]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/alpha/alpha.c
2015-06-04 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / config / alpha / alpha.c
1 /* Subroutines used for code generation on the DEC Alpha.
2 Copyright (C) 1992-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "hash-set.h"
28 #include "vec.h"
29 #include "input.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "inchash.h"
33 #include "tree.h"
34 #include "fold-const.h"
35 #include "stor-layout.h"
36 #include "calls.h"
37 #include "varasm.h"
38 #include "regs.h"
39 #include "hard-reg-set.h"
40 #include "insn-config.h"
41 #include "conditions.h"
42 #include "output.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "recog.h"
46 #include "hashtab.h"
47 #include "function.h"
48 #include "statistics.h"
49 #include "expmed.h"
50 #include "dojump.h"
51 #include "explow.h"
52 #include "emit-rtl.h"
53 #include "stmt.h"
54 #include "expr.h"
55 #include "insn-codes.h"
56 #include "optabs.h"
57 #include "reload.h"
58 #include "obstack.h"
59 #include "except.h"
60 #include "diagnostic-core.h"
61 #include "ggc.h"
62 #include "tm_p.h"
63 #include "target.h"
64 #include "target-def.h"
65 #include "common/common-target.h"
66 #include "debug.h"
67 #include "langhooks.h"
68 #include "hash-map.h"
69 #include "hash-table.h"
70 #include "predict.h"
71 #include "dominance.h"
72 #include "cfg.h"
73 #include "cfgrtl.h"
74 #include "cfganal.h"
75 #include "lcm.h"
76 #include "cfgbuild.h"
77 #include "cfgcleanup.h"
78 #include "basic-block.h"
79 #include "tree-ssa-alias.h"
80 #include "internal-fn.h"
81 #include "gimple-fold.h"
82 #include "tree-eh.h"
83 #include "gimple-expr.h"
84 #include "is-a.h"
85 #include "gimple.h"
86 #include "tree-pass.h"
87 #include "context.h"
88 #include "pass_manager.h"
89 #include "gimple-iterator.h"
90 #include "gimplify.h"
91 #include "gimple-ssa.h"
92 #include "stringpool.h"
93 #include "tree-ssanames.h"
94 #include "tree-stdarg.h"
95 #include "tm-constrs.h"
96 #include "df.h"
97 #include "libfuncs.h"
98 #include "opts.h"
99 #include "params.h"
100 #include "builtins.h"
101 #include "rtl-iter.h"
102
103 /* Specify which cpu to schedule for. */
104 enum processor_type alpha_tune;
105
106 /* Which cpu we're generating code for. */
107 enum processor_type alpha_cpu;
108
109 static const char * const alpha_cpu_name[] =
110 {
111 "ev4", "ev5", "ev6"
112 };
113
114 /* Specify how accurate floating-point traps need to be. */
115
116 enum alpha_trap_precision alpha_tp;
117
118 /* Specify the floating-point rounding mode. */
119
120 enum alpha_fp_rounding_mode alpha_fprm;
121
122 /* Specify which things cause traps. */
123
124 enum alpha_fp_trap_mode alpha_fptm;
125
126 /* Nonzero if inside of a function, because the Alpha asm can't
127 handle .files inside of functions. */
128
129 static int inside_function = FALSE;
130
131 /* The number of cycles of latency we should assume on memory reads. */
132
133 int alpha_memory_latency = 3;
134
135 /* Whether the function needs the GP. */
136
137 static int alpha_function_needs_gp;
138
139 /* The assembler name of the current function. */
140
141 static const char *alpha_fnname;
142
143 /* The next explicit relocation sequence number. */
144 extern GTY(()) int alpha_next_sequence_number;
145 int alpha_next_sequence_number = 1;
146
147 /* The literal and gpdisp sequence numbers for this insn, as printed
148 by %# and %* respectively. */
149 extern GTY(()) int alpha_this_literal_sequence_number;
150 extern GTY(()) int alpha_this_gpdisp_sequence_number;
151 int alpha_this_literal_sequence_number;
152 int alpha_this_gpdisp_sequence_number;
153
154 /* Costs of various operations on the different architectures. */
155
156 struct alpha_rtx_cost_data
157 {
158 unsigned char fp_add;
159 unsigned char fp_mult;
160 unsigned char fp_div_sf;
161 unsigned char fp_div_df;
162 unsigned char int_mult_si;
163 unsigned char int_mult_di;
164 unsigned char int_shift;
165 unsigned char int_cmov;
166 unsigned short int_div;
167 };
168
169 static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
170 {
171 { /* EV4 */
172 COSTS_N_INSNS (6), /* fp_add */
173 COSTS_N_INSNS (6), /* fp_mult */
174 COSTS_N_INSNS (34), /* fp_div_sf */
175 COSTS_N_INSNS (63), /* fp_div_df */
176 COSTS_N_INSNS (23), /* int_mult_si */
177 COSTS_N_INSNS (23), /* int_mult_di */
178 COSTS_N_INSNS (2), /* int_shift */
179 COSTS_N_INSNS (2), /* int_cmov */
180 COSTS_N_INSNS (97), /* int_div */
181 },
182 { /* EV5 */
183 COSTS_N_INSNS (4), /* fp_add */
184 COSTS_N_INSNS (4), /* fp_mult */
185 COSTS_N_INSNS (15), /* fp_div_sf */
186 COSTS_N_INSNS (22), /* fp_div_df */
187 COSTS_N_INSNS (8), /* int_mult_si */
188 COSTS_N_INSNS (12), /* int_mult_di */
189 COSTS_N_INSNS (1) + 1, /* int_shift */
190 COSTS_N_INSNS (1), /* int_cmov */
191 COSTS_N_INSNS (83), /* int_div */
192 },
193 { /* EV6 */
194 COSTS_N_INSNS (4), /* fp_add */
195 COSTS_N_INSNS (4), /* fp_mult */
196 COSTS_N_INSNS (12), /* fp_div_sf */
197 COSTS_N_INSNS (15), /* fp_div_df */
198 COSTS_N_INSNS (7), /* int_mult_si */
199 COSTS_N_INSNS (7), /* int_mult_di */
200 COSTS_N_INSNS (1), /* int_shift */
201 COSTS_N_INSNS (2), /* int_cmov */
202 COSTS_N_INSNS (86), /* int_div */
203 },
204 };
205
206 /* Similar but tuned for code size instead of execution latency. The
207 extra +N is fractional cost tuning based on latency. It's used to
208 encourage use of cheaper insns like shift, but only if there's just
209 one of them. */
210
211 static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
212 {
213 COSTS_N_INSNS (1), /* fp_add */
214 COSTS_N_INSNS (1), /* fp_mult */
215 COSTS_N_INSNS (1), /* fp_div_sf */
216 COSTS_N_INSNS (1) + 1, /* fp_div_df */
217 COSTS_N_INSNS (1) + 1, /* int_mult_si */
218 COSTS_N_INSNS (1) + 2, /* int_mult_di */
219 COSTS_N_INSNS (1), /* int_shift */
220 COSTS_N_INSNS (1), /* int_cmov */
221 COSTS_N_INSNS (6), /* int_div */
222 };
223
224 /* Get the number of args of a function in one of two ways. */
225 #if TARGET_ABI_OPEN_VMS
226 #define NUM_ARGS crtl->args.info.num_args
227 #else
228 #define NUM_ARGS crtl->args.info
229 #endif
230
231 #define REG_PV 27
232 #define REG_RA 26
233
234 /* Declarations of static functions. */
235 static struct machine_function *alpha_init_machine_status (void);
236 static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
237 static void alpha_handle_trap_shadows (void);
238 static void alpha_align_insns (void);
239 static void alpha_override_options_after_change (void);
240
241 #if TARGET_ABI_OPEN_VMS
242 static void alpha_write_linkage (FILE *, const char *);
243 static bool vms_valid_pointer_mode (machine_mode);
244 #else
245 #define vms_patch_builtins() gcc_unreachable()
246 #endif
247 \f
248 static unsigned int
249 rest_of_handle_trap_shadows (void)
250 {
251 alpha_handle_trap_shadows ();
252 return 0;
253 }
254
255 namespace {
256
257 const pass_data pass_data_handle_trap_shadows =
258 {
259 RTL_PASS,
260 "trap_shadows", /* name */
261 OPTGROUP_NONE, /* optinfo_flags */
262 TV_NONE, /* tv_id */
263 0, /* properties_required */
264 0, /* properties_provided */
265 0, /* properties_destroyed */
266 0, /* todo_flags_start */
267 TODO_df_finish, /* todo_flags_finish */
268 };
269
270 class pass_handle_trap_shadows : public rtl_opt_pass
271 {
272 public:
273 pass_handle_trap_shadows(gcc::context *ctxt)
274 : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
275 {}
276
277 /* opt_pass methods: */
278 virtual bool gate (function *)
279 {
280 return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
281 }
282
283 virtual unsigned int execute (function *)
284 {
285 return rest_of_handle_trap_shadows ();
286 }
287
288 }; // class pass_handle_trap_shadows
289
290 } // anon namespace
291
292 rtl_opt_pass *
293 make_pass_handle_trap_shadows (gcc::context *ctxt)
294 {
295 return new pass_handle_trap_shadows (ctxt);
296 }
297
298 static unsigned int
299 rest_of_align_insns (void)
300 {
301 alpha_align_insns ();
302 return 0;
303 }
304
305 namespace {
306
307 const pass_data pass_data_align_insns =
308 {
309 RTL_PASS,
310 "align_insns", /* name */
311 OPTGROUP_NONE, /* optinfo_flags */
312 TV_NONE, /* tv_id */
313 0, /* properties_required */
314 0, /* properties_provided */
315 0, /* properties_destroyed */
316 0, /* todo_flags_start */
317 TODO_df_finish, /* todo_flags_finish */
318 };
319
320 class pass_align_insns : public rtl_opt_pass
321 {
322 public:
323 pass_align_insns(gcc::context *ctxt)
324 : rtl_opt_pass(pass_data_align_insns, ctxt)
325 {}
326
327 /* opt_pass methods: */
328 virtual bool gate (function *)
329 {
330 /* Due to the number of extra trapb insns, don't bother fixing up
331 alignment when trap precision is instruction. Moreover, we can
332 only do our job when sched2 is run. */
333 return ((alpha_tune == PROCESSOR_EV4
334 || alpha_tune == PROCESSOR_EV5)
335 && optimize && !optimize_size
336 && alpha_tp != ALPHA_TP_INSN
337 && flag_schedule_insns_after_reload);
338 }
339
340 virtual unsigned int execute (function *)
341 {
342 return rest_of_align_insns ();
343 }
344
345 }; // class pass_align_insns
346
347 } // anon namespace
348
349 rtl_opt_pass *
350 make_pass_align_insns (gcc::context *ctxt)
351 {
352 return new pass_align_insns (ctxt);
353 }
354
355 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
356 /* Implement TARGET_MANGLE_TYPE. */
357
358 static const char *
359 alpha_mangle_type (const_tree type)
360 {
361 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
362 && TARGET_LONG_DOUBLE_128)
363 return "g";
364
365 /* For all other types, use normal C++ mangling. */
366 return NULL;
367 }
368 #endif
369
370 /* Parse target option strings. */
371
372 static void
373 alpha_option_override (void)
374 {
375 static const struct cpu_table {
376 const char *const name;
377 const enum processor_type processor;
378 const int flags;
379 const unsigned short line_size; /* in bytes */
380 const unsigned short l1_size; /* in kb. */
381 const unsigned short l2_size; /* in kb. */
382 } cpu_table[] = {
383 /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
384 EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
385 had 64k to 8M 8-byte direct Bcache. */
386 { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
387 { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
388 { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
389
390 /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
391 and 1M to 16M 64 byte L3 (not modeled).
392 PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
393 PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
394 { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
395 { "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
396 { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
397 { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
398 { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
399 { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
400 { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
401
402 /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
403 { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
404 { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
405 { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
406 64, 64, 16*1024 },
407 { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
408 64, 64, 16*1024 }
409 };
410
411 opt_pass *pass_handle_trap_shadows = make_pass_handle_trap_shadows (g);
412 struct register_pass_info handle_trap_shadows_info
413 = { pass_handle_trap_shadows, "eh_ranges",
414 1, PASS_POS_INSERT_AFTER
415 };
416
417 opt_pass *pass_align_insns = make_pass_align_insns (g);
418 struct register_pass_info align_insns_info
419 = { pass_align_insns, "shorten",
420 1, PASS_POS_INSERT_BEFORE
421 };
422
423 int const ct_size = ARRAY_SIZE (cpu_table);
424 int line_size = 0, l1_size = 0, l2_size = 0;
425 int i;
426
427 #ifdef SUBTARGET_OVERRIDE_OPTIONS
428 SUBTARGET_OVERRIDE_OPTIONS;
429 #endif
430
431 /* Default to full IEEE compliance mode for Go language. */
432 if (strcmp (lang_hooks.name, "GNU Go") == 0
433 && !(target_flags_explicit & MASK_IEEE))
434 target_flags |= MASK_IEEE;
435
436 alpha_fprm = ALPHA_FPRM_NORM;
437 alpha_tp = ALPHA_TP_PROG;
438 alpha_fptm = ALPHA_FPTM_N;
439
440 if (TARGET_IEEE)
441 {
442 alpha_tp = ALPHA_TP_INSN;
443 alpha_fptm = ALPHA_FPTM_SU;
444 }
445 if (TARGET_IEEE_WITH_INEXACT)
446 {
447 alpha_tp = ALPHA_TP_INSN;
448 alpha_fptm = ALPHA_FPTM_SUI;
449 }
450
451 if (alpha_tp_string)
452 {
453 if (! strcmp (alpha_tp_string, "p"))
454 alpha_tp = ALPHA_TP_PROG;
455 else if (! strcmp (alpha_tp_string, "f"))
456 alpha_tp = ALPHA_TP_FUNC;
457 else if (! strcmp (alpha_tp_string, "i"))
458 alpha_tp = ALPHA_TP_INSN;
459 else
460 error ("bad value %qs for -mtrap-precision switch", alpha_tp_string);
461 }
462
463 if (alpha_fprm_string)
464 {
465 if (! strcmp (alpha_fprm_string, "n"))
466 alpha_fprm = ALPHA_FPRM_NORM;
467 else if (! strcmp (alpha_fprm_string, "m"))
468 alpha_fprm = ALPHA_FPRM_MINF;
469 else if (! strcmp (alpha_fprm_string, "c"))
470 alpha_fprm = ALPHA_FPRM_CHOP;
471 else if (! strcmp (alpha_fprm_string,"d"))
472 alpha_fprm = ALPHA_FPRM_DYN;
473 else
474 error ("bad value %qs for -mfp-rounding-mode switch",
475 alpha_fprm_string);
476 }
477
478 if (alpha_fptm_string)
479 {
480 if (strcmp (alpha_fptm_string, "n") == 0)
481 alpha_fptm = ALPHA_FPTM_N;
482 else if (strcmp (alpha_fptm_string, "u") == 0)
483 alpha_fptm = ALPHA_FPTM_U;
484 else if (strcmp (alpha_fptm_string, "su") == 0)
485 alpha_fptm = ALPHA_FPTM_SU;
486 else if (strcmp (alpha_fptm_string, "sui") == 0)
487 alpha_fptm = ALPHA_FPTM_SUI;
488 else
489 error ("bad value %qs for -mfp-trap-mode switch", alpha_fptm_string);
490 }
491
492 if (alpha_cpu_string)
493 {
494 for (i = 0; i < ct_size; i++)
495 if (! strcmp (alpha_cpu_string, cpu_table [i].name))
496 {
497 alpha_tune = alpha_cpu = cpu_table[i].processor;
498 line_size = cpu_table[i].line_size;
499 l1_size = cpu_table[i].l1_size;
500 l2_size = cpu_table[i].l2_size;
501 target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
502 target_flags |= cpu_table[i].flags;
503 break;
504 }
505 if (i == ct_size)
506 error ("bad value %qs for -mcpu switch", alpha_cpu_string);
507 }
508
509 if (alpha_tune_string)
510 {
511 for (i = 0; i < ct_size; i++)
512 if (! strcmp (alpha_tune_string, cpu_table [i].name))
513 {
514 alpha_tune = cpu_table[i].processor;
515 line_size = cpu_table[i].line_size;
516 l1_size = cpu_table[i].l1_size;
517 l2_size = cpu_table[i].l2_size;
518 break;
519 }
520 if (i == ct_size)
521 error ("bad value %qs for -mtune switch", alpha_tune_string);
522 }
523
524 if (line_size)
525 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, line_size,
526 global_options.x_param_values,
527 global_options_set.x_param_values);
528 if (l1_size)
529 maybe_set_param_value (PARAM_L1_CACHE_SIZE, l1_size,
530 global_options.x_param_values,
531 global_options_set.x_param_values);
532 if (l2_size)
533 maybe_set_param_value (PARAM_L2_CACHE_SIZE, l2_size,
534 global_options.x_param_values,
535 global_options_set.x_param_values);
536
537 /* Do some sanity checks on the above options. */
538
539 if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
540 && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
541 {
542 warning (0, "fp software completion requires -mtrap-precision=i");
543 alpha_tp = ALPHA_TP_INSN;
544 }
545
546 if (alpha_cpu == PROCESSOR_EV6)
547 {
548 /* Except for EV6 pass 1 (not released), we always have precise
549 arithmetic traps. Which means we can do software completion
550 without minding trap shadows. */
551 alpha_tp = ALPHA_TP_PROG;
552 }
553
554 if (TARGET_FLOAT_VAX)
555 {
556 if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
557 {
558 warning (0, "rounding mode not supported for VAX floats");
559 alpha_fprm = ALPHA_FPRM_NORM;
560 }
561 if (alpha_fptm == ALPHA_FPTM_SUI)
562 {
563 warning (0, "trap mode not supported for VAX floats");
564 alpha_fptm = ALPHA_FPTM_SU;
565 }
566 if (target_flags_explicit & MASK_LONG_DOUBLE_128)
567 warning (0, "128-bit long double not supported for VAX floats");
568 target_flags &= ~MASK_LONG_DOUBLE_128;
569 }
570
571 {
572 char *end;
573 int lat;
574
575 if (!alpha_mlat_string)
576 alpha_mlat_string = "L1";
577
578 if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
579 && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
580 ;
581 else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
582 && ISDIGIT ((unsigned char)alpha_mlat_string[1])
583 && alpha_mlat_string[2] == '\0')
584 {
585 static int const cache_latency[][4] =
586 {
587 { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
588 { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
589 { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
590 };
591
592 lat = alpha_mlat_string[1] - '0';
593 if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
594 {
595 warning (0, "L%d cache latency unknown for %s",
596 lat, alpha_cpu_name[alpha_tune]);
597 lat = 3;
598 }
599 else
600 lat = cache_latency[alpha_tune][lat-1];
601 }
602 else if (! strcmp (alpha_mlat_string, "main"))
603 {
604 /* Most current memories have about 370ns latency. This is
605 a reasonable guess for a fast cpu. */
606 lat = 150;
607 }
608 else
609 {
610 warning (0, "bad value %qs for -mmemory-latency", alpha_mlat_string);
611 lat = 3;
612 }
613
614 alpha_memory_latency = lat;
615 }
616
617 /* Default the definition of "small data" to 8 bytes. */
618 if (!global_options_set.x_g_switch_value)
619 g_switch_value = 8;
620
621 /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
622 if (flag_pic == 1)
623 target_flags |= MASK_SMALL_DATA;
624 else if (flag_pic == 2)
625 target_flags &= ~MASK_SMALL_DATA;
626
627 alpha_override_options_after_change ();
628
629 /* Register variables and functions with the garbage collector. */
630
631 /* Set up function hooks. */
632 init_machine_status = alpha_init_machine_status;
633
634 /* Tell the compiler when we're using VAX floating point. */
635 if (TARGET_FLOAT_VAX)
636 {
637 REAL_MODE_FORMAT (SFmode) = &vax_f_format;
638 REAL_MODE_FORMAT (DFmode) = &vax_g_format;
639 REAL_MODE_FORMAT (TFmode) = NULL;
640 }
641
642 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
643 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
644 target_flags |= MASK_LONG_DOUBLE_128;
645 #endif
646
647 /* This needs to be done at start up. It's convenient to do it here. */
648 register_pass (&handle_trap_shadows_info);
649 register_pass (&align_insns_info);
650 }
651
652 /* Implement targetm.override_options_after_change. */
653
654 static void
655 alpha_override_options_after_change (void)
656 {
657 /* Align labels and loops for optimal branching. */
658 /* ??? Kludge these by not doing anything if we don't optimize. */
659 if (optimize > 0)
660 {
661 if (align_loops <= 0)
662 align_loops = 16;
663 if (align_jumps <= 0)
664 align_jumps = 16;
665 }
666 if (align_functions <= 0)
667 align_functions = 16;
668 }
669 \f
670 /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
671
672 int
673 zap_mask (HOST_WIDE_INT value)
674 {
675 int i;
676
677 for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
678 i++, value >>= 8)
679 if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
680 return 0;
681
682 return 1;
683 }
684
685 /* Return true if OP is valid for a particular TLS relocation.
686 We are already guaranteed that OP is a CONST. */
687
688 int
689 tls_symbolic_operand_1 (rtx op, int size, int unspec)
690 {
691 op = XEXP (op, 0);
692
693 if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
694 return 0;
695 op = XVECEXP (op, 0, 0);
696
697 if (GET_CODE (op) != SYMBOL_REF)
698 return 0;
699
700 switch (SYMBOL_REF_TLS_MODEL (op))
701 {
702 case TLS_MODEL_LOCAL_DYNAMIC:
703 return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
704 case TLS_MODEL_INITIAL_EXEC:
705 return unspec == UNSPEC_TPREL && size == 64;
706 case TLS_MODEL_LOCAL_EXEC:
707 return unspec == UNSPEC_TPREL && size == alpha_tls_size;
708 default:
709 gcc_unreachable ();
710 }
711 }
712
713 /* Used by aligned_memory_operand and unaligned_memory_operand to
714 resolve what reload is going to do with OP if it's a register. */
715
716 rtx
717 resolve_reload_operand (rtx op)
718 {
719 if (reload_in_progress)
720 {
721 rtx tmp = op;
722 if (GET_CODE (tmp) == SUBREG)
723 tmp = SUBREG_REG (tmp);
724 if (REG_P (tmp)
725 && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
726 {
727 op = reg_equiv_memory_loc (REGNO (tmp));
728 if (op == 0)
729 return 0;
730 }
731 }
732 return op;
733 }
734
735 /* The scalar modes supported differs from the default check-what-c-supports
736 version in that sometimes TFmode is available even when long double
737 indicates only DFmode. */
738
739 static bool
740 alpha_scalar_mode_supported_p (machine_mode mode)
741 {
742 switch (mode)
743 {
744 case QImode:
745 case HImode:
746 case SImode:
747 case DImode:
748 case TImode: /* via optabs.c */
749 return true;
750
751 case SFmode:
752 case DFmode:
753 return true;
754
755 case TFmode:
756 return TARGET_HAS_XFLOATING_LIBS;
757
758 default:
759 return false;
760 }
761 }
762
763 /* Alpha implements a couple of integer vector mode operations when
764 TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
765 which allows the vectorizer to operate on e.g. move instructions,
766 or when expand_vector_operations can do something useful. */
767
768 static bool
769 alpha_vector_mode_supported_p (machine_mode mode)
770 {
771 return mode == V8QImode || mode == V4HImode || mode == V2SImode;
772 }
773
774 /* Return 1 if this function can directly return via $26. */
775
776 int
777 direct_return (void)
778 {
779 return (TARGET_ABI_OSF
780 && reload_completed
781 && alpha_sa_size () == 0
782 && get_frame_size () == 0
783 && crtl->outgoing_args_size == 0
784 && crtl->args.pretend_args_size == 0);
785 }
786
787 /* Return the TLS model to use for SYMBOL. */
788
789 static enum tls_model
790 tls_symbolic_operand_type (rtx symbol)
791 {
792 enum tls_model model;
793
794 if (GET_CODE (symbol) != SYMBOL_REF)
795 return TLS_MODEL_NONE;
796 model = SYMBOL_REF_TLS_MODEL (symbol);
797
798 /* Local-exec with a 64-bit size is the same code as initial-exec. */
799 if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
800 model = TLS_MODEL_INITIAL_EXEC;
801
802 return model;
803 }
804 \f
805 /* Return true if the function DECL will share the same GP as any
806 function in the current unit of translation. */
807
808 static bool
809 decl_has_samegp (const_tree decl)
810 {
811 /* Functions that are not local can be overridden, and thus may
812 not share the same gp. */
813 if (!(*targetm.binds_local_p) (decl))
814 return false;
815
816 /* If -msmall-data is in effect, assume that there is only one GP
817 for the module, and so any local symbol has this property. We
818 need explicit relocations to be able to enforce this for symbols
819 not defined in this unit of translation, however. */
820 if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
821 return true;
822
823 /* Functions that are not external are defined in this UoT. */
824 /* ??? Irritatingly, static functions not yet emitted are still
825 marked "external". Apply this to non-static functions only. */
826 return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
827 }
828
829 /* Return true if EXP should be placed in the small data section. */
830
831 static bool
832 alpha_in_small_data_p (const_tree exp)
833 {
834 /* We want to merge strings, so we never consider them small data. */
835 if (TREE_CODE (exp) == STRING_CST)
836 return false;
837
838 /* Functions are never in the small data area. Duh. */
839 if (TREE_CODE (exp) == FUNCTION_DECL)
840 return false;
841
842 /* COMMON symbols are never small data. */
843 if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp))
844 return false;
845
846 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
847 {
848 const char *section = DECL_SECTION_NAME (exp);
849 if (strcmp (section, ".sdata") == 0
850 || strcmp (section, ".sbss") == 0)
851 return true;
852 }
853 else
854 {
855 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
856
857 /* If this is an incomplete type with size 0, then we can't put it
858 in sdata because it might be too big when completed. */
859 if (size > 0 && size <= g_switch_value)
860 return true;
861 }
862
863 return false;
864 }
865
866 #if TARGET_ABI_OPEN_VMS
867 static bool
868 vms_valid_pointer_mode (machine_mode mode)
869 {
870 return (mode == SImode || mode == DImode);
871 }
872
873 static bool
874 alpha_linkage_symbol_p (const char *symname)
875 {
876 int symlen = strlen (symname);
877
878 if (symlen > 4)
879 return strcmp (&symname [symlen - 4], "..lk") == 0;
880
881 return false;
882 }
883
884 #define LINKAGE_SYMBOL_REF_P(X) \
885 ((GET_CODE (X) == SYMBOL_REF \
886 && alpha_linkage_symbol_p (XSTR (X, 0))) \
887 || (GET_CODE (X) == CONST \
888 && GET_CODE (XEXP (X, 0)) == PLUS \
889 && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
890 && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
891 #endif
892
893 /* legitimate_address_p recognizes an RTL expression that is a valid
894 memory address for an instruction. The MODE argument is the
895 machine mode for the MEM expression that wants to use this address.
896
897 For Alpha, we have either a constant address or the sum of a
898 register and a constant address, or just a register. For DImode,
899 any of those forms can be surrounded with an AND that clear the
900 low-order three bits; this is an "unaligned" access. */
901
902 static bool
903 alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict)
904 {
905 /* If this is an ldq_u type address, discard the outer AND. */
906 if (mode == DImode
907 && GET_CODE (x) == AND
908 && CONST_INT_P (XEXP (x, 1))
909 && INTVAL (XEXP (x, 1)) == -8)
910 x = XEXP (x, 0);
911
912 /* Discard non-paradoxical subregs. */
913 if (GET_CODE (x) == SUBREG
914 && (GET_MODE_SIZE (GET_MODE (x))
915 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
916 x = SUBREG_REG (x);
917
918 /* Unadorned general registers are valid. */
919 if (REG_P (x)
920 && (strict
921 ? STRICT_REG_OK_FOR_BASE_P (x)
922 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
923 return true;
924
925 /* Constant addresses (i.e. +/- 32k) are valid. */
926 if (CONSTANT_ADDRESS_P (x))
927 return true;
928
929 #if TARGET_ABI_OPEN_VMS
930 if (LINKAGE_SYMBOL_REF_P (x))
931 return true;
932 #endif
933
934 /* Register plus a small constant offset is valid. */
935 if (GET_CODE (x) == PLUS)
936 {
937 rtx ofs = XEXP (x, 1);
938 x = XEXP (x, 0);
939
940 /* Discard non-paradoxical subregs. */
941 if (GET_CODE (x) == SUBREG
942 && (GET_MODE_SIZE (GET_MODE (x))
943 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
944 x = SUBREG_REG (x);
945
946 if (REG_P (x))
947 {
948 if (! strict
949 && NONSTRICT_REG_OK_FP_BASE_P (x)
950 && CONST_INT_P (ofs))
951 return true;
952 if ((strict
953 ? STRICT_REG_OK_FOR_BASE_P (x)
954 : NONSTRICT_REG_OK_FOR_BASE_P (x))
955 && CONSTANT_ADDRESS_P (ofs))
956 return true;
957 }
958 }
959
960 /* If we're managing explicit relocations, LO_SUM is valid, as are small
961 data symbols. Avoid explicit relocations of modes larger than word
962 mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
963 else if (TARGET_EXPLICIT_RELOCS
964 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
965 {
966 if (small_symbolic_operand (x, Pmode))
967 return true;
968
969 if (GET_CODE (x) == LO_SUM)
970 {
971 rtx ofs = XEXP (x, 1);
972 x = XEXP (x, 0);
973
974 /* Discard non-paradoxical subregs. */
975 if (GET_CODE (x) == SUBREG
976 && (GET_MODE_SIZE (GET_MODE (x))
977 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
978 x = SUBREG_REG (x);
979
980 /* Must have a valid base register. */
981 if (! (REG_P (x)
982 && (strict
983 ? STRICT_REG_OK_FOR_BASE_P (x)
984 : NONSTRICT_REG_OK_FOR_BASE_P (x))))
985 return false;
986
987 /* The symbol must be local. */
988 if (local_symbolic_operand (ofs, Pmode)
989 || dtp32_symbolic_operand (ofs, Pmode)
990 || tp32_symbolic_operand (ofs, Pmode))
991 return true;
992 }
993 }
994
995 return false;
996 }
997
998 /* Build the SYMBOL_REF for __tls_get_addr. */
999
1000 static GTY(()) rtx tls_get_addr_libfunc;
1001
1002 static rtx
1003 get_tls_get_addr (void)
1004 {
1005 if (!tls_get_addr_libfunc)
1006 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
1007 return tls_get_addr_libfunc;
1008 }
1009
1010 /* Try machine-dependent ways of modifying an illegitimate address
1011 to be legitimate. If we find one, return the new, valid address. */
1012
1013 static rtx
1014 alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
1015 {
1016 HOST_WIDE_INT addend;
1017
1018 /* If the address is (plus reg const_int) and the CONST_INT is not a
1019 valid offset, compute the high part of the constant and add it to
1020 the register. Then our address is (plus temp low-part-const). */
1021 if (GET_CODE (x) == PLUS
1022 && REG_P (XEXP (x, 0))
1023 && CONST_INT_P (XEXP (x, 1))
1024 && ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
1025 {
1026 addend = INTVAL (XEXP (x, 1));
1027 x = XEXP (x, 0);
1028 goto split_addend;
1029 }
1030
1031 /* If the address is (const (plus FOO const_int)), find the low-order
1032 part of the CONST_INT. Then load FOO plus any high-order part of the
1033 CONST_INT into a register. Our address is (plus reg low-part-const).
1034 This is done to reduce the number of GOT entries. */
1035 if (can_create_pseudo_p ()
1036 && GET_CODE (x) == CONST
1037 && GET_CODE (XEXP (x, 0)) == PLUS
1038 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
1039 {
1040 addend = INTVAL (XEXP (XEXP (x, 0), 1));
1041 x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
1042 goto split_addend;
1043 }
1044
1045 /* If we have a (plus reg const), emit the load as in (2), then add
1046 the two registers, and finally generate (plus reg low-part-const) as
1047 our address. */
1048 if (can_create_pseudo_p ()
1049 && GET_CODE (x) == PLUS
1050 && REG_P (XEXP (x, 0))
1051 && GET_CODE (XEXP (x, 1)) == CONST
1052 && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
1053 && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
1054 {
1055 addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
1056 x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
1057 XEXP (XEXP (XEXP (x, 1), 0), 0),
1058 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1059 goto split_addend;
1060 }
1061
1062 /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
1063 Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
1064 around +/- 32k offset. */
1065 if (TARGET_EXPLICIT_RELOCS
1066 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
1067 && symbolic_operand (x, Pmode))
1068 {
1069 rtx r0, r16, eqv, tga, tp, insn, dest, seq;
1070
1071 switch (tls_symbolic_operand_type (x))
1072 {
1073 case TLS_MODEL_NONE:
1074 break;
1075
1076 case TLS_MODEL_GLOBAL_DYNAMIC:
1077 start_sequence ();
1078
1079 r0 = gen_rtx_REG (Pmode, 0);
1080 r16 = gen_rtx_REG (Pmode, 16);
1081 tga = get_tls_get_addr ();
1082 dest = gen_reg_rtx (Pmode);
1083 seq = GEN_INT (alpha_next_sequence_number++);
1084
1085 emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
1086 insn = gen_call_value_osf_tlsgd (r0, tga, seq);
1087 insn = emit_call_insn (insn);
1088 RTL_CONST_CALL_P (insn) = 1;
1089 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1090
1091 insn = get_insns ();
1092 end_sequence ();
1093
1094 emit_libcall_block (insn, dest, r0, x);
1095 return dest;
1096
1097 case TLS_MODEL_LOCAL_DYNAMIC:
1098 start_sequence ();
1099
1100 r0 = gen_rtx_REG (Pmode, 0);
1101 r16 = gen_rtx_REG (Pmode, 16);
1102 tga = get_tls_get_addr ();
1103 scratch = gen_reg_rtx (Pmode);
1104 seq = GEN_INT (alpha_next_sequence_number++);
1105
1106 emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
1107 insn = gen_call_value_osf_tlsldm (r0, tga, seq);
1108 insn = emit_call_insn (insn);
1109 RTL_CONST_CALL_P (insn) = 1;
1110 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
1111
1112 insn = get_insns ();
1113 end_sequence ();
1114
1115 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1116 UNSPEC_TLSLDM_CALL);
1117 emit_libcall_block (insn, scratch, r0, eqv);
1118
1119 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
1120 eqv = gen_rtx_CONST (Pmode, eqv);
1121
1122 if (alpha_tls_size == 64)
1123 {
1124 dest = gen_reg_rtx (Pmode);
1125 emit_insn (gen_rtx_SET (dest, eqv));
1126 emit_insn (gen_adddi3 (dest, dest, scratch));
1127 return dest;
1128 }
1129 if (alpha_tls_size == 32)
1130 {
1131 insn = gen_rtx_HIGH (Pmode, eqv);
1132 insn = gen_rtx_PLUS (Pmode, scratch, insn);
1133 scratch = gen_reg_rtx (Pmode);
1134 emit_insn (gen_rtx_SET (scratch, insn));
1135 }
1136 return gen_rtx_LO_SUM (Pmode, scratch, eqv);
1137
1138 case TLS_MODEL_INITIAL_EXEC:
1139 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1140 eqv = gen_rtx_CONST (Pmode, eqv);
1141 tp = gen_reg_rtx (Pmode);
1142 scratch = gen_reg_rtx (Pmode);
1143 dest = gen_reg_rtx (Pmode);
1144
1145 emit_insn (gen_get_thread_pointerdi (tp));
1146 emit_insn (gen_rtx_SET (scratch, eqv));
1147 emit_insn (gen_adddi3 (dest, tp, scratch));
1148 return dest;
1149
1150 case TLS_MODEL_LOCAL_EXEC:
1151 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
1152 eqv = gen_rtx_CONST (Pmode, eqv);
1153 tp = gen_reg_rtx (Pmode);
1154
1155 emit_insn (gen_get_thread_pointerdi (tp));
1156 if (alpha_tls_size == 32)
1157 {
1158 insn = gen_rtx_HIGH (Pmode, eqv);
1159 insn = gen_rtx_PLUS (Pmode, tp, insn);
1160 tp = gen_reg_rtx (Pmode);
1161 emit_insn (gen_rtx_SET (tp, insn));
1162 }
1163 return gen_rtx_LO_SUM (Pmode, tp, eqv);
1164
1165 default:
1166 gcc_unreachable ();
1167 }
1168
1169 if (local_symbolic_operand (x, Pmode))
1170 {
1171 if (small_symbolic_operand (x, Pmode))
1172 return x;
1173 else
1174 {
1175 if (can_create_pseudo_p ())
1176 scratch = gen_reg_rtx (Pmode);
1177 emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
1178 return gen_rtx_LO_SUM (Pmode, scratch, x);
1179 }
1180 }
1181 }
1182
1183 return NULL;
1184
1185 split_addend:
1186 {
1187 HOST_WIDE_INT low, high;
1188
1189 low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
1190 addend -= low;
1191 high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
1192 addend -= high;
1193
1194 if (addend)
1195 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
1196 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1197 1, OPTAB_LIB_WIDEN);
1198 if (high)
1199 x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
1200 (!can_create_pseudo_p () ? scratch : NULL_RTX),
1201 1, OPTAB_LIB_WIDEN);
1202
1203 return plus_constant (Pmode, x, low);
1204 }
1205 }
1206
1207
1208 /* Try machine-dependent ways of modifying an illegitimate address
1209 to be legitimate. Return X or the new, valid address. */
1210
1211 static rtx
1212 alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1213 machine_mode mode)
1214 {
1215 rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
1216 return new_x ? new_x : x;
1217 }
1218
1219 /* Return true if ADDR has an effect that depends on the machine mode it
1220 is used for. On the Alpha this is true only for the unaligned modes.
1221 We can simplify the test since we know that the address must be valid. */
1222
1223 static bool
1224 alpha_mode_dependent_address_p (const_rtx addr,
1225 addr_space_t as ATTRIBUTE_UNUSED)
1226 {
1227 return GET_CODE (addr) == AND;
1228 }
1229
1230 /* Primarily this is required for TLS symbols, but given that our move
1231 patterns *ought* to be able to handle any symbol at any time, we
1232 should never be spilling symbolic operands to the constant pool, ever. */
1233
1234 static bool
1235 alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
1236 {
1237 enum rtx_code code = GET_CODE (x);
1238 return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
1239 }
1240
1241 /* We do not allow indirect calls to be optimized into sibling calls, nor
1242 can we allow a call to a function with a different GP to be optimized
1243 into a sibcall. */
1244
1245 static bool
1246 alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1247 {
1248 /* Can't do indirect tail calls, since we don't know if the target
1249 uses the same GP. */
1250 if (!decl)
1251 return false;
1252
1253 /* Otherwise, we can make a tail call if the target function shares
1254 the same GP. */
1255 return decl_has_samegp (decl);
1256 }
1257
1258 bool
1259 some_small_symbolic_operand_int (rtx x)
1260 {
1261 subrtx_var_iterator::array_type array;
1262 FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
1263 {
1264 rtx x = *iter;
1265 /* Don't re-split. */
1266 if (GET_CODE (x) == LO_SUM)
1267 iter.skip_subrtxes ();
1268 else if (small_symbolic_operand (x, Pmode))
1269 return true;
1270 }
1271 return false;
1272 }
1273
1274 rtx
1275 split_small_symbolic_operand (rtx x)
1276 {
1277 x = copy_insn (x);
1278 subrtx_ptr_iterator::array_type array;
1279 FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
1280 {
1281 rtx *ptr = *iter;
1282 rtx x = *ptr;
1283 /* Don't re-split. */
1284 if (GET_CODE (x) == LO_SUM)
1285 iter.skip_subrtxes ();
1286 else if (small_symbolic_operand (x, Pmode))
1287 {
1288 *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
1289 iter.skip_subrtxes ();
1290 }
1291 }
1292 return x;
1293 }
1294
1295 /* Indicate that INSN cannot be duplicated. This is true for any insn
1296 that we've marked with gpdisp relocs, since those have to stay in
1297 1-1 correspondence with one another.
1298
1299 Technically we could copy them if we could set up a mapping from one
1300 sequence number to another, across the set of insns to be duplicated.
1301 This seems overly complicated and error-prone since interblock motion
1302 from sched-ebb could move one of the pair of insns to a different block.
1303
1304 Also cannot allow jsr insns to be duplicated. If they throw exceptions,
1305 then they'll be in a different block from their ldgp. Which could lead
1306 the bb reorder code to think that it would be ok to copy just the block
1307 containing the call and branch to the block containing the ldgp. */
1308
1309 static bool
1310 alpha_cannot_copy_insn_p (rtx_insn *insn)
1311 {
1312 if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
1313 return false;
1314 if (recog_memoized (insn) >= 0)
1315 return get_attr_cannot_copy (insn);
1316 else
1317 return false;
1318 }
1319
1320
1321 /* Try a machine-dependent way of reloading an illegitimate address
1322 operand. If we find one, push the reload and return the new rtx. */
1323
1324 rtx
1325 alpha_legitimize_reload_address (rtx x,
1326 machine_mode mode ATTRIBUTE_UNUSED,
1327 int opnum, int type,
1328 int ind_levels ATTRIBUTE_UNUSED)
1329 {
1330 /* We must recognize output that we have already generated ourselves. */
1331 if (GET_CODE (x) == PLUS
1332 && GET_CODE (XEXP (x, 0)) == PLUS
1333 && REG_P (XEXP (XEXP (x, 0), 0))
1334 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
1335 && CONST_INT_P (XEXP (x, 1)))
1336 {
1337 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1338 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1339 opnum, (enum reload_type) type);
1340 return x;
1341 }
1342
1343 /* We wish to handle large displacements off a base register by
1344 splitting the addend across an ldah and the mem insn. This
1345 cuts number of extra insns needed from 3 to 1. */
1346 if (GET_CODE (x) == PLUS
1347 && REG_P (XEXP (x, 0))
1348 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
1349 && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
1350 && CONST_INT_P (XEXP (x, 1)))
1351 {
1352 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
1353 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
1354 HOST_WIDE_INT high
1355 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
1356
1357 /* Check for 32-bit overflow. */
1358 if (high + low != val)
1359 return NULL_RTX;
1360
1361 /* Reload the high part into a base reg; leave the low part
1362 in the mem directly. */
1363 x = gen_rtx_PLUS (GET_MODE (x),
1364 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
1365 GEN_INT (high)),
1366 GEN_INT (low));
1367
1368 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
1369 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
1370 opnum, (enum reload_type) type);
1371 return x;
1372 }
1373
1374 return NULL_RTX;
1375 }
1376 \f
1377 /* Compute a (partial) cost for rtx X. Return true if the complete
1378 cost has been computed, and false if subexpressions should be
1379 scanned. In either case, *TOTAL contains the cost result. */
1380
1381 static bool
1382 alpha_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
1383 bool speed)
1384 {
1385 machine_mode mode = GET_MODE (x);
1386 bool float_mode_p = FLOAT_MODE_P (mode);
1387 const struct alpha_rtx_cost_data *cost_data;
1388
1389 if (!speed)
1390 cost_data = &alpha_rtx_cost_size;
1391 else
1392 cost_data = &alpha_rtx_cost_data[alpha_tune];
1393
1394 switch (code)
1395 {
1396 case CONST_INT:
1397 /* If this is an 8-bit constant, return zero since it can be used
1398 nearly anywhere with no cost. If it is a valid operand for an
1399 ADD or AND, likewise return 0 if we know it will be used in that
1400 context. Otherwise, return 2 since it might be used there later.
1401 All other constants take at least two insns. */
1402 if (INTVAL (x) >= 0 && INTVAL (x) < 256)
1403 {
1404 *total = 0;
1405 return true;
1406 }
1407 /* FALLTHRU */
1408
1409 case CONST_DOUBLE:
1410 case CONST_WIDE_INT:
1411 if (x == CONST0_RTX (mode))
1412 *total = 0;
1413 else if ((outer_code == PLUS && add_operand (x, VOIDmode))
1414 || (outer_code == AND && and_operand (x, VOIDmode)))
1415 *total = 0;
1416 else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
1417 *total = 2;
1418 else
1419 *total = COSTS_N_INSNS (2);
1420 return true;
1421
1422 case CONST:
1423 case SYMBOL_REF:
1424 case LABEL_REF:
1425 if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
1426 *total = COSTS_N_INSNS (outer_code != MEM);
1427 else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
1428 *total = COSTS_N_INSNS (1 + (outer_code != MEM));
1429 else if (tls_symbolic_operand_type (x))
1430 /* Estimate of cost for call_pal rduniq. */
1431 /* ??? How many insns do we emit here? More than one... */
1432 *total = COSTS_N_INSNS (15);
1433 else
1434 /* Otherwise we do a load from the GOT. */
1435 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1436 return true;
1437
1438 case HIGH:
1439 /* This is effectively an add_operand. */
1440 *total = 2;
1441 return true;
1442
1443 case PLUS:
1444 case MINUS:
1445 if (float_mode_p)
1446 *total = cost_data->fp_add;
1447 else if (GET_CODE (XEXP (x, 0)) == MULT
1448 && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
1449 {
1450 *total = (rtx_cost (XEXP (XEXP (x, 0), 0),
1451 (enum rtx_code) outer_code, opno, speed)
1452 + rtx_cost (XEXP (x, 1),
1453 (enum rtx_code) outer_code, opno, speed)
1454 + COSTS_N_INSNS (1));
1455 return true;
1456 }
1457 return false;
1458
1459 case MULT:
1460 if (float_mode_p)
1461 *total = cost_data->fp_mult;
1462 else if (mode == DImode)
1463 *total = cost_data->int_mult_di;
1464 else
1465 *total = cost_data->int_mult_si;
1466 return false;
1467
1468 case ASHIFT:
1469 if (CONST_INT_P (XEXP (x, 1))
1470 && INTVAL (XEXP (x, 1)) <= 3)
1471 {
1472 *total = COSTS_N_INSNS (1);
1473 return false;
1474 }
1475 /* FALLTHRU */
1476
1477 case ASHIFTRT:
1478 case LSHIFTRT:
1479 *total = cost_data->int_shift;
1480 return false;
1481
1482 case IF_THEN_ELSE:
1483 if (float_mode_p)
1484 *total = cost_data->fp_add;
1485 else
1486 *total = cost_data->int_cmov;
1487 return false;
1488
1489 case DIV:
1490 case UDIV:
1491 case MOD:
1492 case UMOD:
1493 if (!float_mode_p)
1494 *total = cost_data->int_div;
1495 else if (mode == SFmode)
1496 *total = cost_data->fp_div_sf;
1497 else
1498 *total = cost_data->fp_div_df;
1499 return false;
1500
1501 case MEM:
1502 *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
1503 return true;
1504
1505 case NEG:
1506 if (! float_mode_p)
1507 {
1508 *total = COSTS_N_INSNS (1);
1509 return false;
1510 }
1511 /* FALLTHRU */
1512
1513 case ABS:
1514 if (! float_mode_p)
1515 {
1516 *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
1517 return false;
1518 }
1519 /* FALLTHRU */
1520
1521 case FLOAT:
1522 case UNSIGNED_FLOAT:
1523 case FIX:
1524 case UNSIGNED_FIX:
1525 case FLOAT_TRUNCATE:
1526 *total = cost_data->fp_add;
1527 return false;
1528
1529 case FLOAT_EXTEND:
1530 if (MEM_P (XEXP (x, 0)))
1531 *total = 0;
1532 else
1533 *total = cost_data->fp_add;
1534 return false;
1535
1536 default:
1537 return false;
1538 }
1539 }
1540 \f
1541 /* REF is an alignable memory location. Place an aligned SImode
1542 reference into *PALIGNED_MEM and the number of bits to shift into
1543 *PBITNUM. SCRATCH is a free register for use in reloading out
1544 of range stack slots. */
1545
1546 void
1547 get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
1548 {
1549 rtx base;
1550 HOST_WIDE_INT disp, offset;
1551
1552 gcc_assert (MEM_P (ref));
1553
1554 if (reload_in_progress)
1555 {
1556 base = find_replacement (&XEXP (ref, 0));
1557 gcc_assert (memory_address_p (GET_MODE (ref), base));
1558 }
1559 else
1560 base = XEXP (ref, 0);
1561
1562 if (GET_CODE (base) == PLUS)
1563 disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1564 else
1565 disp = 0;
1566
1567 /* Find the byte offset within an aligned word. If the memory itself is
1568 claimed to be aligned, believe it. Otherwise, aligned_memory_operand
1569 will have examined the base register and determined it is aligned, and
1570 thus displacements from it are naturally alignable. */
1571 if (MEM_ALIGN (ref) >= 32)
1572 offset = 0;
1573 else
1574 offset = disp & 3;
1575
1576 /* The location should not cross aligned word boundary. */
1577 gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
1578 <= GET_MODE_SIZE (SImode));
1579
1580 /* Access the entire aligned word. */
1581 *paligned_mem = widen_memory_access (ref, SImode, -offset);
1582
1583 /* Convert the byte offset within the word to a bit offset. */
1584 offset *= BITS_PER_UNIT;
1585 *pbitnum = GEN_INT (offset);
1586 }
1587
1588 /* Similar, but just get the address. Handle the two reload cases.
1589 Add EXTRA_OFFSET to the address we return. */
1590
1591 rtx
1592 get_unaligned_address (rtx ref)
1593 {
1594 rtx base;
1595 HOST_WIDE_INT offset = 0;
1596
1597 gcc_assert (MEM_P (ref));
1598
1599 if (reload_in_progress)
1600 {
1601 base = find_replacement (&XEXP (ref, 0));
1602 gcc_assert (memory_address_p (GET_MODE (ref), base));
1603 }
1604 else
1605 base = XEXP (ref, 0);
1606
1607 if (GET_CODE (base) == PLUS)
1608 offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
1609
1610 return plus_constant (Pmode, base, offset);
1611 }
1612
1613 /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
1614 X is always returned in a register. */
1615
1616 rtx
1617 get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
1618 {
1619 if (GET_CODE (addr) == PLUS)
1620 {
1621 ofs += INTVAL (XEXP (addr, 1));
1622 addr = XEXP (addr, 0);
1623 }
1624
1625 return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
1626 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1627 }
1628
1629 /* On the Alpha, all (non-symbolic) constants except zero go into
1630 a floating-point register via memory. Note that we cannot
1631 return anything that is not a subset of RCLASS, and that some
1632 symbolic constants cannot be dropped to memory. */
1633
1634 enum reg_class
1635 alpha_preferred_reload_class(rtx x, enum reg_class rclass)
1636 {
1637 /* Zero is present in any register class. */
1638 if (x == CONST0_RTX (GET_MODE (x)))
1639 return rclass;
1640
1641 /* These sorts of constants we can easily drop to memory. */
1642 if (CONST_SCALAR_INT_P (x)
1643 || CONST_DOUBLE_P (x)
1644 || GET_CODE (x) == CONST_VECTOR)
1645 {
1646 if (rclass == FLOAT_REGS)
1647 return NO_REGS;
1648 if (rclass == ALL_REGS)
1649 return GENERAL_REGS;
1650 return rclass;
1651 }
1652
1653 /* All other kinds of constants should not (and in the case of HIGH
1654 cannot) be dropped to memory -- instead we use a GENERAL_REGS
1655 secondary reload. */
1656 if (CONSTANT_P (x))
1657 return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
1658
1659 return rclass;
1660 }
1661
1662 /* Inform reload about cases where moving X with a mode MODE to a register in
1663 RCLASS requires an extra scratch or immediate register. Return the class
1664 needed for the immediate register. */
1665
1666 static reg_class_t
1667 alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
1668 machine_mode mode, secondary_reload_info *sri)
1669 {
1670 enum reg_class rclass = (enum reg_class) rclass_i;
1671
1672 /* Loading and storing HImode or QImode values to and from memory
1673 usually requires a scratch register. */
1674 if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
1675 {
1676 if (any_memory_operand (x, mode))
1677 {
1678 if (in_p)
1679 {
1680 if (!aligned_memory_operand (x, mode))
1681 sri->icode = direct_optab_handler (reload_in_optab, mode);
1682 }
1683 else
1684 sri->icode = direct_optab_handler (reload_out_optab, mode);
1685 return NO_REGS;
1686 }
1687 }
1688
1689 /* We also cannot do integral arithmetic into FP regs, as might result
1690 from register elimination into a DImode fp register. */
1691 if (rclass == FLOAT_REGS)
1692 {
1693 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
1694 return GENERAL_REGS;
1695 if (in_p && INTEGRAL_MODE_P (mode)
1696 && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
1697 return GENERAL_REGS;
1698 }
1699
1700 return NO_REGS;
1701 }
1702 \f
1703 /* Given SEQ, which is an INSN list, look for any MEMs in either
1704 a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
1705 volatile flags from REF into each of the MEMs found. If REF is not
1706 a MEM, don't do anything. */
1707
1708 void
1709 alpha_set_memflags (rtx seq, rtx ref)
1710 {
1711 rtx_insn *insn;
1712
1713 if (!MEM_P (ref))
1714 return;
1715
1716 /* This is only called from alpha.md, after having had something
1717 generated from one of the insn patterns. So if everything is
1718 zero, the pattern is already up-to-date. */
1719 if (!MEM_VOLATILE_P (ref)
1720 && !MEM_NOTRAP_P (ref)
1721 && !MEM_READONLY_P (ref))
1722 return;
1723
1724 subrtx_var_iterator::array_type array;
1725 for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
1726 if (INSN_P (insn))
1727 FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
1728 {
1729 rtx x = *iter;
1730 if (MEM_P (x))
1731 {
1732 MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
1733 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
1734 MEM_READONLY_P (x) = MEM_READONLY_P (ref);
1735 /* Sadly, we cannot use alias sets because the extra
1736 aliasing produced by the AND interferes. Given that
1737 two-byte quantities are the only thing we would be
1738 able to differentiate anyway, there does not seem to
1739 be any point in convoluting the early out of the
1740 alias check. */
1741 iter.skip_subrtxes ();
1742 }
1743 }
1744 else
1745 gcc_unreachable ();
1746 }
1747 \f
1748 static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT,
1749 int, bool);
1750
1751 /* Internal routine for alpha_emit_set_const to check for N or below insns.
1752 If NO_OUTPUT is true, then we only check to see if N insns are possible,
1753 and return pc_rtx if successful. */
1754
1755 static rtx
1756 alpha_emit_set_const_1 (rtx target, machine_mode mode,
1757 HOST_WIDE_INT c, int n, bool no_output)
1758 {
1759 HOST_WIDE_INT new_const;
1760 int i, bits;
1761 /* Use a pseudo if highly optimizing and still generating RTL. */
1762 rtx subtarget
1763 = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
1764 rtx temp, insn;
1765
1766 /* If this is a sign-extended 32-bit constant, we can do this in at most
1767 three insns, so do it if we have enough insns left. */
1768
1769 if (c >> 31 == -1 || c >> 31 == 0)
1770 {
1771 HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
1772 HOST_WIDE_INT tmp1 = c - low;
1773 HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
1774 HOST_WIDE_INT extra = 0;
1775
1776 /* If HIGH will be interpreted as negative but the constant is
1777 positive, we must adjust it to do two ldha insns. */
1778
1779 if ((high & 0x8000) != 0 && c >= 0)
1780 {
1781 extra = 0x4000;
1782 tmp1 -= 0x40000000;
1783 high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
1784 }
1785
1786 if (c == low || (low == 0 && extra == 0))
1787 {
1788 /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
1789 but that meant that we can't handle INT_MIN on 32-bit machines
1790 (like NT/Alpha), because we recurse indefinitely through
1791 emit_move_insn to gen_movdi. So instead, since we know exactly
1792 what we want, create it explicitly. */
1793
1794 if (no_output)
1795 return pc_rtx;
1796 if (target == NULL)
1797 target = gen_reg_rtx (mode);
1798 emit_insn (gen_rtx_SET (target, GEN_INT (c)));
1799 return target;
1800 }
1801 else if (n >= 2 + (extra != 0))
1802 {
1803 if (no_output)
1804 return pc_rtx;
1805 if (!can_create_pseudo_p ())
1806 {
1807 emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
1808 temp = target;
1809 }
1810 else
1811 temp = copy_to_suggested_reg (GEN_INT (high << 16),
1812 subtarget, mode);
1813
1814 /* As of 2002-02-23, addsi3 is only available when not optimizing.
1815 This means that if we go through expand_binop, we'll try to
1816 generate extensions, etc, which will require new pseudos, which
1817 will fail during some split phases. The SImode add patterns
1818 still exist, but are not named. So build the insns by hand. */
1819
1820 if (extra != 0)
1821 {
1822 if (! subtarget)
1823 subtarget = gen_reg_rtx (mode);
1824 insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
1825 insn = gen_rtx_SET (subtarget, insn);
1826 emit_insn (insn);
1827 temp = subtarget;
1828 }
1829
1830 if (target == NULL)
1831 target = gen_reg_rtx (mode);
1832 insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
1833 insn = gen_rtx_SET (target, insn);
1834 emit_insn (insn);
1835 return target;
1836 }
1837 }
1838
1839 /* If we couldn't do it that way, try some other methods. But if we have
1840 no instructions left, don't bother. Likewise, if this is SImode and
1841 we can't make pseudos, we can't do anything since the expand_binop
1842 and expand_unop calls will widen and try to make pseudos. */
1843
1844 if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
1845 return 0;
1846
1847 /* Next, see if we can load a related constant and then shift and possibly
1848 negate it to get the constant we want. Try this once each increasing
1849 numbers of insns. */
1850
1851 for (i = 1; i < n; i++)
1852 {
1853 /* First, see if minus some low bits, we've an easy load of
1854 high bits. */
1855
1856 new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
1857 if (new_const != 0)
1858 {
1859 temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
1860 if (temp)
1861 {
1862 if (no_output)
1863 return temp;
1864 return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
1865 target, 0, OPTAB_WIDEN);
1866 }
1867 }
1868
1869 /* Next try complementing. */
1870 temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
1871 if (temp)
1872 {
1873 if (no_output)
1874 return temp;
1875 return expand_unop (mode, one_cmpl_optab, temp, target, 0);
1876 }
1877
1878 /* Next try to form a constant and do a left shift. We can do this
1879 if some low-order bits are zero; the exact_log2 call below tells
1880 us that information. The bits we are shifting out could be any
1881 value, but here we'll just try the 0- and sign-extended forms of
1882 the constant. To try to increase the chance of having the same
1883 constant in more than one insn, start at the highest number of
1884 bits to shift, but try all possibilities in case a ZAPNOT will
1885 be useful. */
1886
1887 bits = exact_log2 (c & -c);
1888 if (bits > 0)
1889 for (; bits > 0; bits--)
1890 {
1891 new_const = c >> bits;
1892 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1893 if (!temp && c < 0)
1894 {
1895 new_const = (unsigned HOST_WIDE_INT)c >> bits;
1896 temp = alpha_emit_set_const (subtarget, mode, new_const,
1897 i, no_output);
1898 }
1899 if (temp)
1900 {
1901 if (no_output)
1902 return temp;
1903 return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
1904 target, 0, OPTAB_WIDEN);
1905 }
1906 }
1907
1908 /* Now try high-order zero bits. Here we try the shifted-in bits as
1909 all zero and all ones. Be careful to avoid shifting outside the
1910 mode and to avoid shifting outside the host wide int size. */
1911
1912 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1913 - floor_log2 (c) - 1);
1914 if (bits > 0)
1915 for (; bits > 0; bits--)
1916 {
1917 new_const = c << bits;
1918 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1919 if (!temp)
1920 {
1921 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1922 temp = alpha_emit_set_const (subtarget, mode, new_const,
1923 i, no_output);
1924 }
1925 if (temp)
1926 {
1927 if (no_output)
1928 return temp;
1929 return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
1930 target, 1, OPTAB_WIDEN);
1931 }
1932 }
1933
1934 /* Now try high-order 1 bits. We get that with a sign-extension.
1935 But one bit isn't enough here. Be careful to avoid shifting outside
1936 the mode and to avoid shifting outside the host wide int size. */
1937
1938 bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
1939 - floor_log2 (~ c) - 2);
1940 if (bits > 0)
1941 for (; bits > 0; bits--)
1942 {
1943 new_const = c << bits;
1944 temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
1945 if (!temp)
1946 {
1947 new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
1948 temp = alpha_emit_set_const (subtarget, mode, new_const,
1949 i, no_output);
1950 }
1951 if (temp)
1952 {
1953 if (no_output)
1954 return temp;
1955 return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
1956 target, 0, OPTAB_WIDEN);
1957 }
1958 }
1959 }
1960
1961 /* Finally, see if can load a value into the target that is the same as the
1962 constant except that all bytes that are 0 are changed to be 0xff. If we
1963 can, then we can do a ZAPNOT to obtain the desired constant. */
1964
1965 new_const = c;
1966 for (i = 0; i < 64; i += 8)
1967 if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
1968 new_const |= (HOST_WIDE_INT) 0xff << i;
1969
1970 /* We are only called for SImode and DImode. If this is SImode, ensure that
1971 we are sign extended to a full word. */
1972
1973 if (mode == SImode)
1974 new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
1975
1976 if (new_const != c)
1977 {
1978 temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
1979 if (temp)
1980 {
1981 if (no_output)
1982 return temp;
1983 return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
1984 target, 0, OPTAB_WIDEN);
1985 }
1986 }
1987
1988 return 0;
1989 }
1990
1991 /* Try to output insns to set TARGET equal to the constant C if it can be
1992 done in less than N insns. Do all computations in MODE. Returns the place
1993 where the output has been placed if it can be done and the insns have been
1994 emitted. If it would take more than N insns, zero is returned and no
1995 insns and emitted. */
1996
1997 static rtx
1998 alpha_emit_set_const (rtx target, machine_mode mode,
1999 HOST_WIDE_INT c, int n, bool no_output)
2000 {
2001 machine_mode orig_mode = mode;
2002 rtx orig_target = target;
2003 rtx result = 0;
2004 int i;
2005
2006 /* If we can't make any pseudos, TARGET is an SImode hard register, we
2007 can't load this constant in one insn, do this in DImode. */
2008 if (!can_create_pseudo_p () && mode == SImode
2009 && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
2010 {
2011 result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
2012 if (result)
2013 return result;
2014
2015 target = no_output ? NULL : gen_lowpart (DImode, target);
2016 mode = DImode;
2017 }
2018 else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
2019 {
2020 target = no_output ? NULL : gen_lowpart (DImode, target);
2021 mode = DImode;
2022 }
2023
2024 /* Try 1 insn, then 2, then up to N. */
2025 for (i = 1; i <= n; i++)
2026 {
2027 result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
2028 if (result)
2029 {
2030 rtx_insn *insn;
2031 rtx set;
2032
2033 if (no_output)
2034 return result;
2035
2036 insn = get_last_insn ();
2037 set = single_set (insn);
2038 if (! CONSTANT_P (SET_SRC (set)))
2039 set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
2040 break;
2041 }
2042 }
2043
2044 /* Allow for the case where we changed the mode of TARGET. */
2045 if (result)
2046 {
2047 if (result == target)
2048 result = orig_target;
2049 else if (mode != orig_mode)
2050 result = gen_lowpart (orig_mode, result);
2051 }
2052
2053 return result;
2054 }
2055
2056 /* Having failed to find a 3 insn sequence in alpha_emit_set_const,
2057 fall back to a straight forward decomposition. We do this to avoid
2058 exponential run times encountered when looking for longer sequences
2059 with alpha_emit_set_const. */
2060
2061 static rtx
2062 alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
2063 {
2064 HOST_WIDE_INT d1, d2, d3, d4;
2065
2066 /* Decompose the entire word */
2067
2068 d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2069 c1 -= d1;
2070 d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2071 c1 = (c1 - d2) >> 32;
2072 d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
2073 c1 -= d3;
2074 d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
2075 gcc_assert (c1 == d4);
2076
2077 /* Construct the high word */
2078 if (d4)
2079 {
2080 emit_move_insn (target, GEN_INT (d4));
2081 if (d3)
2082 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
2083 }
2084 else
2085 emit_move_insn (target, GEN_INT (d3));
2086
2087 /* Shift it into place */
2088 emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
2089
2090 /* Add in the low bits. */
2091 if (d2)
2092 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
2093 if (d1)
2094 emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
2095
2096 return target;
2097 }
2098
2099 /* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits. */
2100
2101 static HOST_WIDE_INT
2102 alpha_extract_integer (rtx x)
2103 {
2104 if (GET_CODE (x) == CONST_VECTOR)
2105 x = simplify_subreg (DImode, x, GET_MODE (x), 0);
2106
2107 gcc_assert (CONST_INT_P (x));
2108
2109 return INTVAL (x);
2110 }
2111
2112 /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
2113 we are willing to load the value into a register via a move pattern.
2114 Normally this is all symbolic constants, integral constants that
2115 take three or fewer instructions, and floating-point zero. */
2116
2117 bool
2118 alpha_legitimate_constant_p (machine_mode mode, rtx x)
2119 {
2120 HOST_WIDE_INT i0;
2121
2122 switch (GET_CODE (x))
2123 {
2124 case LABEL_REF:
2125 case HIGH:
2126 return true;
2127
2128 case CONST:
2129 if (GET_CODE (XEXP (x, 0)) == PLUS
2130 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2131 x = XEXP (XEXP (x, 0), 0);
2132 else
2133 return true;
2134
2135 if (GET_CODE (x) != SYMBOL_REF)
2136 return true;
2137 /* FALLTHRU */
2138
2139 case SYMBOL_REF:
2140 /* TLS symbols are never valid. */
2141 return SYMBOL_REF_TLS_MODEL (x) == 0;
2142
2143 case CONST_WIDE_INT:
2144 if (TARGET_BUILD_CONSTANTS)
2145 return true;
2146 if (x == CONST0_RTX (mode))
2147 return true;
2148 mode = DImode;
2149 gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
2150 i0 = CONST_WIDE_INT_ELT (x, 1);
2151 if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
2152 return false;
2153 i0 = CONST_WIDE_INT_ELT (x, 0);
2154 goto do_integer;
2155
2156 case CONST_DOUBLE:
2157 if (x == CONST0_RTX (mode))
2158 return true;
2159 return false;
2160
2161 case CONST_VECTOR:
2162 if (x == CONST0_RTX (mode))
2163 return true;
2164 if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
2165 return false;
2166 if (GET_MODE_SIZE (mode) != 8)
2167 return false;
2168 /* FALLTHRU */
2169
2170 case CONST_INT:
2171 if (TARGET_BUILD_CONSTANTS)
2172 return true;
2173 i0 = alpha_extract_integer (x);
2174 do_integer:
2175 return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
2176
2177 default:
2178 return false;
2179 }
2180 }
2181
2182 /* Operand 1 is known to be a constant, and should require more than one
2183 instruction to load. Emit that multi-part load. */
2184
2185 bool
2186 alpha_split_const_mov (machine_mode mode, rtx *operands)
2187 {
2188 HOST_WIDE_INT i0;
2189 rtx temp = NULL_RTX;
2190
2191 i0 = alpha_extract_integer (operands[1]);
2192
2193 temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
2194
2195 if (!temp && TARGET_BUILD_CONSTANTS)
2196 temp = alpha_emit_set_long_const (operands[0], i0);
2197
2198 if (temp)
2199 {
2200 if (!rtx_equal_p (operands[0], temp))
2201 emit_move_insn (operands[0], temp);
2202 return true;
2203 }
2204
2205 return false;
2206 }
2207
2208 /* Expand a move instruction; return true if all work is done.
2209 We don't handle non-bwx subword loads here. */
2210
2211 bool
2212 alpha_expand_mov (machine_mode mode, rtx *operands)
2213 {
2214 rtx tmp;
2215
2216 /* If the output is not a register, the input must be. */
2217 if (MEM_P (operands[0])
2218 && ! reg_or_0_operand (operands[1], mode))
2219 operands[1] = force_reg (mode, operands[1]);
2220
2221 /* Allow legitimize_address to perform some simplifications. */
2222 if (mode == Pmode && symbolic_operand (operands[1], mode))
2223 {
2224 tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
2225 if (tmp)
2226 {
2227 if (tmp == operands[0])
2228 return true;
2229 operands[1] = tmp;
2230 return false;
2231 }
2232 }
2233
2234 /* Early out for non-constants and valid constants. */
2235 if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
2236 return false;
2237
2238 /* Split large integers. */
2239 if (CONST_INT_P (operands[1])
2240 || GET_CODE (operands[1]) == CONST_VECTOR)
2241 {
2242 if (alpha_split_const_mov (mode, operands))
2243 return true;
2244 }
2245
2246 /* Otherwise we've nothing left but to drop the thing to memory. */
2247 tmp = force_const_mem (mode, operands[1]);
2248
2249 if (tmp == NULL_RTX)
2250 return false;
2251
2252 if (reload_in_progress)
2253 {
2254 emit_move_insn (operands[0], XEXP (tmp, 0));
2255 operands[1] = replace_equiv_address (tmp, operands[0]);
2256 }
2257 else
2258 operands[1] = validize_mem (tmp);
2259 return false;
2260 }
2261
2262 /* Expand a non-bwx QImode or HImode move instruction;
2263 return true if all work is done. */
2264
2265 bool
2266 alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
2267 {
2268 rtx seq;
2269
2270 /* If the output is not a register, the input must be. */
2271 if (MEM_P (operands[0]))
2272 operands[1] = force_reg (mode, operands[1]);
2273
2274 /* Handle four memory cases, unaligned and aligned for either the input
2275 or the output. The only case where we can be called during reload is
2276 for aligned loads; all other cases require temporaries. */
2277
2278 if (any_memory_operand (operands[1], mode))
2279 {
2280 if (aligned_memory_operand (operands[1], mode))
2281 {
2282 if (reload_in_progress)
2283 {
2284 if (mode == QImode)
2285 seq = gen_reload_inqi_aligned (operands[0], operands[1]);
2286 else
2287 seq = gen_reload_inhi_aligned (operands[0], operands[1]);
2288 emit_insn (seq);
2289 }
2290 else
2291 {
2292 rtx aligned_mem, bitnum;
2293 rtx scratch = gen_reg_rtx (SImode);
2294 rtx subtarget;
2295 bool copyout;
2296
2297 get_aligned_mem (operands[1], &aligned_mem, &bitnum);
2298
2299 subtarget = operands[0];
2300 if (REG_P (subtarget))
2301 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2302 else
2303 subtarget = gen_reg_rtx (DImode), copyout = true;
2304
2305 if (mode == QImode)
2306 seq = gen_aligned_loadqi (subtarget, aligned_mem,
2307 bitnum, scratch);
2308 else
2309 seq = gen_aligned_loadhi (subtarget, aligned_mem,
2310 bitnum, scratch);
2311 emit_insn (seq);
2312
2313 if (copyout)
2314 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2315 }
2316 }
2317 else
2318 {
2319 /* Don't pass these as parameters since that makes the generated
2320 code depend on parameter evaluation order which will cause
2321 bootstrap failures. */
2322
2323 rtx temp1, temp2, subtarget, ua;
2324 bool copyout;
2325
2326 temp1 = gen_reg_rtx (DImode);
2327 temp2 = gen_reg_rtx (DImode);
2328
2329 subtarget = operands[0];
2330 if (REG_P (subtarget))
2331 subtarget = gen_lowpart (DImode, subtarget), copyout = false;
2332 else
2333 subtarget = gen_reg_rtx (DImode), copyout = true;
2334
2335 ua = get_unaligned_address (operands[1]);
2336 if (mode == QImode)
2337 seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
2338 else
2339 seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
2340
2341 alpha_set_memflags (seq, operands[1]);
2342 emit_insn (seq);
2343
2344 if (copyout)
2345 emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
2346 }
2347 return true;
2348 }
2349
2350 if (any_memory_operand (operands[0], mode))
2351 {
2352 if (aligned_memory_operand (operands[0], mode))
2353 {
2354 rtx aligned_mem, bitnum;
2355 rtx temp1 = gen_reg_rtx (SImode);
2356 rtx temp2 = gen_reg_rtx (SImode);
2357
2358 get_aligned_mem (operands[0], &aligned_mem, &bitnum);
2359
2360 emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
2361 temp1, temp2));
2362 }
2363 else
2364 {
2365 rtx temp1 = gen_reg_rtx (DImode);
2366 rtx temp2 = gen_reg_rtx (DImode);
2367 rtx temp3 = gen_reg_rtx (DImode);
2368 rtx ua = get_unaligned_address (operands[0]);
2369
2370 if (mode == QImode)
2371 seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
2372 else
2373 seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
2374
2375 alpha_set_memflags (seq, operands[0]);
2376 emit_insn (seq);
2377 }
2378 return true;
2379 }
2380
2381 return false;
2382 }
2383
2384 /* Implement the movmisalign patterns. One of the operands is a memory
2385 that is not naturally aligned. Emit instructions to load it. */
2386
2387 void
2388 alpha_expand_movmisalign (machine_mode mode, rtx *operands)
2389 {
2390 /* Honor misaligned loads, for those we promised to do so. */
2391 if (MEM_P (operands[1]))
2392 {
2393 rtx tmp;
2394
2395 if (register_operand (operands[0], mode))
2396 tmp = operands[0];
2397 else
2398 tmp = gen_reg_rtx (mode);
2399
2400 alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
2401 if (tmp != operands[0])
2402 emit_move_insn (operands[0], tmp);
2403 }
2404 else if (MEM_P (operands[0]))
2405 {
2406 if (!reg_or_0_operand (operands[1], mode))
2407 operands[1] = force_reg (mode, operands[1]);
2408 alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
2409 }
2410 else
2411 gcc_unreachable ();
2412 }
2413
2414 /* Generate an unsigned DImode to FP conversion. This is the same code
2415 optabs would emit if we didn't have TFmode patterns.
2416
2417 For SFmode, this is the only construction I've found that can pass
2418 gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
2419 intermediates will work, because you'll get intermediate rounding
2420 that ruins the end result. Some of this could be fixed by turning
2421 on round-to-positive-infinity, but that requires diddling the fpsr,
2422 which kills performance. I tried turning this around and converting
2423 to a negative number, so that I could turn on /m, but either I did
2424 it wrong or there's something else cause I wound up with the exact
2425 same single-bit error. There is a branch-less form of this same code:
2426
2427 srl $16,1,$1
2428 and $16,1,$2
2429 cmplt $16,0,$3
2430 or $1,$2,$2
2431 cmovge $16,$16,$2
2432 itoft $3,$f10
2433 itoft $2,$f11
2434 cvtqs $f11,$f11
2435 adds $f11,$f11,$f0
2436 fcmoveq $f10,$f11,$f0
2437
2438 I'm not using it because it's the same number of instructions as
2439 this branch-full form, and it has more serialized long latency
2440 instructions on the critical path.
2441
2442 For DFmode, we can avoid rounding errors by breaking up the word
2443 into two pieces, converting them separately, and adding them back:
2444
2445 LC0: .long 0,0x5f800000
2446
2447 itoft $16,$f11
2448 lda $2,LC0
2449 cmplt $16,0,$1
2450 cpyse $f11,$f31,$f10
2451 cpyse $f31,$f11,$f11
2452 s4addq $1,$2,$1
2453 lds $f12,0($1)
2454 cvtqt $f10,$f10
2455 cvtqt $f11,$f11
2456 addt $f12,$f10,$f0
2457 addt $f0,$f11,$f0
2458
2459 This doesn't seem to be a clear-cut win over the optabs form.
2460 It probably all depends on the distribution of numbers being
2461 converted -- in the optabs form, all but high-bit-set has a
2462 much lower minimum execution time. */
2463
2464 void
2465 alpha_emit_floatuns (rtx operands[2])
2466 {
2467 rtx neglab, donelab, i0, i1, f0, in, out;
2468 machine_mode mode;
2469
2470 out = operands[0];
2471 in = force_reg (DImode, operands[1]);
2472 mode = GET_MODE (out);
2473 neglab = gen_label_rtx ();
2474 donelab = gen_label_rtx ();
2475 i0 = gen_reg_rtx (DImode);
2476 i1 = gen_reg_rtx (DImode);
2477 f0 = gen_reg_rtx (mode);
2478
2479 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
2480
2481 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
2482 emit_jump_insn (gen_jump (donelab));
2483 emit_barrier ();
2484
2485 emit_label (neglab);
2486
2487 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
2488 emit_insn (gen_anddi3 (i1, in, const1_rtx));
2489 emit_insn (gen_iordi3 (i0, i0, i1));
2490 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
2491 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
2492
2493 emit_label (donelab);
2494 }
2495
2496 /* Generate the comparison for a conditional branch. */
2497
2498 void
2499 alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
2500 {
2501 enum rtx_code cmp_code, branch_code;
2502 machine_mode branch_mode = VOIDmode;
2503 enum rtx_code code = GET_CODE (operands[0]);
2504 rtx op0 = operands[1], op1 = operands[2];
2505 rtx tem;
2506
2507 if (cmp_mode == TFmode)
2508 {
2509 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2510 op1 = const0_rtx;
2511 cmp_mode = DImode;
2512 }
2513
2514 /* The general case: fold the comparison code to the types of compares
2515 that we have, choosing the branch as necessary. */
2516 switch (code)
2517 {
2518 case EQ: case LE: case LT: case LEU: case LTU:
2519 case UNORDERED:
2520 /* We have these compares. */
2521 cmp_code = code, branch_code = NE;
2522 break;
2523
2524 case NE:
2525 case ORDERED:
2526 /* These must be reversed. */
2527 cmp_code = reverse_condition (code), branch_code = EQ;
2528 break;
2529
2530 case GE: case GT: case GEU: case GTU:
2531 /* For FP, we swap them, for INT, we reverse them. */
2532 if (cmp_mode == DFmode)
2533 {
2534 cmp_code = swap_condition (code);
2535 branch_code = NE;
2536 std::swap (op0, op1);
2537 }
2538 else
2539 {
2540 cmp_code = reverse_condition (code);
2541 branch_code = EQ;
2542 }
2543 break;
2544
2545 default:
2546 gcc_unreachable ();
2547 }
2548
2549 if (cmp_mode == DFmode)
2550 {
2551 if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
2552 {
2553 /* When we are not as concerned about non-finite values, and we
2554 are comparing against zero, we can branch directly. */
2555 if (op1 == CONST0_RTX (DFmode))
2556 cmp_code = UNKNOWN, branch_code = code;
2557 else if (op0 == CONST0_RTX (DFmode))
2558 {
2559 /* Undo the swap we probably did just above. */
2560 std::swap (op0, op1);
2561 branch_code = swap_condition (cmp_code);
2562 cmp_code = UNKNOWN;
2563 }
2564 }
2565 else
2566 {
2567 /* ??? We mark the branch mode to be CCmode to prevent the
2568 compare and branch from being combined, since the compare
2569 insn follows IEEE rules that the branch does not. */
2570 branch_mode = CCmode;
2571 }
2572 }
2573 else
2574 {
2575 /* The following optimizations are only for signed compares. */
2576 if (code != LEU && code != LTU && code != GEU && code != GTU)
2577 {
2578 /* Whee. Compare and branch against 0 directly. */
2579 if (op1 == const0_rtx)
2580 cmp_code = UNKNOWN, branch_code = code;
2581
2582 /* If the constants doesn't fit into an immediate, but can
2583 be generated by lda/ldah, we adjust the argument and
2584 compare against zero, so we can use beq/bne directly. */
2585 /* ??? Don't do this when comparing against symbols, otherwise
2586 we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
2587 be declared false out of hand (at least for non-weak). */
2588 else if (CONST_INT_P (op1)
2589 && (code == EQ || code == NE)
2590 && !(symbolic_operand (op0, VOIDmode)
2591 || (REG_P (op0) && REG_POINTER (op0))))
2592 {
2593 rtx n_op1 = GEN_INT (-INTVAL (op1));
2594
2595 if (! satisfies_constraint_I (op1)
2596 && (satisfies_constraint_K (n_op1)
2597 || satisfies_constraint_L (n_op1)))
2598 cmp_code = PLUS, branch_code = code, op1 = n_op1;
2599 }
2600 }
2601
2602 if (!reg_or_0_operand (op0, DImode))
2603 op0 = force_reg (DImode, op0);
2604 if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
2605 op1 = force_reg (DImode, op1);
2606 }
2607
2608 /* Emit an initial compare instruction, if necessary. */
2609 tem = op0;
2610 if (cmp_code != UNKNOWN)
2611 {
2612 tem = gen_reg_rtx (cmp_mode);
2613 emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
2614 }
2615
2616 /* Emit the branch instruction. */
2617 tem = gen_rtx_SET (pc_rtx,
2618 gen_rtx_IF_THEN_ELSE (VOIDmode,
2619 gen_rtx_fmt_ee (branch_code,
2620 branch_mode, tem,
2621 CONST0_RTX (cmp_mode)),
2622 gen_rtx_LABEL_REF (VOIDmode,
2623 operands[3]),
2624 pc_rtx));
2625 emit_jump_insn (tem);
2626 }
2627
2628 /* Certain simplifications can be done to make invalid setcc operations
2629 valid. Return the final comparison, or NULL if we can't work. */
2630
2631 bool
2632 alpha_emit_setcc (rtx operands[], machine_mode cmp_mode)
2633 {
2634 enum rtx_code cmp_code;
2635 enum rtx_code code = GET_CODE (operands[1]);
2636 rtx op0 = operands[2], op1 = operands[3];
2637 rtx tmp;
2638
2639 if (cmp_mode == TFmode)
2640 {
2641 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2642 op1 = const0_rtx;
2643 cmp_mode = DImode;
2644 }
2645
2646 if (cmp_mode == DFmode && !TARGET_FIX)
2647 return 0;
2648
2649 /* The general case: fold the comparison code to the types of compares
2650 that we have, choosing the branch as necessary. */
2651
2652 cmp_code = UNKNOWN;
2653 switch (code)
2654 {
2655 case EQ: case LE: case LT: case LEU: case LTU:
2656 case UNORDERED:
2657 /* We have these compares. */
2658 if (cmp_mode == DFmode)
2659 cmp_code = code, code = NE;
2660 break;
2661
2662 case NE:
2663 if (cmp_mode == DImode && op1 == const0_rtx)
2664 break;
2665 /* FALLTHRU */
2666
2667 case ORDERED:
2668 cmp_code = reverse_condition (code);
2669 code = EQ;
2670 break;
2671
2672 case GE: case GT: case GEU: case GTU:
2673 /* These normally need swapping, but for integer zero we have
2674 special patterns that recognize swapped operands. */
2675 if (cmp_mode == DImode && op1 == const0_rtx)
2676 break;
2677 code = swap_condition (code);
2678 if (cmp_mode == DFmode)
2679 cmp_code = code, code = NE;
2680 std::swap (op0, op1);
2681 break;
2682
2683 default:
2684 gcc_unreachable ();
2685 }
2686
2687 if (cmp_mode == DImode)
2688 {
2689 if (!register_operand (op0, DImode))
2690 op0 = force_reg (DImode, op0);
2691 if (!reg_or_8bit_operand (op1, DImode))
2692 op1 = force_reg (DImode, op1);
2693 }
2694
2695 /* Emit an initial compare instruction, if necessary. */
2696 if (cmp_code != UNKNOWN)
2697 {
2698 tmp = gen_reg_rtx (cmp_mode);
2699 emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2700 op0, op1)));
2701
2702 op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
2703 op1 = const0_rtx;
2704 }
2705
2706 /* Emit the setcc instruction. */
2707 emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode,
2708 op0, op1)));
2709 return true;
2710 }
2711
2712
2713 /* Rewrite a comparison against zero CMP of the form
2714 (CODE (cc0) (const_int 0)) so it can be written validly in
2715 a conditional move (if_then_else CMP ...).
2716 If both of the operands that set cc0 are nonzero we must emit
2717 an insn to perform the compare (it can't be done within
2718 the conditional move). */
2719
2720 rtx
2721 alpha_emit_conditional_move (rtx cmp, machine_mode mode)
2722 {
2723 enum rtx_code code = GET_CODE (cmp);
2724 enum rtx_code cmov_code = NE;
2725 rtx op0 = XEXP (cmp, 0);
2726 rtx op1 = XEXP (cmp, 1);
2727 machine_mode cmp_mode
2728 = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
2729 machine_mode cmov_mode = VOIDmode;
2730 int local_fast_math = flag_unsafe_math_optimizations;
2731 rtx tem;
2732
2733 if (cmp_mode == TFmode)
2734 {
2735 op0 = alpha_emit_xfloating_compare (&code, op0, op1);
2736 op1 = const0_rtx;
2737 cmp_mode = DImode;
2738 }
2739
2740 gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
2741
2742 if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
2743 {
2744 enum rtx_code cmp_code;
2745
2746 if (! TARGET_FIX)
2747 return 0;
2748
2749 /* If we have fp<->int register move instructions, do a cmov by
2750 performing the comparison in fp registers, and move the
2751 zero/nonzero value to integer registers, where we can then
2752 use a normal cmov, or vice-versa. */
2753
2754 switch (code)
2755 {
2756 case EQ: case LE: case LT: case LEU: case LTU:
2757 case UNORDERED:
2758 /* We have these compares. */
2759 cmp_code = code, code = NE;
2760 break;
2761
2762 case NE:
2763 case ORDERED:
2764 /* These must be reversed. */
2765 cmp_code = reverse_condition (code), code = EQ;
2766 break;
2767
2768 case GE: case GT: case GEU: case GTU:
2769 /* These normally need swapping, but for integer zero we have
2770 special patterns that recognize swapped operands. */
2771 if (cmp_mode == DImode && op1 == const0_rtx)
2772 cmp_code = code, code = NE;
2773 else
2774 {
2775 cmp_code = swap_condition (code);
2776 code = NE;
2777 std::swap (op0, op1);
2778 }
2779 break;
2780
2781 default:
2782 gcc_unreachable ();
2783 }
2784
2785 if (cmp_mode == DImode)
2786 {
2787 if (!reg_or_0_operand (op0, DImode))
2788 op0 = force_reg (DImode, op0);
2789 if (!reg_or_8bit_operand (op1, DImode))
2790 op1 = force_reg (DImode, op1);
2791 }
2792
2793 tem = gen_reg_rtx (cmp_mode);
2794 emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode,
2795 op0, op1)));
2796
2797 cmp_mode = cmp_mode == DImode ? DFmode : DImode;
2798 op0 = gen_lowpart (cmp_mode, tem);
2799 op1 = CONST0_RTX (cmp_mode);
2800 cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2801 local_fast_math = 1;
2802 }
2803
2804 if (cmp_mode == DImode)
2805 {
2806 if (!reg_or_0_operand (op0, DImode))
2807 op0 = force_reg (DImode, op0);
2808 if (!reg_or_8bit_operand (op1, DImode))
2809 op1 = force_reg (DImode, op1);
2810 }
2811
2812 /* We may be able to use a conditional move directly.
2813 This avoids emitting spurious compares. */
2814 if (signed_comparison_operator (cmp, VOIDmode)
2815 && (cmp_mode == DImode || local_fast_math)
2816 && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
2817 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2818
2819 /* We can't put the comparison inside the conditional move;
2820 emit a compare instruction and put that inside the
2821 conditional move. Make sure we emit only comparisons we have;
2822 swap or reverse as necessary. */
2823
2824 if (!can_create_pseudo_p ())
2825 return NULL_RTX;
2826
2827 switch (code)
2828 {
2829 case EQ: case LE: case LT: case LEU: case LTU:
2830 case UNORDERED:
2831 /* We have these compares: */
2832 break;
2833
2834 case NE:
2835 case ORDERED:
2836 /* These must be reversed. */
2837 code = reverse_condition (code);
2838 cmov_code = EQ;
2839 break;
2840
2841 case GE: case GT: case GEU: case GTU:
2842 /* These normally need swapping, but for integer zero we have
2843 special patterns that recognize swapped operands. */
2844 if (cmp_mode == DImode && op1 == const0_rtx)
2845 break;
2846 code = swap_condition (code);
2847 std::swap (op0, op1);
2848 break;
2849
2850 default:
2851 gcc_unreachable ();
2852 }
2853
2854 if (cmp_mode == DImode)
2855 {
2856 if (!reg_or_0_operand (op0, DImode))
2857 op0 = force_reg (DImode, op0);
2858 if (!reg_or_8bit_operand (op1, DImode))
2859 op1 = force_reg (DImode, op1);
2860 }
2861
2862 /* ??? We mark the branch mode to be CCmode to prevent the compare
2863 and cmov from being combined, since the compare insn follows IEEE
2864 rules that the cmov does not. */
2865 if (cmp_mode == DFmode && !local_fast_math)
2866 cmov_mode = CCmode;
2867
2868 tem = gen_reg_rtx (cmp_mode);
2869 emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
2870 return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
2871 }
2872
2873 /* Simplify a conditional move of two constants into a setcc with
2874 arithmetic. This is done with a splitter since combine would
2875 just undo the work if done during code generation. It also catches
2876 cases we wouldn't have before cse. */
2877
2878 int
2879 alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
2880 rtx t_rtx, rtx f_rtx)
2881 {
2882 HOST_WIDE_INT t, f, diff;
2883 machine_mode mode;
2884 rtx target, subtarget, tmp;
2885
2886 mode = GET_MODE (dest);
2887 t = INTVAL (t_rtx);
2888 f = INTVAL (f_rtx);
2889 diff = t - f;
2890
2891 if (((code == NE || code == EQ) && diff < 0)
2892 || (code == GE || code == GT))
2893 {
2894 code = reverse_condition (code);
2895 diff = t, t = f, f = diff;
2896 diff = t - f;
2897 }
2898
2899 subtarget = target = dest;
2900 if (mode != DImode)
2901 {
2902 target = gen_lowpart (DImode, dest);
2903 if (can_create_pseudo_p ())
2904 subtarget = gen_reg_rtx (DImode);
2905 else
2906 subtarget = target;
2907 }
2908 /* Below, we must be careful to use copy_rtx on target and subtarget
2909 in intermediate insns, as they may be a subreg rtx, which may not
2910 be shared. */
2911
2912 if (f == 0 && exact_log2 (diff) > 0
2913 /* On EV6, we've got enough shifters to make non-arithmetic shifts
2914 viable over a longer latency cmove. On EV5, the E0 slot is a
2915 scarce resource, and on EV4 shift has the same latency as a cmove. */
2916 && (diff <= 8 || alpha_tune == PROCESSOR_EV6))
2917 {
2918 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2919 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2920
2921 tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
2922 GEN_INT (exact_log2 (t)));
2923 emit_insn (gen_rtx_SET (target, tmp));
2924 }
2925 else if (f == 0 && t == -1)
2926 {
2927 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2928 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2929
2930 emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
2931 }
2932 else if (diff == 1 || diff == 4 || diff == 8)
2933 {
2934 rtx add_op;
2935
2936 tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
2937 emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
2938
2939 if (diff == 1)
2940 emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
2941 else
2942 {
2943 add_op = GEN_INT (f);
2944 if (sext_add_operand (add_op, mode))
2945 {
2946 tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget),
2947 GEN_INT (diff));
2948 tmp = gen_rtx_PLUS (DImode, tmp, add_op);
2949 emit_insn (gen_rtx_SET (target, tmp));
2950 }
2951 else
2952 return 0;
2953 }
2954 }
2955 else
2956 return 0;
2957
2958 return 1;
2959 }
2960 \f
2961 /* Look up the function X_floating library function name for the
2962 given operation. */
2963
2964 struct GTY(()) xfloating_op
2965 {
2966 const enum rtx_code code;
2967 const char *const GTY((skip)) osf_func;
2968 const char *const GTY((skip)) vms_func;
2969 rtx libcall;
2970 };
2971
2972 static GTY(()) struct xfloating_op xfloating_ops[] =
2973 {
2974 { PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
2975 { MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
2976 { MULT, "_OtsMulX", "OTS$MUL_X", 0 },
2977 { DIV, "_OtsDivX", "OTS$DIV_X", 0 },
2978 { EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
2979 { NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
2980 { LT, "_OtsLssX", "OTS$LSS_X", 0 },
2981 { LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
2982 { GT, "_OtsGtrX", "OTS$GTR_X", 0 },
2983 { GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
2984 { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
2985 { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
2986 { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
2987 { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
2988 { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
2989 };
2990
2991 static GTY(()) struct xfloating_op vax_cvt_ops[] =
2992 {
2993 { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
2994 { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
2995 };
2996
2997 static rtx
2998 alpha_lookup_xfloating_lib_func (enum rtx_code code)
2999 {
3000 struct xfloating_op *ops = xfloating_ops;
3001 long n = ARRAY_SIZE (xfloating_ops);
3002 long i;
3003
3004 gcc_assert (TARGET_HAS_XFLOATING_LIBS);
3005
3006 /* How irritating. Nothing to key off for the main table. */
3007 if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
3008 {
3009 ops = vax_cvt_ops;
3010 n = ARRAY_SIZE (vax_cvt_ops);
3011 }
3012
3013 for (i = 0; i < n; ++i, ++ops)
3014 if (ops->code == code)
3015 {
3016 rtx func = ops->libcall;
3017 if (!func)
3018 {
3019 func = init_one_libfunc (TARGET_ABI_OPEN_VMS
3020 ? ops->vms_func : ops->osf_func);
3021 ops->libcall = func;
3022 }
3023 return func;
3024 }
3025
3026 gcc_unreachable ();
3027 }
3028
3029 /* Most X_floating operations take the rounding mode as an argument.
3030 Compute that here. */
3031
3032 static int
3033 alpha_compute_xfloating_mode_arg (enum rtx_code code,
3034 enum alpha_fp_rounding_mode round)
3035 {
3036 int mode;
3037
3038 switch (round)
3039 {
3040 case ALPHA_FPRM_NORM:
3041 mode = 2;
3042 break;
3043 case ALPHA_FPRM_MINF:
3044 mode = 1;
3045 break;
3046 case ALPHA_FPRM_CHOP:
3047 mode = 0;
3048 break;
3049 case ALPHA_FPRM_DYN:
3050 mode = 4;
3051 break;
3052 default:
3053 gcc_unreachable ();
3054
3055 /* XXX For reference, round to +inf is mode = 3. */
3056 }
3057
3058 if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
3059 mode |= 0x10000;
3060
3061 return mode;
3062 }
3063
3064 /* Emit an X_floating library function call.
3065
3066 Note that these functions do not follow normal calling conventions:
3067 TFmode arguments are passed in two integer registers (as opposed to
3068 indirect); TFmode return values appear in R16+R17.
3069
3070 FUNC is the function to call.
3071 TARGET is where the output belongs.
3072 OPERANDS are the inputs.
3073 NOPERANDS is the count of inputs.
3074 EQUIV is the expression equivalent for the function.
3075 */
3076
3077 static void
3078 alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
3079 int noperands, rtx equiv)
3080 {
3081 rtx usage = NULL_RTX, tmp, reg;
3082 int regno = 16, i;
3083
3084 start_sequence ();
3085
3086 for (i = 0; i < noperands; ++i)
3087 {
3088 switch (GET_MODE (operands[i]))
3089 {
3090 case TFmode:
3091 reg = gen_rtx_REG (TFmode, regno);
3092 regno += 2;
3093 break;
3094
3095 case DFmode:
3096 reg = gen_rtx_REG (DFmode, regno + 32);
3097 regno += 1;
3098 break;
3099
3100 case VOIDmode:
3101 gcc_assert (CONST_INT_P (operands[i]));
3102 /* FALLTHRU */
3103 case DImode:
3104 reg = gen_rtx_REG (DImode, regno);
3105 regno += 1;
3106 break;
3107
3108 default:
3109 gcc_unreachable ();
3110 }
3111
3112 emit_move_insn (reg, operands[i]);
3113 use_reg (&usage, reg);
3114 }
3115
3116 switch (GET_MODE (target))
3117 {
3118 case TFmode:
3119 reg = gen_rtx_REG (TFmode, 16);
3120 break;
3121 case DFmode:
3122 reg = gen_rtx_REG (DFmode, 32);
3123 break;
3124 case DImode:
3125 reg = gen_rtx_REG (DImode, 0);
3126 break;
3127 default:
3128 gcc_unreachable ();
3129 }
3130
3131 tmp = gen_rtx_MEM (QImode, func);
3132 tmp = emit_call_insn (GEN_CALL_VALUE (reg, tmp, const0_rtx,
3133 const0_rtx, const0_rtx));
3134 CALL_INSN_FUNCTION_USAGE (tmp) = usage;
3135 RTL_CONST_CALL_P (tmp) = 1;
3136
3137 tmp = get_insns ();
3138 end_sequence ();
3139
3140 emit_libcall_block (tmp, target, reg, equiv);
3141 }
3142
3143 /* Emit an X_floating library function call for arithmetic (+,-,*,/). */
3144
3145 void
3146 alpha_emit_xfloating_arith (enum rtx_code code, rtx operands[])
3147 {
3148 rtx func;
3149 int mode;
3150 rtx out_operands[3];
3151
3152 func = alpha_lookup_xfloating_lib_func (code);
3153 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3154
3155 out_operands[0] = operands[1];
3156 out_operands[1] = operands[2];
3157 out_operands[2] = GEN_INT (mode);
3158 alpha_emit_xfloating_libcall (func, operands[0], out_operands, 3,
3159 gen_rtx_fmt_ee (code, TFmode, operands[1],
3160 operands[2]));
3161 }
3162
3163 /* Emit an X_floating library function call for a comparison. */
3164
3165 static rtx
3166 alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
3167 {
3168 enum rtx_code cmp_code, res_code;
3169 rtx func, out, operands[2], note;
3170
3171 /* X_floating library comparison functions return
3172 -1 unordered
3173 0 false
3174 1 true
3175 Convert the compare against the raw return value. */
3176
3177 cmp_code = *pcode;
3178 switch (cmp_code)
3179 {
3180 case UNORDERED:
3181 cmp_code = EQ;
3182 res_code = LT;
3183 break;
3184 case ORDERED:
3185 cmp_code = EQ;
3186 res_code = GE;
3187 break;
3188 case NE:
3189 res_code = NE;
3190 break;
3191 case EQ:
3192 case LT:
3193 case GT:
3194 case LE:
3195 case GE:
3196 res_code = GT;
3197 break;
3198 default:
3199 gcc_unreachable ();
3200 }
3201 *pcode = res_code;
3202
3203 func = alpha_lookup_xfloating_lib_func (cmp_code);
3204
3205 operands[0] = op0;
3206 operands[1] = op1;
3207 out = gen_reg_rtx (DImode);
3208
3209 /* What's actually returned is -1,0,1, not a proper boolean value. */
3210 note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
3211 note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
3212 alpha_emit_xfloating_libcall (func, out, operands, 2, note);
3213
3214 return out;
3215 }
3216
3217 /* Emit an X_floating library function call for a conversion. */
3218
3219 void
3220 alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
3221 {
3222 int noperands = 1, mode;
3223 rtx out_operands[2];
3224 rtx func;
3225 enum rtx_code code = orig_code;
3226
3227 if (code == UNSIGNED_FIX)
3228 code = FIX;
3229
3230 func = alpha_lookup_xfloating_lib_func (code);
3231
3232 out_operands[0] = operands[1];
3233
3234 switch (code)
3235 {
3236 case FIX:
3237 mode = alpha_compute_xfloating_mode_arg (code, ALPHA_FPRM_CHOP);
3238 out_operands[1] = GEN_INT (mode);
3239 noperands = 2;
3240 break;
3241 case FLOAT_TRUNCATE:
3242 mode = alpha_compute_xfloating_mode_arg (code, alpha_fprm);
3243 out_operands[1] = GEN_INT (mode);
3244 noperands = 2;
3245 break;
3246 default:
3247 break;
3248 }
3249
3250 alpha_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
3251 gen_rtx_fmt_e (orig_code,
3252 GET_MODE (operands[0]),
3253 operands[1]));
3254 }
3255
3256 /* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
3257 DImode moves from OP[2,3] to OP[0,1]. If FIXUP_OVERLAP is true,
3258 guarantee that the sequence
3259 set (OP[0] OP[2])
3260 set (OP[1] OP[3])
3261 is valid. Naturally, output operand ordering is little-endian.
3262 This is used by *movtf_internal and *movti_internal. */
3263
3264 void
3265 alpha_split_tmode_pair (rtx operands[4], machine_mode mode,
3266 bool fixup_overlap)
3267 {
3268 switch (GET_CODE (operands[1]))
3269 {
3270 case REG:
3271 operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
3272 operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
3273 break;
3274
3275 case MEM:
3276 operands[3] = adjust_address (operands[1], DImode, 8);
3277 operands[2] = adjust_address (operands[1], DImode, 0);
3278 break;
3279
3280 CASE_CONST_SCALAR_INT:
3281 case CONST_DOUBLE:
3282 gcc_assert (operands[1] == CONST0_RTX (mode));
3283 operands[2] = operands[3] = const0_rtx;
3284 break;
3285
3286 default:
3287 gcc_unreachable ();
3288 }
3289
3290 switch (GET_CODE (operands[0]))
3291 {
3292 case REG:
3293 operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
3294 operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
3295 break;
3296
3297 case MEM:
3298 operands[1] = adjust_address (operands[0], DImode, 8);
3299 operands[0] = adjust_address (operands[0], DImode, 0);
3300 break;
3301
3302 default:
3303 gcc_unreachable ();
3304 }
3305
3306 if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
3307 {
3308 std::swap (operands[0], operands[1]);
3309 std::swap (operands[2], operands[3]);
3310 }
3311 }
3312
3313 /* Implement negtf2 or abstf2. Op0 is destination, op1 is source,
3314 op2 is a register containing the sign bit, operation is the
3315 logical operation to be performed. */
3316
3317 void
3318 alpha_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
3319 {
3320 rtx high_bit = operands[2];
3321 rtx scratch;
3322 int move;
3323
3324 alpha_split_tmode_pair (operands, TFmode, false);
3325
3326 /* Detect three flavors of operand overlap. */
3327 move = 1;
3328 if (rtx_equal_p (operands[0], operands[2]))
3329 move = 0;
3330 else if (rtx_equal_p (operands[1], operands[2]))
3331 {
3332 if (rtx_equal_p (operands[0], high_bit))
3333 move = 2;
3334 else
3335 move = -1;
3336 }
3337
3338 if (move < 0)
3339 emit_move_insn (operands[0], operands[2]);
3340
3341 /* ??? If the destination overlaps both source tf and high_bit, then
3342 assume source tf is dead in its entirety and use the other half
3343 for a scratch register. Otherwise "scratch" is just the proper
3344 destination register. */
3345 scratch = operands[move < 2 ? 1 : 3];
3346
3347 emit_insn ((*operation) (scratch, high_bit, operands[3]));
3348
3349 if (move > 0)
3350 {
3351 emit_move_insn (operands[0], operands[2]);
3352 if (move > 1)
3353 emit_move_insn (operands[1], scratch);
3354 }
3355 }
3356 \f
3357 /* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
3358 unaligned data:
3359
3360 unsigned: signed:
3361 word: ldq_u r1,X(r11) ldq_u r1,X(r11)
3362 ldq_u r2,X+1(r11) ldq_u r2,X+1(r11)
3363 lda r3,X(r11) lda r3,X+2(r11)
3364 extwl r1,r3,r1 extql r1,r3,r1
3365 extwh r2,r3,r2 extqh r2,r3,r2
3366 or r1.r2.r1 or r1,r2,r1
3367 sra r1,48,r1
3368
3369 long: ldq_u r1,X(r11) ldq_u r1,X(r11)
3370 ldq_u r2,X+3(r11) ldq_u r2,X+3(r11)
3371 lda r3,X(r11) lda r3,X(r11)
3372 extll r1,r3,r1 extll r1,r3,r1
3373 extlh r2,r3,r2 extlh r2,r3,r2
3374 or r1.r2.r1 addl r1,r2,r1
3375
3376 quad: ldq_u r1,X(r11)
3377 ldq_u r2,X+7(r11)
3378 lda r3,X(r11)
3379 extql r1,r3,r1
3380 extqh r2,r3,r2
3381 or r1.r2.r1
3382 */
3383
3384 void
3385 alpha_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
3386 HOST_WIDE_INT ofs, int sign)
3387 {
3388 rtx meml, memh, addr, extl, exth, tmp, mema;
3389 machine_mode mode;
3390
3391 if (TARGET_BWX && size == 2)
3392 {
3393 meml = adjust_address (mem, QImode, ofs);
3394 memh = adjust_address (mem, QImode, ofs+1);
3395 extl = gen_reg_rtx (DImode);
3396 exth = gen_reg_rtx (DImode);
3397 emit_insn (gen_zero_extendqidi2 (extl, meml));
3398 emit_insn (gen_zero_extendqidi2 (exth, memh));
3399 exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8),
3400 NULL, 1, OPTAB_LIB_WIDEN);
3401 addr = expand_simple_binop (DImode, IOR, extl, exth,
3402 NULL, 1, OPTAB_LIB_WIDEN);
3403
3404 if (sign && GET_MODE (tgt) != HImode)
3405 {
3406 addr = gen_lowpart (HImode, addr);
3407 emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
3408 }
3409 else
3410 {
3411 if (GET_MODE (tgt) != DImode)
3412 addr = gen_lowpart (GET_MODE (tgt), addr);
3413 emit_move_insn (tgt, addr);
3414 }
3415 return;
3416 }
3417
3418 meml = gen_reg_rtx (DImode);
3419 memh = gen_reg_rtx (DImode);
3420 addr = gen_reg_rtx (DImode);
3421 extl = gen_reg_rtx (DImode);
3422 exth = gen_reg_rtx (DImode);
3423
3424 mema = XEXP (mem, 0);
3425 if (GET_CODE (mema) == LO_SUM)
3426 mema = force_reg (Pmode, mema);
3427
3428 /* AND addresses cannot be in any alias set, since they may implicitly
3429 alias surrounding code. Ideally we'd have some alias set that
3430 covered all types except those with alignment 8 or higher. */
3431
3432 tmp = change_address (mem, DImode,
3433 gen_rtx_AND (DImode,
3434 plus_constant (DImode, mema, ofs),
3435 GEN_INT (-8)));
3436 set_mem_alias_set (tmp, 0);
3437 emit_move_insn (meml, tmp);
3438
3439 tmp = change_address (mem, DImode,
3440 gen_rtx_AND (DImode,
3441 plus_constant (DImode, mema,
3442 ofs + size - 1),
3443 GEN_INT (-8)));
3444 set_mem_alias_set (tmp, 0);
3445 emit_move_insn (memh, tmp);
3446
3447 if (sign && size == 2)
3448 {
3449 emit_move_insn (addr, plus_constant (Pmode, mema, ofs+2));
3450
3451 emit_insn (gen_extql (extl, meml, addr));
3452 emit_insn (gen_extqh (exth, memh, addr));
3453
3454 /* We must use tgt here for the target. Alpha-vms port fails if we use
3455 addr for the target, because addr is marked as a pointer and combine
3456 knows that pointers are always sign-extended 32-bit values. */
3457 addr = expand_binop (DImode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
3458 addr = expand_binop (DImode, ashr_optab, addr, GEN_INT (48),
3459 addr, 1, OPTAB_WIDEN);
3460 }
3461 else
3462 {
3463 emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
3464 emit_insn (gen_extxl (extl, meml, GEN_INT (size*8), addr));
3465 switch ((int) size)
3466 {
3467 case 2:
3468 emit_insn (gen_extwh (exth, memh, addr));
3469 mode = HImode;
3470 break;
3471 case 4:
3472 emit_insn (gen_extlh (exth, memh, addr));
3473 mode = SImode;
3474 break;
3475 case 8:
3476 emit_insn (gen_extqh (exth, memh, addr));
3477 mode = DImode;
3478 break;
3479 default:
3480 gcc_unreachable ();
3481 }
3482
3483 addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
3484 gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
3485 sign, OPTAB_WIDEN);
3486 }
3487
3488 if (addr != tgt)
3489 emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
3490 }
3491
3492 /* Similarly, use ins and msk instructions to perform unaligned stores. */
3493
3494 void
3495 alpha_expand_unaligned_store (rtx dst, rtx src,
3496 HOST_WIDE_INT size, HOST_WIDE_INT ofs)
3497 {
3498 rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
3499
3500 if (TARGET_BWX && size == 2)
3501 {
3502 if (src != const0_rtx)
3503 {
3504 dstl = gen_lowpart (QImode, src);
3505 dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8),
3506 NULL, 1, OPTAB_LIB_WIDEN);
3507 dsth = gen_lowpart (QImode, dsth);
3508 }
3509 else
3510 dstl = dsth = const0_rtx;
3511
3512 meml = adjust_address (dst, QImode, ofs);
3513 memh = adjust_address (dst, QImode, ofs+1);
3514
3515 emit_move_insn (meml, dstl);
3516 emit_move_insn (memh, dsth);
3517 return;
3518 }
3519
3520 dstl = gen_reg_rtx (DImode);
3521 dsth = gen_reg_rtx (DImode);
3522 insl = gen_reg_rtx (DImode);
3523 insh = gen_reg_rtx (DImode);
3524
3525 dsta = XEXP (dst, 0);
3526 if (GET_CODE (dsta) == LO_SUM)
3527 dsta = force_reg (Pmode, dsta);
3528
3529 /* AND addresses cannot be in any alias set, since they may implicitly
3530 alias surrounding code. Ideally we'd have some alias set that
3531 covered all types except those with alignment 8 or higher. */
3532
3533 meml = change_address (dst, DImode,
3534 gen_rtx_AND (DImode,
3535 plus_constant (DImode, dsta, ofs),
3536 GEN_INT (-8)));
3537 set_mem_alias_set (meml, 0);
3538
3539 memh = change_address (dst, DImode,
3540 gen_rtx_AND (DImode,
3541 plus_constant (DImode, dsta,
3542 ofs + size - 1),
3543 GEN_INT (-8)));
3544 set_mem_alias_set (memh, 0);
3545
3546 emit_move_insn (dsth, memh);
3547 emit_move_insn (dstl, meml);
3548
3549 addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
3550
3551 if (src != CONST0_RTX (GET_MODE (src)))
3552 {
3553 emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
3554 GEN_INT (size*8), addr));
3555
3556 switch ((int) size)
3557 {
3558 case 2:
3559 emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
3560 break;
3561 case 4:
3562 emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
3563 break;
3564 case 8:
3565 emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
3566 break;
3567 default:
3568 gcc_unreachable ();
3569 }
3570 }
3571
3572 emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size*8), addr));
3573
3574 switch ((int) size)
3575 {
3576 case 2:
3577 emit_insn (gen_mskwl (dstl, dstl, addr));
3578 break;
3579 case 4:
3580 emit_insn (gen_mskll (dstl, dstl, addr));
3581 break;
3582 case 8:
3583 emit_insn (gen_mskql (dstl, dstl, addr));
3584 break;
3585 default:
3586 gcc_unreachable ();
3587 }
3588
3589 if (src != CONST0_RTX (GET_MODE (src)))
3590 {
3591 dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
3592 dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
3593 }
3594
3595 /* Must store high before low for degenerate case of aligned. */
3596 emit_move_insn (memh, dsth);
3597 emit_move_insn (meml, dstl);
3598 }
3599
3600 /* The block move code tries to maximize speed by separating loads and
3601 stores at the expense of register pressure: we load all of the data
3602 before we store it back out. There are two secondary effects worth
3603 mentioning, that this speeds copying to/from aligned and unaligned
3604 buffers, and that it makes the code significantly easier to write. */
3605
3606 #define MAX_MOVE_WORDS 8
3607
3608 /* Load an integral number of consecutive unaligned quadwords. */
3609
3610 static void
3611 alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
3612 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3613 {
3614 rtx const im8 = GEN_INT (-8);
3615 rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS+1];
3616 rtx sreg, areg, tmp, smema;
3617 HOST_WIDE_INT i;
3618
3619 smema = XEXP (smem, 0);
3620 if (GET_CODE (smema) == LO_SUM)
3621 smema = force_reg (Pmode, smema);
3622
3623 /* Generate all the tmp registers we need. */
3624 for (i = 0; i < words; ++i)
3625 {
3626 data_regs[i] = out_regs[i];
3627 ext_tmps[i] = gen_reg_rtx (DImode);
3628 }
3629 data_regs[words] = gen_reg_rtx (DImode);
3630
3631 if (ofs != 0)
3632 smem = adjust_address (smem, GET_MODE (smem), ofs);
3633
3634 /* Load up all of the source data. */
3635 for (i = 0; i < words; ++i)
3636 {
3637 tmp = change_address (smem, DImode,
3638 gen_rtx_AND (DImode,
3639 plus_constant (DImode, smema, 8*i),
3640 im8));
3641 set_mem_alias_set (tmp, 0);
3642 emit_move_insn (data_regs[i], tmp);
3643 }
3644
3645 tmp = change_address (smem, DImode,
3646 gen_rtx_AND (DImode,
3647 plus_constant (DImode, smema,
3648 8*words - 1),
3649 im8));
3650 set_mem_alias_set (tmp, 0);
3651 emit_move_insn (data_regs[words], tmp);
3652
3653 /* Extract the half-word fragments. Unfortunately DEC decided to make
3654 extxh with offset zero a noop instead of zeroing the register, so
3655 we must take care of that edge condition ourselves with cmov. */
3656
3657 sreg = copy_addr_to_reg (smema);
3658 areg = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL,
3659 1, OPTAB_WIDEN);
3660 for (i = 0; i < words; ++i)
3661 {
3662 emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
3663 emit_insn (gen_extqh (ext_tmps[i], data_regs[i+1], sreg));
3664 emit_insn (gen_rtx_SET (ext_tmps[i],
3665 gen_rtx_IF_THEN_ELSE (DImode,
3666 gen_rtx_EQ (DImode, areg,
3667 const0_rtx),
3668 const0_rtx, ext_tmps[i])));
3669 }
3670
3671 /* Merge the half-words into whole words. */
3672 for (i = 0; i < words; ++i)
3673 {
3674 out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i],
3675 ext_tmps[i], data_regs[i], 1, OPTAB_WIDEN);
3676 }
3677 }
3678
3679 /* Store an integral number of consecutive unaligned quadwords. DATA_REGS
3680 may be NULL to store zeros. */
3681
3682 static void
3683 alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
3684 HOST_WIDE_INT words, HOST_WIDE_INT ofs)
3685 {
3686 rtx const im8 = GEN_INT (-8);
3687 rtx ins_tmps[MAX_MOVE_WORDS];
3688 rtx st_tmp_1, st_tmp_2, dreg;
3689 rtx st_addr_1, st_addr_2, dmema;
3690 HOST_WIDE_INT i;
3691
3692 dmema = XEXP (dmem, 0);
3693 if (GET_CODE (dmema) == LO_SUM)
3694 dmema = force_reg (Pmode, dmema);
3695
3696 /* Generate all the tmp registers we need. */
3697 if (data_regs != NULL)
3698 for (i = 0; i < words; ++i)
3699 ins_tmps[i] = gen_reg_rtx(DImode);
3700 st_tmp_1 = gen_reg_rtx(DImode);
3701 st_tmp_2 = gen_reg_rtx(DImode);
3702
3703 if (ofs != 0)
3704 dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
3705
3706 st_addr_2 = change_address (dmem, DImode,
3707 gen_rtx_AND (DImode,
3708 plus_constant (DImode, dmema,
3709 words*8 - 1),
3710 im8));
3711 set_mem_alias_set (st_addr_2, 0);
3712
3713 st_addr_1 = change_address (dmem, DImode,
3714 gen_rtx_AND (DImode, dmema, im8));
3715 set_mem_alias_set (st_addr_1, 0);
3716
3717 /* Load up the destination end bits. */
3718 emit_move_insn (st_tmp_2, st_addr_2);
3719 emit_move_insn (st_tmp_1, st_addr_1);
3720
3721 /* Shift the input data into place. */
3722 dreg = copy_addr_to_reg (dmema);
3723 if (data_regs != NULL)
3724 {
3725 for (i = words-1; i >= 0; --i)
3726 {
3727 emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
3728 emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
3729 }
3730 for (i = words-1; i > 0; --i)
3731 {
3732 ins_tmps[i-1] = expand_binop (DImode, ior_optab, data_regs[i],
3733 ins_tmps[i-1], ins_tmps[i-1], 1,
3734 OPTAB_WIDEN);
3735 }
3736 }
3737
3738 /* Split and merge the ends with the destination data. */
3739 emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
3740 emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
3741
3742 if (data_regs != NULL)
3743 {
3744 st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words-1],
3745 st_tmp_2, 1, OPTAB_WIDEN);
3746 st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
3747 st_tmp_1, 1, OPTAB_WIDEN);
3748 }
3749
3750 /* Store it all. */
3751 emit_move_insn (st_addr_2, st_tmp_2);
3752 for (i = words-1; i > 0; --i)
3753 {
3754 rtx tmp = change_address (dmem, DImode,
3755 gen_rtx_AND (DImode,
3756 plus_constant (DImode,
3757 dmema, i*8),
3758 im8));
3759 set_mem_alias_set (tmp, 0);
3760 emit_move_insn (tmp, data_regs ? ins_tmps[i-1] : const0_rtx);
3761 }
3762 emit_move_insn (st_addr_1, st_tmp_1);
3763 }
3764
3765
3766 /* Expand string/block move operations.
3767
3768 operands[0] is the pointer to the destination.
3769 operands[1] is the pointer to the source.
3770 operands[2] is the number of bytes to move.
3771 operands[3] is the alignment. */
3772
3773 int
3774 alpha_expand_block_move (rtx operands[])
3775 {
3776 rtx bytes_rtx = operands[2];
3777 rtx align_rtx = operands[3];
3778 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
3779 HOST_WIDE_INT bytes = orig_bytes;
3780 HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
3781 HOST_WIDE_INT dst_align = src_align;
3782 rtx orig_src = operands[1];
3783 rtx orig_dst = operands[0];
3784 rtx data_regs[2 * MAX_MOVE_WORDS + 16];
3785 rtx tmp;
3786 unsigned int i, words, ofs, nregs = 0;
3787
3788 if (orig_bytes <= 0)
3789 return 1;
3790 else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
3791 return 0;
3792
3793 /* Look for additional alignment information from recorded register info. */
3794
3795 tmp = XEXP (orig_src, 0);
3796 if (REG_P (tmp))
3797 src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3798 else if (GET_CODE (tmp) == PLUS
3799 && REG_P (XEXP (tmp, 0))
3800 && CONST_INT_P (XEXP (tmp, 1)))
3801 {
3802 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3803 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3804
3805 if (a > src_align)
3806 {
3807 if (a >= 64 && c % 8 == 0)
3808 src_align = 64;
3809 else if (a >= 32 && c % 4 == 0)
3810 src_align = 32;
3811 else if (a >= 16 && c % 2 == 0)
3812 src_align = 16;
3813 }
3814 }
3815
3816 tmp = XEXP (orig_dst, 0);
3817 if (REG_P (tmp))
3818 dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
3819 else if (GET_CODE (tmp) == PLUS
3820 && REG_P (XEXP (tmp, 0))
3821 && CONST_INT_P (XEXP (tmp, 1)))
3822 {
3823 unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
3824 unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
3825
3826 if (a > dst_align)
3827 {
3828 if (a >= 64 && c % 8 == 0)
3829 dst_align = 64;
3830 else if (a >= 32 && c % 4 == 0)
3831 dst_align = 32;
3832 else if (a >= 16 && c % 2 == 0)
3833 dst_align = 16;
3834 }
3835 }
3836
3837 ofs = 0;
3838 if (src_align >= 64 && bytes >= 8)
3839 {
3840 words = bytes / 8;
3841
3842 for (i = 0; i < words; ++i)
3843 data_regs[nregs + i] = gen_reg_rtx (DImode);
3844
3845 for (i = 0; i < words; ++i)
3846 emit_move_insn (data_regs[nregs + i],
3847 adjust_address (orig_src, DImode, ofs + i * 8));
3848
3849 nregs += words;
3850 bytes -= words * 8;
3851 ofs += words * 8;
3852 }
3853
3854 if (src_align >= 32 && bytes >= 4)
3855 {
3856 words = bytes / 4;
3857
3858 for (i = 0; i < words; ++i)
3859 data_regs[nregs + i] = gen_reg_rtx (SImode);
3860
3861 for (i = 0; i < words; ++i)
3862 emit_move_insn (data_regs[nregs + i],
3863 adjust_address (orig_src, SImode, ofs + i * 4));
3864
3865 nregs += words;
3866 bytes -= words * 4;
3867 ofs += words * 4;
3868 }
3869
3870 if (bytes >= 8)
3871 {
3872 words = bytes / 8;
3873
3874 for (i = 0; i < words+1; ++i)
3875 data_regs[nregs + i] = gen_reg_rtx (DImode);
3876
3877 alpha_expand_unaligned_load_words (data_regs + nregs, orig_src,
3878 words, ofs);
3879
3880 nregs += words;
3881 bytes -= words * 8;
3882 ofs += words * 8;
3883 }
3884
3885 if (! TARGET_BWX && bytes >= 4)
3886 {
3887 data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
3888 alpha_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
3889 bytes -= 4;
3890 ofs += 4;
3891 }
3892
3893 if (bytes >= 2)
3894 {
3895 if (src_align >= 16)
3896 {
3897 do {
3898 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3899 emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
3900 bytes -= 2;
3901 ofs += 2;
3902 } while (bytes >= 2);
3903 }
3904 else if (! TARGET_BWX)
3905 {
3906 data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
3907 alpha_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
3908 bytes -= 2;
3909 ofs += 2;
3910 }
3911 }
3912
3913 while (bytes > 0)
3914 {
3915 data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
3916 emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
3917 bytes -= 1;
3918 ofs += 1;
3919 }
3920
3921 gcc_assert (nregs <= ARRAY_SIZE (data_regs));
3922
3923 /* Now save it back out again. */
3924
3925 i = 0, ofs = 0;
3926
3927 /* Write out the data in whatever chunks reading the source allowed. */
3928 if (dst_align >= 64)
3929 {
3930 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3931 {
3932 emit_move_insn (adjust_address (orig_dst, DImode, ofs),
3933 data_regs[i]);
3934 ofs += 8;
3935 i++;
3936 }
3937 }
3938
3939 if (dst_align >= 32)
3940 {
3941 /* If the source has remaining DImode regs, write them out in
3942 two pieces. */
3943 while (i < nregs && GET_MODE (data_regs[i]) == DImode)
3944 {
3945 tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
3946 NULL_RTX, 1, OPTAB_WIDEN);
3947
3948 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3949 gen_lowpart (SImode, data_regs[i]));
3950 emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
3951 gen_lowpart (SImode, tmp));
3952 ofs += 8;
3953 i++;
3954 }
3955
3956 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3957 {
3958 emit_move_insn (adjust_address (orig_dst, SImode, ofs),
3959 data_regs[i]);
3960 ofs += 4;
3961 i++;
3962 }
3963 }
3964
3965 if (i < nregs && GET_MODE (data_regs[i]) == DImode)
3966 {
3967 /* Write out a remaining block of words using unaligned methods. */
3968
3969 for (words = 1; i + words < nregs; words++)
3970 if (GET_MODE (data_regs[i + words]) != DImode)
3971 break;
3972
3973 if (words == 1)
3974 alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
3975 else
3976 alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
3977 words, ofs);
3978
3979 i += words;
3980 ofs += words * 8;
3981 }
3982
3983 /* Due to the above, this won't be aligned. */
3984 /* ??? If we have more than one of these, consider constructing full
3985 words in registers and using alpha_expand_unaligned_store_words. */
3986 while (i < nregs && GET_MODE (data_regs[i]) == SImode)
3987 {
3988 alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
3989 ofs += 4;
3990 i++;
3991 }
3992
3993 if (dst_align >= 16)
3994 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
3995 {
3996 emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
3997 i++;
3998 ofs += 2;
3999 }
4000 else
4001 while (i < nregs && GET_MODE (data_regs[i]) == HImode)
4002 {
4003 alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
4004 i++;
4005 ofs += 2;
4006 }
4007
4008 /* The remainder must be byte copies. */
4009 while (i < nregs)
4010 {
4011 gcc_assert (GET_MODE (data_regs[i]) == QImode);
4012 emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
4013 i++;
4014 ofs += 1;
4015 }
4016
4017 return 1;
4018 }
4019
4020 int
4021 alpha_expand_block_clear (rtx operands[])
4022 {
4023 rtx bytes_rtx = operands[1];
4024 rtx align_rtx = operands[3];
4025 HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
4026 HOST_WIDE_INT bytes = orig_bytes;
4027 HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
4028 HOST_WIDE_INT alignofs = 0;
4029 rtx orig_dst = operands[0];
4030 rtx tmp;
4031 int i, words, ofs = 0;
4032
4033 if (orig_bytes <= 0)
4034 return 1;
4035 if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
4036 return 0;
4037
4038 /* Look for stricter alignment. */
4039 tmp = XEXP (orig_dst, 0);
4040 if (REG_P (tmp))
4041 align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
4042 else if (GET_CODE (tmp) == PLUS
4043 && REG_P (XEXP (tmp, 0))
4044 && CONST_INT_P (XEXP (tmp, 1)))
4045 {
4046 HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
4047 int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
4048
4049 if (a > align)
4050 {
4051 if (a >= 64)
4052 align = a, alignofs = 8 - c % 8;
4053 else if (a >= 32)
4054 align = a, alignofs = 4 - c % 4;
4055 else if (a >= 16)
4056 align = a, alignofs = 2 - c % 2;
4057 }
4058 }
4059
4060 /* Handle an unaligned prefix first. */
4061
4062 if (alignofs > 0)
4063 {
4064 /* Given that alignofs is bounded by align, the only time BWX could
4065 generate three stores is for a 7 byte fill. Prefer two individual
4066 stores over a load/mask/store sequence. */
4067 if ((!TARGET_BWX || alignofs == 7)
4068 && align >= 32
4069 && !(alignofs == 4 && bytes >= 4))
4070 {
4071 machine_mode mode = (align >= 64 ? DImode : SImode);
4072 int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
4073 rtx mem, tmp;
4074 HOST_WIDE_INT mask;
4075
4076 mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
4077 set_mem_alias_set (mem, 0);
4078
4079 mask = ~(~(HOST_WIDE_INT)0 << (inv_alignofs * 8));
4080 if (bytes < alignofs)
4081 {
4082 mask |= ~(HOST_WIDE_INT)0 << ((inv_alignofs + bytes) * 8);
4083 ofs += bytes;
4084 bytes = 0;
4085 }
4086 else
4087 {
4088 bytes -= alignofs;
4089 ofs += alignofs;
4090 }
4091 alignofs = 0;
4092
4093 tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
4094 NULL_RTX, 1, OPTAB_WIDEN);
4095
4096 emit_move_insn (mem, tmp);
4097 }
4098
4099 if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
4100 {
4101 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4102 bytes -= 1;
4103 ofs += 1;
4104 alignofs -= 1;
4105 }
4106 if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
4107 {
4108 emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
4109 bytes -= 2;
4110 ofs += 2;
4111 alignofs -= 2;
4112 }
4113 if (alignofs == 4 && bytes >= 4)
4114 {
4115 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4116 bytes -= 4;
4117 ofs += 4;
4118 alignofs = 0;
4119 }
4120
4121 /* If we've not used the extra lead alignment information by now,
4122 we won't be able to. Downgrade align to match what's left over. */
4123 if (alignofs > 0)
4124 {
4125 alignofs = alignofs & -alignofs;
4126 align = MIN (align, alignofs * BITS_PER_UNIT);
4127 }
4128 }
4129
4130 /* Handle a block of contiguous long-words. */
4131
4132 if (align >= 64 && bytes >= 8)
4133 {
4134 words = bytes / 8;
4135
4136 for (i = 0; i < words; ++i)
4137 emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
4138 const0_rtx);
4139
4140 bytes -= words * 8;
4141 ofs += words * 8;
4142 }
4143
4144 /* If the block is large and appropriately aligned, emit a single
4145 store followed by a sequence of stq_u insns. */
4146
4147 if (align >= 32 && bytes > 16)
4148 {
4149 rtx orig_dsta;
4150
4151 emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
4152 bytes -= 4;
4153 ofs += 4;
4154
4155 orig_dsta = XEXP (orig_dst, 0);
4156 if (GET_CODE (orig_dsta) == LO_SUM)
4157 orig_dsta = force_reg (Pmode, orig_dsta);
4158
4159 words = bytes / 8;
4160 for (i = 0; i < words; ++i)
4161 {
4162 rtx mem
4163 = change_address (orig_dst, DImode,
4164 gen_rtx_AND (DImode,
4165 plus_constant (DImode, orig_dsta,
4166 ofs + i*8),
4167 GEN_INT (-8)));
4168 set_mem_alias_set (mem, 0);
4169 emit_move_insn (mem, const0_rtx);
4170 }
4171
4172 /* Depending on the alignment, the first stq_u may have overlapped
4173 with the initial stl, which means that the last stq_u didn't
4174 write as much as it would appear. Leave those questionable bytes
4175 unaccounted for. */
4176 bytes -= words * 8 - 4;
4177 ofs += words * 8 - 4;
4178 }
4179
4180 /* Handle a smaller block of aligned words. */
4181
4182 if ((align >= 64 && bytes == 4)
4183 || (align == 32 && bytes >= 4))
4184 {
4185 words = bytes / 4;
4186
4187 for (i = 0; i < words; ++i)
4188 emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
4189 const0_rtx);
4190
4191 bytes -= words * 4;
4192 ofs += words * 4;
4193 }
4194
4195 /* An unaligned block uses stq_u stores for as many as possible. */
4196
4197 if (bytes >= 8)
4198 {
4199 words = bytes / 8;
4200
4201 alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
4202
4203 bytes -= words * 8;
4204 ofs += words * 8;
4205 }
4206
4207 /* Next clean up any trailing pieces. */
4208
4209 /* Count the number of bits in BYTES for which aligned stores could
4210 be emitted. */
4211 words = 0;
4212 for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align ; i <<= 1)
4213 if (bytes & i)
4214 words += 1;
4215
4216 /* If we have appropriate alignment (and it wouldn't take too many
4217 instructions otherwise), mask out the bytes we need. */
4218 if (TARGET_BWX ? words > 2 : bytes > 0)
4219 {
4220 if (align >= 64)
4221 {
4222 rtx mem, tmp;
4223 HOST_WIDE_INT mask;
4224
4225 mem = adjust_address (orig_dst, DImode, ofs);
4226 set_mem_alias_set (mem, 0);
4227
4228 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4229
4230 tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
4231 NULL_RTX, 1, OPTAB_WIDEN);
4232
4233 emit_move_insn (mem, tmp);
4234 return 1;
4235 }
4236 else if (align >= 32 && bytes < 4)
4237 {
4238 rtx mem, tmp;
4239 HOST_WIDE_INT mask;
4240
4241 mem = adjust_address (orig_dst, SImode, ofs);
4242 set_mem_alias_set (mem, 0);
4243
4244 mask = ~(HOST_WIDE_INT)0 << (bytes * 8);
4245
4246 tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
4247 NULL_RTX, 1, OPTAB_WIDEN);
4248
4249 emit_move_insn (mem, tmp);
4250 return 1;
4251 }
4252 }
4253
4254 if (!TARGET_BWX && bytes >= 4)
4255 {
4256 alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
4257 bytes -= 4;
4258 ofs += 4;
4259 }
4260
4261 if (bytes >= 2)
4262 {
4263 if (align >= 16)
4264 {
4265 do {
4266 emit_move_insn (adjust_address (orig_dst, HImode, ofs),
4267 const0_rtx);
4268 bytes -= 2;
4269 ofs += 2;
4270 } while (bytes >= 2);
4271 }
4272 else if (! TARGET_BWX)
4273 {
4274 alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
4275 bytes -= 2;
4276 ofs += 2;
4277 }
4278 }
4279
4280 while (bytes > 0)
4281 {
4282 emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
4283 bytes -= 1;
4284 ofs += 1;
4285 }
4286
4287 return 1;
4288 }
4289
4290 /* Returns a mask so that zap(x, value) == x & mask. */
4291
4292 rtx
4293 alpha_expand_zap_mask (HOST_WIDE_INT value)
4294 {
4295 rtx result;
4296 int i;
4297 HOST_WIDE_INT mask = 0;
4298
4299 for (i = 7; i >= 0; --i)
4300 {
4301 mask <<= 8;
4302 if (!((value >> i) & 1))
4303 mask |= 0xff;
4304 }
4305
4306 result = gen_int_mode (mask, DImode);
4307 return result;
4308 }
4309
4310 void
4311 alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
4312 machine_mode mode,
4313 rtx op0, rtx op1, rtx op2)
4314 {
4315 op0 = gen_lowpart (mode, op0);
4316
4317 if (op1 == const0_rtx)
4318 op1 = CONST0_RTX (mode);
4319 else
4320 op1 = gen_lowpart (mode, op1);
4321
4322 if (op2 == const0_rtx)
4323 op2 = CONST0_RTX (mode);
4324 else
4325 op2 = gen_lowpart (mode, op2);
4326
4327 emit_insn ((*gen) (op0, op1, op2));
4328 }
4329
4330 /* A subroutine of the atomic operation splitters. Jump to LABEL if
4331 COND is true. Mark the jump as unlikely to be taken. */
4332
4333 static void
4334 emit_unlikely_jump (rtx cond, rtx label)
4335 {
4336 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
4337 rtx x;
4338
4339 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
4340 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
4341 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
4342 }
4343
4344 /* A subroutine of the atomic operation splitters. Emit a load-locked
4345 instruction in MODE. */
4346
4347 static void
4348 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
4349 {
4350 rtx (*fn) (rtx, rtx) = NULL;
4351 if (mode == SImode)
4352 fn = gen_load_locked_si;
4353 else if (mode == DImode)
4354 fn = gen_load_locked_di;
4355 emit_insn (fn (reg, mem));
4356 }
4357
4358 /* A subroutine of the atomic operation splitters. Emit a store-conditional
4359 instruction in MODE. */
4360
4361 static void
4362 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
4363 {
4364 rtx (*fn) (rtx, rtx, rtx) = NULL;
4365 if (mode == SImode)
4366 fn = gen_store_conditional_si;
4367 else if (mode == DImode)
4368 fn = gen_store_conditional_di;
4369 emit_insn (fn (res, mem, val));
4370 }
4371
4372 /* Subroutines of the atomic operation splitters. Emit barriers
4373 as needed for the memory MODEL. */
4374
4375 static void
4376 alpha_pre_atomic_barrier (enum memmodel model)
4377 {
4378 if (need_atomic_barrier_p (model, true))
4379 emit_insn (gen_memory_barrier ());
4380 }
4381
4382 static void
4383 alpha_post_atomic_barrier (enum memmodel model)
4384 {
4385 if (need_atomic_barrier_p (model, false))
4386 emit_insn (gen_memory_barrier ());
4387 }
4388
4389 /* A subroutine of the atomic operation splitters. Emit an insxl
4390 instruction in MODE. */
4391
4392 static rtx
4393 emit_insxl (machine_mode mode, rtx op1, rtx op2)
4394 {
4395 rtx ret = gen_reg_rtx (DImode);
4396 rtx (*fn) (rtx, rtx, rtx);
4397
4398 switch (mode)
4399 {
4400 case QImode:
4401 fn = gen_insbl;
4402 break;
4403 case HImode:
4404 fn = gen_inswl;
4405 break;
4406 case SImode:
4407 fn = gen_insll;
4408 break;
4409 case DImode:
4410 fn = gen_insql;
4411 break;
4412 default:
4413 gcc_unreachable ();
4414 }
4415
4416 op1 = force_reg (mode, op1);
4417 emit_insn (fn (ret, op1, op2));
4418
4419 return ret;
4420 }
4421
4422 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
4423 to perform. MEM is the memory on which to operate. VAL is the second
4424 operand of the binary operator. BEFORE and AFTER are optional locations to
4425 return the value of MEM either before of after the operation. SCRATCH is
4426 a scratch register. */
4427
4428 void
4429 alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
4430 rtx after, rtx scratch, enum memmodel model)
4431 {
4432 machine_mode mode = GET_MODE (mem);
4433 rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
4434
4435 alpha_pre_atomic_barrier (model);
4436
4437 label = gen_label_rtx ();
4438 emit_label (label);
4439 label = gen_rtx_LABEL_REF (DImode, label);
4440
4441 if (before == NULL)
4442 before = scratch;
4443 emit_load_locked (mode, before, mem);
4444
4445 if (code == NOT)
4446 {
4447 x = gen_rtx_AND (mode, before, val);
4448 emit_insn (gen_rtx_SET (val, x));
4449
4450 x = gen_rtx_NOT (mode, val);
4451 }
4452 else
4453 x = gen_rtx_fmt_ee (code, mode, before, val);
4454 if (after)
4455 emit_insn (gen_rtx_SET (after, copy_rtx (x)));
4456 emit_insn (gen_rtx_SET (scratch, x));
4457
4458 emit_store_conditional (mode, cond, mem, scratch);
4459
4460 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4461 emit_unlikely_jump (x, label);
4462
4463 alpha_post_atomic_barrier (model);
4464 }
4465
4466 /* Expand a compare and swap operation. */
4467
4468 void
4469 alpha_split_compare_and_swap (rtx operands[])
4470 {
4471 rtx cond, retval, mem, oldval, newval;
4472 bool is_weak;
4473 enum memmodel mod_s, mod_f;
4474 machine_mode mode;
4475 rtx label1, label2, x;
4476
4477 cond = operands[0];
4478 retval = operands[1];
4479 mem = operands[2];
4480 oldval = operands[3];
4481 newval = operands[4];
4482 is_weak = (operands[5] != const0_rtx);
4483 mod_s = memmodel_from_int (INTVAL (operands[6]));
4484 mod_f = memmodel_from_int (INTVAL (operands[7]));
4485 mode = GET_MODE (mem);
4486
4487 alpha_pre_atomic_barrier (mod_s);
4488
4489 label1 = NULL_RTX;
4490 if (!is_weak)
4491 {
4492 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4493 emit_label (XEXP (label1, 0));
4494 }
4495 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4496
4497 emit_load_locked (mode, retval, mem);
4498
4499 x = gen_lowpart (DImode, retval);
4500 if (oldval == const0_rtx)
4501 {
4502 emit_move_insn (cond, const0_rtx);
4503 x = gen_rtx_NE (DImode, x, const0_rtx);
4504 }
4505 else
4506 {
4507 x = gen_rtx_EQ (DImode, x, oldval);
4508 emit_insn (gen_rtx_SET (cond, x));
4509 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4510 }
4511 emit_unlikely_jump (x, label2);
4512
4513 emit_move_insn (cond, newval);
4514 emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
4515
4516 if (!is_weak)
4517 {
4518 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4519 emit_unlikely_jump (x, label1);
4520 }
4521
4522 if (!is_mm_relaxed (mod_f))
4523 emit_label (XEXP (label2, 0));
4524
4525 alpha_post_atomic_barrier (mod_s);
4526
4527 if (is_mm_relaxed (mod_f))
4528 emit_label (XEXP (label2, 0));
4529 }
4530
4531 void
4532 alpha_expand_compare_and_swap_12 (rtx operands[])
4533 {
4534 rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
4535 machine_mode mode;
4536 rtx addr, align, wdst;
4537 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
4538
4539 cond = operands[0];
4540 dst = operands[1];
4541 mem = operands[2];
4542 oldval = operands[3];
4543 newval = operands[4];
4544 is_weak = operands[5];
4545 mod_s = operands[6];
4546 mod_f = operands[7];
4547 mode = GET_MODE (mem);
4548
4549 /* We forced the address into a register via mem_noofs_operand. */
4550 addr = XEXP (mem, 0);
4551 gcc_assert (register_operand (addr, DImode));
4552
4553 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4554 NULL_RTX, 1, OPTAB_DIRECT);
4555
4556 oldval = convert_modes (DImode, mode, oldval, 1);
4557
4558 if (newval != const0_rtx)
4559 newval = emit_insxl (mode, newval, addr);
4560
4561 wdst = gen_reg_rtx (DImode);
4562 if (mode == QImode)
4563 gen = gen_atomic_compare_and_swapqi_1;
4564 else
4565 gen = gen_atomic_compare_and_swaphi_1;
4566 emit_insn (gen (cond, wdst, mem, oldval, newval, align,
4567 is_weak, mod_s, mod_f));
4568
4569 emit_move_insn (dst, gen_lowpart (mode, wdst));
4570 }
4571
4572 void
4573 alpha_split_compare_and_swap_12 (rtx operands[])
4574 {
4575 rtx cond, dest, orig_mem, oldval, newval, align, scratch;
4576 machine_mode mode;
4577 bool is_weak;
4578 enum memmodel mod_s, mod_f;
4579 rtx label1, label2, mem, addr, width, mask, x;
4580
4581 cond = operands[0];
4582 dest = operands[1];
4583 orig_mem = operands[2];
4584 oldval = operands[3];
4585 newval = operands[4];
4586 align = operands[5];
4587 is_weak = (operands[6] != const0_rtx);
4588 mod_s = memmodel_from_int (INTVAL (operands[7]));
4589 mod_f = memmodel_from_int (INTVAL (operands[8]));
4590 scratch = operands[9];
4591 mode = GET_MODE (orig_mem);
4592 addr = XEXP (orig_mem, 0);
4593
4594 mem = gen_rtx_MEM (DImode, align);
4595 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4596 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4597 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4598
4599 alpha_pre_atomic_barrier (mod_s);
4600
4601 label1 = NULL_RTX;
4602 if (!is_weak)
4603 {
4604 label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4605 emit_label (XEXP (label1, 0));
4606 }
4607 label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4608
4609 emit_load_locked (DImode, scratch, mem);
4610
4611 width = GEN_INT (GET_MODE_BITSIZE (mode));
4612 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4613 emit_insn (gen_extxl (dest, scratch, width, addr));
4614
4615 if (oldval == const0_rtx)
4616 {
4617 emit_move_insn (cond, const0_rtx);
4618 x = gen_rtx_NE (DImode, dest, const0_rtx);
4619 }
4620 else
4621 {
4622 x = gen_rtx_EQ (DImode, dest, oldval);
4623 emit_insn (gen_rtx_SET (cond, x));
4624 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4625 }
4626 emit_unlikely_jump (x, label2);
4627
4628 emit_insn (gen_mskxl (cond, scratch, mask, addr));
4629
4630 if (newval != const0_rtx)
4631 emit_insn (gen_iordi3 (cond, cond, newval));
4632
4633 emit_store_conditional (DImode, cond, mem, cond);
4634
4635 if (!is_weak)
4636 {
4637 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4638 emit_unlikely_jump (x, label1);
4639 }
4640
4641 if (!is_mm_relaxed (mod_f))
4642 emit_label (XEXP (label2, 0));
4643
4644 alpha_post_atomic_barrier (mod_s);
4645
4646 if (is_mm_relaxed (mod_f))
4647 emit_label (XEXP (label2, 0));
4648 }
4649
4650 /* Expand an atomic exchange operation. */
4651
4652 void
4653 alpha_split_atomic_exchange (rtx operands[])
4654 {
4655 rtx retval, mem, val, scratch;
4656 enum memmodel model;
4657 machine_mode mode;
4658 rtx label, x, cond;
4659
4660 retval = operands[0];
4661 mem = operands[1];
4662 val = operands[2];
4663 model = (enum memmodel) INTVAL (operands[3]);
4664 scratch = operands[4];
4665 mode = GET_MODE (mem);
4666 cond = gen_lowpart (DImode, scratch);
4667
4668 alpha_pre_atomic_barrier (model);
4669
4670 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4671 emit_label (XEXP (label, 0));
4672
4673 emit_load_locked (mode, retval, mem);
4674 emit_move_insn (scratch, val);
4675 emit_store_conditional (mode, cond, mem, scratch);
4676
4677 x = gen_rtx_EQ (DImode, cond, const0_rtx);
4678 emit_unlikely_jump (x, label);
4679
4680 alpha_post_atomic_barrier (model);
4681 }
4682
4683 void
4684 alpha_expand_atomic_exchange_12 (rtx operands[])
4685 {
4686 rtx dst, mem, val, model;
4687 machine_mode mode;
4688 rtx addr, align, wdst;
4689 rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
4690
4691 dst = operands[0];
4692 mem = operands[1];
4693 val = operands[2];
4694 model = operands[3];
4695 mode = GET_MODE (mem);
4696
4697 /* We forced the address into a register via mem_noofs_operand. */
4698 addr = XEXP (mem, 0);
4699 gcc_assert (register_operand (addr, DImode));
4700
4701 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8),
4702 NULL_RTX, 1, OPTAB_DIRECT);
4703
4704 /* Insert val into the correct byte location within the word. */
4705 if (val != const0_rtx)
4706 val = emit_insxl (mode, val, addr);
4707
4708 wdst = gen_reg_rtx (DImode);
4709 if (mode == QImode)
4710 gen = gen_atomic_exchangeqi_1;
4711 else
4712 gen = gen_atomic_exchangehi_1;
4713 emit_insn (gen (wdst, mem, val, align, model));
4714
4715 emit_move_insn (dst, gen_lowpart (mode, wdst));
4716 }
4717
4718 void
4719 alpha_split_atomic_exchange_12 (rtx operands[])
4720 {
4721 rtx dest, orig_mem, addr, val, align, scratch;
4722 rtx label, mem, width, mask, x;
4723 machine_mode mode;
4724 enum memmodel model;
4725
4726 dest = operands[0];
4727 orig_mem = operands[1];
4728 val = operands[2];
4729 align = operands[3];
4730 model = (enum memmodel) INTVAL (operands[4]);
4731 scratch = operands[5];
4732 mode = GET_MODE (orig_mem);
4733 addr = XEXP (orig_mem, 0);
4734
4735 mem = gen_rtx_MEM (DImode, align);
4736 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
4737 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
4738 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
4739
4740 alpha_pre_atomic_barrier (model);
4741
4742 label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
4743 emit_label (XEXP (label, 0));
4744
4745 emit_load_locked (DImode, scratch, mem);
4746
4747 width = GEN_INT (GET_MODE_BITSIZE (mode));
4748 mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
4749 emit_insn (gen_extxl (dest, scratch, width, addr));
4750 emit_insn (gen_mskxl (scratch, scratch, mask, addr));
4751 if (val != const0_rtx)
4752 emit_insn (gen_iordi3 (scratch, scratch, val));
4753
4754 emit_store_conditional (DImode, scratch, mem, scratch);
4755
4756 x = gen_rtx_EQ (DImode, scratch, const0_rtx);
4757 emit_unlikely_jump (x, label);
4758
4759 alpha_post_atomic_barrier (model);
4760 }
4761 \f
4762 /* Adjust the cost of a scheduling dependency. Return the new cost of
4763 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
4764
4765 static int
4766 alpha_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
4767 {
4768 enum attr_type dep_insn_type;
4769
4770 /* If the dependence is an anti-dependence, there is no cost. For an
4771 output dependence, there is sometimes a cost, but it doesn't seem
4772 worth handling those few cases. */
4773 if (REG_NOTE_KIND (link) != 0)
4774 return cost;
4775
4776 /* If we can't recognize the insns, we can't really do anything. */
4777 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
4778 return cost;
4779
4780 dep_insn_type = get_attr_type (dep_insn);
4781
4782 /* Bring in the user-defined memory latency. */
4783 if (dep_insn_type == TYPE_ILD
4784 || dep_insn_type == TYPE_FLD
4785 || dep_insn_type == TYPE_LDSYM)
4786 cost += alpha_memory_latency-1;
4787
4788 /* Everything else handled in DFA bypasses now. */
4789
4790 return cost;
4791 }
4792
4793 /* The number of instructions that can be issued per cycle. */
4794
4795 static int
4796 alpha_issue_rate (void)
4797 {
4798 return (alpha_tune == PROCESSOR_EV4 ? 2 : 4);
4799 }
4800
4801 /* How many alternative schedules to try. This should be as wide as the
4802 scheduling freedom in the DFA, but no wider. Making this value too
4803 large results extra work for the scheduler.
4804
4805 For EV4, loads can be issued to either IB0 or IB1, thus we have 2
4806 alternative schedules. For EV5, we can choose between E0/E1 and
4807 FA/FM. For EV6, an arithmetic insn can be issued to U0/U1/L0/L1. */
4808
4809 static int
4810 alpha_multipass_dfa_lookahead (void)
4811 {
4812 return (alpha_tune == PROCESSOR_EV6 ? 4 : 2);
4813 }
4814 \f
4815 /* Machine-specific function data. */
4816
4817 struct GTY(()) alpha_links;
4818
4819 struct string_traits : default_hashmap_traits
4820 {
4821 static bool equal_keys (const char *const &a, const char *const &b)
4822 {
4823 return strcmp (a, b) == 0;
4824 }
4825 };
4826
4827 struct GTY(()) machine_function
4828 {
4829 /* For flag_reorder_blocks_and_partition. */
4830 rtx gp_save_rtx;
4831
4832 /* For VMS condition handlers. */
4833 bool uses_condition_handler;
4834
4835 /* Linkage entries. */
4836 hash_map<const char *, alpha_links *, string_traits> *links;
4837 };
4838
4839 /* How to allocate a 'struct machine_function'. */
4840
4841 static struct machine_function *
4842 alpha_init_machine_status (void)
4843 {
4844 return ggc_cleared_alloc<machine_function> ();
4845 }
4846
4847 /* Support for frame based VMS condition handlers. */
4848
4849 /* A VMS condition handler may be established for a function with a call to
4850 __builtin_establish_vms_condition_handler, and cancelled with a call to
4851 __builtin_revert_vms_condition_handler.
4852
4853 The VMS Condition Handling Facility knows about the existence of a handler
4854 from the procedure descriptor .handler field. As the VMS native compilers,
4855 we store the user specified handler's address at a fixed location in the
4856 stack frame and point the procedure descriptor at a common wrapper which
4857 fetches the real handler's address and issues an indirect call.
4858
4859 The indirection wrapper is "__gcc_shell_handler", provided by libgcc.
4860
4861 We force the procedure kind to PT_STACK, and the fixed frame location is
4862 fp+8, just before the register save area. We use the handler_data field in
4863 the procedure descriptor to state the fp offset at which the installed
4864 handler address can be found. */
4865
4866 #define VMS_COND_HANDLER_FP_OFFSET 8
4867
4868 /* Expand code to store the currently installed user VMS condition handler
4869 into TARGET and install HANDLER as the new condition handler. */
4870
4871 void
4872 alpha_expand_builtin_establish_vms_condition_handler (rtx target, rtx handler)
4873 {
4874 rtx handler_slot_address = plus_constant (Pmode, hard_frame_pointer_rtx,
4875 VMS_COND_HANDLER_FP_OFFSET);
4876
4877 rtx handler_slot
4878 = gen_rtx_MEM (DImode, handler_slot_address);
4879
4880 emit_move_insn (target, handler_slot);
4881 emit_move_insn (handler_slot, handler);
4882
4883 /* Notify the start/prologue/epilogue emitters that the condition handler
4884 slot is needed. In addition to reserving the slot space, this will force
4885 the procedure kind to PT_STACK so ensure that the hard_frame_pointer_rtx
4886 use above is correct. */
4887 cfun->machine->uses_condition_handler = true;
4888 }
4889
4890 /* Expand code to store the current VMS condition handler into TARGET and
4891 nullify it. */
4892
4893 void
4894 alpha_expand_builtin_revert_vms_condition_handler (rtx target)
4895 {
4896 /* We implement this by establishing a null condition handler, with the tiny
4897 side effect of setting uses_condition_handler. This is a little bit
4898 pessimistic if no actual builtin_establish call is ever issued, which is
4899 not a real problem and expected never to happen anyway. */
4900
4901 alpha_expand_builtin_establish_vms_condition_handler (target, const0_rtx);
4902 }
4903
4904 /* Functions to save and restore alpha_return_addr_rtx. */
4905
4906 /* Start the ball rolling with RETURN_ADDR_RTX. */
4907
4908 rtx
4909 alpha_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4910 {
4911 if (count != 0)
4912 return const0_rtx;
4913
4914 return get_hard_reg_initial_val (Pmode, REG_RA);
4915 }
4916
4917 /* Return or create a memory slot containing the gp value for the current
4918 function. Needed only if TARGET_LD_BUGGY_LDGP. */
4919
4920 rtx
4921 alpha_gp_save_rtx (void)
4922 {
4923 rtx_insn *seq;
4924 rtx m = cfun->machine->gp_save_rtx;
4925
4926 if (m == NULL)
4927 {
4928 start_sequence ();
4929
4930 m = assign_stack_local (DImode, UNITS_PER_WORD, BITS_PER_WORD);
4931 m = validize_mem (m);
4932 emit_move_insn (m, pic_offset_table_rtx);
4933
4934 seq = get_insns ();
4935 end_sequence ();
4936
4937 /* We used to simply emit the sequence after entry_of_function.
4938 However this breaks the CFG if the first instruction in the
4939 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
4940 label. Emit the sequence properly on the edge. We are only
4941 invoked from dw2_build_landing_pads and finish_eh_generation
4942 will call commit_edge_insertions thanks to a kludge. */
4943 insert_insn_on_edge (seq,
4944 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
4945
4946 cfun->machine->gp_save_rtx = m;
4947 }
4948
4949 return m;
4950 }
4951
4952 static void
4953 alpha_instantiate_decls (void)
4954 {
4955 if (cfun->machine->gp_save_rtx != NULL_RTX)
4956 instantiate_decl_rtl (cfun->machine->gp_save_rtx);
4957 }
4958
4959 static int
4960 alpha_ra_ever_killed (void)
4961 {
4962 rtx_insn *top;
4963
4964 if (!has_hard_reg_initial_val (Pmode, REG_RA))
4965 return (int)df_regs_ever_live_p (REG_RA);
4966
4967 push_topmost_sequence ();
4968 top = get_insns ();
4969 pop_topmost_sequence ();
4970
4971 return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
4972 }
4973
4974 \f
4975 /* Return the trap mode suffix applicable to the current
4976 instruction, or NULL. */
4977
4978 static const char *
4979 get_trap_mode_suffix (void)
4980 {
4981 enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
4982
4983 switch (s)
4984 {
4985 case TRAP_SUFFIX_NONE:
4986 return NULL;
4987
4988 case TRAP_SUFFIX_SU:
4989 if (alpha_fptm >= ALPHA_FPTM_SU)
4990 return "su";
4991 return NULL;
4992
4993 case TRAP_SUFFIX_SUI:
4994 if (alpha_fptm >= ALPHA_FPTM_SUI)
4995 return "sui";
4996 return NULL;
4997
4998 case TRAP_SUFFIX_V_SV:
4999 switch (alpha_fptm)
5000 {
5001 case ALPHA_FPTM_N:
5002 return NULL;
5003 case ALPHA_FPTM_U:
5004 return "v";
5005 case ALPHA_FPTM_SU:
5006 case ALPHA_FPTM_SUI:
5007 return "sv";
5008 default:
5009 gcc_unreachable ();
5010 }
5011
5012 case TRAP_SUFFIX_V_SV_SVI:
5013 switch (alpha_fptm)
5014 {
5015 case ALPHA_FPTM_N:
5016 return NULL;
5017 case ALPHA_FPTM_U:
5018 return "v";
5019 case ALPHA_FPTM_SU:
5020 return "sv";
5021 case ALPHA_FPTM_SUI:
5022 return "svi";
5023 default:
5024 gcc_unreachable ();
5025 }
5026 break;
5027
5028 case TRAP_SUFFIX_U_SU_SUI:
5029 switch (alpha_fptm)
5030 {
5031 case ALPHA_FPTM_N:
5032 return NULL;
5033 case ALPHA_FPTM_U:
5034 return "u";
5035 case ALPHA_FPTM_SU:
5036 return "su";
5037 case ALPHA_FPTM_SUI:
5038 return "sui";
5039 default:
5040 gcc_unreachable ();
5041 }
5042 break;
5043
5044 default:
5045 gcc_unreachable ();
5046 }
5047 gcc_unreachable ();
5048 }
5049
5050 /* Return the rounding mode suffix applicable to the current
5051 instruction, or NULL. */
5052
5053 static const char *
5054 get_round_mode_suffix (void)
5055 {
5056 enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
5057
5058 switch (s)
5059 {
5060 case ROUND_SUFFIX_NONE:
5061 return NULL;
5062 case ROUND_SUFFIX_NORMAL:
5063 switch (alpha_fprm)
5064 {
5065 case ALPHA_FPRM_NORM:
5066 return NULL;
5067 case ALPHA_FPRM_MINF:
5068 return "m";
5069 case ALPHA_FPRM_CHOP:
5070 return "c";
5071 case ALPHA_FPRM_DYN:
5072 return "d";
5073 default:
5074 gcc_unreachable ();
5075 }
5076 break;
5077
5078 case ROUND_SUFFIX_C:
5079 return "c";
5080
5081 default:
5082 gcc_unreachable ();
5083 }
5084 gcc_unreachable ();
5085 }
5086
5087 /* Print an operand. Recognize special options, documented below. */
5088
5089 void
5090 print_operand (FILE *file, rtx x, int code)
5091 {
5092 int i;
5093
5094 switch (code)
5095 {
5096 case '~':
5097 /* Print the assembler name of the current function. */
5098 assemble_name (file, alpha_fnname);
5099 break;
5100
5101 case '&':
5102 if (const char *name = get_some_local_dynamic_name ())
5103 assemble_name (file, name);
5104 else
5105 output_operand_lossage ("'%%&' used without any "
5106 "local dynamic TLS references");
5107 break;
5108
5109 case '/':
5110 {
5111 const char *trap = get_trap_mode_suffix ();
5112 const char *round = get_round_mode_suffix ();
5113
5114 if (trap || round)
5115 fprintf (file, "/%s%s", (trap ? trap : ""), (round ? round : ""));
5116 break;
5117 }
5118
5119 case ',':
5120 /* Generates single precision instruction suffix. */
5121 fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
5122 break;
5123
5124 case '-':
5125 /* Generates double precision instruction suffix. */
5126 fputc ((TARGET_FLOAT_VAX ? 'g' : 't'), file);
5127 break;
5128
5129 case '#':
5130 if (alpha_this_literal_sequence_number == 0)
5131 alpha_this_literal_sequence_number = alpha_next_sequence_number++;
5132 fprintf (file, "%d", alpha_this_literal_sequence_number);
5133 break;
5134
5135 case '*':
5136 if (alpha_this_gpdisp_sequence_number == 0)
5137 alpha_this_gpdisp_sequence_number = alpha_next_sequence_number++;
5138 fprintf (file, "%d", alpha_this_gpdisp_sequence_number);
5139 break;
5140
5141 case 'J':
5142 {
5143 const char *lituse;
5144
5145 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
5146 {
5147 x = XVECEXP (x, 0, 0);
5148 lituse = "lituse_tlsgd";
5149 }
5150 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
5151 {
5152 x = XVECEXP (x, 0, 0);
5153 lituse = "lituse_tlsldm";
5154 }
5155 else if (CONST_INT_P (x))
5156 lituse = "lituse_jsr";
5157 else
5158 {
5159 output_operand_lossage ("invalid %%J value");
5160 break;
5161 }
5162
5163 if (x != const0_rtx)
5164 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5165 }
5166 break;
5167
5168 case 'j':
5169 {
5170 const char *lituse;
5171
5172 #ifdef HAVE_AS_JSRDIRECT_RELOCS
5173 lituse = "lituse_jsrdirect";
5174 #else
5175 lituse = "lituse_jsr";
5176 #endif
5177
5178 gcc_assert (INTVAL (x) != 0);
5179 fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
5180 }
5181 break;
5182 case 'r':
5183 /* If this operand is the constant zero, write it as "$31". */
5184 if (REG_P (x))
5185 fprintf (file, "%s", reg_names[REGNO (x)]);
5186 else if (x == CONST0_RTX (GET_MODE (x)))
5187 fprintf (file, "$31");
5188 else
5189 output_operand_lossage ("invalid %%r value");
5190 break;
5191
5192 case 'R':
5193 /* Similar, but for floating-point. */
5194 if (REG_P (x))
5195 fprintf (file, "%s", reg_names[REGNO (x)]);
5196 else if (x == CONST0_RTX (GET_MODE (x)))
5197 fprintf (file, "$f31");
5198 else
5199 output_operand_lossage ("invalid %%R value");
5200 break;
5201
5202 case 'N':
5203 /* Write the 1's complement of a constant. */
5204 if (!CONST_INT_P (x))
5205 output_operand_lossage ("invalid %%N value");
5206
5207 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
5208 break;
5209
5210 case 'P':
5211 /* Write 1 << C, for a constant C. */
5212 if (!CONST_INT_P (x))
5213 output_operand_lossage ("invalid %%P value");
5214
5215 fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x));
5216 break;
5217
5218 case 'h':
5219 /* Write the high-order 16 bits of a constant, sign-extended. */
5220 if (!CONST_INT_P (x))
5221 output_operand_lossage ("invalid %%h value");
5222
5223 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
5224 break;
5225
5226 case 'L':
5227 /* Write the low-order 16 bits of a constant, sign-extended. */
5228 if (!CONST_INT_P (x))
5229 output_operand_lossage ("invalid %%L value");
5230
5231 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5232 (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
5233 break;
5234
5235 case 'm':
5236 /* Write mask for ZAP insn. */
5237 if (CONST_INT_P (x))
5238 {
5239 HOST_WIDE_INT mask = 0, value = INTVAL (x);
5240
5241 for (i = 0; i < 8; i++, value >>= 8)
5242 if (value & 0xff)
5243 mask |= (1 << i);
5244
5245 fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
5246 }
5247 else
5248 output_operand_lossage ("invalid %%m value");
5249 break;
5250
5251 case 'M':
5252 /* 'b', 'w', 'l', or 'q' as the value of the constant. */
5253 if (!mode_width_operand (x, VOIDmode))
5254 output_operand_lossage ("invalid %%M value");
5255
5256 fprintf (file, "%s",
5257 (INTVAL (x) == 8 ? "b"
5258 : INTVAL (x) == 16 ? "w"
5259 : INTVAL (x) == 32 ? "l"
5260 : "q"));
5261 break;
5262
5263 case 'U':
5264 /* Similar, except do it from the mask. */
5265 if (CONST_INT_P (x))
5266 {
5267 HOST_WIDE_INT value = INTVAL (x);
5268
5269 if (value == 0xff)
5270 {
5271 fputc ('b', file);
5272 break;
5273 }
5274 if (value == 0xffff)
5275 {
5276 fputc ('w', file);
5277 break;
5278 }
5279 if (value == 0xffffffff)
5280 {
5281 fputc ('l', file);
5282 break;
5283 }
5284 if (value == -1)
5285 {
5286 fputc ('q', file);
5287 break;
5288 }
5289 }
5290
5291 output_operand_lossage ("invalid %%U value");
5292 break;
5293
5294 case 's':
5295 /* Write the constant value divided by 8. */
5296 if (!CONST_INT_P (x)
5297 || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5298 || (INTVAL (x) & 7) != 0)
5299 output_operand_lossage ("invalid %%s value");
5300
5301 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
5302 break;
5303
5304 case 'S':
5305 /* Same, except compute (64 - c) / 8 */
5306
5307 if (!CONST_INT_P (x)
5308 && (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
5309 && (INTVAL (x) & 7) != 8)
5310 output_operand_lossage ("invalid %%s value");
5311
5312 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (64 - INTVAL (x)) / 8);
5313 break;
5314
5315 case 'C': case 'D': case 'c': case 'd':
5316 /* Write out comparison name. */
5317 {
5318 enum rtx_code c = GET_CODE (x);
5319
5320 if (!COMPARISON_P (x))
5321 output_operand_lossage ("invalid %%C value");
5322
5323 else if (code == 'D')
5324 c = reverse_condition (c);
5325 else if (code == 'c')
5326 c = swap_condition (c);
5327 else if (code == 'd')
5328 c = swap_condition (reverse_condition (c));
5329
5330 if (c == LEU)
5331 fprintf (file, "ule");
5332 else if (c == LTU)
5333 fprintf (file, "ult");
5334 else if (c == UNORDERED)
5335 fprintf (file, "un");
5336 else
5337 fprintf (file, "%s", GET_RTX_NAME (c));
5338 }
5339 break;
5340
5341 case 'E':
5342 /* Write the divide or modulus operator. */
5343 switch (GET_CODE (x))
5344 {
5345 case DIV:
5346 fprintf (file, "div%s", GET_MODE (x) == SImode ? "l" : "q");
5347 break;
5348 case UDIV:
5349 fprintf (file, "div%su", GET_MODE (x) == SImode ? "l" : "q");
5350 break;
5351 case MOD:
5352 fprintf (file, "rem%s", GET_MODE (x) == SImode ? "l" : "q");
5353 break;
5354 case UMOD:
5355 fprintf (file, "rem%su", GET_MODE (x) == SImode ? "l" : "q");
5356 break;
5357 default:
5358 output_operand_lossage ("invalid %%E value");
5359 break;
5360 }
5361 break;
5362
5363 case 'A':
5364 /* Write "_u" for unaligned access. */
5365 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
5366 fprintf (file, "_u");
5367 break;
5368
5369 case 0:
5370 if (REG_P (x))
5371 fprintf (file, "%s", reg_names[REGNO (x)]);
5372 else if (MEM_P (x))
5373 output_address (XEXP (x, 0));
5374 else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
5375 {
5376 switch (XINT (XEXP (x, 0), 1))
5377 {
5378 case UNSPEC_DTPREL:
5379 case UNSPEC_TPREL:
5380 output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
5381 break;
5382 default:
5383 output_operand_lossage ("unknown relocation unspec");
5384 break;
5385 }
5386 }
5387 else
5388 output_addr_const (file, x);
5389 break;
5390
5391 default:
5392 output_operand_lossage ("invalid %%xn code");
5393 }
5394 }
5395
5396 void
5397 print_operand_address (FILE *file, rtx addr)
5398 {
5399 int basereg = 31;
5400 HOST_WIDE_INT offset = 0;
5401
5402 if (GET_CODE (addr) == AND)
5403 addr = XEXP (addr, 0);
5404
5405 if (GET_CODE (addr) == PLUS
5406 && CONST_INT_P (XEXP (addr, 1)))
5407 {
5408 offset = INTVAL (XEXP (addr, 1));
5409 addr = XEXP (addr, 0);
5410 }
5411
5412 if (GET_CODE (addr) == LO_SUM)
5413 {
5414 const char *reloc16, *reloclo;
5415 rtx op1 = XEXP (addr, 1);
5416
5417 if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
5418 {
5419 op1 = XEXP (op1, 0);
5420 switch (XINT (op1, 1))
5421 {
5422 case UNSPEC_DTPREL:
5423 reloc16 = NULL;
5424 reloclo = (alpha_tls_size == 16 ? "dtprel" : "dtprello");
5425 break;
5426 case UNSPEC_TPREL:
5427 reloc16 = NULL;
5428 reloclo = (alpha_tls_size == 16 ? "tprel" : "tprello");
5429 break;
5430 default:
5431 output_operand_lossage ("unknown relocation unspec");
5432 return;
5433 }
5434
5435 output_addr_const (file, XVECEXP (op1, 0, 0));
5436 }
5437 else
5438 {
5439 reloc16 = "gprel";
5440 reloclo = "gprellow";
5441 output_addr_const (file, op1);
5442 }
5443
5444 if (offset)
5445 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
5446
5447 addr = XEXP (addr, 0);
5448 switch (GET_CODE (addr))
5449 {
5450 case REG:
5451 basereg = REGNO (addr);
5452 break;
5453
5454 case SUBREG:
5455 basereg = subreg_regno (addr);
5456 break;
5457
5458 default:
5459 gcc_unreachable ();
5460 }
5461
5462 fprintf (file, "($%d)\t\t!%s", basereg,
5463 (basereg == 29 ? reloc16 : reloclo));
5464 return;
5465 }
5466
5467 switch (GET_CODE (addr))
5468 {
5469 case REG:
5470 basereg = REGNO (addr);
5471 break;
5472
5473 case SUBREG:
5474 basereg = subreg_regno (addr);
5475 break;
5476
5477 case CONST_INT:
5478 offset = INTVAL (addr);
5479 break;
5480
5481 case SYMBOL_REF:
5482 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5483 fprintf (file, "%s", XSTR (addr, 0));
5484 return;
5485
5486 case CONST:
5487 gcc_assert(TARGET_ABI_OPEN_VMS || this_is_asm_operands);
5488 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
5489 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
5490 fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
5491 XSTR (XEXP (XEXP (addr, 0), 0), 0),
5492 INTVAL (XEXP (XEXP (addr, 0), 1)));
5493 return;
5494
5495 default:
5496 output_operand_lossage ("invalid operand address");
5497 return;
5498 }
5499
5500 fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
5501 }
5502 \f
5503 /* Emit RTL insns to initialize the variable parts of a trampoline at
5504 M_TRAMP. FNDECL is target function's decl. CHAIN_VALUE is an rtx
5505 for the static chain value for the function. */
5506
5507 static void
5508 alpha_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
5509 {
5510 rtx fnaddr, mem, word1, word2;
5511
5512 fnaddr = XEXP (DECL_RTL (fndecl), 0);
5513
5514 #ifdef POINTERS_EXTEND_UNSIGNED
5515 fnaddr = convert_memory_address (Pmode, fnaddr);
5516 chain_value = convert_memory_address (Pmode, chain_value);
5517 #endif
5518
5519 if (TARGET_ABI_OPEN_VMS)
5520 {
5521 const char *fnname;
5522 char *trname;
5523
5524 /* Construct the name of the trampoline entry point. */
5525 fnname = XSTR (fnaddr, 0);
5526 trname = (char *) alloca (strlen (fnname) + 5);
5527 strcpy (trname, fnname);
5528 strcat (trname, "..tr");
5529 fnname = ggc_alloc_string (trname, strlen (trname) + 1);
5530 word2 = gen_rtx_SYMBOL_REF (Pmode, fnname);
5531
5532 /* Trampoline (or "bounded") procedure descriptor is constructed from
5533 the function's procedure descriptor with certain fields zeroed IAW
5534 the VMS calling standard. This is stored in the first quadword. */
5535 word1 = force_reg (DImode, gen_const_mem (DImode, fnaddr));
5536 word1 = expand_and (DImode, word1,
5537 GEN_INT (HOST_WIDE_INT_C (0xffff0fff0000fff0)),
5538 NULL);
5539 }
5540 else
5541 {
5542 /* These 4 instructions are:
5543 ldq $1,24($27)
5544 ldq $27,16($27)
5545 jmp $31,($27),0
5546 nop
5547 We don't bother setting the HINT field of the jump; the nop
5548 is merely there for padding. */
5549 word1 = GEN_INT (HOST_WIDE_INT_C (0xa77b0010a43b0018));
5550 word2 = GEN_INT (HOST_WIDE_INT_C (0x47ff041f6bfb0000));
5551 }
5552
5553 /* Store the first two words, as computed above. */
5554 mem = adjust_address (m_tramp, DImode, 0);
5555 emit_move_insn (mem, word1);
5556 mem = adjust_address (m_tramp, DImode, 8);
5557 emit_move_insn (mem, word2);
5558
5559 /* Store function address and static chain value. */
5560 mem = adjust_address (m_tramp, Pmode, 16);
5561 emit_move_insn (mem, fnaddr);
5562 mem = adjust_address (m_tramp, Pmode, 24);
5563 emit_move_insn (mem, chain_value);
5564
5565 if (TARGET_ABI_OSF)
5566 {
5567 emit_insn (gen_imb ());
5568 #ifdef HAVE_ENABLE_EXECUTE_STACK
5569 emit_library_call (init_one_libfunc ("__enable_execute_stack"),
5570 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
5571 #endif
5572 }
5573 }
5574 \f
5575 /* Determine where to put an argument to a function.
5576 Value is zero to push the argument on the stack,
5577 or a hard register in which to store the argument.
5578
5579 MODE is the argument's machine mode.
5580 TYPE is the data type of the argument (as a tree).
5581 This is null for libcalls where that information may
5582 not be available.
5583 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5584 the preceding args and about the function being called.
5585 NAMED is nonzero if this argument is a named parameter
5586 (otherwise it is an extra parameter matching an ellipsis).
5587
5588 On Alpha the first 6 words of args are normally in registers
5589 and the rest are pushed. */
5590
5591 static rtx
5592 alpha_function_arg (cumulative_args_t cum_v, machine_mode mode,
5593 const_tree type, bool named ATTRIBUTE_UNUSED)
5594 {
5595 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5596 int basereg;
5597 int num_args;
5598
5599 /* Don't get confused and pass small structures in FP registers. */
5600 if (type && AGGREGATE_TYPE_P (type))
5601 basereg = 16;
5602 else
5603 {
5604 #ifdef ENABLE_CHECKING
5605 /* With alpha_split_complex_arg, we shouldn't see any raw complex
5606 values here. */
5607 gcc_assert (!COMPLEX_MODE_P (mode));
5608 #endif
5609
5610 /* Set up defaults for FP operands passed in FP registers, and
5611 integral operands passed in integer registers. */
5612 if (TARGET_FPREGS && GET_MODE_CLASS (mode) == MODE_FLOAT)
5613 basereg = 32 + 16;
5614 else
5615 basereg = 16;
5616 }
5617
5618 /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
5619 the two platforms, so we can't avoid conditional compilation. */
5620 #if TARGET_ABI_OPEN_VMS
5621 {
5622 if (mode == VOIDmode)
5623 return alpha_arg_info_reg_val (*cum);
5624
5625 num_args = cum->num_args;
5626 if (num_args >= 6
5627 || targetm.calls.must_pass_in_stack (mode, type))
5628 return NULL_RTX;
5629 }
5630 #elif TARGET_ABI_OSF
5631 {
5632 if (*cum >= 6)
5633 return NULL_RTX;
5634 num_args = *cum;
5635
5636 /* VOID is passed as a special flag for "last argument". */
5637 if (type == void_type_node)
5638 basereg = 16;
5639 else if (targetm.calls.must_pass_in_stack (mode, type))
5640 return NULL_RTX;
5641 }
5642 #else
5643 #error Unhandled ABI
5644 #endif
5645
5646 return gen_rtx_REG (mode, num_args + basereg);
5647 }
5648
5649 /* Update the data in CUM to advance over an argument
5650 of mode MODE and data type TYPE.
5651 (TYPE is null for libcalls where that information may not be available.) */
5652
5653 static void
5654 alpha_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
5655 const_tree type, bool named ATTRIBUTE_UNUSED)
5656 {
5657 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5658 bool onstack = targetm.calls.must_pass_in_stack (mode, type);
5659 int increment = onstack ? 6 : ALPHA_ARG_SIZE (mode, type, named);
5660
5661 #if TARGET_ABI_OSF
5662 *cum += increment;
5663 #else
5664 if (!onstack && cum->num_args < 6)
5665 cum->atypes[cum->num_args] = alpha_arg_type (mode);
5666 cum->num_args += increment;
5667 #endif
5668 }
5669
5670 static int
5671 alpha_arg_partial_bytes (cumulative_args_t cum_v,
5672 machine_mode mode ATTRIBUTE_UNUSED,
5673 tree type ATTRIBUTE_UNUSED,
5674 bool named ATTRIBUTE_UNUSED)
5675 {
5676 int words = 0;
5677 CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
5678
5679 #if TARGET_ABI_OPEN_VMS
5680 if (cum->num_args < 6
5681 && 6 < cum->num_args + ALPHA_ARG_SIZE (mode, type, named))
5682 words = 6 - cum->num_args;
5683 #elif TARGET_ABI_OSF
5684 if (*cum < 6 && 6 < *cum + ALPHA_ARG_SIZE (mode, type, named))
5685 words = 6 - *cum;
5686 #else
5687 #error Unhandled ABI
5688 #endif
5689
5690 return words * UNITS_PER_WORD;
5691 }
5692
5693
5694 /* Return true if TYPE must be returned in memory, instead of in registers. */
5695
5696 static bool
5697 alpha_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
5698 {
5699 machine_mode mode = VOIDmode;
5700 int size;
5701
5702 if (type)
5703 {
5704 mode = TYPE_MODE (type);
5705
5706 /* All aggregates are returned in memory, except on OpenVMS where
5707 records that fit 64 bits should be returned by immediate value
5708 as required by section 3.8.7.1 of the OpenVMS Calling Standard. */
5709 if (TARGET_ABI_OPEN_VMS
5710 && TREE_CODE (type) != ARRAY_TYPE
5711 && (unsigned HOST_WIDE_INT) int_size_in_bytes(type) <= 8)
5712 return false;
5713
5714 if (AGGREGATE_TYPE_P (type))
5715 return true;
5716 }
5717
5718 size = GET_MODE_SIZE (mode);
5719 switch (GET_MODE_CLASS (mode))
5720 {
5721 case MODE_VECTOR_FLOAT:
5722 /* Pass all float vectors in memory, like an aggregate. */
5723 return true;
5724
5725 case MODE_COMPLEX_FLOAT:
5726 /* We judge complex floats on the size of their element,
5727 not the size of the whole type. */
5728 size = GET_MODE_UNIT_SIZE (mode);
5729 break;
5730
5731 case MODE_INT:
5732 case MODE_FLOAT:
5733 case MODE_COMPLEX_INT:
5734 case MODE_VECTOR_INT:
5735 break;
5736
5737 default:
5738 /* ??? We get called on all sorts of random stuff from
5739 aggregate_value_p. We must return something, but it's not
5740 clear what's safe to return. Pretend it's a struct I
5741 guess. */
5742 return true;
5743 }
5744
5745 /* Otherwise types must fit in one register. */
5746 return size > UNITS_PER_WORD;
5747 }
5748
5749 /* Return true if TYPE should be passed by invisible reference. */
5750
5751 static bool
5752 alpha_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
5753 machine_mode mode,
5754 const_tree type ATTRIBUTE_UNUSED,
5755 bool named ATTRIBUTE_UNUSED)
5756 {
5757 return mode == TFmode || mode == TCmode;
5758 }
5759
5760 /* Define how to find the value returned by a function. VALTYPE is the
5761 data type of the value (as a tree). If the precise function being
5762 called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
5763 MODE is set instead of VALTYPE for libcalls.
5764
5765 On Alpha the value is found in $0 for integer functions and
5766 $f0 for floating-point functions. */
5767
5768 rtx
5769 function_value (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
5770 machine_mode mode)
5771 {
5772 unsigned int regnum, dummy ATTRIBUTE_UNUSED;
5773 enum mode_class mclass;
5774
5775 gcc_assert (!valtype || !alpha_return_in_memory (valtype, func));
5776
5777 if (valtype)
5778 mode = TYPE_MODE (valtype);
5779
5780 mclass = GET_MODE_CLASS (mode);
5781 switch (mclass)
5782 {
5783 case MODE_INT:
5784 /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
5785 where we have them returning both SImode and DImode. */
5786 if (!(TARGET_ABI_OPEN_VMS && valtype && AGGREGATE_TYPE_P (valtype)))
5787 PROMOTE_MODE (mode, dummy, valtype);
5788 /* FALLTHRU */
5789
5790 case MODE_COMPLEX_INT:
5791 case MODE_VECTOR_INT:
5792 regnum = 0;
5793 break;
5794
5795 case MODE_FLOAT:
5796 regnum = 32;
5797 break;
5798
5799 case MODE_COMPLEX_FLOAT:
5800 {
5801 machine_mode cmode = GET_MODE_INNER (mode);
5802
5803 return gen_rtx_PARALLEL
5804 (VOIDmode,
5805 gen_rtvec (2,
5806 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
5807 const0_rtx),
5808 gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
5809 GEN_INT (GET_MODE_SIZE (cmode)))));
5810 }
5811
5812 case MODE_RANDOM:
5813 /* We should only reach here for BLKmode on VMS. */
5814 gcc_assert (TARGET_ABI_OPEN_VMS && mode == BLKmode);
5815 regnum = 0;
5816 break;
5817
5818 default:
5819 gcc_unreachable ();
5820 }
5821
5822 return gen_rtx_REG (mode, regnum);
5823 }
5824
5825 /* TCmode complex values are passed by invisible reference. We
5826 should not split these values. */
5827
5828 static bool
5829 alpha_split_complex_arg (const_tree type)
5830 {
5831 return TYPE_MODE (type) != TCmode;
5832 }
5833
5834 static tree
5835 alpha_build_builtin_va_list (void)
5836 {
5837 tree base, ofs, space, record, type_decl;
5838
5839 if (TARGET_ABI_OPEN_VMS)
5840 return ptr_type_node;
5841
5842 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5843 type_decl = build_decl (BUILTINS_LOCATION,
5844 TYPE_DECL, get_identifier ("__va_list_tag"), record);
5845 TYPE_STUB_DECL (record) = type_decl;
5846 TYPE_NAME (record) = type_decl;
5847
5848 /* C++? SET_IS_AGGR_TYPE (record, 1); */
5849
5850 /* Dummy field to prevent alignment warnings. */
5851 space = build_decl (BUILTINS_LOCATION,
5852 FIELD_DECL, NULL_TREE, integer_type_node);
5853 DECL_FIELD_CONTEXT (space) = record;
5854 DECL_ARTIFICIAL (space) = 1;
5855 DECL_IGNORED_P (space) = 1;
5856
5857 ofs = build_decl (BUILTINS_LOCATION,
5858 FIELD_DECL, get_identifier ("__offset"),
5859 integer_type_node);
5860 DECL_FIELD_CONTEXT (ofs) = record;
5861 DECL_CHAIN (ofs) = space;
5862
5863 base = build_decl (BUILTINS_LOCATION,
5864 FIELD_DECL, get_identifier ("__base"),
5865 ptr_type_node);
5866 DECL_FIELD_CONTEXT (base) = record;
5867 DECL_CHAIN (base) = ofs;
5868
5869 TYPE_FIELDS (record) = base;
5870 layout_type (record);
5871
5872 va_list_gpr_counter_field = ofs;
5873 return record;
5874 }
5875
5876 #if TARGET_ABI_OSF
5877 /* Helper function for alpha_stdarg_optimize_hook. Skip over casts
5878 and constant additions. */
5879
5880 static gimple
5881 va_list_skip_additions (tree lhs)
5882 {
5883 gimple stmt;
5884
5885 for (;;)
5886 {
5887 enum tree_code code;
5888
5889 stmt = SSA_NAME_DEF_STMT (lhs);
5890
5891 if (gimple_code (stmt) == GIMPLE_PHI)
5892 return stmt;
5893
5894 if (!is_gimple_assign (stmt)
5895 || gimple_assign_lhs (stmt) != lhs)
5896 return NULL;
5897
5898 if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
5899 return stmt;
5900 code = gimple_assign_rhs_code (stmt);
5901 if (!CONVERT_EXPR_CODE_P (code)
5902 && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
5903 || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
5904 || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
5905 return stmt;
5906
5907 lhs = gimple_assign_rhs1 (stmt);
5908 }
5909 }
5910
5911 /* Check if LHS = RHS statement is
5912 LHS = *(ap.__base + ap.__offset + cst)
5913 or
5914 LHS = *(ap.__base
5915 + ((ap.__offset + cst <= 47)
5916 ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
5917 If the former, indicate that GPR registers are needed,
5918 if the latter, indicate that FPR registers are needed.
5919
5920 Also look for LHS = (*ptr).field, where ptr is one of the forms
5921 listed above.
5922
5923 On alpha, cfun->va_list_gpr_size is used as size of the needed
5924 regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
5925 registers are needed and bit 1 set if FPR registers are needed.
5926 Return true if va_list references should not be scanned for the
5927 current statement. */
5928
5929 static bool
5930 alpha_stdarg_optimize_hook (struct stdarg_info *si, const_gimple stmt)
5931 {
5932 tree base, offset, rhs;
5933 int offset_arg = 1;
5934 gimple base_stmt;
5935
5936 if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
5937 != GIMPLE_SINGLE_RHS)
5938 return false;
5939
5940 rhs = gimple_assign_rhs1 (stmt);
5941 while (handled_component_p (rhs))
5942 rhs = TREE_OPERAND (rhs, 0);
5943 if (TREE_CODE (rhs) != MEM_REF
5944 || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
5945 return false;
5946
5947 stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
5948 if (stmt == NULL
5949 || !is_gimple_assign (stmt)
5950 || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
5951 return false;
5952
5953 base = gimple_assign_rhs1 (stmt);
5954 if (TREE_CODE (base) == SSA_NAME)
5955 {
5956 base_stmt = va_list_skip_additions (base);
5957 if (base_stmt
5958 && is_gimple_assign (base_stmt)
5959 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5960 base = gimple_assign_rhs1 (base_stmt);
5961 }
5962
5963 if (TREE_CODE (base) != COMPONENT_REF
5964 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5965 {
5966 base = gimple_assign_rhs2 (stmt);
5967 if (TREE_CODE (base) == SSA_NAME)
5968 {
5969 base_stmt = va_list_skip_additions (base);
5970 if (base_stmt
5971 && is_gimple_assign (base_stmt)
5972 && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
5973 base = gimple_assign_rhs1 (base_stmt);
5974 }
5975
5976 if (TREE_CODE (base) != COMPONENT_REF
5977 || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
5978 return false;
5979
5980 offset_arg = 0;
5981 }
5982
5983 base = get_base_address (base);
5984 if (TREE_CODE (base) != VAR_DECL
5985 || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
5986 return false;
5987
5988 offset = gimple_op (stmt, 1 + offset_arg);
5989 if (TREE_CODE (offset) == SSA_NAME)
5990 {
5991 gimple offset_stmt = va_list_skip_additions (offset);
5992
5993 if (offset_stmt
5994 && gimple_code (offset_stmt) == GIMPLE_PHI)
5995 {
5996 HOST_WIDE_INT sub;
5997 gimple arg1_stmt, arg2_stmt;
5998 tree arg1, arg2;
5999 enum tree_code code1, code2;
6000
6001 if (gimple_phi_num_args (offset_stmt) != 2)
6002 goto escapes;
6003
6004 arg1_stmt
6005 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
6006 arg2_stmt
6007 = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
6008 if (arg1_stmt == NULL
6009 || !is_gimple_assign (arg1_stmt)
6010 || arg2_stmt == NULL
6011 || !is_gimple_assign (arg2_stmt))
6012 goto escapes;
6013
6014 code1 = gimple_assign_rhs_code (arg1_stmt);
6015 code2 = gimple_assign_rhs_code (arg2_stmt);
6016 if (code1 == COMPONENT_REF
6017 && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
6018 /* Do nothing. */;
6019 else if (code2 == COMPONENT_REF
6020 && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
6021 {
6022 gimple tem = arg1_stmt;
6023 code2 = code1;
6024 arg1_stmt = arg2_stmt;
6025 arg2_stmt = tem;
6026 }
6027 else
6028 goto escapes;
6029
6030 if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
6031 goto escapes;
6032
6033 sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
6034 if (code2 == MINUS_EXPR)
6035 sub = -sub;
6036 if (sub < -48 || sub > -32)
6037 goto escapes;
6038
6039 arg1 = gimple_assign_rhs1 (arg1_stmt);
6040 arg2 = gimple_assign_rhs1 (arg2_stmt);
6041 if (TREE_CODE (arg2) == SSA_NAME)
6042 {
6043 arg2_stmt = va_list_skip_additions (arg2);
6044 if (arg2_stmt == NULL
6045 || !is_gimple_assign (arg2_stmt)
6046 || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
6047 goto escapes;
6048 arg2 = gimple_assign_rhs1 (arg2_stmt);
6049 }
6050 if (arg1 != arg2)
6051 goto escapes;
6052
6053 if (TREE_CODE (arg1) != COMPONENT_REF
6054 || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
6055 || get_base_address (arg1) != base)
6056 goto escapes;
6057
6058 /* Need floating point regs. */
6059 cfun->va_list_fpr_size |= 2;
6060 return false;
6061 }
6062 if (offset_stmt
6063 && is_gimple_assign (offset_stmt)
6064 && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
6065 offset = gimple_assign_rhs1 (offset_stmt);
6066 }
6067 if (TREE_CODE (offset) != COMPONENT_REF
6068 || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
6069 || get_base_address (offset) != base)
6070 goto escapes;
6071 else
6072 /* Need general regs. */
6073 cfun->va_list_fpr_size |= 1;
6074 return false;
6075
6076 escapes:
6077 si->va_list_escapes = true;
6078 return false;
6079 }
6080 #endif
6081
6082 /* Perform any needed actions needed for a function that is receiving a
6083 variable number of arguments. */
6084
6085 static void
6086 alpha_setup_incoming_varargs (cumulative_args_t pcum, machine_mode mode,
6087 tree type, int *pretend_size, int no_rtl)
6088 {
6089 CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
6090
6091 /* Skip the current argument. */
6092 targetm.calls.function_arg_advance (pack_cumulative_args (&cum), mode, type,
6093 true);
6094
6095 #if TARGET_ABI_OPEN_VMS
6096 /* For VMS, we allocate space for all 6 arg registers plus a count.
6097
6098 However, if NO registers need to be saved, don't allocate any space.
6099 This is not only because we won't need the space, but because AP
6100 includes the current_pretend_args_size and we don't want to mess up
6101 any ap-relative addresses already made. */
6102 if (cum.num_args < 6)
6103 {
6104 if (!no_rtl)
6105 {
6106 emit_move_insn (gen_rtx_REG (DImode, 1), virtual_incoming_args_rtx);
6107 emit_insn (gen_arg_home ());
6108 }
6109 *pretend_size = 7 * UNITS_PER_WORD;
6110 }
6111 #else
6112 /* On OSF/1 and friends, we allocate space for all 12 arg registers, but
6113 only push those that are remaining. However, if NO registers need to
6114 be saved, don't allocate any space. This is not only because we won't
6115 need the space, but because AP includes the current_pretend_args_size
6116 and we don't want to mess up any ap-relative addresses already made.
6117
6118 If we are not to use the floating-point registers, save the integer
6119 registers where we would put the floating-point registers. This is
6120 not the most efficient way to implement varargs with just one register
6121 class, but it isn't worth doing anything more efficient in this rare
6122 case. */
6123 if (cum >= 6)
6124 return;
6125
6126 if (!no_rtl)
6127 {
6128 int count;
6129 alias_set_type set = get_varargs_alias_set ();
6130 rtx tmp;
6131
6132 count = cfun->va_list_gpr_size / UNITS_PER_WORD;
6133 if (count > 6 - cum)
6134 count = 6 - cum;
6135
6136 /* Detect whether integer registers or floating-point registers
6137 are needed by the detected va_arg statements. See above for
6138 how these values are computed. Note that the "escape" value
6139 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
6140 these bits set. */
6141 gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
6142
6143 if (cfun->va_list_fpr_size & 1)
6144 {
6145 tmp = gen_rtx_MEM (BLKmode,
6146 plus_constant (Pmode, virtual_incoming_args_rtx,
6147 (cum + 6) * UNITS_PER_WORD));
6148 MEM_NOTRAP_P (tmp) = 1;
6149 set_mem_alias_set (tmp, set);
6150 move_block_from_reg (16 + cum, tmp, count);
6151 }
6152
6153 if (cfun->va_list_fpr_size & 2)
6154 {
6155 tmp = gen_rtx_MEM (BLKmode,
6156 plus_constant (Pmode, virtual_incoming_args_rtx,
6157 cum * UNITS_PER_WORD));
6158 MEM_NOTRAP_P (tmp) = 1;
6159 set_mem_alias_set (tmp, set);
6160 move_block_from_reg (16 + cum + TARGET_FPREGS*32, tmp, count);
6161 }
6162 }
6163 *pretend_size = 12 * UNITS_PER_WORD;
6164 #endif
6165 }
6166
6167 static void
6168 alpha_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6169 {
6170 HOST_WIDE_INT offset;
6171 tree t, offset_field, base_field;
6172
6173 if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
6174 return;
6175
6176 /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
6177 up by 48, storing fp arg registers in the first 48 bytes, and the
6178 integer arg registers in the next 48 bytes. This is only done,
6179 however, if any integer registers need to be stored.
6180
6181 If no integer registers need be stored, then we must subtract 48
6182 in order to account for the integer arg registers which are counted
6183 in argsize above, but which are not actually stored on the stack.
6184 Must further be careful here about structures straddling the last
6185 integer argument register; that futzes with pretend_args_size,
6186 which changes the meaning of AP. */
6187
6188 if (NUM_ARGS < 6)
6189 offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
6190 else
6191 offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
6192
6193 if (TARGET_ABI_OPEN_VMS)
6194 {
6195 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6196 t = fold_build_pointer_plus_hwi (t, offset + NUM_ARGS * UNITS_PER_WORD);
6197 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
6198 TREE_SIDE_EFFECTS (t) = 1;
6199 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6200 }
6201 else
6202 {
6203 base_field = TYPE_FIELDS (TREE_TYPE (valist));
6204 offset_field = DECL_CHAIN (base_field);
6205
6206 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6207 valist, base_field, NULL_TREE);
6208 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6209 valist, offset_field, NULL_TREE);
6210
6211 t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
6212 t = fold_build_pointer_plus_hwi (t, offset);
6213 t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
6214 TREE_SIDE_EFFECTS (t) = 1;
6215 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6216
6217 t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
6218 t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
6219 TREE_SIDE_EFFECTS (t) = 1;
6220 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6221 }
6222 }
6223
6224 static tree
6225 alpha_gimplify_va_arg_1 (tree type, tree base, tree offset,
6226 gimple_seq *pre_p)
6227 {
6228 tree type_size, ptr_type, addend, t, addr;
6229 gimple_seq internal_post;
6230
6231 /* If the type could not be passed in registers, skip the block
6232 reserved for the registers. */
6233 if (targetm.calls.must_pass_in_stack (TYPE_MODE (type), type))
6234 {
6235 t = build_int_cst (TREE_TYPE (offset), 6*8);
6236 gimplify_assign (offset,
6237 build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
6238 pre_p);
6239 }
6240
6241 addend = offset;
6242 ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
6243
6244 if (TREE_CODE (type) == COMPLEX_TYPE)
6245 {
6246 tree real_part, imag_part, real_temp;
6247
6248 real_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6249 offset, pre_p);
6250
6251 /* Copy the value into a new temporary, lest the formal temporary
6252 be reused out from under us. */
6253 real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
6254
6255 imag_part = alpha_gimplify_va_arg_1 (TREE_TYPE (type), base,
6256 offset, pre_p);
6257
6258 return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
6259 }
6260 else if (TREE_CODE (type) == REAL_TYPE)
6261 {
6262 tree fpaddend, cond, fourtyeight;
6263
6264 fourtyeight = build_int_cst (TREE_TYPE (addend), 6*8);
6265 fpaddend = fold_build2 (MINUS_EXPR, TREE_TYPE (addend),
6266 addend, fourtyeight);
6267 cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
6268 addend = fold_build3 (COND_EXPR, TREE_TYPE (addend), cond,
6269 fpaddend, addend);
6270 }
6271
6272 /* Build the final address and force that value into a temporary. */
6273 addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
6274 internal_post = NULL;
6275 gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
6276 gimple_seq_add_seq (pre_p, internal_post);
6277
6278 /* Update the offset field. */
6279 type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
6280 if (type_size == NULL || TREE_OVERFLOW (type_size))
6281 t = size_zero_node;
6282 else
6283 {
6284 t = size_binop (PLUS_EXPR, type_size, size_int (7));
6285 t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
6286 t = size_binop (MULT_EXPR, t, size_int (8));
6287 }
6288 t = fold_convert (TREE_TYPE (offset), t);
6289 gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
6290 pre_p);
6291
6292 return build_va_arg_indirect_ref (addr);
6293 }
6294
6295 static tree
6296 alpha_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6297 gimple_seq *post_p)
6298 {
6299 tree offset_field, base_field, offset, base, t, r;
6300 bool indirect;
6301
6302 if (TARGET_ABI_OPEN_VMS)
6303 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6304
6305 base_field = TYPE_FIELDS (va_list_type_node);
6306 offset_field = DECL_CHAIN (base_field);
6307 base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field),
6308 valist, base_field, NULL_TREE);
6309 offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field),
6310 valist, offset_field, NULL_TREE);
6311
6312 /* Pull the fields of the structure out into temporaries. Since we never
6313 modify the base field, we can use a formal temporary. Sign-extend the
6314 offset field so that it's the proper width for pointer arithmetic. */
6315 base = get_formal_tmp_var (base_field, pre_p);
6316
6317 t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
6318 offset = get_initialized_tmp_var (t, pre_p, NULL);
6319
6320 indirect = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6321 if (indirect)
6322 type = build_pointer_type_for_mode (type, ptr_mode, true);
6323
6324 /* Find the value. Note that this will be a stable indirection, or
6325 a composite of stable indirections in the case of complex. */
6326 r = alpha_gimplify_va_arg_1 (type, base, offset, pre_p);
6327
6328 /* Stuff the offset temporary back into its field. */
6329 gimplify_assign (unshare_expr (offset_field),
6330 fold_convert (TREE_TYPE (offset_field), offset), pre_p);
6331
6332 if (indirect)
6333 r = build_va_arg_indirect_ref (r);
6334
6335 return r;
6336 }
6337 \f
6338 /* Builtins. */
6339
6340 enum alpha_builtin
6341 {
6342 ALPHA_BUILTIN_CMPBGE,
6343 ALPHA_BUILTIN_EXTBL,
6344 ALPHA_BUILTIN_EXTWL,
6345 ALPHA_BUILTIN_EXTLL,
6346 ALPHA_BUILTIN_EXTQL,
6347 ALPHA_BUILTIN_EXTWH,
6348 ALPHA_BUILTIN_EXTLH,
6349 ALPHA_BUILTIN_EXTQH,
6350 ALPHA_BUILTIN_INSBL,
6351 ALPHA_BUILTIN_INSWL,
6352 ALPHA_BUILTIN_INSLL,
6353 ALPHA_BUILTIN_INSQL,
6354 ALPHA_BUILTIN_INSWH,
6355 ALPHA_BUILTIN_INSLH,
6356 ALPHA_BUILTIN_INSQH,
6357 ALPHA_BUILTIN_MSKBL,
6358 ALPHA_BUILTIN_MSKWL,
6359 ALPHA_BUILTIN_MSKLL,
6360 ALPHA_BUILTIN_MSKQL,
6361 ALPHA_BUILTIN_MSKWH,
6362 ALPHA_BUILTIN_MSKLH,
6363 ALPHA_BUILTIN_MSKQH,
6364 ALPHA_BUILTIN_UMULH,
6365 ALPHA_BUILTIN_ZAP,
6366 ALPHA_BUILTIN_ZAPNOT,
6367 ALPHA_BUILTIN_AMASK,
6368 ALPHA_BUILTIN_IMPLVER,
6369 ALPHA_BUILTIN_RPCC,
6370 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6371 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER,
6372
6373 /* TARGET_MAX */
6374 ALPHA_BUILTIN_MINUB8,
6375 ALPHA_BUILTIN_MINSB8,
6376 ALPHA_BUILTIN_MINUW4,
6377 ALPHA_BUILTIN_MINSW4,
6378 ALPHA_BUILTIN_MAXUB8,
6379 ALPHA_BUILTIN_MAXSB8,
6380 ALPHA_BUILTIN_MAXUW4,
6381 ALPHA_BUILTIN_MAXSW4,
6382 ALPHA_BUILTIN_PERR,
6383 ALPHA_BUILTIN_PKLB,
6384 ALPHA_BUILTIN_PKWB,
6385 ALPHA_BUILTIN_UNPKBL,
6386 ALPHA_BUILTIN_UNPKBW,
6387
6388 /* TARGET_CIX */
6389 ALPHA_BUILTIN_CTTZ,
6390 ALPHA_BUILTIN_CTLZ,
6391 ALPHA_BUILTIN_CTPOP,
6392
6393 ALPHA_BUILTIN_max
6394 };
6395
6396 static enum insn_code const code_for_builtin[ALPHA_BUILTIN_max] = {
6397 CODE_FOR_builtin_cmpbge,
6398 CODE_FOR_extbl,
6399 CODE_FOR_extwl,
6400 CODE_FOR_extll,
6401 CODE_FOR_extql,
6402 CODE_FOR_extwh,
6403 CODE_FOR_extlh,
6404 CODE_FOR_extqh,
6405 CODE_FOR_builtin_insbl,
6406 CODE_FOR_builtin_inswl,
6407 CODE_FOR_builtin_insll,
6408 CODE_FOR_insql,
6409 CODE_FOR_inswh,
6410 CODE_FOR_inslh,
6411 CODE_FOR_insqh,
6412 CODE_FOR_mskbl,
6413 CODE_FOR_mskwl,
6414 CODE_FOR_mskll,
6415 CODE_FOR_mskql,
6416 CODE_FOR_mskwh,
6417 CODE_FOR_msklh,
6418 CODE_FOR_mskqh,
6419 CODE_FOR_umuldi3_highpart,
6420 CODE_FOR_builtin_zap,
6421 CODE_FOR_builtin_zapnot,
6422 CODE_FOR_builtin_amask,
6423 CODE_FOR_builtin_implver,
6424 CODE_FOR_builtin_rpcc,
6425 CODE_FOR_builtin_establish_vms_condition_handler,
6426 CODE_FOR_builtin_revert_vms_condition_handler,
6427
6428 /* TARGET_MAX */
6429 CODE_FOR_builtin_minub8,
6430 CODE_FOR_builtin_minsb8,
6431 CODE_FOR_builtin_minuw4,
6432 CODE_FOR_builtin_minsw4,
6433 CODE_FOR_builtin_maxub8,
6434 CODE_FOR_builtin_maxsb8,
6435 CODE_FOR_builtin_maxuw4,
6436 CODE_FOR_builtin_maxsw4,
6437 CODE_FOR_builtin_perr,
6438 CODE_FOR_builtin_pklb,
6439 CODE_FOR_builtin_pkwb,
6440 CODE_FOR_builtin_unpkbl,
6441 CODE_FOR_builtin_unpkbw,
6442
6443 /* TARGET_CIX */
6444 CODE_FOR_ctzdi2,
6445 CODE_FOR_clzdi2,
6446 CODE_FOR_popcountdi2
6447 };
6448
6449 struct alpha_builtin_def
6450 {
6451 const char *name;
6452 enum alpha_builtin code;
6453 unsigned int target_mask;
6454 bool is_const;
6455 };
6456
6457 static struct alpha_builtin_def const zero_arg_builtins[] = {
6458 { "__builtin_alpha_implver", ALPHA_BUILTIN_IMPLVER, 0, true },
6459 { "__builtin_alpha_rpcc", ALPHA_BUILTIN_RPCC, 0, false }
6460 };
6461
6462 static struct alpha_builtin_def const one_arg_builtins[] = {
6463 { "__builtin_alpha_amask", ALPHA_BUILTIN_AMASK, 0, true },
6464 { "__builtin_alpha_pklb", ALPHA_BUILTIN_PKLB, MASK_MAX, true },
6465 { "__builtin_alpha_pkwb", ALPHA_BUILTIN_PKWB, MASK_MAX, true },
6466 { "__builtin_alpha_unpkbl", ALPHA_BUILTIN_UNPKBL, MASK_MAX, true },
6467 { "__builtin_alpha_unpkbw", ALPHA_BUILTIN_UNPKBW, MASK_MAX, true },
6468 { "__builtin_alpha_cttz", ALPHA_BUILTIN_CTTZ, MASK_CIX, true },
6469 { "__builtin_alpha_ctlz", ALPHA_BUILTIN_CTLZ, MASK_CIX, true },
6470 { "__builtin_alpha_ctpop", ALPHA_BUILTIN_CTPOP, MASK_CIX, true }
6471 };
6472
6473 static struct alpha_builtin_def const two_arg_builtins[] = {
6474 { "__builtin_alpha_cmpbge", ALPHA_BUILTIN_CMPBGE, 0, true },
6475 { "__builtin_alpha_extbl", ALPHA_BUILTIN_EXTBL, 0, true },
6476 { "__builtin_alpha_extwl", ALPHA_BUILTIN_EXTWL, 0, true },
6477 { "__builtin_alpha_extll", ALPHA_BUILTIN_EXTLL, 0, true },
6478 { "__builtin_alpha_extql", ALPHA_BUILTIN_EXTQL, 0, true },
6479 { "__builtin_alpha_extwh", ALPHA_BUILTIN_EXTWH, 0, true },
6480 { "__builtin_alpha_extlh", ALPHA_BUILTIN_EXTLH, 0, true },
6481 { "__builtin_alpha_extqh", ALPHA_BUILTIN_EXTQH, 0, true },
6482 { "__builtin_alpha_insbl", ALPHA_BUILTIN_INSBL, 0, true },
6483 { "__builtin_alpha_inswl", ALPHA_BUILTIN_INSWL, 0, true },
6484 { "__builtin_alpha_insll", ALPHA_BUILTIN_INSLL, 0, true },
6485 { "__builtin_alpha_insql", ALPHA_BUILTIN_INSQL, 0, true },
6486 { "__builtin_alpha_inswh", ALPHA_BUILTIN_INSWH, 0, true },
6487 { "__builtin_alpha_inslh", ALPHA_BUILTIN_INSLH, 0, true },
6488 { "__builtin_alpha_insqh", ALPHA_BUILTIN_INSQH, 0, true },
6489 { "__builtin_alpha_mskbl", ALPHA_BUILTIN_MSKBL, 0, true },
6490 { "__builtin_alpha_mskwl", ALPHA_BUILTIN_MSKWL, 0, true },
6491 { "__builtin_alpha_mskll", ALPHA_BUILTIN_MSKLL, 0, true },
6492 { "__builtin_alpha_mskql", ALPHA_BUILTIN_MSKQL, 0, true },
6493 { "__builtin_alpha_mskwh", ALPHA_BUILTIN_MSKWH, 0, true },
6494 { "__builtin_alpha_msklh", ALPHA_BUILTIN_MSKLH, 0, true },
6495 { "__builtin_alpha_mskqh", ALPHA_BUILTIN_MSKQH, 0, true },
6496 { "__builtin_alpha_umulh", ALPHA_BUILTIN_UMULH, 0, true },
6497 { "__builtin_alpha_zap", ALPHA_BUILTIN_ZAP, 0, true },
6498 { "__builtin_alpha_zapnot", ALPHA_BUILTIN_ZAPNOT, 0, true },
6499 { "__builtin_alpha_minub8", ALPHA_BUILTIN_MINUB8, MASK_MAX, true },
6500 { "__builtin_alpha_minsb8", ALPHA_BUILTIN_MINSB8, MASK_MAX, true },
6501 { "__builtin_alpha_minuw4", ALPHA_BUILTIN_MINUW4, MASK_MAX, true },
6502 { "__builtin_alpha_minsw4", ALPHA_BUILTIN_MINSW4, MASK_MAX, true },
6503 { "__builtin_alpha_maxub8", ALPHA_BUILTIN_MAXUB8, MASK_MAX, true },
6504 { "__builtin_alpha_maxsb8", ALPHA_BUILTIN_MAXSB8, MASK_MAX, true },
6505 { "__builtin_alpha_maxuw4", ALPHA_BUILTIN_MAXUW4, MASK_MAX, true },
6506 { "__builtin_alpha_maxsw4", ALPHA_BUILTIN_MAXSW4, MASK_MAX, true },
6507 { "__builtin_alpha_perr", ALPHA_BUILTIN_PERR, MASK_MAX, true }
6508 };
6509
6510 static GTY(()) tree alpha_dimode_u;
6511 static GTY(()) tree alpha_v8qi_u;
6512 static GTY(()) tree alpha_v8qi_s;
6513 static GTY(()) tree alpha_v4hi_u;
6514 static GTY(()) tree alpha_v4hi_s;
6515
6516 static GTY(()) tree alpha_builtins[(int) ALPHA_BUILTIN_max];
6517
6518 /* Return the alpha builtin for CODE. */
6519
6520 static tree
6521 alpha_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
6522 {
6523 if (code >= ALPHA_BUILTIN_max)
6524 return error_mark_node;
6525 return alpha_builtins[code];
6526 }
6527
6528 /* Helper function of alpha_init_builtins. Add the built-in specified
6529 by NAME, TYPE, CODE, and ECF. */
6530
6531 static void
6532 alpha_builtin_function (const char *name, tree ftype,
6533 enum alpha_builtin code, unsigned ecf)
6534 {
6535 tree decl = add_builtin_function (name, ftype, (int) code,
6536 BUILT_IN_MD, NULL, NULL_TREE);
6537
6538 if (ecf & ECF_CONST)
6539 TREE_READONLY (decl) = 1;
6540 if (ecf & ECF_NOTHROW)
6541 TREE_NOTHROW (decl) = 1;
6542
6543 alpha_builtins [(int) code] = decl;
6544 }
6545
6546 /* Helper function of alpha_init_builtins. Add the COUNT built-in
6547 functions pointed to by P, with function type FTYPE. */
6548
6549 static void
6550 alpha_add_builtins (const struct alpha_builtin_def *p, size_t count,
6551 tree ftype)
6552 {
6553 size_t i;
6554
6555 for (i = 0; i < count; ++i, ++p)
6556 if ((target_flags & p->target_mask) == p->target_mask)
6557 alpha_builtin_function (p->name, ftype, p->code,
6558 (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
6559 }
6560
6561 static void
6562 alpha_init_builtins (void)
6563 {
6564 tree ftype;
6565
6566 alpha_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
6567 alpha_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
6568 alpha_v8qi_s = build_vector_type (intQI_type_node, 8);
6569 alpha_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
6570 alpha_v4hi_s = build_vector_type (intHI_type_node, 4);
6571
6572 ftype = build_function_type_list (alpha_dimode_u, NULL_TREE);
6573 alpha_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
6574
6575 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u, NULL_TREE);
6576 alpha_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
6577
6578 ftype = build_function_type_list (alpha_dimode_u, alpha_dimode_u,
6579 alpha_dimode_u, NULL_TREE);
6580 alpha_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
6581
6582 if (TARGET_ABI_OPEN_VMS)
6583 {
6584 ftype = build_function_type_list (ptr_type_node, ptr_type_node,
6585 NULL_TREE);
6586 alpha_builtin_function ("__builtin_establish_vms_condition_handler",
6587 ftype,
6588 ALPHA_BUILTIN_ESTABLISH_VMS_CONDITION_HANDLER,
6589 0);
6590
6591 ftype = build_function_type_list (ptr_type_node, void_type_node,
6592 NULL_TREE);
6593 alpha_builtin_function ("__builtin_revert_vms_condition_handler", ftype,
6594 ALPHA_BUILTIN_REVERT_VMS_CONDITION_HANDLER, 0);
6595
6596 vms_patch_builtins ();
6597 }
6598 }
6599
6600 /* Expand an expression EXP that calls a built-in function,
6601 with result going to TARGET if that's convenient
6602 (and in mode MODE if that's convenient).
6603 SUBTARGET may be used as the target for computing one of EXP's operands.
6604 IGNORE is nonzero if the value is to be ignored. */
6605
6606 static rtx
6607 alpha_expand_builtin (tree exp, rtx target,
6608 rtx subtarget ATTRIBUTE_UNUSED,
6609 machine_mode mode ATTRIBUTE_UNUSED,
6610 int ignore ATTRIBUTE_UNUSED)
6611 {
6612 #define MAX_ARGS 2
6613
6614 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6615 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6616 tree arg;
6617 call_expr_arg_iterator iter;
6618 enum insn_code icode;
6619 rtx op[MAX_ARGS], pat;
6620 int arity;
6621 bool nonvoid;
6622
6623 if (fcode >= ALPHA_BUILTIN_max)
6624 internal_error ("bad builtin fcode");
6625 icode = code_for_builtin[fcode];
6626 if (icode == 0)
6627 internal_error ("bad builtin fcode");
6628
6629 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
6630
6631 arity = 0;
6632 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
6633 {
6634 const struct insn_operand_data *insn_op;
6635
6636 if (arg == error_mark_node)
6637 return NULL_RTX;
6638 if (arity > MAX_ARGS)
6639 return NULL_RTX;
6640
6641 insn_op = &insn_data[icode].operand[arity + nonvoid];
6642
6643 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
6644
6645 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
6646 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
6647 arity++;
6648 }
6649
6650 if (nonvoid)
6651 {
6652 machine_mode tmode = insn_data[icode].operand[0].mode;
6653 if (!target
6654 || GET_MODE (target) != tmode
6655 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
6656 target = gen_reg_rtx (tmode);
6657 }
6658
6659 switch (arity)
6660 {
6661 case 0:
6662 pat = GEN_FCN (icode) (target);
6663 break;
6664 case 1:
6665 if (nonvoid)
6666 pat = GEN_FCN (icode) (target, op[0]);
6667 else
6668 pat = GEN_FCN (icode) (op[0]);
6669 break;
6670 case 2:
6671 pat = GEN_FCN (icode) (target, op[0], op[1]);
6672 break;
6673 default:
6674 gcc_unreachable ();
6675 }
6676 if (!pat)
6677 return NULL_RTX;
6678 emit_insn (pat);
6679
6680 if (nonvoid)
6681 return target;
6682 else
6683 return const0_rtx;
6684 }
6685
6686 /* Fold the builtin for the CMPBGE instruction. This is a vector comparison
6687 with an 8-bit output vector. OPINT contains the integer operands; bit N
6688 of OP_CONST is set if OPINT[N] is valid. */
6689
6690 static tree
6691 alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
6692 {
6693 if (op_const == 3)
6694 {
6695 int i, val;
6696 for (i = 0, val = 0; i < 8; ++i)
6697 {
6698 unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
6699 unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
6700 if (c0 >= c1)
6701 val |= 1 << i;
6702 }
6703 return build_int_cst (alpha_dimode_u, val);
6704 }
6705 else if (op_const == 2 && opint[1] == 0)
6706 return build_int_cst (alpha_dimode_u, 0xff);
6707 return NULL;
6708 }
6709
6710 /* Fold the builtin for the ZAPNOT instruction. This is essentially a
6711 specialized form of an AND operation. Other byte manipulation instructions
6712 are defined in terms of this instruction, so this is also used as a
6713 subroutine for other builtins.
6714
6715 OP contains the tree operands; OPINT contains the extracted integer values.
6716 Bit N of OP_CONST it set if OPINT[N] is valid. OP may be null if only
6717 OPINT may be considered. */
6718
6719 static tree
6720 alpha_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
6721 long op_const)
6722 {
6723 if (op_const & 2)
6724 {
6725 unsigned HOST_WIDE_INT mask = 0;
6726 int i;
6727
6728 for (i = 0; i < 8; ++i)
6729 if ((opint[1] >> i) & 1)
6730 mask |= (unsigned HOST_WIDE_INT)0xff << (i * 8);
6731
6732 if (op_const & 1)
6733 return build_int_cst (alpha_dimode_u, opint[0] & mask);
6734
6735 if (op)
6736 return fold_build2 (BIT_AND_EXPR, alpha_dimode_u, op[0],
6737 build_int_cst (alpha_dimode_u, mask));
6738 }
6739 else if ((op_const & 1) && opint[0] == 0)
6740 return build_int_cst (alpha_dimode_u, 0);
6741 return NULL;
6742 }
6743
6744 /* Fold the builtins for the EXT family of instructions. */
6745
6746 static tree
6747 alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
6748 long op_const, unsigned HOST_WIDE_INT bytemask,
6749 bool is_high)
6750 {
6751 long zap_const = 2;
6752 tree *zap_op = NULL;
6753
6754 if (op_const & 2)
6755 {
6756 unsigned HOST_WIDE_INT loc;
6757
6758 loc = opint[1] & 7;
6759 loc *= BITS_PER_UNIT;
6760
6761 if (loc != 0)
6762 {
6763 if (op_const & 1)
6764 {
6765 unsigned HOST_WIDE_INT temp = opint[0];
6766 if (is_high)
6767 temp <<= loc;
6768 else
6769 temp >>= loc;
6770 opint[0] = temp;
6771 zap_const = 3;
6772 }
6773 }
6774 else
6775 zap_op = op;
6776 }
6777
6778 opint[1] = bytemask;
6779 return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
6780 }
6781
6782 /* Fold the builtins for the INS family of instructions. */
6783
6784 static tree
6785 alpha_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
6786 long op_const, unsigned HOST_WIDE_INT bytemask,
6787 bool is_high)
6788 {
6789 if ((op_const & 1) && opint[0] == 0)
6790 return build_int_cst (alpha_dimode_u, 0);
6791
6792 if (op_const & 2)
6793 {
6794 unsigned HOST_WIDE_INT temp, loc, byteloc;
6795 tree *zap_op = NULL;
6796
6797 loc = opint[1] & 7;
6798 bytemask <<= loc;
6799
6800 temp = opint[0];
6801 if (is_high)
6802 {
6803 byteloc = (64 - (loc * 8)) & 0x3f;
6804 if (byteloc == 0)
6805 zap_op = op;
6806 else
6807 temp >>= byteloc;
6808 bytemask >>= 8;
6809 }
6810 else
6811 {
6812 byteloc = loc * 8;
6813 if (byteloc == 0)
6814 zap_op = op;
6815 else
6816 temp <<= byteloc;
6817 }
6818
6819 opint[0] = temp;
6820 opint[1] = bytemask;
6821 return alpha_fold_builtin_zapnot (zap_op, opint, op_const);
6822 }
6823
6824 return NULL;
6825 }
6826
6827 static tree
6828 alpha_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
6829 long op_const, unsigned HOST_WIDE_INT bytemask,
6830 bool is_high)
6831 {
6832 if (op_const & 2)
6833 {
6834 unsigned HOST_WIDE_INT loc;
6835
6836 loc = opint[1] & 7;
6837 bytemask <<= loc;
6838
6839 if (is_high)
6840 bytemask >>= 8;
6841
6842 opint[1] = bytemask ^ 0xff;
6843 }
6844
6845 return alpha_fold_builtin_zapnot (op, opint, op_const);
6846 }
6847
6848 static tree
6849 alpha_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
6850 {
6851 tree op0 = fold_convert (vtype, op[0]);
6852 tree op1 = fold_convert (vtype, op[1]);
6853 tree val = fold_build2 (code, vtype, op0, op1);
6854 return fold_build1 (VIEW_CONVERT_EXPR, alpha_dimode_u, val);
6855 }
6856
6857 static tree
6858 alpha_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
6859 {
6860 unsigned HOST_WIDE_INT temp = 0;
6861 int i;
6862
6863 if (op_const != 3)
6864 return NULL;
6865
6866 for (i = 0; i < 8; ++i)
6867 {
6868 unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
6869 unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
6870 if (a >= b)
6871 temp += a - b;
6872 else
6873 temp += b - a;
6874 }
6875
6876 return build_int_cst (alpha_dimode_u, temp);
6877 }
6878
6879 static tree
6880 alpha_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
6881 {
6882 unsigned HOST_WIDE_INT temp;
6883
6884 if (op_const == 0)
6885 return NULL;
6886
6887 temp = opint[0] & 0xff;
6888 temp |= (opint[0] >> 24) & 0xff00;
6889
6890 return build_int_cst (alpha_dimode_u, temp);
6891 }
6892
6893 static tree
6894 alpha_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
6895 {
6896 unsigned HOST_WIDE_INT temp;
6897
6898 if (op_const == 0)
6899 return NULL;
6900
6901 temp = opint[0] & 0xff;
6902 temp |= (opint[0] >> 8) & 0xff00;
6903 temp |= (opint[0] >> 16) & 0xff0000;
6904 temp |= (opint[0] >> 24) & 0xff000000;
6905
6906 return build_int_cst (alpha_dimode_u, temp);
6907 }
6908
6909 static tree
6910 alpha_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
6911 {
6912 unsigned HOST_WIDE_INT temp;
6913
6914 if (op_const == 0)
6915 return NULL;
6916
6917 temp = opint[0] & 0xff;
6918 temp |= (opint[0] & 0xff00) << 24;
6919
6920 return build_int_cst (alpha_dimode_u, temp);
6921 }
6922
6923 static tree
6924 alpha_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
6925 {
6926 unsigned HOST_WIDE_INT temp;
6927
6928 if (op_const == 0)
6929 return NULL;
6930
6931 temp = opint[0] & 0xff;
6932 temp |= (opint[0] & 0x0000ff00) << 8;
6933 temp |= (opint[0] & 0x00ff0000) << 16;
6934 temp |= (opint[0] & 0xff000000) << 24;
6935
6936 return build_int_cst (alpha_dimode_u, temp);
6937 }
6938
6939 static tree
6940 alpha_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
6941 {
6942 unsigned HOST_WIDE_INT temp;
6943
6944 if (op_const == 0)
6945 return NULL;
6946
6947 if (opint[0] == 0)
6948 temp = 64;
6949 else
6950 temp = exact_log2 (opint[0] & -opint[0]);
6951
6952 return build_int_cst (alpha_dimode_u, temp);
6953 }
6954
6955 static tree
6956 alpha_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
6957 {
6958 unsigned HOST_WIDE_INT temp;
6959
6960 if (op_const == 0)
6961 return NULL;
6962
6963 if (opint[0] == 0)
6964 temp = 64;
6965 else
6966 temp = 64 - floor_log2 (opint[0]) - 1;
6967
6968 return build_int_cst (alpha_dimode_u, temp);
6969 }
6970
6971 static tree
6972 alpha_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
6973 {
6974 unsigned HOST_WIDE_INT temp, op;
6975
6976 if (op_const == 0)
6977 return NULL;
6978
6979 op = opint[0];
6980 temp = 0;
6981 while (op)
6982 temp++, op &= op - 1;
6983
6984 return build_int_cst (alpha_dimode_u, temp);
6985 }
6986
6987 /* Fold one of our builtin functions. */
6988
6989 static tree
6990 alpha_fold_builtin (tree fndecl, int n_args, tree *op,
6991 bool ignore ATTRIBUTE_UNUSED)
6992 {
6993 unsigned HOST_WIDE_INT opint[MAX_ARGS];
6994 long op_const = 0;
6995 int i;
6996
6997 if (n_args > MAX_ARGS)
6998 return NULL;
6999
7000 for (i = 0; i < n_args; i++)
7001 {
7002 tree arg = op[i];
7003 if (arg == error_mark_node)
7004 return NULL;
7005
7006 opint[i] = 0;
7007 if (TREE_CODE (arg) == INTEGER_CST)
7008 {
7009 op_const |= 1L << i;
7010 opint[i] = int_cst_value (arg);
7011 }
7012 }
7013
7014 switch (DECL_FUNCTION_CODE (fndecl))
7015 {
7016 case ALPHA_BUILTIN_CMPBGE:
7017 return alpha_fold_builtin_cmpbge (opint, op_const);
7018
7019 case ALPHA_BUILTIN_EXTBL:
7020 return alpha_fold_builtin_extxx (op, opint, op_const, 0x01, false);
7021 case ALPHA_BUILTIN_EXTWL:
7022 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, false);
7023 case ALPHA_BUILTIN_EXTLL:
7024 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
7025 case ALPHA_BUILTIN_EXTQL:
7026 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, false);
7027 case ALPHA_BUILTIN_EXTWH:
7028 return alpha_fold_builtin_extxx (op, opint, op_const, 0x03, true);
7029 case ALPHA_BUILTIN_EXTLH:
7030 return alpha_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
7031 case ALPHA_BUILTIN_EXTQH:
7032 return alpha_fold_builtin_extxx (op, opint, op_const, 0xff, true);
7033
7034 case ALPHA_BUILTIN_INSBL:
7035 return alpha_fold_builtin_insxx (op, opint, op_const, 0x01, false);
7036 case ALPHA_BUILTIN_INSWL:
7037 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, false);
7038 case ALPHA_BUILTIN_INSLL:
7039 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
7040 case ALPHA_BUILTIN_INSQL:
7041 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, false);
7042 case ALPHA_BUILTIN_INSWH:
7043 return alpha_fold_builtin_insxx (op, opint, op_const, 0x03, true);
7044 case ALPHA_BUILTIN_INSLH:
7045 return alpha_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
7046 case ALPHA_BUILTIN_INSQH:
7047 return alpha_fold_builtin_insxx (op, opint, op_const, 0xff, true);
7048
7049 case ALPHA_BUILTIN_MSKBL:
7050 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
7051 case ALPHA_BUILTIN_MSKWL:
7052 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
7053 case ALPHA_BUILTIN_MSKLL:
7054 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
7055 case ALPHA_BUILTIN_MSKQL:
7056 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
7057 case ALPHA_BUILTIN_MSKWH:
7058 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
7059 case ALPHA_BUILTIN_MSKLH:
7060 return alpha_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
7061 case ALPHA_BUILTIN_MSKQH:
7062 return alpha_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
7063
7064 case ALPHA_BUILTIN_ZAP:
7065 opint[1] ^= 0xff;
7066 /* FALLTHRU */
7067 case ALPHA_BUILTIN_ZAPNOT:
7068 return alpha_fold_builtin_zapnot (op, opint, op_const);
7069
7070 case ALPHA_BUILTIN_MINUB8:
7071 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_u);
7072 case ALPHA_BUILTIN_MINSB8:
7073 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v8qi_s);
7074 case ALPHA_BUILTIN_MINUW4:
7075 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_u);
7076 case ALPHA_BUILTIN_MINSW4:
7077 return alpha_fold_vector_minmax (MIN_EXPR, op, alpha_v4hi_s);
7078 case ALPHA_BUILTIN_MAXUB8:
7079 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_u);
7080 case ALPHA_BUILTIN_MAXSB8:
7081 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v8qi_s);
7082 case ALPHA_BUILTIN_MAXUW4:
7083 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_u);
7084 case ALPHA_BUILTIN_MAXSW4:
7085 return alpha_fold_vector_minmax (MAX_EXPR, op, alpha_v4hi_s);
7086
7087 case ALPHA_BUILTIN_PERR:
7088 return alpha_fold_builtin_perr (opint, op_const);
7089 case ALPHA_BUILTIN_PKLB:
7090 return alpha_fold_builtin_pklb (opint, op_const);
7091 case ALPHA_BUILTIN_PKWB:
7092 return alpha_fold_builtin_pkwb (opint, op_const);
7093 case ALPHA_BUILTIN_UNPKBL:
7094 return alpha_fold_builtin_unpkbl (opint, op_const);
7095 case ALPHA_BUILTIN_UNPKBW:
7096 return alpha_fold_builtin_unpkbw (opint, op_const);
7097
7098 case ALPHA_BUILTIN_CTTZ:
7099 return alpha_fold_builtin_cttz (opint, op_const);
7100 case ALPHA_BUILTIN_CTLZ:
7101 return alpha_fold_builtin_ctlz (opint, op_const);
7102 case ALPHA_BUILTIN_CTPOP:
7103 return alpha_fold_builtin_ctpop (opint, op_const);
7104
7105 case ALPHA_BUILTIN_AMASK:
7106 case ALPHA_BUILTIN_IMPLVER:
7107 case ALPHA_BUILTIN_RPCC:
7108 /* None of these are foldable at compile-time. */
7109 default:
7110 return NULL;
7111 }
7112 }
7113
7114 bool
7115 alpha_gimple_fold_builtin (gimple_stmt_iterator *gsi)
7116 {
7117 bool changed = false;
7118 gimple stmt = gsi_stmt (*gsi);
7119 tree call = gimple_call_fn (stmt);
7120 gimple new_stmt = NULL;
7121
7122 if (call)
7123 {
7124 tree fndecl = gimple_call_fndecl (stmt);
7125
7126 if (fndecl)
7127 {
7128 tree arg0, arg1;
7129
7130 switch (DECL_FUNCTION_CODE (fndecl))
7131 {
7132 case ALPHA_BUILTIN_UMULH:
7133 arg0 = gimple_call_arg (stmt, 0);
7134 arg1 = gimple_call_arg (stmt, 1);
7135
7136 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
7137 MULT_HIGHPART_EXPR, arg0, arg1);
7138 break;
7139 default:
7140 break;
7141 }
7142 }
7143 }
7144
7145 if (new_stmt)
7146 {
7147 gsi_replace (gsi, new_stmt, true);
7148 changed = true;
7149 }
7150
7151 return changed;
7152 }
7153 \f
7154 /* This page contains routines that are used to determine what the function
7155 prologue and epilogue code will do and write them out. */
7156
7157 /* Compute the size of the save area in the stack. */
7158
7159 /* These variables are used for communication between the following functions.
7160 They indicate various things about the current function being compiled
7161 that are used to tell what kind of prologue, epilogue and procedure
7162 descriptor to generate. */
7163
7164 /* Nonzero if we need a stack procedure. */
7165 enum alpha_procedure_types {PT_NULL = 0, PT_REGISTER = 1, PT_STACK = 2};
7166 static enum alpha_procedure_types alpha_procedure_type;
7167
7168 /* Register number (either FP or SP) that is used to unwind the frame. */
7169 static int vms_unwind_regno;
7170
7171 /* Register number used to save FP. We need not have one for RA since
7172 we don't modify it for register procedures. This is only defined
7173 for register frame procedures. */
7174 static int vms_save_fp_regno;
7175
7176 /* Register number used to reference objects off our PV. */
7177 static int vms_base_regno;
7178
7179 /* Compute register masks for saved registers. */
7180
7181 static void
7182 alpha_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
7183 {
7184 unsigned long imask = 0;
7185 unsigned long fmask = 0;
7186 unsigned int i;
7187
7188 /* When outputting a thunk, we don't have valid register life info,
7189 but assemble_start_function wants to output .frame and .mask
7190 directives. */
7191 if (cfun->is_thunk)
7192 {
7193 *imaskP = 0;
7194 *fmaskP = 0;
7195 return;
7196 }
7197
7198 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7199 imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
7200
7201 /* One for every register we have to save. */
7202 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
7203 if (! fixed_regs[i] && ! call_used_regs[i]
7204 && df_regs_ever_live_p (i) && i != REG_RA)
7205 {
7206 if (i < 32)
7207 imask |= (1UL << i);
7208 else
7209 fmask |= (1UL << (i - 32));
7210 }
7211
7212 /* We need to restore these for the handler. */
7213 if (crtl->calls_eh_return)
7214 {
7215 for (i = 0; ; ++i)
7216 {
7217 unsigned regno = EH_RETURN_DATA_REGNO (i);
7218 if (regno == INVALID_REGNUM)
7219 break;
7220 imask |= 1UL << regno;
7221 }
7222 }
7223
7224 /* If any register spilled, then spill the return address also. */
7225 /* ??? This is required by the Digital stack unwind specification
7226 and isn't needed if we're doing Dwarf2 unwinding. */
7227 if (imask || fmask || alpha_ra_ever_killed ())
7228 imask |= (1UL << REG_RA);
7229
7230 *imaskP = imask;
7231 *fmaskP = fmask;
7232 }
7233
7234 int
7235 alpha_sa_size (void)
7236 {
7237 unsigned long mask[2];
7238 int sa_size = 0;
7239 int i, j;
7240
7241 alpha_sa_mask (&mask[0], &mask[1]);
7242
7243 for (j = 0; j < 2; ++j)
7244 for (i = 0; i < 32; ++i)
7245 if ((mask[j] >> i) & 1)
7246 sa_size++;
7247
7248 if (TARGET_ABI_OPEN_VMS)
7249 {
7250 /* Start with a stack procedure if we make any calls (REG_RA used), or
7251 need a frame pointer, with a register procedure if we otherwise need
7252 at least a slot, and with a null procedure in other cases. */
7253 if ((mask[0] >> REG_RA) & 1 || frame_pointer_needed)
7254 alpha_procedure_type = PT_STACK;
7255 else if (get_frame_size() != 0)
7256 alpha_procedure_type = PT_REGISTER;
7257 else
7258 alpha_procedure_type = PT_NULL;
7259
7260 /* Don't reserve space for saving FP & RA yet. Do that later after we've
7261 made the final decision on stack procedure vs register procedure. */
7262 if (alpha_procedure_type == PT_STACK)
7263 sa_size -= 2;
7264
7265 /* Decide whether to refer to objects off our PV via FP or PV.
7266 If we need FP for something else or if we receive a nonlocal
7267 goto (which expects PV to contain the value), we must use PV.
7268 Otherwise, start by assuming we can use FP. */
7269
7270 vms_base_regno
7271 = (frame_pointer_needed
7272 || cfun->has_nonlocal_label
7273 || alpha_procedure_type == PT_STACK
7274 || crtl->outgoing_args_size)
7275 ? REG_PV : HARD_FRAME_POINTER_REGNUM;
7276
7277 /* If we want to copy PV into FP, we need to find some register
7278 in which to save FP. */
7279
7280 vms_save_fp_regno = -1;
7281 if (vms_base_regno == HARD_FRAME_POINTER_REGNUM)
7282 for (i = 0; i < 32; i++)
7283 if (! fixed_regs[i] && call_used_regs[i] && ! df_regs_ever_live_p (i))
7284 vms_save_fp_regno = i;
7285
7286 /* A VMS condition handler requires a stack procedure in our
7287 implementation. (not required by the calling standard). */
7288 if ((vms_save_fp_regno == -1 && alpha_procedure_type == PT_REGISTER)
7289 || cfun->machine->uses_condition_handler)
7290 vms_base_regno = REG_PV, alpha_procedure_type = PT_STACK;
7291 else if (alpha_procedure_type == PT_NULL)
7292 vms_base_regno = REG_PV;
7293
7294 /* Stack unwinding should be done via FP unless we use it for PV. */
7295 vms_unwind_regno = (vms_base_regno == REG_PV
7296 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
7297
7298 /* If this is a stack procedure, allow space for saving FP, RA and
7299 a condition handler slot if needed. */
7300 if (alpha_procedure_type == PT_STACK)
7301 sa_size += 2 + cfun->machine->uses_condition_handler;
7302 }
7303 else
7304 {
7305 /* Our size must be even (multiple of 16 bytes). */
7306 if (sa_size & 1)
7307 sa_size++;
7308 }
7309
7310 return sa_size * 8;
7311 }
7312
7313 /* Define the offset between two registers, one to be eliminated,
7314 and the other its replacement, at the start of a routine. */
7315
7316 HOST_WIDE_INT
7317 alpha_initial_elimination_offset (unsigned int from,
7318 unsigned int to ATTRIBUTE_UNUSED)
7319 {
7320 HOST_WIDE_INT ret;
7321
7322 ret = alpha_sa_size ();
7323 ret += ALPHA_ROUND (crtl->outgoing_args_size);
7324
7325 switch (from)
7326 {
7327 case FRAME_POINTER_REGNUM:
7328 break;
7329
7330 case ARG_POINTER_REGNUM:
7331 ret += (ALPHA_ROUND (get_frame_size ()
7332 + crtl->args.pretend_args_size)
7333 - crtl->args.pretend_args_size);
7334 break;
7335
7336 default:
7337 gcc_unreachable ();
7338 }
7339
7340 return ret;
7341 }
7342
7343 #if TARGET_ABI_OPEN_VMS
7344
7345 /* Worker function for TARGET_CAN_ELIMINATE. */
7346
7347 static bool
7348 alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
7349 {
7350 /* We need the alpha_procedure_type to decide. Evaluate it now. */
7351 alpha_sa_size ();
7352
7353 switch (alpha_procedure_type)
7354 {
7355 case PT_NULL:
7356 /* NULL procedures have no frame of their own and we only
7357 know how to resolve from the current stack pointer. */
7358 return to == STACK_POINTER_REGNUM;
7359
7360 case PT_REGISTER:
7361 case PT_STACK:
7362 /* We always eliminate except to the stack pointer if there is no
7363 usable frame pointer at hand. */
7364 return (to != STACK_POINTER_REGNUM
7365 || vms_unwind_regno != HARD_FRAME_POINTER_REGNUM);
7366 }
7367
7368 gcc_unreachable ();
7369 }
7370
7371 /* FROM is to be eliminated for TO. Return the offset so that TO+offset
7372 designates the same location as FROM. */
7373
7374 HOST_WIDE_INT
7375 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
7376 {
7377 /* The only possible attempts we ever expect are ARG or FRAME_PTR to
7378 HARD_FRAME or STACK_PTR. We need the alpha_procedure_type to decide
7379 on the proper computations and will need the register save area size
7380 in most cases. */
7381
7382 HOST_WIDE_INT sa_size = alpha_sa_size ();
7383
7384 /* PT_NULL procedures have no frame of their own and we only allow
7385 elimination to the stack pointer. This is the argument pointer and we
7386 resolve the soft frame pointer to that as well. */
7387
7388 if (alpha_procedure_type == PT_NULL)
7389 return 0;
7390
7391 /* For a PT_STACK procedure the frame layout looks as follows
7392
7393 -----> decreasing addresses
7394
7395 < size rounded up to 16 | likewise >
7396 --------------#------------------------------+++--------------+++-------#
7397 incoming args # pretended args | "frame" | regs sa | PV | outgoing args #
7398 --------------#---------------------------------------------------------#
7399 ^ ^ ^ ^
7400 ARG_PTR FRAME_PTR HARD_FRAME_PTR STACK_PTR
7401
7402
7403 PT_REGISTER procedures are similar in that they may have a frame of their
7404 own. They have no regs-sa/pv/outgoing-args area.
7405
7406 We first compute offset to HARD_FRAME_PTR, then add what we need to get
7407 to STACK_PTR if need be. */
7408
7409 {
7410 HOST_WIDE_INT offset;
7411 HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
7412
7413 switch (from)
7414 {
7415 case FRAME_POINTER_REGNUM:
7416 offset = ALPHA_ROUND (sa_size + pv_save_size);
7417 break;
7418 case ARG_POINTER_REGNUM:
7419 offset = (ALPHA_ROUND (sa_size + pv_save_size
7420 + get_frame_size ()
7421 + crtl->args.pretend_args_size)
7422 - crtl->args.pretend_args_size);
7423 break;
7424 default:
7425 gcc_unreachable ();
7426 }
7427
7428 if (to == STACK_POINTER_REGNUM)
7429 offset += ALPHA_ROUND (crtl->outgoing_args_size);
7430
7431 return offset;
7432 }
7433 }
7434
7435 #define COMMON_OBJECT "common_object"
7436
7437 static tree
7438 common_object_handler (tree *node, tree name ATTRIBUTE_UNUSED,
7439 tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED,
7440 bool *no_add_attrs ATTRIBUTE_UNUSED)
7441 {
7442 tree decl = *node;
7443 gcc_assert (DECL_P (decl));
7444
7445 DECL_COMMON (decl) = 1;
7446 return NULL_TREE;
7447 }
7448
7449 static const struct attribute_spec vms_attribute_table[] =
7450 {
7451 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7452 affects_type_identity } */
7453 { COMMON_OBJECT, 0, 1, true, false, false, common_object_handler, false },
7454 { NULL, 0, 0, false, false, false, NULL, false }
7455 };
7456
7457 void
7458 vms_output_aligned_decl_common(FILE *file, tree decl, const char *name,
7459 unsigned HOST_WIDE_INT size,
7460 unsigned int align)
7461 {
7462 tree attr = DECL_ATTRIBUTES (decl);
7463 fprintf (file, "%s", COMMON_ASM_OP);
7464 assemble_name (file, name);
7465 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED, size);
7466 /* ??? Unlike on OSF/1, the alignment factor is not in log units. */
7467 fprintf (file, ",%u", align / BITS_PER_UNIT);
7468 if (attr)
7469 {
7470 attr = lookup_attribute (COMMON_OBJECT, attr);
7471 if (attr)
7472 fprintf (file, ",%s",
7473 IDENTIFIER_POINTER (TREE_VALUE (TREE_VALUE (attr))));
7474 }
7475 fputc ('\n', file);
7476 }
7477
7478 #undef COMMON_OBJECT
7479
7480 #endif
7481
7482 bool
7483 alpha_find_lo_sum_using_gp (rtx insn)
7484 {
7485 subrtx_iterator::array_type array;
7486 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
7487 {
7488 const_rtx x = *iter;
7489 if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx)
7490 return true;
7491 }
7492 return false;
7493 }
7494
7495 static int
7496 alpha_does_function_need_gp (void)
7497 {
7498 rtx_insn *insn;
7499
7500 /* The GP being variable is an OSF abi thing. */
7501 if (! TARGET_ABI_OSF)
7502 return 0;
7503
7504 /* We need the gp to load the address of __mcount. */
7505 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7506 return 1;
7507
7508 /* The code emitted by alpha_output_mi_thunk_osf uses the gp. */
7509 if (cfun->is_thunk)
7510 return 1;
7511
7512 /* The nonlocal receiver pattern assumes that the gp is valid for
7513 the nested function. Reasonable because it's almost always set
7514 correctly already. For the cases where that's wrong, make sure
7515 the nested function loads its gp on entry. */
7516 if (crtl->has_nonlocal_goto)
7517 return 1;
7518
7519 /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
7520 Even if we are a static function, we still need to do this in case
7521 our address is taken and passed to something like qsort. */
7522
7523 push_topmost_sequence ();
7524 insn = get_insns ();
7525 pop_topmost_sequence ();
7526
7527 for (; insn; insn = NEXT_INSN (insn))
7528 if (NONDEBUG_INSN_P (insn)
7529 && GET_CODE (PATTERN (insn)) != USE
7530 && GET_CODE (PATTERN (insn)) != CLOBBER
7531 && get_attr_usegp (insn))
7532 return 1;
7533
7534 return 0;
7535 }
7536
7537 \f
7538 /* Helper function to set RTX_FRAME_RELATED_P on instructions, including
7539 sequences. */
7540
7541 static rtx_insn *
7542 set_frame_related_p (void)
7543 {
7544 rtx_insn *seq = get_insns ();
7545 rtx_insn *insn;
7546
7547 end_sequence ();
7548
7549 if (!seq)
7550 return NULL;
7551
7552 if (INSN_P (seq))
7553 {
7554 insn = seq;
7555 while (insn != NULL_RTX)
7556 {
7557 RTX_FRAME_RELATED_P (insn) = 1;
7558 insn = NEXT_INSN (insn);
7559 }
7560 seq = emit_insn (seq);
7561 }
7562 else
7563 {
7564 seq = emit_insn (seq);
7565 RTX_FRAME_RELATED_P (seq) = 1;
7566 }
7567 return seq;
7568 }
7569
7570 #define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
7571
7572 /* Generates a store with the proper unwind info attached. VALUE is
7573 stored at BASE_REG+BASE_OFS. If FRAME_BIAS is nonzero, then BASE_REG
7574 contains SP+FRAME_BIAS, and that is the unwind info that should be
7575 generated. If FRAME_REG != VALUE, then VALUE is being stored on
7576 behalf of FRAME_REG, and FRAME_REG should be present in the unwind. */
7577
7578 static void
7579 emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
7580 HOST_WIDE_INT base_ofs, rtx frame_reg)
7581 {
7582 rtx addr, mem;
7583 rtx_insn *insn;
7584
7585 addr = plus_constant (Pmode, base_reg, base_ofs);
7586 mem = gen_frame_mem (DImode, addr);
7587
7588 insn = emit_move_insn (mem, value);
7589 RTX_FRAME_RELATED_P (insn) = 1;
7590
7591 if (frame_bias || value != frame_reg)
7592 {
7593 if (frame_bias)
7594 {
7595 addr = plus_constant (Pmode, stack_pointer_rtx,
7596 frame_bias + base_ofs);
7597 mem = gen_rtx_MEM (DImode, addr);
7598 }
7599
7600 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
7601 gen_rtx_SET (mem, frame_reg));
7602 }
7603 }
7604
7605 static void
7606 emit_frame_store (unsigned int regno, rtx base_reg,
7607 HOST_WIDE_INT frame_bias, HOST_WIDE_INT base_ofs)
7608 {
7609 rtx reg = gen_rtx_REG (DImode, regno);
7610 emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
7611 }
7612
7613 /* Compute the frame size. SIZE is the size of the "naked" frame
7614 and SA_SIZE is the size of the register save area. */
7615
7616 static HOST_WIDE_INT
7617 compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
7618 {
7619 if (TARGET_ABI_OPEN_VMS)
7620 return ALPHA_ROUND (sa_size
7621 + (alpha_procedure_type == PT_STACK ? 8 : 0)
7622 + size
7623 + crtl->args.pretend_args_size);
7624 else
7625 return ALPHA_ROUND (crtl->outgoing_args_size)
7626 + sa_size
7627 + ALPHA_ROUND (size
7628 + crtl->args.pretend_args_size);
7629 }
7630
7631 /* Write function prologue. */
7632
7633 /* On vms we have two kinds of functions:
7634
7635 - stack frame (PROC_STACK)
7636 these are 'normal' functions with local vars and which are
7637 calling other functions
7638 - register frame (PROC_REGISTER)
7639 keeps all data in registers, needs no stack
7640
7641 We must pass this to the assembler so it can generate the
7642 proper pdsc (procedure descriptor)
7643 This is done with the '.pdesc' command.
7644
7645 On not-vms, we don't really differentiate between the two, as we can
7646 simply allocate stack without saving registers. */
7647
7648 void
7649 alpha_expand_prologue (void)
7650 {
7651 /* Registers to save. */
7652 unsigned long imask = 0;
7653 unsigned long fmask = 0;
7654 /* Stack space needed for pushing registers clobbered by us. */
7655 HOST_WIDE_INT sa_size, sa_bias;
7656 /* Complete stack size needed. */
7657 HOST_WIDE_INT frame_size;
7658 /* Probed stack size; it additionally includes the size of
7659 the "reserve region" if any. */
7660 HOST_WIDE_INT probed_size;
7661 /* Offset from base reg to register save area. */
7662 HOST_WIDE_INT reg_offset;
7663 rtx sa_reg;
7664 int i;
7665
7666 sa_size = alpha_sa_size ();
7667 frame_size = compute_frame_size (get_frame_size (), sa_size);
7668
7669 if (flag_stack_usage_info)
7670 current_function_static_stack_size = frame_size;
7671
7672 if (TARGET_ABI_OPEN_VMS)
7673 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7674 else
7675 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7676
7677 alpha_sa_mask (&imask, &fmask);
7678
7679 /* Emit an insn to reload GP, if needed. */
7680 if (TARGET_ABI_OSF)
7681 {
7682 alpha_function_needs_gp = alpha_does_function_need_gp ();
7683 if (alpha_function_needs_gp)
7684 emit_insn (gen_prologue_ldgp ());
7685 }
7686
7687 /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
7688 the call to mcount ourselves, rather than having the linker do it
7689 magically in response to -pg. Since _mcount has special linkage,
7690 don't represent the call as a call. */
7691 if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
7692 emit_insn (gen_prologue_mcount ());
7693
7694 /* Adjust the stack by the frame size. If the frame size is > 4096
7695 bytes, we need to be sure we probe somewhere in the first and last
7696 4096 bytes (we can probably get away without the latter test) and
7697 every 8192 bytes in between. If the frame size is > 32768, we
7698 do this in a loop. Otherwise, we generate the explicit probe
7699 instructions.
7700
7701 Note that we are only allowed to adjust sp once in the prologue. */
7702
7703 probed_size = frame_size;
7704 if (flag_stack_check)
7705 probed_size += STACK_CHECK_PROTECT;
7706
7707 if (probed_size <= 32768)
7708 {
7709 if (probed_size > 4096)
7710 {
7711 int probed;
7712
7713 for (probed = 4096; probed < probed_size; probed += 8192)
7714 emit_insn (gen_probe_stack (GEN_INT (-probed)));
7715
7716 /* We only have to do this probe if we aren't saving registers or
7717 if we are probing beyond the frame because of -fstack-check. */
7718 if ((sa_size == 0 && probed_size > probed - 4096)
7719 || flag_stack_check)
7720 emit_insn (gen_probe_stack (GEN_INT (-probed_size)));
7721 }
7722
7723 if (frame_size != 0)
7724 FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
7725 GEN_INT (-frame_size))));
7726 }
7727 else
7728 {
7729 /* Here we generate code to set R22 to SP + 4096 and set R23 to the
7730 number of 8192 byte blocks to probe. We then probe each block
7731 in the loop and then set SP to the proper location. If the
7732 amount remaining is > 4096, we have to do one more probe if we
7733 are not saving any registers or if we are probing beyond the
7734 frame because of -fstack-check. */
7735
7736 HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
7737 HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
7738 rtx ptr = gen_rtx_REG (DImode, 22);
7739 rtx count = gen_rtx_REG (DImode, 23);
7740 rtx seq;
7741
7742 emit_move_insn (count, GEN_INT (blocks));
7743 emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
7744
7745 /* Because of the difficulty in emitting a new basic block this
7746 late in the compilation, generate the loop as a single insn. */
7747 emit_insn (gen_prologue_stack_probe_loop (count, ptr));
7748
7749 if ((leftover > 4096 && sa_size == 0) || flag_stack_check)
7750 {
7751 rtx last = gen_rtx_MEM (DImode,
7752 plus_constant (Pmode, ptr, -leftover));
7753 MEM_VOLATILE_P (last) = 1;
7754 emit_move_insn (last, const0_rtx);
7755 }
7756
7757 if (flag_stack_check)
7758 {
7759 /* If -fstack-check is specified we have to load the entire
7760 constant into a register and subtract from the sp in one go,
7761 because the probed stack size is not equal to the frame size. */
7762 HOST_WIDE_INT lo, hi;
7763 lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
7764 hi = frame_size - lo;
7765
7766 emit_move_insn (ptr, GEN_INT (hi));
7767 emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
7768 seq = emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx,
7769 ptr));
7770 }
7771 else
7772 {
7773 seq = emit_insn (gen_adddi3 (stack_pointer_rtx, ptr,
7774 GEN_INT (-leftover)));
7775 }
7776
7777 /* This alternative is special, because the DWARF code cannot
7778 possibly intuit through the loop above. So we invent this
7779 note it looks at instead. */
7780 RTX_FRAME_RELATED_P (seq) = 1;
7781 add_reg_note (seq, REG_FRAME_RELATED_EXPR,
7782 gen_rtx_SET (stack_pointer_rtx,
7783 plus_constant (Pmode, stack_pointer_rtx,
7784 -frame_size)));
7785 }
7786
7787 /* Cope with very large offsets to the register save area. */
7788 sa_bias = 0;
7789 sa_reg = stack_pointer_rtx;
7790 if (reg_offset + sa_size > 0x8000)
7791 {
7792 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
7793 rtx sa_bias_rtx;
7794
7795 if (low + sa_size <= 0x8000)
7796 sa_bias = reg_offset - low, reg_offset = low;
7797 else
7798 sa_bias = reg_offset, reg_offset = 0;
7799
7800 sa_reg = gen_rtx_REG (DImode, 24);
7801 sa_bias_rtx = GEN_INT (sa_bias);
7802
7803 if (add_operand (sa_bias_rtx, DImode))
7804 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
7805 else
7806 {
7807 emit_move_insn (sa_reg, sa_bias_rtx);
7808 emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
7809 }
7810 }
7811
7812 /* Save regs in stack order. Beginning with VMS PV. */
7813 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_STACK)
7814 emit_frame_store (REG_PV, stack_pointer_rtx, 0, 0);
7815
7816 /* Save register RA next. */
7817 if (imask & (1UL << REG_RA))
7818 {
7819 emit_frame_store (REG_RA, sa_reg, sa_bias, reg_offset);
7820 imask &= ~(1UL << REG_RA);
7821 reg_offset += 8;
7822 }
7823
7824 /* Now save any other registers required to be saved. */
7825 for (i = 0; i < 31; i++)
7826 if (imask & (1UL << i))
7827 {
7828 emit_frame_store (i, sa_reg, sa_bias, reg_offset);
7829 reg_offset += 8;
7830 }
7831
7832 for (i = 0; i < 31; i++)
7833 if (fmask & (1UL << i))
7834 {
7835 emit_frame_store (i+32, sa_reg, sa_bias, reg_offset);
7836 reg_offset += 8;
7837 }
7838
7839 if (TARGET_ABI_OPEN_VMS)
7840 {
7841 /* Register frame procedures save the fp. */
7842 if (alpha_procedure_type == PT_REGISTER)
7843 {
7844 rtx_insn *insn =
7845 emit_move_insn (gen_rtx_REG (DImode, vms_save_fp_regno),
7846 hard_frame_pointer_rtx);
7847 add_reg_note (insn, REG_CFA_REGISTER, NULL);
7848 RTX_FRAME_RELATED_P (insn) = 1;
7849 }
7850
7851 if (alpha_procedure_type != PT_NULL && vms_base_regno != REG_PV)
7852 emit_insn (gen_force_movdi (gen_rtx_REG (DImode, vms_base_regno),
7853 gen_rtx_REG (DImode, REG_PV)));
7854
7855 if (alpha_procedure_type != PT_NULL
7856 && vms_unwind_regno == HARD_FRAME_POINTER_REGNUM)
7857 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7858
7859 /* If we have to allocate space for outgoing args, do it now. */
7860 if (crtl->outgoing_args_size != 0)
7861 {
7862 rtx_insn *seq
7863 = emit_move_insn (stack_pointer_rtx,
7864 plus_constant
7865 (Pmode, hard_frame_pointer_rtx,
7866 - (ALPHA_ROUND
7867 (crtl->outgoing_args_size))));
7868
7869 /* Only set FRAME_RELATED_P on the stack adjustment we just emitted
7870 if ! frame_pointer_needed. Setting the bit will change the CFA
7871 computation rule to use sp again, which would be wrong if we had
7872 frame_pointer_needed, as this means sp might move unpredictably
7873 later on.
7874
7875 Also, note that
7876 frame_pointer_needed
7877 => vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
7878 and
7879 crtl->outgoing_args_size != 0
7880 => alpha_procedure_type != PT_NULL,
7881
7882 so when we are not setting the bit here, we are guaranteed to
7883 have emitted an FRP frame pointer update just before. */
7884 RTX_FRAME_RELATED_P (seq) = ! frame_pointer_needed;
7885 }
7886 }
7887 else
7888 {
7889 /* If we need a frame pointer, set it from the stack pointer. */
7890 if (frame_pointer_needed)
7891 {
7892 if (TARGET_CAN_FAULT_IN_PROLOGUE)
7893 FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
7894 else
7895 /* This must always be the last instruction in the
7896 prologue, thus we emit a special move + clobber. */
7897 FRP (emit_insn (gen_init_fp (hard_frame_pointer_rtx,
7898 stack_pointer_rtx, sa_reg)));
7899 }
7900 }
7901
7902 /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
7903 the prologue, for exception handling reasons, we cannot do this for
7904 any insn that might fault. We could prevent this for mems with a
7905 (clobber:BLK (scratch)), but this doesn't work for fp insns. So we
7906 have to prevent all such scheduling with a blockage.
7907
7908 Linux, on the other hand, never bothered to implement OSF/1's
7909 exception handling, and so doesn't care about such things. Anyone
7910 planning to use dwarf2 frame-unwind info can also omit the blockage. */
7911
7912 if (! TARGET_CAN_FAULT_IN_PROLOGUE)
7913 emit_insn (gen_blockage ());
7914 }
7915
7916 /* Count the number of .file directives, so that .loc is up to date. */
7917 int num_source_filenames = 0;
7918
7919 /* Output the textual info surrounding the prologue. */
7920
7921 void
7922 alpha_start_function (FILE *file, const char *fnname,
7923 tree decl ATTRIBUTE_UNUSED)
7924 {
7925 unsigned long imask = 0;
7926 unsigned long fmask = 0;
7927 /* Stack space needed for pushing registers clobbered by us. */
7928 HOST_WIDE_INT sa_size;
7929 /* Complete stack size needed. */
7930 unsigned HOST_WIDE_INT frame_size;
7931 /* The maximum debuggable frame size. */
7932 unsigned HOST_WIDE_INT max_frame_size = 1UL << 31;
7933 /* Offset from base reg to register save area. */
7934 HOST_WIDE_INT reg_offset;
7935 char *entry_label = (char *) alloca (strlen (fnname) + 6);
7936 char *tramp_label = (char *) alloca (strlen (fnname) + 6);
7937 int i;
7938
7939 #if TARGET_ABI_OPEN_VMS
7940 vms_start_function (fnname);
7941 #endif
7942
7943 alpha_fnname = fnname;
7944 sa_size = alpha_sa_size ();
7945 frame_size = compute_frame_size (get_frame_size (), sa_size);
7946
7947 if (TARGET_ABI_OPEN_VMS)
7948 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
7949 else
7950 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
7951
7952 alpha_sa_mask (&imask, &fmask);
7953
7954 /* Issue function start and label. */
7955 if (TARGET_ABI_OPEN_VMS || !flag_inhibit_size_directive)
7956 {
7957 fputs ("\t.ent ", file);
7958 assemble_name (file, fnname);
7959 putc ('\n', file);
7960
7961 /* If the function needs GP, we'll write the "..ng" label there.
7962 Otherwise, do it here. */
7963 if (TARGET_ABI_OSF
7964 && ! alpha_function_needs_gp
7965 && ! cfun->is_thunk)
7966 {
7967 putc ('$', file);
7968 assemble_name (file, fnname);
7969 fputs ("..ng:\n", file);
7970 }
7971 }
7972 /* Nested functions on VMS that are potentially called via trampoline
7973 get a special transfer entry point that loads the called functions
7974 procedure descriptor and static chain. */
7975 if (TARGET_ABI_OPEN_VMS
7976 && !TREE_PUBLIC (decl)
7977 && DECL_CONTEXT (decl)
7978 && !TYPE_P (DECL_CONTEXT (decl))
7979 && TREE_CODE (DECL_CONTEXT (decl)) != TRANSLATION_UNIT_DECL)
7980 {
7981 strcpy (tramp_label, fnname);
7982 strcat (tramp_label, "..tr");
7983 ASM_OUTPUT_LABEL (file, tramp_label);
7984 fprintf (file, "\tldq $1,24($27)\n");
7985 fprintf (file, "\tldq $27,16($27)\n");
7986 }
7987
7988 strcpy (entry_label, fnname);
7989 if (TARGET_ABI_OPEN_VMS)
7990 strcat (entry_label, "..en");
7991
7992 ASM_OUTPUT_LABEL (file, entry_label);
7993 inside_function = TRUE;
7994
7995 if (TARGET_ABI_OPEN_VMS)
7996 fprintf (file, "\t.base $%d\n", vms_base_regno);
7997
7998 if (TARGET_ABI_OSF
7999 && TARGET_IEEE_CONFORMANT
8000 && !flag_inhibit_size_directive)
8001 {
8002 /* Set flags in procedure descriptor to request IEEE-conformant
8003 math-library routines. The value we set it to is PDSC_EXC_IEEE
8004 (/usr/include/pdsc.h). */
8005 fputs ("\t.eflag 48\n", file);
8006 }
8007
8008 /* Set up offsets to alpha virtual arg/local debugging pointer. */
8009 alpha_auto_offset = -frame_size + crtl->args.pretend_args_size;
8010 alpha_arg_offset = -frame_size + 48;
8011
8012 /* Describe our frame. If the frame size is larger than an integer,
8013 print it as zero to avoid an assembler error. We won't be
8014 properly describing such a frame, but that's the best we can do. */
8015 if (TARGET_ABI_OPEN_VMS)
8016 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,"
8017 HOST_WIDE_INT_PRINT_DEC "\n",
8018 vms_unwind_regno,
8019 frame_size >= (1UL << 31) ? 0 : frame_size,
8020 reg_offset);
8021 else if (!flag_inhibit_size_directive)
8022 fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
8023 (frame_pointer_needed
8024 ? HARD_FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM),
8025 frame_size >= max_frame_size ? 0 : frame_size,
8026 crtl->args.pretend_args_size);
8027
8028 /* Describe which registers were spilled. */
8029 if (TARGET_ABI_OPEN_VMS)
8030 {
8031 if (imask)
8032 /* ??? Does VMS care if mask contains ra? The old code didn't
8033 set it, so I don't here. */
8034 fprintf (file, "\t.mask 0x%lx,0\n", imask & ~(1UL << REG_RA));
8035 if (fmask)
8036 fprintf (file, "\t.fmask 0x%lx,0\n", fmask);
8037 if (alpha_procedure_type == PT_REGISTER)
8038 fprintf (file, "\t.fp_save $%d\n", vms_save_fp_regno);
8039 }
8040 else if (!flag_inhibit_size_directive)
8041 {
8042 if (imask)
8043 {
8044 fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
8045 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8046
8047 for (i = 0; i < 32; ++i)
8048 if (imask & (1UL << i))
8049 reg_offset += 8;
8050 }
8051
8052 if (fmask)
8053 fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
8054 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
8055 }
8056
8057 #if TARGET_ABI_OPEN_VMS
8058 /* If a user condition handler has been installed at some point, emit
8059 the procedure descriptor bits to point the Condition Handling Facility
8060 at the indirection wrapper, and state the fp offset at which the user
8061 handler may be found. */
8062 if (cfun->machine->uses_condition_handler)
8063 {
8064 fprintf (file, "\t.handler __gcc_shell_handler\n");
8065 fprintf (file, "\t.handler_data %d\n", VMS_COND_HANDLER_FP_OFFSET);
8066 }
8067
8068 #ifdef TARGET_VMS_CRASH_DEBUG
8069 /* Support of minimal traceback info. */
8070 switch_to_section (readonly_data_section);
8071 fprintf (file, "\t.align 3\n");
8072 assemble_name (file, fnname); fputs ("..na:\n", file);
8073 fputs ("\t.ascii \"", file);
8074 assemble_name (file, fnname);
8075 fputs ("\\0\"\n", file);
8076 switch_to_section (text_section);
8077 #endif
8078 #endif /* TARGET_ABI_OPEN_VMS */
8079 }
8080
8081 /* Emit the .prologue note at the scheduled end of the prologue. */
8082
8083 static void
8084 alpha_output_function_end_prologue (FILE *file)
8085 {
8086 if (TARGET_ABI_OPEN_VMS)
8087 fputs ("\t.prologue\n", file);
8088 else if (!flag_inhibit_size_directive)
8089 fprintf (file, "\t.prologue %d\n",
8090 alpha_function_needs_gp || cfun->is_thunk);
8091 }
8092
8093 /* Write function epilogue. */
8094
8095 void
8096 alpha_expand_epilogue (void)
8097 {
8098 /* Registers to save. */
8099 unsigned long imask = 0;
8100 unsigned long fmask = 0;
8101 /* Stack space needed for pushing registers clobbered by us. */
8102 HOST_WIDE_INT sa_size;
8103 /* Complete stack size needed. */
8104 HOST_WIDE_INT frame_size;
8105 /* Offset from base reg to register save area. */
8106 HOST_WIDE_INT reg_offset;
8107 int fp_is_frame_pointer, fp_offset;
8108 rtx sa_reg, sa_reg_exp = NULL;
8109 rtx sp_adj1, sp_adj2, mem, reg, insn;
8110 rtx eh_ofs;
8111 rtx cfa_restores = NULL_RTX;
8112 int i;
8113
8114 sa_size = alpha_sa_size ();
8115 frame_size = compute_frame_size (get_frame_size (), sa_size);
8116
8117 if (TARGET_ABI_OPEN_VMS)
8118 {
8119 if (alpha_procedure_type == PT_STACK)
8120 reg_offset = 8 + 8 * cfun->machine->uses_condition_handler;
8121 else
8122 reg_offset = 0;
8123 }
8124 else
8125 reg_offset = ALPHA_ROUND (crtl->outgoing_args_size);
8126
8127 alpha_sa_mask (&imask, &fmask);
8128
8129 fp_is_frame_pointer
8130 = (TARGET_ABI_OPEN_VMS
8131 ? alpha_procedure_type == PT_STACK
8132 : frame_pointer_needed);
8133 fp_offset = 0;
8134 sa_reg = stack_pointer_rtx;
8135
8136 if (crtl->calls_eh_return)
8137 eh_ofs = EH_RETURN_STACKADJ_RTX;
8138 else
8139 eh_ofs = NULL_RTX;
8140
8141 if (sa_size)
8142 {
8143 /* If we have a frame pointer, restore SP from it. */
8144 if (TARGET_ABI_OPEN_VMS
8145 ? vms_unwind_regno == HARD_FRAME_POINTER_REGNUM
8146 : frame_pointer_needed)
8147 emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
8148
8149 /* Cope with very large offsets to the register save area. */
8150 if (reg_offset + sa_size > 0x8000)
8151 {
8152 int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
8153 HOST_WIDE_INT bias;
8154
8155 if (low + sa_size <= 0x8000)
8156 bias = reg_offset - low, reg_offset = low;
8157 else
8158 bias = reg_offset, reg_offset = 0;
8159
8160 sa_reg = gen_rtx_REG (DImode, 22);
8161 sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
8162
8163 emit_move_insn (sa_reg, sa_reg_exp);
8164 }
8165
8166 /* Restore registers in order, excepting a true frame pointer. */
8167
8168 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg, reg_offset));
8169 reg = gen_rtx_REG (DImode, REG_RA);
8170 emit_move_insn (reg, mem);
8171 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8172
8173 reg_offset += 8;
8174 imask &= ~(1UL << REG_RA);
8175
8176 for (i = 0; i < 31; ++i)
8177 if (imask & (1UL << i))
8178 {
8179 if (i == HARD_FRAME_POINTER_REGNUM && fp_is_frame_pointer)
8180 fp_offset = reg_offset;
8181 else
8182 {
8183 mem = gen_frame_mem (DImode,
8184 plus_constant (Pmode, sa_reg,
8185 reg_offset));
8186 reg = gen_rtx_REG (DImode, i);
8187 emit_move_insn (reg, mem);
8188 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
8189 cfa_restores);
8190 }
8191 reg_offset += 8;
8192 }
8193
8194 for (i = 0; i < 31; ++i)
8195 if (fmask & (1UL << i))
8196 {
8197 mem = gen_frame_mem (DFmode, plus_constant (Pmode, sa_reg,
8198 reg_offset));
8199 reg = gen_rtx_REG (DFmode, i+32);
8200 emit_move_insn (reg, mem);
8201 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
8202 reg_offset += 8;
8203 }
8204 }
8205
8206 if (frame_size || eh_ofs)
8207 {
8208 sp_adj1 = stack_pointer_rtx;
8209
8210 if (eh_ofs)
8211 {
8212 sp_adj1 = gen_rtx_REG (DImode, 23);
8213 emit_move_insn (sp_adj1,
8214 gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
8215 }
8216
8217 /* If the stack size is large, begin computation into a temporary
8218 register so as not to interfere with a potential fp restore,
8219 which must be consecutive with an SP restore. */
8220 if (frame_size < 32768 && !cfun->calls_alloca)
8221 sp_adj2 = GEN_INT (frame_size);
8222 else if (frame_size < 0x40007fffL)
8223 {
8224 int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
8225
8226 sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
8227 if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
8228 sp_adj1 = sa_reg;
8229 else
8230 {
8231 sp_adj1 = gen_rtx_REG (DImode, 23);
8232 emit_move_insn (sp_adj1, sp_adj2);
8233 }
8234 sp_adj2 = GEN_INT (low);
8235 }
8236 else
8237 {
8238 rtx tmp = gen_rtx_REG (DImode, 23);
8239 sp_adj2 = alpha_emit_set_const (tmp, DImode, frame_size, 3, false);
8240 if (!sp_adj2)
8241 {
8242 /* We can't drop new things to memory this late, afaik,
8243 so build it up by pieces. */
8244 sp_adj2 = alpha_emit_set_long_const (tmp, frame_size);
8245 gcc_assert (sp_adj2);
8246 }
8247 }
8248
8249 /* From now on, things must be in order. So emit blockages. */
8250
8251 /* Restore the frame pointer. */
8252 if (fp_is_frame_pointer)
8253 {
8254 emit_insn (gen_blockage ());
8255 mem = gen_frame_mem (DImode, plus_constant (Pmode, sa_reg,
8256 fp_offset));
8257 emit_move_insn (hard_frame_pointer_rtx, mem);
8258 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8259 hard_frame_pointer_rtx, cfa_restores);
8260 }
8261 else if (TARGET_ABI_OPEN_VMS)
8262 {
8263 emit_insn (gen_blockage ());
8264 emit_move_insn (hard_frame_pointer_rtx,
8265 gen_rtx_REG (DImode, vms_save_fp_regno));
8266 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
8267 hard_frame_pointer_rtx, cfa_restores);
8268 }
8269
8270 /* Restore the stack pointer. */
8271 emit_insn (gen_blockage ());
8272 if (sp_adj2 == const0_rtx)
8273 insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
8274 else
8275 insn = emit_move_insn (stack_pointer_rtx,
8276 gen_rtx_PLUS (DImode, sp_adj1, sp_adj2));
8277 REG_NOTES (insn) = cfa_restores;
8278 add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
8279 RTX_FRAME_RELATED_P (insn) = 1;
8280 }
8281 else
8282 {
8283 gcc_assert (cfa_restores == NULL);
8284
8285 if (TARGET_ABI_OPEN_VMS && alpha_procedure_type == PT_REGISTER)
8286 {
8287 emit_insn (gen_blockage ());
8288 insn = emit_move_insn (hard_frame_pointer_rtx,
8289 gen_rtx_REG (DImode, vms_save_fp_regno));
8290 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8291 RTX_FRAME_RELATED_P (insn) = 1;
8292 }
8293 }
8294 }
8295 \f
8296 /* Output the rest of the textual info surrounding the epilogue. */
8297
8298 void
8299 alpha_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
8300 {
8301 rtx_insn *insn;
8302
8303 /* We output a nop after noreturn calls at the very end of the function to
8304 ensure that the return address always remains in the caller's code range,
8305 as not doing so might confuse unwinding engines. */
8306 insn = get_last_insn ();
8307 if (!INSN_P (insn))
8308 insn = prev_active_insn (insn);
8309 if (insn && CALL_P (insn))
8310 output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
8311
8312 #if TARGET_ABI_OPEN_VMS
8313 /* Write the linkage entries. */
8314 alpha_write_linkage (file, fnname);
8315 #endif
8316
8317 /* End the function. */
8318 if (TARGET_ABI_OPEN_VMS
8319 || !flag_inhibit_size_directive)
8320 {
8321 fputs ("\t.end ", file);
8322 assemble_name (file, fnname);
8323 putc ('\n', file);
8324 }
8325 inside_function = FALSE;
8326 }
8327
8328 #if TARGET_ABI_OSF
8329 /* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
8330
8331 In order to avoid the hordes of differences between generated code
8332 with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
8333 lots of code loading up large constants, generate rtl and emit it
8334 instead of going straight to text.
8335
8336 Not sure why this idea hasn't been explored before... */
8337
8338 static void
8339 alpha_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
8340 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
8341 tree function)
8342 {
8343 HOST_WIDE_INT hi, lo;
8344 rtx this_rtx, funexp;
8345 rtx_insn *insn;
8346
8347 /* We always require a valid GP. */
8348 emit_insn (gen_prologue_ldgp ());
8349 emit_note (NOTE_INSN_PROLOGUE_END);
8350
8351 /* Find the "this" pointer. If the function returns a structure,
8352 the structure return pointer is in $16. */
8353 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
8354 this_rtx = gen_rtx_REG (Pmode, 17);
8355 else
8356 this_rtx = gen_rtx_REG (Pmode, 16);
8357
8358 /* Add DELTA. When possible we use ldah+lda. Otherwise load the
8359 entire constant for the add. */
8360 lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
8361 hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8362 if (hi + lo == delta)
8363 {
8364 if (hi)
8365 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
8366 if (lo)
8367 emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
8368 }
8369 else
8370 {
8371 rtx tmp = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta);
8372 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8373 }
8374
8375 /* Add a delta stored in the vtable at VCALL_OFFSET. */
8376 if (vcall_offset)
8377 {
8378 rtx tmp, tmp2;
8379
8380 tmp = gen_rtx_REG (Pmode, 0);
8381 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
8382
8383 lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
8384 hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
8385 if (hi + lo == vcall_offset)
8386 {
8387 if (hi)
8388 emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
8389 }
8390 else
8391 {
8392 tmp2 = alpha_emit_set_long_const (gen_rtx_REG (Pmode, 1),
8393 vcall_offset);
8394 emit_insn (gen_adddi3 (tmp, tmp, tmp2));
8395 lo = 0;
8396 }
8397 if (lo)
8398 tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
8399 else
8400 tmp2 = tmp;
8401 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
8402
8403 emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
8404 }
8405
8406 /* Generate a tail call to the target function. */
8407 if (! TREE_USED (function))
8408 {
8409 assemble_external (function);
8410 TREE_USED (function) = 1;
8411 }
8412 funexp = XEXP (DECL_RTL (function), 0);
8413 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
8414 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
8415 SIBLING_CALL_P (insn) = 1;
8416
8417 /* Run just enough of rest_of_compilation to get the insns emitted.
8418 There's not really enough bulk here to make other passes such as
8419 instruction scheduling worth while. Note that use_thunk calls
8420 assemble_start_function and assemble_end_function. */
8421 insn = get_insns ();
8422 shorten_branches (insn);
8423 final_start_function (insn, file, 1);
8424 final (insn, file, 1);
8425 final_end_function ();
8426 }
8427 #endif /* TARGET_ABI_OSF */
8428 \f
8429 /* Debugging support. */
8430
8431 #include "gstab.h"
8432
8433 /* Name of the file containing the current function. */
8434
8435 static const char *current_function_file = "";
8436
8437 /* Offsets to alpha virtual arg/local debugging pointers. */
8438
8439 long alpha_arg_offset;
8440 long alpha_auto_offset;
8441 \f
8442 /* Emit a new filename to a stream. */
8443
8444 void
8445 alpha_output_filename (FILE *stream, const char *name)
8446 {
8447 static int first_time = TRUE;
8448
8449 if (first_time)
8450 {
8451 first_time = FALSE;
8452 ++num_source_filenames;
8453 current_function_file = name;
8454 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8455 output_quoted_string (stream, name);
8456 fprintf (stream, "\n");
8457 }
8458
8459 else if (name != current_function_file
8460 && strcmp (name, current_function_file) != 0)
8461 {
8462 ++num_source_filenames;
8463 current_function_file = name;
8464 fprintf (stream, "\t.file\t%d ", num_source_filenames);
8465
8466 output_quoted_string (stream, name);
8467 fprintf (stream, "\n");
8468 }
8469 }
8470 \f
8471 /* Structure to show the current status of registers and memory. */
8472
8473 struct shadow_summary
8474 {
8475 struct {
8476 unsigned int i : 31; /* Mask of int regs */
8477 unsigned int fp : 31; /* Mask of fp regs */
8478 unsigned int mem : 1; /* mem == imem | fpmem */
8479 } used, defd;
8480 };
8481
8482 /* Summary the effects of expression X on the machine. Update SUM, a pointer
8483 to the summary structure. SET is nonzero if the insn is setting the
8484 object, otherwise zero. */
8485
8486 static void
8487 summarize_insn (rtx x, struct shadow_summary *sum, int set)
8488 {
8489 const char *format_ptr;
8490 int i, j;
8491
8492 if (x == 0)
8493 return;
8494
8495 switch (GET_CODE (x))
8496 {
8497 /* ??? Note that this case would be incorrect if the Alpha had a
8498 ZERO_EXTRACT in SET_DEST. */
8499 case SET:
8500 summarize_insn (SET_SRC (x), sum, 0);
8501 summarize_insn (SET_DEST (x), sum, 1);
8502 break;
8503
8504 case CLOBBER:
8505 summarize_insn (XEXP (x, 0), sum, 1);
8506 break;
8507
8508 case USE:
8509 summarize_insn (XEXP (x, 0), sum, 0);
8510 break;
8511
8512 case ASM_OPERANDS:
8513 for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
8514 summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
8515 break;
8516
8517 case PARALLEL:
8518 for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
8519 summarize_insn (XVECEXP (x, 0, i), sum, 0);
8520 break;
8521
8522 case SUBREG:
8523 summarize_insn (SUBREG_REG (x), sum, 0);
8524 break;
8525
8526 case REG:
8527 {
8528 int regno = REGNO (x);
8529 unsigned long mask = ((unsigned long) 1) << (regno % 32);
8530
8531 if (regno == 31 || regno == 63)
8532 break;
8533
8534 if (set)
8535 {
8536 if (regno < 32)
8537 sum->defd.i |= mask;
8538 else
8539 sum->defd.fp |= mask;
8540 }
8541 else
8542 {
8543 if (regno < 32)
8544 sum->used.i |= mask;
8545 else
8546 sum->used.fp |= mask;
8547 }
8548 }
8549 break;
8550
8551 case MEM:
8552 if (set)
8553 sum->defd.mem = 1;
8554 else
8555 sum->used.mem = 1;
8556
8557 /* Find the regs used in memory address computation: */
8558 summarize_insn (XEXP (x, 0), sum, 0);
8559 break;
8560
8561 case CONST_INT: case CONST_WIDE_INT: case CONST_DOUBLE:
8562 case SYMBOL_REF: case LABEL_REF: case CONST:
8563 case SCRATCH: case ASM_INPUT:
8564 break;
8565
8566 /* Handle common unary and binary ops for efficiency. */
8567 case COMPARE: case PLUS: case MINUS: case MULT: case DIV:
8568 case MOD: case UDIV: case UMOD: case AND: case IOR:
8569 case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT:
8570 case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX:
8571 case NE: case EQ: case GE: case GT: case LE:
8572 case LT: case GEU: case GTU: case LEU: case LTU:
8573 summarize_insn (XEXP (x, 0), sum, 0);
8574 summarize_insn (XEXP (x, 1), sum, 0);
8575 break;
8576
8577 case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND:
8578 case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT:
8579 case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS:
8580 case SQRT: case FFS:
8581 summarize_insn (XEXP (x, 0), sum, 0);
8582 break;
8583
8584 default:
8585 format_ptr = GET_RTX_FORMAT (GET_CODE (x));
8586 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8587 switch (format_ptr[i])
8588 {
8589 case 'e':
8590 summarize_insn (XEXP (x, i), sum, 0);
8591 break;
8592
8593 case 'E':
8594 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8595 summarize_insn (XVECEXP (x, i, j), sum, 0);
8596 break;
8597
8598 case 'i':
8599 break;
8600
8601 default:
8602 gcc_unreachable ();
8603 }
8604 }
8605 }
8606
8607 /* Ensure a sufficient number of `trapb' insns are in the code when
8608 the user requests code with a trap precision of functions or
8609 instructions.
8610
8611 In naive mode, when the user requests a trap-precision of
8612 "instruction", a trapb is needed after every instruction that may
8613 generate a trap. This ensures that the code is resumption safe but
8614 it is also slow.
8615
8616 When optimizations are turned on, we delay issuing a trapb as long
8617 as possible. In this context, a trap shadow is the sequence of
8618 instructions that starts with a (potentially) trap generating
8619 instruction and extends to the next trapb or call_pal instruction
8620 (but GCC never generates call_pal by itself). We can delay (and
8621 therefore sometimes omit) a trapb subject to the following
8622 conditions:
8623
8624 (a) On entry to the trap shadow, if any Alpha register or memory
8625 location contains a value that is used as an operand value by some
8626 instruction in the trap shadow (live on entry), then no instruction
8627 in the trap shadow may modify the register or memory location.
8628
8629 (b) Within the trap shadow, the computation of the base register
8630 for a memory load or store instruction may not involve using the
8631 result of an instruction that might generate an UNPREDICTABLE
8632 result.
8633
8634 (c) Within the trap shadow, no register may be used more than once
8635 as a destination register. (This is to make life easier for the
8636 trap-handler.)
8637
8638 (d) The trap shadow may not include any branch instructions. */
8639
8640 static void
8641 alpha_handle_trap_shadows (void)
8642 {
8643 struct shadow_summary shadow;
8644 int trap_pending, exception_nesting;
8645 rtx_insn *i, *n;
8646
8647 trap_pending = 0;
8648 exception_nesting = 0;
8649 shadow.used.i = 0;
8650 shadow.used.fp = 0;
8651 shadow.used.mem = 0;
8652 shadow.defd = shadow.used;
8653
8654 for (i = get_insns (); i ; i = NEXT_INSN (i))
8655 {
8656 if (NOTE_P (i))
8657 {
8658 switch (NOTE_KIND (i))
8659 {
8660 case NOTE_INSN_EH_REGION_BEG:
8661 exception_nesting++;
8662 if (trap_pending)
8663 goto close_shadow;
8664 break;
8665
8666 case NOTE_INSN_EH_REGION_END:
8667 exception_nesting--;
8668 if (trap_pending)
8669 goto close_shadow;
8670 break;
8671
8672 case NOTE_INSN_EPILOGUE_BEG:
8673 if (trap_pending && alpha_tp >= ALPHA_TP_FUNC)
8674 goto close_shadow;
8675 break;
8676 }
8677 }
8678 else if (trap_pending)
8679 {
8680 if (alpha_tp == ALPHA_TP_FUNC)
8681 {
8682 if (JUMP_P (i)
8683 && GET_CODE (PATTERN (i)) == RETURN)
8684 goto close_shadow;
8685 }
8686 else if (alpha_tp == ALPHA_TP_INSN)
8687 {
8688 if (optimize > 0)
8689 {
8690 struct shadow_summary sum;
8691
8692 sum.used.i = 0;
8693 sum.used.fp = 0;
8694 sum.used.mem = 0;
8695 sum.defd = sum.used;
8696
8697 switch (GET_CODE (i))
8698 {
8699 case INSN:
8700 /* Annoyingly, get_attr_trap will die on these. */
8701 if (GET_CODE (PATTERN (i)) == USE
8702 || GET_CODE (PATTERN (i)) == CLOBBER)
8703 break;
8704
8705 summarize_insn (PATTERN (i), &sum, 0);
8706
8707 if ((sum.defd.i & shadow.defd.i)
8708 || (sum.defd.fp & shadow.defd.fp))
8709 {
8710 /* (c) would be violated */
8711 goto close_shadow;
8712 }
8713
8714 /* Combine shadow with summary of current insn: */
8715 shadow.used.i |= sum.used.i;
8716 shadow.used.fp |= sum.used.fp;
8717 shadow.used.mem |= sum.used.mem;
8718 shadow.defd.i |= sum.defd.i;
8719 shadow.defd.fp |= sum.defd.fp;
8720 shadow.defd.mem |= sum.defd.mem;
8721
8722 if ((sum.defd.i & shadow.used.i)
8723 || (sum.defd.fp & shadow.used.fp)
8724 || (sum.defd.mem & shadow.used.mem))
8725 {
8726 /* (a) would be violated (also takes care of (b)) */
8727 gcc_assert (get_attr_trap (i) != TRAP_YES
8728 || (!(sum.defd.i & sum.used.i)
8729 && !(sum.defd.fp & sum.used.fp)));
8730
8731 goto close_shadow;
8732 }
8733 break;
8734
8735 case BARRIER:
8736 /* __builtin_unreachable can expand to no code at all,
8737 leaving (barrier) RTXes in the instruction stream. */
8738 goto close_shadow_notrapb;
8739
8740 case JUMP_INSN:
8741 case CALL_INSN:
8742 case CODE_LABEL:
8743 goto close_shadow;
8744
8745 default:
8746 gcc_unreachable ();
8747 }
8748 }
8749 else
8750 {
8751 close_shadow:
8752 n = emit_insn_before (gen_trapb (), i);
8753 PUT_MODE (n, TImode);
8754 PUT_MODE (i, TImode);
8755 close_shadow_notrapb:
8756 trap_pending = 0;
8757 shadow.used.i = 0;
8758 shadow.used.fp = 0;
8759 shadow.used.mem = 0;
8760 shadow.defd = shadow.used;
8761 }
8762 }
8763 }
8764
8765 if ((exception_nesting > 0 || alpha_tp >= ALPHA_TP_FUNC)
8766 && NONJUMP_INSN_P (i)
8767 && GET_CODE (PATTERN (i)) != USE
8768 && GET_CODE (PATTERN (i)) != CLOBBER
8769 && get_attr_trap (i) == TRAP_YES)
8770 {
8771 if (optimize && !trap_pending)
8772 summarize_insn (PATTERN (i), &shadow, 0);
8773 trap_pending = 1;
8774 }
8775 }
8776 }
8777 \f
8778 /* Alpha can only issue instruction groups simultaneously if they are
8779 suitably aligned. This is very processor-specific. */
8780 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
8781 that are marked "fake". These instructions do not exist on that target,
8782 but it is possible to see these insns with deranged combinations of
8783 command-line options, such as "-mtune=ev4 -mmax". Instead of aborting,
8784 choose a result at random. */
8785
8786 enum alphaev4_pipe {
8787 EV4_STOP = 0,
8788 EV4_IB0 = 1,
8789 EV4_IB1 = 2,
8790 EV4_IBX = 4
8791 };
8792
8793 enum alphaev5_pipe {
8794 EV5_STOP = 0,
8795 EV5_NONE = 1,
8796 EV5_E01 = 2,
8797 EV5_E0 = 4,
8798 EV5_E1 = 8,
8799 EV5_FAM = 16,
8800 EV5_FA = 32,
8801 EV5_FM = 64
8802 };
8803
8804 static enum alphaev4_pipe
8805 alphaev4_insn_pipe (rtx_insn *insn)
8806 {
8807 if (recog_memoized (insn) < 0)
8808 return EV4_STOP;
8809 if (get_attr_length (insn) != 4)
8810 return EV4_STOP;
8811
8812 switch (get_attr_type (insn))
8813 {
8814 case TYPE_ILD:
8815 case TYPE_LDSYM:
8816 case TYPE_FLD:
8817 case TYPE_LD_L:
8818 return EV4_IBX;
8819
8820 case TYPE_IADD:
8821 case TYPE_ILOG:
8822 case TYPE_ICMOV:
8823 case TYPE_ICMP:
8824 case TYPE_FST:
8825 case TYPE_SHIFT:
8826 case TYPE_IMUL:
8827 case TYPE_FBR:
8828 case TYPE_MVI: /* fake */
8829 return EV4_IB0;
8830
8831 case TYPE_IST:
8832 case TYPE_MISC:
8833 case TYPE_IBR:
8834 case TYPE_JSR:
8835 case TYPE_CALLPAL:
8836 case TYPE_FCPYS:
8837 case TYPE_FCMOV:
8838 case TYPE_FADD:
8839 case TYPE_FDIV:
8840 case TYPE_FMUL:
8841 case TYPE_ST_C:
8842 case TYPE_MB:
8843 case TYPE_FSQRT: /* fake */
8844 case TYPE_FTOI: /* fake */
8845 case TYPE_ITOF: /* fake */
8846 return EV4_IB1;
8847
8848 default:
8849 gcc_unreachable ();
8850 }
8851 }
8852
8853 static enum alphaev5_pipe
8854 alphaev5_insn_pipe (rtx_insn *insn)
8855 {
8856 if (recog_memoized (insn) < 0)
8857 return EV5_STOP;
8858 if (get_attr_length (insn) != 4)
8859 return EV5_STOP;
8860
8861 switch (get_attr_type (insn))
8862 {
8863 case TYPE_ILD:
8864 case TYPE_FLD:
8865 case TYPE_LDSYM:
8866 case TYPE_IADD:
8867 case TYPE_ILOG:
8868 case TYPE_ICMOV:
8869 case TYPE_ICMP:
8870 return EV5_E01;
8871
8872 case TYPE_IST:
8873 case TYPE_FST:
8874 case TYPE_SHIFT:
8875 case TYPE_IMUL:
8876 case TYPE_MISC:
8877 case TYPE_MVI:
8878 case TYPE_LD_L:
8879 case TYPE_ST_C:
8880 case TYPE_MB:
8881 case TYPE_FTOI: /* fake */
8882 case TYPE_ITOF: /* fake */
8883 return EV5_E0;
8884
8885 case TYPE_IBR:
8886 case TYPE_JSR:
8887 case TYPE_CALLPAL:
8888 return EV5_E1;
8889
8890 case TYPE_FCPYS:
8891 return EV5_FAM;
8892
8893 case TYPE_FBR:
8894 case TYPE_FCMOV:
8895 case TYPE_FADD:
8896 case TYPE_FDIV:
8897 case TYPE_FSQRT: /* fake */
8898 return EV5_FA;
8899
8900 case TYPE_FMUL:
8901 return EV5_FM;
8902
8903 default:
8904 gcc_unreachable ();
8905 }
8906 }
8907
8908 /* IN_USE is a mask of the slots currently filled within the insn group.
8909 The mask bits come from alphaev4_pipe above. If EV4_IBX is set, then
8910 the insn in EV4_IB0 can be swapped by the hardware into EV4_IB1.
8911
8912 LEN is, of course, the length of the group in bytes. */
8913
8914 static rtx_insn *
8915 alphaev4_next_group (rtx_insn *insn, int *pin_use, int *plen)
8916 {
8917 int len, in_use;
8918
8919 len = in_use = 0;
8920
8921 if (! INSN_P (insn)
8922 || GET_CODE (PATTERN (insn)) == CLOBBER
8923 || GET_CODE (PATTERN (insn)) == USE)
8924 goto next_and_done;
8925
8926 while (1)
8927 {
8928 enum alphaev4_pipe pipe;
8929
8930 pipe = alphaev4_insn_pipe (insn);
8931 switch (pipe)
8932 {
8933 case EV4_STOP:
8934 /* Force complex instructions to start new groups. */
8935 if (in_use)
8936 goto done;
8937
8938 /* If this is a completely unrecognized insn, it's an asm.
8939 We don't know how long it is, so record length as -1 to
8940 signal a needed realignment. */
8941 if (recog_memoized (insn) < 0)
8942 len = -1;
8943 else
8944 len = get_attr_length (insn);
8945 goto next_and_done;
8946
8947 case EV4_IBX:
8948 if (in_use & EV4_IB0)
8949 {
8950 if (in_use & EV4_IB1)
8951 goto done;
8952 in_use |= EV4_IB1;
8953 }
8954 else
8955 in_use |= EV4_IB0 | EV4_IBX;
8956 break;
8957
8958 case EV4_IB0:
8959 if (in_use & EV4_IB0)
8960 {
8961 if (!(in_use & EV4_IBX) || (in_use & EV4_IB1))
8962 goto done;
8963 in_use |= EV4_IB1;
8964 }
8965 in_use |= EV4_IB0;
8966 break;
8967
8968 case EV4_IB1:
8969 if (in_use & EV4_IB1)
8970 goto done;
8971 in_use |= EV4_IB1;
8972 break;
8973
8974 default:
8975 gcc_unreachable ();
8976 }
8977 len += 4;
8978
8979 /* Haifa doesn't do well scheduling branches. */
8980 if (JUMP_P (insn))
8981 goto next_and_done;
8982
8983 next:
8984 insn = next_nonnote_insn (insn);
8985
8986 if (!insn || ! INSN_P (insn))
8987 goto done;
8988
8989 /* Let Haifa tell us where it thinks insn group boundaries are. */
8990 if (GET_MODE (insn) == TImode)
8991 goto done;
8992
8993 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
8994 goto next;
8995 }
8996
8997 next_and_done:
8998 insn = next_nonnote_insn (insn);
8999
9000 done:
9001 *plen = len;
9002 *pin_use = in_use;
9003 return insn;
9004 }
9005
9006 /* IN_USE is a mask of the slots currently filled within the insn group.
9007 The mask bits come from alphaev5_pipe above. If EV5_E01 is set, then
9008 the insn in EV5_E0 can be swapped by the hardware into EV5_E1.
9009
9010 LEN is, of course, the length of the group in bytes. */
9011
9012 static rtx_insn *
9013 alphaev5_next_group (rtx_insn *insn, int *pin_use, int *plen)
9014 {
9015 int len, in_use;
9016
9017 len = in_use = 0;
9018
9019 if (! INSN_P (insn)
9020 || GET_CODE (PATTERN (insn)) == CLOBBER
9021 || GET_CODE (PATTERN (insn)) == USE)
9022 goto next_and_done;
9023
9024 while (1)
9025 {
9026 enum alphaev5_pipe pipe;
9027
9028 pipe = alphaev5_insn_pipe (insn);
9029 switch (pipe)
9030 {
9031 case EV5_STOP:
9032 /* Force complex instructions to start new groups. */
9033 if (in_use)
9034 goto done;
9035
9036 /* If this is a completely unrecognized insn, it's an asm.
9037 We don't know how long it is, so record length as -1 to
9038 signal a needed realignment. */
9039 if (recog_memoized (insn) < 0)
9040 len = -1;
9041 else
9042 len = get_attr_length (insn);
9043 goto next_and_done;
9044
9045 /* ??? Most of the places below, we would like to assert never
9046 happen, as it would indicate an error either in Haifa, or
9047 in the scheduling description. Unfortunately, Haifa never
9048 schedules the last instruction of the BB, so we don't have
9049 an accurate TI bit to go off. */
9050 case EV5_E01:
9051 if (in_use & EV5_E0)
9052 {
9053 if (in_use & EV5_E1)
9054 goto done;
9055 in_use |= EV5_E1;
9056 }
9057 else
9058 in_use |= EV5_E0 | EV5_E01;
9059 break;
9060
9061 case EV5_E0:
9062 if (in_use & EV5_E0)
9063 {
9064 if (!(in_use & EV5_E01) || (in_use & EV5_E1))
9065 goto done;
9066 in_use |= EV5_E1;
9067 }
9068 in_use |= EV5_E0;
9069 break;
9070
9071 case EV5_E1:
9072 if (in_use & EV5_E1)
9073 goto done;
9074 in_use |= EV5_E1;
9075 break;
9076
9077 case EV5_FAM:
9078 if (in_use & EV5_FA)
9079 {
9080 if (in_use & EV5_FM)
9081 goto done;
9082 in_use |= EV5_FM;
9083 }
9084 else
9085 in_use |= EV5_FA | EV5_FAM;
9086 break;
9087
9088 case EV5_FA:
9089 if (in_use & EV5_FA)
9090 goto done;
9091 in_use |= EV5_FA;
9092 break;
9093
9094 case EV5_FM:
9095 if (in_use & EV5_FM)
9096 goto done;
9097 in_use |= EV5_FM;
9098 break;
9099
9100 case EV5_NONE:
9101 break;
9102
9103 default:
9104 gcc_unreachable ();
9105 }
9106 len += 4;
9107
9108 /* Haifa doesn't do well scheduling branches. */
9109 /* ??? If this is predicted not-taken, slotting continues, except
9110 that no more IBR, FBR, or JSR insns may be slotted. */
9111 if (JUMP_P (insn))
9112 goto next_and_done;
9113
9114 next:
9115 insn = next_nonnote_insn (insn);
9116
9117 if (!insn || ! INSN_P (insn))
9118 goto done;
9119
9120 /* Let Haifa tell us where it thinks insn group boundaries are. */
9121 if (GET_MODE (insn) == TImode)
9122 goto done;
9123
9124 if (GET_CODE (insn) == CLOBBER || GET_CODE (insn) == USE)
9125 goto next;
9126 }
9127
9128 next_and_done:
9129 insn = next_nonnote_insn (insn);
9130
9131 done:
9132 *plen = len;
9133 *pin_use = in_use;
9134 return insn;
9135 }
9136
9137 static rtx
9138 alphaev4_next_nop (int *pin_use)
9139 {
9140 int in_use = *pin_use;
9141 rtx nop;
9142
9143 if (!(in_use & EV4_IB0))
9144 {
9145 in_use |= EV4_IB0;
9146 nop = gen_nop ();
9147 }
9148 else if ((in_use & (EV4_IBX|EV4_IB1)) == EV4_IBX)
9149 {
9150 in_use |= EV4_IB1;
9151 nop = gen_nop ();
9152 }
9153 else if (TARGET_FP && !(in_use & EV4_IB1))
9154 {
9155 in_use |= EV4_IB1;
9156 nop = gen_fnop ();
9157 }
9158 else
9159 nop = gen_unop ();
9160
9161 *pin_use = in_use;
9162 return nop;
9163 }
9164
9165 static rtx
9166 alphaev5_next_nop (int *pin_use)
9167 {
9168 int in_use = *pin_use;
9169 rtx nop;
9170
9171 if (!(in_use & EV5_E1))
9172 {
9173 in_use |= EV5_E1;
9174 nop = gen_nop ();
9175 }
9176 else if (TARGET_FP && !(in_use & EV5_FA))
9177 {
9178 in_use |= EV5_FA;
9179 nop = gen_fnop ();
9180 }
9181 else if (TARGET_FP && !(in_use & EV5_FM))
9182 {
9183 in_use |= EV5_FM;
9184 nop = gen_fnop ();
9185 }
9186 else
9187 nop = gen_unop ();
9188
9189 *pin_use = in_use;
9190 return nop;
9191 }
9192
9193 /* The instruction group alignment main loop. */
9194
9195 static void
9196 alpha_align_insns_1 (unsigned int max_align,
9197 rtx_insn *(*next_group) (rtx_insn *, int *, int *),
9198 rtx (*next_nop) (int *))
9199 {
9200 /* ALIGN is the known alignment for the insn group. */
9201 unsigned int align;
9202 /* OFS is the offset of the current insn in the insn group. */
9203 int ofs;
9204 int prev_in_use, in_use, len, ldgp;
9205 rtx_insn *i, *next;
9206
9207 /* Let shorten branches care for assigning alignments to code labels. */
9208 shorten_branches (get_insns ());
9209
9210 if (align_functions < 4)
9211 align = 4;
9212 else if ((unsigned int) align_functions < max_align)
9213 align = align_functions;
9214 else
9215 align = max_align;
9216
9217 ofs = prev_in_use = 0;
9218 i = get_insns ();
9219 if (NOTE_P (i))
9220 i = next_nonnote_insn (i);
9221
9222 ldgp = alpha_function_needs_gp ? 8 : 0;
9223
9224 while (i)
9225 {
9226 next = (*next_group) (i, &in_use, &len);
9227
9228 /* When we see a label, resync alignment etc. */
9229 if (LABEL_P (i))
9230 {
9231 unsigned int new_align = 1 << label_to_alignment (i);
9232
9233 if (new_align >= align)
9234 {
9235 align = new_align < max_align ? new_align : max_align;
9236 ofs = 0;
9237 }
9238
9239 else if (ofs & (new_align-1))
9240 ofs = (ofs | (new_align-1)) + 1;
9241 gcc_assert (!len);
9242 }
9243
9244 /* Handle complex instructions special. */
9245 else if (in_use == 0)
9246 {
9247 /* Asms will have length < 0. This is a signal that we have
9248 lost alignment knowledge. Assume, however, that the asm
9249 will not mis-align instructions. */
9250 if (len < 0)
9251 {
9252 ofs = 0;
9253 align = 4;
9254 len = 0;
9255 }
9256 }
9257
9258 /* If the known alignment is smaller than the recognized insn group,
9259 realign the output. */
9260 else if ((int) align < len)
9261 {
9262 unsigned int new_log_align = len > 8 ? 4 : 3;
9263 rtx_insn *prev, *where;
9264
9265 where = prev = prev_nonnote_insn (i);
9266 if (!where || !LABEL_P (where))
9267 where = i;
9268
9269 /* Can't realign between a call and its gp reload. */
9270 if (! (TARGET_EXPLICIT_RELOCS
9271 && prev && CALL_P (prev)))
9272 {
9273 emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
9274 align = 1 << new_log_align;
9275 ofs = 0;
9276 }
9277 }
9278
9279 /* We may not insert padding inside the initial ldgp sequence. */
9280 else if (ldgp > 0)
9281 ldgp -= len;
9282
9283 /* If the group won't fit in the same INT16 as the previous,
9284 we need to add padding to keep the group together. Rather
9285 than simply leaving the insn filling to the assembler, we
9286 can make use of the knowledge of what sorts of instructions
9287 were issued in the previous group to make sure that all of
9288 the added nops are really free. */
9289 else if (ofs + len > (int) align)
9290 {
9291 int nop_count = (align - ofs) / 4;
9292 rtx_insn *where;
9293
9294 /* Insert nops before labels, branches, and calls to truly merge
9295 the execution of the nops with the previous instruction group. */
9296 where = prev_nonnote_insn (i);
9297 if (where)
9298 {
9299 if (LABEL_P (where))
9300 {
9301 rtx_insn *where2 = prev_nonnote_insn (where);
9302 if (where2 && JUMP_P (where2))
9303 where = where2;
9304 }
9305 else if (NONJUMP_INSN_P (where))
9306 where = i;
9307 }
9308 else
9309 where = i;
9310
9311 do
9312 emit_insn_before ((*next_nop)(&prev_in_use), where);
9313 while (--nop_count);
9314 ofs = 0;
9315 }
9316
9317 ofs = (ofs + len) & (align - 1);
9318 prev_in_use = in_use;
9319 i = next;
9320 }
9321 }
9322
9323 static void
9324 alpha_align_insns (void)
9325 {
9326 if (alpha_tune == PROCESSOR_EV4)
9327 alpha_align_insns_1 (8, alphaev4_next_group, alphaev4_next_nop);
9328 else if (alpha_tune == PROCESSOR_EV5)
9329 alpha_align_insns_1 (16, alphaev5_next_group, alphaev5_next_nop);
9330 else
9331 gcc_unreachable ();
9332 }
9333
9334 /* Insert an unop between sibcall or noreturn function call and GP load. */
9335
9336 static void
9337 alpha_pad_function_end (void)
9338 {
9339 rtx_insn *insn, *next;
9340
9341 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9342 {
9343 if (!CALL_P (insn)
9344 || !(SIBLING_CALL_P (insn)
9345 || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
9346 continue;
9347
9348 /* Make sure we do not split a call and its corresponding
9349 CALL_ARG_LOCATION note. */
9350 next = NEXT_INSN (insn);
9351 if (next == NULL)
9352 continue;
9353 if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
9354 insn = next;
9355
9356 next = next_active_insn (insn);
9357 if (next)
9358 {
9359 rtx pat = PATTERN (next);
9360
9361 if (GET_CODE (pat) == SET
9362 && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
9363 && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
9364 emit_insn_after (gen_unop (), insn);
9365 }
9366 }
9367 }
9368 \f
9369 /* Machine dependent reorg pass. */
9370
9371 static void
9372 alpha_reorg (void)
9373 {
9374 /* Workaround for a linker error that triggers when an exception
9375 handler immediatelly follows a sibcall or a noreturn function.
9376
9377 In the sibcall case:
9378
9379 The instruction stream from an object file:
9380
9381 1d8: 00 00 fb 6b jmp (t12)
9382 1dc: 00 00 ba 27 ldah gp,0(ra)
9383 1e0: 00 00 bd 23 lda gp,0(gp)
9384 1e4: 00 00 7d a7 ldq t12,0(gp)
9385 1e8: 00 40 5b 6b jsr ra,(t12),1ec <__funcZ+0x1ec>
9386
9387 was converted in the final link pass to:
9388
9389 12003aa88: 67 fa ff c3 br 120039428 <...>
9390 12003aa8c: 00 00 fe 2f unop
9391 12003aa90: 00 00 fe 2f unop
9392 12003aa94: 48 83 7d a7 ldq t12,-31928(gp)
9393 12003aa98: 00 40 5b 6b jsr ra,(t12),12003aa9c <__func+0x1ec>
9394
9395 And in the noreturn case:
9396
9397 The instruction stream from an object file:
9398
9399 54: 00 40 5b 6b jsr ra,(t12),58 <__func+0x58>
9400 58: 00 00 ba 27 ldah gp,0(ra)
9401 5c: 00 00 bd 23 lda gp,0(gp)
9402 60: 00 00 7d a7 ldq t12,0(gp)
9403 64: 00 40 5b 6b jsr ra,(t12),68 <__func+0x68>
9404
9405 was converted in the final link pass to:
9406
9407 fdb24: a0 03 40 d3 bsr ra,fe9a8 <_called_func+0x8>
9408 fdb28: 00 00 fe 2f unop
9409 fdb2c: 00 00 fe 2f unop
9410 fdb30: 30 82 7d a7 ldq t12,-32208(gp)
9411 fdb34: 00 40 5b 6b jsr ra,(t12),fdb38 <__func+0x68>
9412
9413 GP load instructions were wrongly cleared by the linker relaxation
9414 pass. This workaround prevents removal of GP loads by inserting
9415 an unop instruction between a sibcall or noreturn function call and
9416 exception handler prologue. */
9417
9418 if (current_function_has_exception_handlers ())
9419 alpha_pad_function_end ();
9420 }
9421 \f
9422 static void
9423 alpha_file_start (void)
9424 {
9425 default_file_start ();
9426
9427 fputs ("\t.set noreorder\n", asm_out_file);
9428 fputs ("\t.set volatile\n", asm_out_file);
9429 if (TARGET_ABI_OSF)
9430 fputs ("\t.set noat\n", asm_out_file);
9431 if (TARGET_EXPLICIT_RELOCS)
9432 fputs ("\t.set nomacro\n", asm_out_file);
9433 if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX)
9434 {
9435 const char *arch;
9436
9437 if (alpha_cpu == PROCESSOR_EV6 || TARGET_FIX || TARGET_CIX)
9438 arch = "ev6";
9439 else if (TARGET_MAX)
9440 arch = "pca56";
9441 else if (TARGET_BWX)
9442 arch = "ev56";
9443 else if (alpha_cpu == PROCESSOR_EV5)
9444 arch = "ev5";
9445 else
9446 arch = "ev4";
9447
9448 fprintf (asm_out_file, "\t.arch %s\n", arch);
9449 }
9450 }
9451
9452 /* Since we don't have a .dynbss section, we should not allow global
9453 relocations in the .rodata section. */
9454
9455 static int
9456 alpha_elf_reloc_rw_mask (void)
9457 {
9458 return flag_pic ? 3 : 2;
9459 }
9460
9461 /* Return a section for X. The only special thing we do here is to
9462 honor small data. */
9463
9464 static section *
9465 alpha_elf_select_rtx_section (machine_mode mode, rtx x,
9466 unsigned HOST_WIDE_INT align)
9467 {
9468 if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
9469 /* ??? Consider using mergeable sdata sections. */
9470 return sdata_section;
9471 else
9472 return default_elf_select_rtx_section (mode, x, align);
9473 }
9474
9475 static unsigned int
9476 alpha_elf_section_type_flags (tree decl, const char *name, int reloc)
9477 {
9478 unsigned int flags = 0;
9479
9480 if (strcmp (name, ".sdata") == 0
9481 || strncmp (name, ".sdata.", 7) == 0
9482 || strncmp (name, ".gnu.linkonce.s.", 16) == 0
9483 || strcmp (name, ".sbss") == 0
9484 || strncmp (name, ".sbss.", 6) == 0
9485 || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
9486 flags = SECTION_SMALL;
9487
9488 flags |= default_section_type_flags (decl, name, reloc);
9489 return flags;
9490 }
9491 \f
9492 /* Structure to collect function names for final output in link section. */
9493 /* Note that items marked with GTY can't be ifdef'ed out. */
9494
9495 enum reloc_kind
9496 {
9497 KIND_LINKAGE,
9498 KIND_CODEADDR
9499 };
9500
9501 struct GTY(()) alpha_links
9502 {
9503 rtx func;
9504 rtx linkage;
9505 enum reloc_kind rkind;
9506 };
9507
9508 #if TARGET_ABI_OPEN_VMS
9509
9510 /* Return the VMS argument type corresponding to MODE. */
9511
9512 enum avms_arg_type
9513 alpha_arg_type (machine_mode mode)
9514 {
9515 switch (mode)
9516 {
9517 case SFmode:
9518 return TARGET_FLOAT_VAX ? FF : FS;
9519 case DFmode:
9520 return TARGET_FLOAT_VAX ? FD : FT;
9521 default:
9522 return I64;
9523 }
9524 }
9525
9526 /* Return an rtx for an integer representing the VMS Argument Information
9527 register value. */
9528
9529 rtx
9530 alpha_arg_info_reg_val (CUMULATIVE_ARGS cum)
9531 {
9532 unsigned HOST_WIDE_INT regval = cum.num_args;
9533 int i;
9534
9535 for (i = 0; i < 6; i++)
9536 regval |= ((int) cum.atypes[i]) << (i * 3 + 8);
9537
9538 return GEN_INT (regval);
9539 }
9540 \f
9541
9542 /* Return a SYMBOL_REF representing the reference to the .linkage entry
9543 of function FUNC built for calls made from CFUNDECL. LFLAG is 1 if
9544 this is the reference to the linkage pointer value, 0 if this is the
9545 reference to the function entry value. RFLAG is 1 if this a reduced
9546 reference (code address only), 0 if this is a full reference. */
9547
9548 rtx
9549 alpha_use_linkage (rtx func, bool lflag, bool rflag)
9550 {
9551 struct alpha_links *al = NULL;
9552 const char *name = XSTR (func, 0);
9553
9554 if (cfun->machine->links)
9555 {
9556 /* Is this name already defined? */
9557 alpha_links **slot = cfun->machine->links->get (name);
9558 if (slot)
9559 al = *slot;
9560 }
9561 else
9562 cfun->machine->links
9563 = hash_map<const char *, alpha_links *, string_traits>::create_ggc (64);
9564
9565 if (al == NULL)
9566 {
9567 size_t buf_len;
9568 char *linksym;
9569 tree id;
9570
9571 if (name[0] == '*')
9572 name++;
9573
9574 /* Follow transparent alias, as this is used for CRTL translations. */
9575 id = maybe_get_identifier (name);
9576 if (id)
9577 {
9578 while (IDENTIFIER_TRANSPARENT_ALIAS (id))
9579 id = TREE_CHAIN (id);
9580 name = IDENTIFIER_POINTER (id);
9581 }
9582
9583 buf_len = strlen (name) + 8 + 9;
9584 linksym = (char *) alloca (buf_len);
9585 snprintf (linksym, buf_len, "$%d..%s..lk", cfun->funcdef_no, name);
9586
9587 al = ggc_alloc<alpha_links> ();
9588 al->func = func;
9589 al->linkage = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (linksym));
9590
9591 cfun->machine->links->put (ggc_strdup (name), al);
9592 }
9593
9594 al->rkind = rflag ? KIND_CODEADDR : KIND_LINKAGE;
9595
9596 if (lflag)
9597 return gen_rtx_MEM (Pmode, plus_constant (Pmode, al->linkage, 8));
9598 else
9599 return al->linkage;
9600 }
9601
9602 static int
9603 alpha_write_one_linkage (const char *name, alpha_links *link, FILE *stream)
9604 {
9605 ASM_OUTPUT_INTERNAL_LABEL (stream, XSTR (link->linkage, 0));
9606 if (link->rkind == KIND_CODEADDR)
9607 {
9608 /* External and used, request code address. */
9609 fprintf (stream, "\t.code_address ");
9610 }
9611 else
9612 {
9613 if (!SYMBOL_REF_EXTERNAL_P (link->func)
9614 && SYMBOL_REF_LOCAL_P (link->func))
9615 {
9616 /* Locally defined, build linkage pair. */
9617 fprintf (stream, "\t.quad %s..en\n", name);
9618 fprintf (stream, "\t.quad ");
9619 }
9620 else
9621 {
9622 /* External, request linkage pair. */
9623 fprintf (stream, "\t.linkage ");
9624 }
9625 }
9626 assemble_name (stream, name);
9627 fputs ("\n", stream);
9628
9629 return 0;
9630 }
9631
9632 static void
9633 alpha_write_linkage (FILE *stream, const char *funname)
9634 {
9635 fprintf (stream, "\t.link\n");
9636 fprintf (stream, "\t.align 3\n");
9637 in_section = NULL;
9638
9639 #ifdef TARGET_VMS_CRASH_DEBUG
9640 fputs ("\t.name ", stream);
9641 assemble_name (stream, funname);
9642 fputs ("..na\n", stream);
9643 #endif
9644
9645 ASM_OUTPUT_LABEL (stream, funname);
9646 fprintf (stream, "\t.pdesc ");
9647 assemble_name (stream, funname);
9648 fprintf (stream, "..en,%s\n",
9649 alpha_procedure_type == PT_STACK ? "stack"
9650 : alpha_procedure_type == PT_REGISTER ? "reg" : "null");
9651
9652 if (cfun->machine->links)
9653 {
9654 hash_map<const char *, alpha_links *, string_traits>::iterator iter
9655 = cfun->machine->links->begin ();
9656 for (; iter != cfun->machine->links->end (); ++iter)
9657 alpha_write_one_linkage ((*iter).first, (*iter).second, stream);
9658 }
9659 }
9660
9661 /* Switch to an arbitrary section NAME with attributes as specified
9662 by FLAGS. ALIGN specifies any known alignment requirements for
9663 the section; 0 if the default should be used. */
9664
9665 static void
9666 vms_asm_named_section (const char *name, unsigned int flags,
9667 tree decl ATTRIBUTE_UNUSED)
9668 {
9669 fputc ('\n', asm_out_file);
9670 fprintf (asm_out_file, ".section\t%s", name);
9671
9672 if (flags & SECTION_DEBUG)
9673 fprintf (asm_out_file, ",NOWRT");
9674
9675 fputc ('\n', asm_out_file);
9676 }
9677
9678 /* Record an element in the table of global constructors. SYMBOL is
9679 a SYMBOL_REF of the function to be called; PRIORITY is a number
9680 between 0 and MAX_INIT_PRIORITY.
9681
9682 Differs from default_ctors_section_asm_out_constructor in that the
9683 width of the .ctors entry is always 64 bits, rather than the 32 bits
9684 used by a normal pointer. */
9685
9686 static void
9687 vms_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9688 {
9689 switch_to_section (ctors_section);
9690 assemble_align (BITS_PER_WORD);
9691 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9692 }
9693
9694 static void
9695 vms_asm_out_destructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
9696 {
9697 switch_to_section (dtors_section);
9698 assemble_align (BITS_PER_WORD);
9699 assemble_integer (symbol, UNITS_PER_WORD, BITS_PER_WORD, 1);
9700 }
9701 #else
9702 rtx
9703 alpha_use_linkage (rtx func ATTRIBUTE_UNUSED,
9704 bool lflag ATTRIBUTE_UNUSED,
9705 bool rflag ATTRIBUTE_UNUSED)
9706 {
9707 return NULL_RTX;
9708 }
9709
9710 #endif /* TARGET_ABI_OPEN_VMS */
9711 \f
9712 static void
9713 alpha_init_libfuncs (void)
9714 {
9715 if (TARGET_ABI_OPEN_VMS)
9716 {
9717 /* Use the VMS runtime library functions for division and
9718 remainder. */
9719 set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
9720 set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
9721 set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
9722 set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
9723 set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
9724 set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
9725 set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
9726 set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
9727 abort_libfunc = init_one_libfunc ("decc$abort");
9728 memcmp_libfunc = init_one_libfunc ("decc$memcmp");
9729 #ifdef MEM_LIBFUNCS_INIT
9730 MEM_LIBFUNCS_INIT;
9731 #endif
9732 }
9733 }
9734
9735 /* On the Alpha, we use this to disable the floating-point registers
9736 when they don't exist. */
9737
9738 static void
9739 alpha_conditional_register_usage (void)
9740 {
9741 int i;
9742 if (! TARGET_FPREGS)
9743 for (i = 32; i < 63; i++)
9744 fixed_regs[i] = call_used_regs[i] = 1;
9745 }
9746
9747 /* Canonicalize a comparison from one we don't have to one we do have. */
9748
9749 static void
9750 alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
9751 bool op0_preserve_value)
9752 {
9753 if (!op0_preserve_value
9754 && (*code == GE || *code == GT || *code == GEU || *code == GTU)
9755 && (REG_P (*op1) || *op1 == const0_rtx))
9756 {
9757 rtx tem = *op0;
9758 *op0 = *op1;
9759 *op1 = tem;
9760 *code = (int)swap_condition ((enum rtx_code)*code);
9761 }
9762
9763 if ((*code == LT || *code == LTU)
9764 && CONST_INT_P (*op1) && INTVAL (*op1) == 256)
9765 {
9766 *code = *code == LT ? LE : LEU;
9767 *op1 = GEN_INT (255);
9768 }
9769 }
9770
9771 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
9772
9773 static void
9774 alpha_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
9775 {
9776 const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
9777
9778 tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
9779 tree new_fenv_var, reload_fenv, restore_fnenv;
9780 tree update_call, atomic_feraiseexcept, hold_fnclex;
9781
9782 /* Assume OSF/1 compatible interfaces. */
9783 if (!TARGET_ABI_OSF)
9784 return;
9785
9786 /* Generate the equivalent of :
9787 unsigned long fenv_var;
9788 fenv_var = __ieee_get_fp_control ();
9789
9790 unsigned long masked_fenv;
9791 masked_fenv = fenv_var & mask;
9792
9793 __ieee_set_fp_control (masked_fenv); */
9794
9795 fenv_var = create_tmp_var (long_unsigned_type_node);
9796 get_fpscr
9797 = build_fn_decl ("__ieee_get_fp_control",
9798 build_function_type_list (long_unsigned_type_node, NULL));
9799 set_fpscr
9800 = build_fn_decl ("__ieee_set_fp_control",
9801 build_function_type_list (void_type_node, NULL));
9802 mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
9803 ld_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node,
9804 fenv_var, build_call_expr (get_fpscr, 0));
9805 masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
9806 hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
9807 *hold = build2 (COMPOUND_EXPR, void_type_node,
9808 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
9809 hold_fnclex);
9810
9811 /* Store the value of masked_fenv to clear the exceptions:
9812 __ieee_set_fp_control (masked_fenv); */
9813
9814 *clear = build_call_expr (set_fpscr, 1, masked_fenv);
9815
9816 /* Generate the equivalent of :
9817 unsigned long new_fenv_var;
9818 new_fenv_var = __ieee_get_fp_control ();
9819
9820 __ieee_set_fp_control (fenv_var);
9821
9822 __atomic_feraiseexcept (new_fenv_var); */
9823
9824 new_fenv_var = create_tmp_var (long_unsigned_type_node);
9825 reload_fenv = build2 (MODIFY_EXPR, long_unsigned_type_node, new_fenv_var,
9826 build_call_expr (get_fpscr, 0));
9827 restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
9828 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
9829 update_call
9830 = build_call_expr (atomic_feraiseexcept, 1,
9831 fold_convert (integer_type_node, new_fenv_var));
9832 *update = build2 (COMPOUND_EXPR, void_type_node,
9833 build2 (COMPOUND_EXPR, void_type_node,
9834 reload_fenv, restore_fnenv), update_call);
9835 }
9836 \f
9837 /* Initialize the GCC target structure. */
9838 #if TARGET_ABI_OPEN_VMS
9839 # undef TARGET_ATTRIBUTE_TABLE
9840 # define TARGET_ATTRIBUTE_TABLE vms_attribute_table
9841 # undef TARGET_CAN_ELIMINATE
9842 # define TARGET_CAN_ELIMINATE alpha_vms_can_eliminate
9843 #endif
9844
9845 #undef TARGET_IN_SMALL_DATA_P
9846 #define TARGET_IN_SMALL_DATA_P alpha_in_small_data_p
9847
9848 #undef TARGET_ASM_ALIGNED_HI_OP
9849 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
9850 #undef TARGET_ASM_ALIGNED_DI_OP
9851 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
9852
9853 /* Default unaligned ops are provided for ELF systems. To get unaligned
9854 data for non-ELF systems, we have to turn off auto alignment. */
9855 #if TARGET_ABI_OPEN_VMS
9856 #undef TARGET_ASM_UNALIGNED_HI_OP
9857 #define TARGET_ASM_UNALIGNED_HI_OP "\t.align 0\n\t.word\t"
9858 #undef TARGET_ASM_UNALIGNED_SI_OP
9859 #define TARGET_ASM_UNALIGNED_SI_OP "\t.align 0\n\t.long\t"
9860 #undef TARGET_ASM_UNALIGNED_DI_OP
9861 #define TARGET_ASM_UNALIGNED_DI_OP "\t.align 0\n\t.quad\t"
9862 #endif
9863
9864 #undef TARGET_ASM_RELOC_RW_MASK
9865 #define TARGET_ASM_RELOC_RW_MASK alpha_elf_reloc_rw_mask
9866 #undef TARGET_ASM_SELECT_RTX_SECTION
9867 #define TARGET_ASM_SELECT_RTX_SECTION alpha_elf_select_rtx_section
9868 #undef TARGET_SECTION_TYPE_FLAGS
9869 #define TARGET_SECTION_TYPE_FLAGS alpha_elf_section_type_flags
9870
9871 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
9872 #define TARGET_ASM_FUNCTION_END_PROLOGUE alpha_output_function_end_prologue
9873
9874 #undef TARGET_INIT_LIBFUNCS
9875 #define TARGET_INIT_LIBFUNCS alpha_init_libfuncs
9876
9877 #undef TARGET_LEGITIMIZE_ADDRESS
9878 #define TARGET_LEGITIMIZE_ADDRESS alpha_legitimize_address
9879 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
9880 #define TARGET_MODE_DEPENDENT_ADDRESS_P alpha_mode_dependent_address_p
9881
9882 #undef TARGET_ASM_FILE_START
9883 #define TARGET_ASM_FILE_START alpha_file_start
9884
9885 #undef TARGET_SCHED_ADJUST_COST
9886 #define TARGET_SCHED_ADJUST_COST alpha_adjust_cost
9887 #undef TARGET_SCHED_ISSUE_RATE
9888 #define TARGET_SCHED_ISSUE_RATE alpha_issue_rate
9889 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
9890 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
9891 alpha_multipass_dfa_lookahead
9892
9893 #undef TARGET_HAVE_TLS
9894 #define TARGET_HAVE_TLS HAVE_AS_TLS
9895
9896 #undef TARGET_BUILTIN_DECL
9897 #define TARGET_BUILTIN_DECL alpha_builtin_decl
9898 #undef TARGET_INIT_BUILTINS
9899 #define TARGET_INIT_BUILTINS alpha_init_builtins
9900 #undef TARGET_EXPAND_BUILTIN
9901 #define TARGET_EXPAND_BUILTIN alpha_expand_builtin
9902 #undef TARGET_FOLD_BUILTIN
9903 #define TARGET_FOLD_BUILTIN alpha_fold_builtin
9904 #undef TARGET_GIMPLE_FOLD_BUILTIN
9905 #define TARGET_GIMPLE_FOLD_BUILTIN alpha_gimple_fold_builtin
9906
9907 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9908 #define TARGET_FUNCTION_OK_FOR_SIBCALL alpha_function_ok_for_sibcall
9909 #undef TARGET_CANNOT_COPY_INSN_P
9910 #define TARGET_CANNOT_COPY_INSN_P alpha_cannot_copy_insn_p
9911 #undef TARGET_LEGITIMATE_CONSTANT_P
9912 #define TARGET_LEGITIMATE_CONSTANT_P alpha_legitimate_constant_p
9913 #undef TARGET_CANNOT_FORCE_CONST_MEM
9914 #define TARGET_CANNOT_FORCE_CONST_MEM alpha_cannot_force_const_mem
9915
9916 #if TARGET_ABI_OSF
9917 #undef TARGET_ASM_OUTPUT_MI_THUNK
9918 #define TARGET_ASM_OUTPUT_MI_THUNK alpha_output_mi_thunk_osf
9919 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9920 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
9921 #undef TARGET_STDARG_OPTIMIZE_HOOK
9922 #define TARGET_STDARG_OPTIMIZE_HOOK alpha_stdarg_optimize_hook
9923 #endif
9924
9925 /* Use 16-bits anchor. */
9926 #undef TARGET_MIN_ANCHOR_OFFSET
9927 #define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
9928 #undef TARGET_MAX_ANCHOR_OFFSET
9929 #define TARGET_MAX_ANCHOR_OFFSET 0x7fff
9930 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9931 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
9932
9933 #undef TARGET_RTX_COSTS
9934 #define TARGET_RTX_COSTS alpha_rtx_costs
9935 #undef TARGET_ADDRESS_COST
9936 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
9937
9938 #undef TARGET_MACHINE_DEPENDENT_REORG
9939 #define TARGET_MACHINE_DEPENDENT_REORG alpha_reorg
9940
9941 #undef TARGET_PROMOTE_FUNCTION_MODE
9942 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
9943 #undef TARGET_PROMOTE_PROTOTYPES
9944 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
9945 #undef TARGET_RETURN_IN_MEMORY
9946 #define TARGET_RETURN_IN_MEMORY alpha_return_in_memory
9947 #undef TARGET_PASS_BY_REFERENCE
9948 #define TARGET_PASS_BY_REFERENCE alpha_pass_by_reference
9949 #undef TARGET_SETUP_INCOMING_VARARGS
9950 #define TARGET_SETUP_INCOMING_VARARGS alpha_setup_incoming_varargs
9951 #undef TARGET_STRICT_ARGUMENT_NAMING
9952 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
9953 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
9954 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
9955 #undef TARGET_SPLIT_COMPLEX_ARG
9956 #define TARGET_SPLIT_COMPLEX_ARG alpha_split_complex_arg
9957 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9958 #define TARGET_GIMPLIFY_VA_ARG_EXPR alpha_gimplify_va_arg
9959 #undef TARGET_ARG_PARTIAL_BYTES
9960 #define TARGET_ARG_PARTIAL_BYTES alpha_arg_partial_bytes
9961 #undef TARGET_FUNCTION_ARG
9962 #define TARGET_FUNCTION_ARG alpha_function_arg
9963 #undef TARGET_FUNCTION_ARG_ADVANCE
9964 #define TARGET_FUNCTION_ARG_ADVANCE alpha_function_arg_advance
9965 #undef TARGET_TRAMPOLINE_INIT
9966 #define TARGET_TRAMPOLINE_INIT alpha_trampoline_init
9967
9968 #undef TARGET_INSTANTIATE_DECLS
9969 #define TARGET_INSTANTIATE_DECLS alpha_instantiate_decls
9970
9971 #undef TARGET_SECONDARY_RELOAD
9972 #define TARGET_SECONDARY_RELOAD alpha_secondary_reload
9973
9974 #undef TARGET_SCALAR_MODE_SUPPORTED_P
9975 #define TARGET_SCALAR_MODE_SUPPORTED_P alpha_scalar_mode_supported_p
9976 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9977 #define TARGET_VECTOR_MODE_SUPPORTED_P alpha_vector_mode_supported_p
9978
9979 #undef TARGET_BUILD_BUILTIN_VA_LIST
9980 #define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
9981
9982 #undef TARGET_EXPAND_BUILTIN_VA_START
9983 #define TARGET_EXPAND_BUILTIN_VA_START alpha_va_start
9984
9985 #undef TARGET_OPTION_OVERRIDE
9986 #define TARGET_OPTION_OVERRIDE alpha_option_override
9987
9988 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9989 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9990 alpha_override_options_after_change
9991
9992 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
9993 #undef TARGET_MANGLE_TYPE
9994 #define TARGET_MANGLE_TYPE alpha_mangle_type
9995 #endif
9996
9997 #undef TARGET_LEGITIMATE_ADDRESS_P
9998 #define TARGET_LEGITIMATE_ADDRESS_P alpha_legitimate_address_p
9999
10000 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10001 #define TARGET_CONDITIONAL_REGISTER_USAGE alpha_conditional_register_usage
10002
10003 #undef TARGET_CANONICALIZE_COMPARISON
10004 #define TARGET_CANONICALIZE_COMPARISON alpha_canonicalize_comparison
10005
10006 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10007 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV alpha_atomic_assign_expand_fenv
10008
10009 struct gcc_target targetm = TARGET_INITIALIZER;
10010
10011 \f
10012 #include "gt-alpha.h"