Wrap option names in gcc internal messages with %< and %>.

[thirdparty/gcc.git] / gcc / config / arm / arm.c
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

index b98195603e0b05fa9affc0f2e64d60c5bb9f285e..67f1a9c8e2d2d7a0cbbe6bd38e4ac4d259b25df7 100644 (file)
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -1,5 +1,5 @@
  /* Output routines for GCC for ARM.
-   Copyright (C) 1991-2018 Free Software Foundation, Inc.
+   Copyright (C) 1991-2019 Free Software Foundation, Inc.
     Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
     and Martin Simmons (@harleqn.co.uk).
     More major hacks by Richard Earnshaw (rearnsha@arm.com).
@@ -850,20 +850,14 @@ struct arm_build_target arm_active_target;
  /* The following are used in the arm.md file as equivalents to bits
     in the above two flag variables.  */
  
-/* Nonzero if this chip supports the ARM Architecture 3M extensions.  */
-int arm_arch3m = 0;
-
  /* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
  int arm_arch4 = 0;
  
  /* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
  int arm_arch4t = 0;
  
-/* Nonzero if this chip supports the ARM Architecture 5 extensions.  */
-int arm_arch5 = 0;
-
-/* Nonzero if this chip supports the ARM Architecture 5E extensions.  */
-int arm_arch5e = 0;
+/* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
+int arm_arch5t = 0;
  
  /* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
  int arm_arch5te = 0;
@@ -901,6 +895,12 @@ int arm_arch8_1 = 0;
  /* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
  int arm_arch8_2 = 0;
  
+/* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
+int arm_arch8_3 = 0;
+
+/* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
+int arm_arch8_4 = 0;
+
  /* Nonzero if this chip supports the FP16 instructions extension of ARM
     Architecture 8.2.  */
  int arm_fp16_inst = 0;
@@ -2472,8 +2472,9 @@ arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
    set_conv_libfunc (optable, to, from, buffer);
  }
  
-/* Set up library functions unique to ARM.  */
+static GTY(()) rtx speculation_barrier_libfunc;
  
+/* Set up library functions unique to ARM.  */
  static void
  arm_init_libfuncs (void)
  {
@@ -2759,6 +2760,8 @@ arm_init_libfuncs (void)
  
    if (TARGET_AAPCS_BASED)
      synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
+
+  speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
  }
  
  /* On AAPCS systems, this is the "struct __va_list".  */
@@ -2881,14 +2884,15 @@ arm_option_check_internal (struct gcc_options *opts)
        && write_symbols != NO_DEBUG
        && !TARGET_APCS_FRAME
        && (TARGET_DEFAULT & MASK_APCS_FRAME))
-    warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
+    warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
+            "debugging");
  
    /* iWMMXt unsupported under Thumb mode.  */
    if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
      error ("iWMMXt unsupported under Thumb mode");
  
    if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
-    error ("can not use -mtp=cp15 with 16-bit Thumb");
+    error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
  
    if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
      {
@@ -2896,17 +2900,22 @@ arm_option_check_internal (struct gcc_options *opts)
        flag_pic = 0;
      }
  
-  /* We only support -mpure-code and -mslow-flash-data on M-profile targets
-     with MOVT.  */
-  if ((target_pure_code || target_slow_flash_data)
-      && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
+  if (target_pure_code || target_slow_flash_data)
      {
        const char *flag = (target_pure_code ? "-mpure-code" :
                                              "-mslow-flash-data");
-      error ("%s only supports non-pic code on M-profile targets with the "
-            "MOVT instruction", flag);
-    }
  
+      /* We only support -mpure-code and -mslow-flash-data on M-profile targets
+        with MOVT.  */
+      if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
+       error ("%s only supports non-pic code on M-profile targets with the "
+              "MOVT instruction", flag);
+
+      /* Cannot load addresses: -mslow-flash-data forbids literal pool and
+        -mword-relocations forbids relocation of MOVT/MOVW.  */
+      if (target_word_relocations)
+       error ("%s incompatible with %<-mword-relocations%>", flag);
+    }
  }
  
  /* Recompute the global settings depending on target attribute options.  */
@@ -2958,9 +2967,10 @@ static GTY(()) tree init_optimize;
  static void
  arm_override_options_after_change_1 (struct gcc_options *opts)
  {
-  if (opts->x_align_functions <= 0)
-    opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
-      && opts->x_optimize_size ? 2 : 4;
+  /* -falign-functions without argument: supply one.  */
+  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+    opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
+      && opts->x_optimize_size ? "2" : "4";
  }
  
  /* Implement targetm.override_options_after_change.  */
@@ -3005,7 +3015,8 @@ arm_option_override_internal (struct gcc_options *opts,
    if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
      {
        /* The default is to enable interworking, so this warning message would
-        be confusing to users who have just compiled with, eg, -march=armv3.  */
+        be confusing to users who have just compiled with
+        eg, -march=armv4.  */
        /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
        opts->x_target_flags &= ~MASK_INTERWORK;
      }
@@ -3091,7 +3102,8 @@ arm_option_override_internal (struct gcc_options *opts,
  
    /* Thumb2 inline assembly code should always use unified syntax.
       This will apply to ARM and Thumb1 eventually.  */
-  opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
+  if (TARGET_THUMB2_P (opts->x_target_flags))
+    opts->x_inline_asm_unified = true;
  
  #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
    SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
@@ -3170,7 +3182,8 @@ arm_configure_build_target (struct arm_build_target *target,
           if (!bitmap_empty_p (isa_delta))
             {
               if (warn_compatible)
-               warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
+               warning (0, "switch %<-mcpu=%s%> conflicts "
+                        "with %<-march=%s%> switch",
                          arm_selected_cpu->common.name,
                          arm_selected_arch->common.name);
               /* -march wins for code generation.
@@ -3237,17 +3250,7 @@ arm_configure_build_target (struct arm_build_target *target,
          switches that require certain abilities from the cpu.  */
  
        if (TARGET_INTERWORK || TARGET_THUMB)
-       {
-         bitmap_set_bit (sought_isa, isa_bit_thumb);
-         bitmap_set_bit (sought_isa, isa_bit_mode32);
-
-         /* There are no ARM processors that support both APCS-26 and
-            interworking.  Therefore we forcibly remove MODE26 from
-            from the isa features here (if it was set), so that the
-            search below will always be able to find a compatible
-            processor.  */
-         bitmap_clear_bit (default_isa, isa_bit_mode26);
-       }
+       bitmap_set_bit (sought_isa, isa_bit_thumb);
  
        /* If there are such requirements and the default CPU does not
          satisfy them, we need to run over the complete list of
@@ -3405,7 +3408,8 @@ arm_option_override (void)
  
    if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
      {
-      warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
+      warning (0, "%<-mapcs-stack-check%> incompatible with "
+              "%<-mno-apcs-frame%>");
        target_flags |= MASK_APCS_FRAME;
      }
  
@@ -3413,7 +3417,7 @@ arm_option_override (void)
      target_flags |= MASK_APCS_FRAME;
  
    if (TARGET_APCS_REENT && flag_pic)
-    error ("-fpic and -mapcs-reent are incompatible");
+    error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
  
    if (TARGET_APCS_REENT)
      warning (0, "APCS reentrant code not supported.  Ignored");
@@ -3474,7 +3478,7 @@ arm_option_override (void)
    if (flag_pic && TARGET_SINGLE_PIC_BASE)
      {
        if (TARGET_VXWORKS_RTP)
-       warning (0, "RTP PIC is incompatible with -msingle-pic-base");
+       warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
        arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
      }
  
@@ -3486,7 +3490,7 @@ arm_option_override (void)
        int pic_register = decode_reg_name (arm_pic_register_string);
  
        if (!flag_pic)
-       warning (0, "-mpic-register= is useless without -fpic");
+       warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
  
        /* Prevent the user from choosing an obviously stupid PIC register.  */
        else if (pic_register < 0 || call_used_regs[pic_register]
@@ -3500,6 +3504,9 @@ arm_option_override (void)
         arm_pic_register = pic_register;
      }
  
+  if (flag_pic)
+    target_word_relocations = 1;
+
    /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
    if (fix_cm3_ldrd == 2)
      {
@@ -3514,7 +3521,8 @@ arm_option_override (void)
    if (flag_reorder_blocks_and_partition)
      {
        inform (input_location,
-             "-freorder-blocks-and-partition not supported on this architecture");
+             "%<-freorder-blocks-and-partition%> not supported "
+             "on this architecture");
        flag_reorder_blocks_and_partition = 0;
        flag_reorder_blocks = 1;
      }
@@ -3639,13 +3647,10 @@ arm_option_reconfigure_globals (void)
  
    /* Initialize boolean versions of the architectural flags, for use
       in the arm.md file.  */
-  arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
    arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
    arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
-  arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
-  arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
-  arm_arch5te = arm_arch5e
-    && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
+  arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
+  arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
    arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
    arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
    arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
@@ -3655,6 +3660,8 @@ arm_option_reconfigure_globals (void)
    arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
    arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
    arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
+  arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
+  arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
    arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
    arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
    arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
@@ -3694,7 +3701,7 @@ arm_option_reconfigure_globals (void)
  void
  arm_options_perform_arch_sanity_checks (void)
  {
-  /* V5 code we generate is completely interworking capable, so we turn off
+  /* V5T code we generate is completely interworking capable, so we turn off
       TARGET_INTERWORK here to avoid many tests later on.  */
  
    /* XXX However, we must pass the right pre-processor defines to CPP
@@ -3702,7 +3709,7 @@ arm_options_perform_arch_sanity_checks (void)
    if (TARGET_INTERWORK)
      arm_cpp_interwork = 1;
  
-  if (arm_arch5)
+  if (arm_arch5t)
      target_flags &= ~MASK_INTERWORK;
  
    if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
@@ -3730,10 +3737,10 @@ arm_options_perform_arch_sanity_checks (void)
    if (TARGET_AAPCS_BASED)
      {
        if (TARGET_CALLER_INTERWORKING)
-       error ("AAPCS does not support -mcaller-super-interworking");
+       error ("AAPCS does not support %<-mcaller-super-interworking%>");
        else
         if (TARGET_CALLEE_INTERWORKING)
-         error ("AAPCS does not support -mcallee-super-interworking");
+         error ("AAPCS does not support %<-mcallee-super-interworking%>");
      }
  
    /* __fp16 support currently assumes the core has ldrh.  */
@@ -3757,7 +3764,7 @@ arm_options_perform_arch_sanity_checks (void)
         {
           arm_pcs_default = ARM_PCS_AAPCS_VFP;
           if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
-           error ("-mfloat-abi=hard: selected processor lacks an FPU");
+           error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
         }
        else
         arm_pcs_default = ARM_PCS_AAPCS;
@@ -3765,7 +3772,7 @@ arm_options_perform_arch_sanity_checks (void)
    else
      {
        if (arm_float_abi == ARM_FLOAT_ABI_HARD)
-       sorry ("-mfloat-abi=hard and VFP");
+       sorry ("%<-mfloat-abi=hard%> and VFP");
  
        if (arm_abi == ARM_ABI_APCS)
         arm_pcs_default = ARM_PCS_APCS;
@@ -4061,10 +4068,10 @@ use_return_insn (int iscond, rtx sibling)
       the other registers, since that is never slower than executing
       another instruction.
  
-     We test for !arm_arch5 here, because code for any architecture
+     We test for !arm_arch5t here, because code for any architecture
       less than this could potentially be run on one of the buggy
       chips.  */
-  if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
+  if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
      {
        /* Validate that r3 is a call-clobbered register (always true in
          the default abi) ...  */
@@ -6595,7 +6602,9 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
      }
  }
  
-/* Return 1 if double word alignment is required for argument passing.
+/* Return 2 if double word alignment is required for argument passing,
+   but wasn't required before the fix for PR88469.
+   Return 1 if double word alignment is required for argument passing.
     Return -1 if double word alignment used to be required for argument
     passing before PR77728 ABI fix, but is not required anymore.
     Return 0 if double word alignment is not required and wasn't requried
@@ -6615,7 +6624,8 @@ arm_needs_doubleword_align (machine_mode mode, const_tree type)
      return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
  
    int ret = 0;
-  /* Record/aggregate types: Use greatest member alignment of any member.  */ 
+  int ret2 = 0;
+  /* Record/aggregate types: Use greatest member alignment of any member.  */
    for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
      if (DECL_ALIGN (field) > PARM_BOUNDARY)
        {
@@ -6627,6 +6637,13 @@ arm_needs_doubleword_align (machine_mode mode, const_tree type)
              Make sure we can warn about that with -Wpsabi.  */
           ret = -1;
        }
+    else if (TREE_CODE (field) == FIELD_DECL
+            && DECL_BIT_FIELD_TYPE (field)
+            && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
+      ret2 = 1;
+
+  if (ret2)
+    return 2;
  
    return ret;
  }
@@ -6692,7 +6709,12 @@ arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
         inform (input_location, "parameter passing for argument of type "
                 "%qT changed in GCC 7.1", type);
        else if (res > 0)
-       pcum->nregs++;
+       {
+         pcum->nregs++;
+         if (res > 1 && warn_psabi)
+           inform (input_location, "parameter passing for argument of type "
+                   "%qT changed in GCC 9.1", type);
+       }
      }
  
    /* Only allow splitting an arg between regs and memory if all preceding
@@ -6719,6 +6741,9 @@ arm_function_arg_boundary (machine_mode mode, const_tree type)
    if (res < 0 && warn_psabi)
      inform (input_location, "parameter passing for argument of type %qT "
             "changed in GCC 7.1", type);
+  if (res > 1 && warn_psabi)
+    inform (input_location, "parameter passing for argument of type "
+           "%qT changed in GCC 9.1", type);
  
    return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
  }
@@ -7018,8 +7043,8 @@ arm_handle_cmse_nonsecure_entry (tree *node, tree name,
    if (!use_cmse)
      {
        *no_add_attrs = true;
-      warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
-              name);
+      warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
+              "option.", name);
        return NULL_TREE;
      }
  
@@ -7070,8 +7095,8 @@ arm_handle_cmse_nonsecure_call (tree *node, tree name,
    if (!use_cmse)
      {
        *no_add_attrs = true;
-      warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
-              name);
+      warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
+              "option.", name);
        return NULL_TREE;
      }
  
@@ -7385,21 +7410,34 @@ legitimate_pic_operand_p (rtx x)
    return 1;
  }
  
-/* Record that the current function needs a PIC register.  Initialize
-   cfun->machine->pic_reg if we have not already done so.  */
+/* Record that the current function needs a PIC register.  If PIC_REG is null,
+   a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
+   both case cfun->machine->pic_reg is initialized if we have not already done
+   so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
+   PIC register is reloaded in the current position of the instruction stream
+   irregardless of whether it was loaded before.  Otherwise, it is only loaded
+   if not already done so (crtl->uses_pic_offset_table is null).  Note that
+   nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
+   is only supported iff COMPUTE_NOW is false.  */
  
  static void
-require_pic_register (void)
+require_pic_register (rtx pic_reg, bool compute_now)
  {
+  gcc_assert (compute_now == (pic_reg != NULL_RTX));
+
    /* A lot of the logic here is made obscure by the fact that this
       routine gets called as part of the rtx cost estimation process.
       We don't want those calls to affect any assumptions about the real
       function; and further, we can't call entry_of_function() until we
       start the real expansion process.  */
-  if (!crtl->uses_pic_offset_table)
+  if (!crtl->uses_pic_offset_table || compute_now)
      {
-      gcc_assert (can_create_pseudo_p ());
+      gcc_assert (can_create_pseudo_p ()
+                 || (pic_reg != NULL_RTX
+                     && REG_P (pic_reg)
+                     && GET_MODE (pic_reg) == Pmode));
        if (arm_pic_register != INVALID_REGNUM
+         && !compute_now
           && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
         {
           if (!cfun->machine->pic_reg)
@@ -7415,8 +7453,10 @@ require_pic_register (void)
         {
           rtx_insn *seq, *insn;
  
+         if (pic_reg == NULL_RTX)
+           pic_reg = gen_reg_rtx (Pmode);
           if (!cfun->machine->pic_reg)
-           cfun->machine->pic_reg = gen_reg_rtx (Pmode);
+           cfun->machine->pic_reg = pic_reg;
  
           /* Play games to avoid marking the function as needing pic
              if we are being called as part of the cost-estimation
@@ -7427,11 +7467,12 @@ require_pic_register (void)
               start_sequence ();
  
               if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
-                 && arm_pic_register > LAST_LO_REGNUM)
+                 && arm_pic_register > LAST_LO_REGNUM
+                 && !compute_now)
                 emit_move_insn (cfun->machine->pic_reg,
                                 gen_rtx_REG (Pmode, arm_pic_register));
               else
-               arm_load_pic_register (0UL);
+               arm_load_pic_register (0UL, pic_reg);
  
               seq = get_insns ();
               end_sequence ();
@@ -7444,16 +7485,33 @@ require_pic_register (void)
                  we can't yet emit instructions directly in the final
                  insn stream.  Queue the insns on the entry edge, they will
                  be committed after everything else is expanded.  */
-             insert_insn_on_edge (seq,
-                                  single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+             if (currently_expanding_to_rtl)
+               insert_insn_on_edge (seq,
+                                    single_succ_edge
+                                    (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+             else
+               emit_insn (seq);
             }
         }
      }
  }
  
+/* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
+   created to hold the result of the load.  If not NULL, PIC_REG indicates
+   which register to use as PIC register, otherwise it is decided by register
+   allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
+   location in the instruction stream, irregardless of whether it was loaded
+   previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
+   true and null PIC_REG is only supported iff COMPUTE_NOW is false.
+
+   Returns the register REG into which the PIC load is performed.  */
+
  rtx
-legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
+legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
+                       bool compute_now)
  {
+  gcc_assert (compute_now == (pic_reg != NULL_RTX));
+
    if (GET_CODE (orig) == SYMBOL_REF
        || GET_CODE (orig) == LABEL_REF)
      {
@@ -7486,9 +7544,12 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
           rtx mem;
  
           /* If this function doesn't have a pic register, create one now.  */
-         require_pic_register ();
+         require_pic_register (pic_reg, compute_now);
+
+         if (pic_reg == NULL_RTX)
+           pic_reg = cfun->machine->pic_reg;
  
-         pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
+         pat = gen_calculate_pic_address (reg, pic_reg, orig);
  
           /* Make the MEM as close to a constant as possible.  */
           mem = SET_SRC (pat);
@@ -7537,9 +7598,11 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
  
        gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
  
-      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
+      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
+                                    pic_reg, compute_now);
        offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
-                                      base == reg ? 0 : reg);
+                                      base == reg ? 0 : reg, pic_reg,
+                                      compute_now);
  
        if (CONST_INT_P (offset))
         {
@@ -7639,16 +7702,17 @@ static GTY(()) int pic_labelno;
     low register.  */
  
  void
-arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
+arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
  {
-  rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
+  rtx l1, labelno, pic_tmp, pic_rtx;
  
    if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
      return;
  
    gcc_assert (flag_pic);
  
-  pic_reg = cfun->machine->pic_reg;
+  if (pic_reg == NULL_RTX)
+    pic_reg = cfun->machine->pic_reg;
    if (TARGET_VXWORKS_RTP)
      {
        pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
@@ -8724,7 +8788,8 @@ arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
      {
        /* We need to find and carefully transform any SYMBOL and LABEL
          references; so go back to the original address expression.  */
-      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
+                                         false /*compute_now*/);
  
        if (new_x != orig_x)
         x = new_x;
@@ -8792,7 +8857,8 @@ thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
      {
        /* We need to find and carefully transform any SYMBOL and LABEL
          references; so go back to the original address expression.  */
-      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
+      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
+                                         false /*compute_now*/);
  
        if (new_x != orig_x)
         x = new_x;
@@ -8820,7 +8886,7 @@ arm_tls_referenced_p (rtx x)
              currently implement these if a literal pool is disabled.  */
           if (arm_disable_literal_pool)
             sorry ("accessing thread-local storage is not currently supported "
-                  "with -mpure-code or -mslow-flash-data");
+                  "with %<-mpure-code%> or %<-mslow-flash-data%>");
  
           return true;
         }
@@ -8878,11 +8944,16 @@ static bool
  arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
  {
    rtx base, offset;
+  split_const (x, &base, &offset);
  
-  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+  if (SYMBOL_REF_P (base))
      {
-      split_const (x, &base, &offset);
-      if (GET_CODE (base) == SYMBOL_REF
+      /* Function symbols cannot have an offset due to the Thumb bit.  */
+      if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
+         && INTVAL (offset) != 0)
+       return true;
+
+      if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
           && !offset_within_block_p (base, INTVAL (offset)))
         return true;
      }
@@ -10010,8 +10081,7 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
  
        if (mode == DImode)
         {
-         if (arm_arch3m
-             && GET_CODE (XEXP (x, 0)) == MULT
+         if (GET_CODE (XEXP (x, 0)) == MULT
               && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
                    && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
                   || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
@@ -10209,11 +10279,10 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
  
        if (mode == DImode)
         {
-         if (arm_arch3m
-             && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
-                  && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
-                 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
-                     && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
+         if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+               && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
+              || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
+                  && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
             {
               if (speed_p)
                 *cost += extra_cost->mult[1].extend;
@@ -12294,7 +12363,7 @@ neon_vdup_constant (rtx vals)
  /* Generate code to load VALS, which is a PARALLEL containing only
     constants (for vec_init) or CONST_VECTOR, efficiently into a
     register.  Returns an RTX to copy into the register, or NULL_RTX
-   for a PARALLEL that can not be converted into a CONST_VECTOR.  */
+   for a PARALLEL that cannot be converted into a CONST_VECTOR.  */
  
  rtx
  neon_make_constant (rtx vals)
@@ -12336,13 +12405,13 @@ neon_make_constant (rtx vals)
      return target;
    else if (const_vec != NULL_RTX)
      /* Load from constant pool.  On Cortex-A8 this takes two cycles
-       (for either double or quad vectors).  We can not take advantage
+       (for either double or quad vectors).  We cannot take advantage
         of single-cycle VLD1 because we need a PC-relative addressing
         mode.  */
      return const_vec;
    else
      /* A PARALLEL containing something not valid inside CONST_VECTOR.
-       We can not construct an initializer.  */
+       We cannot construct an initializer.  */
      return NULL_RTX;
  }
  
@@ -12680,6 +12749,43 @@ neon_struct_mem_operand (rtx op)
    return FALSE;
  }
  
+/* Prepares the operands for the VCMLA by lane instruction such that the right
+   register number is selected.  This instruction is special in that it always
+   requires a D register, however there is a choice to be made between Dn[0],
+   Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
+
+   The VCMLA by lane function always selects two values. For instance given D0
+   and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
+   used by the instruction.  However given V4SF then index 0 and 1 are valid as
+   D0[0] or D1[0] are both valid.
+
+   This function centralizes that information based on OPERANDS, OPERANDS[3]
+   will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
+   updated to contain the right index.  */
+
+rtx *
+neon_vcmla_lane_prepare_operands (rtx *operands)
+{
+  int lane = INTVAL (operands[4]);
+  machine_mode constmode = SImode;
+  machine_mode mode = GET_MODE (operands[3]);
+  int regno = REGNO (operands[3]);
+  regno = ((regno - FIRST_VFP_REGNUM) >> 1);
+  if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
+    {
+      operands[3] = gen_int_mode (regno + 1, constmode);
+      operands[4]
+       = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
+    }
+  else
+    {
+      operands[3] = gen_int_mode (regno, constmode);
+      operands[4] = gen_int_mode (lane, constmode);
+    }
+  return operands;
+}
+
+
  /* Return true if X is a register that will be eliminated later on.  */
  int
  arm_eliminable_register (rtx x)
@@ -13094,6 +13200,9 @@ ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
    if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
      return false;
  
+  if (regno == REGNO (addr))
+    addr_reg_in_reglist = true;
+
    for (; i < count; i++)
      {
        elt = XVECEXP (op, 0, i);
@@ -13288,7 +13397,6 @@ load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
    int unsorted_regs[MAX_LDM_STM_OPS];
    HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
    int order[MAX_LDM_STM_OPS];
-  rtx base_reg_rtx = NULL;
    int base_reg = -1;
    int i, ldm_case;
  
@@ -13333,7 +13441,6 @@ load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
           if (i == 0)
             {
               base_reg = REGNO (reg);
-             base_reg_rtx = reg;
               if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
                 return 0;
             }
@@ -13392,10 +13499,6 @@ load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
        *load_offset = unsorted_offsets[order[0]];
      }
  
-  if (TARGET_THUMB1
-      && !peep2_reg_dead_p (nops, base_reg_rtx))
-    return 0;
-
    if (unsorted_offsets[order[0]] == 0)
      ldm_case = 1; /* ldmia */
    else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
@@ -13771,9 +13874,17 @@ gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
  
    if (TARGET_THUMB1)
      {
-      gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
        gcc_assert (ldm_case == 1 || ldm_case == 5);
-      write_back = TRUE;
+
+      /* Thumb-1 ldm uses writeback except if the base is loaded.  */
+      write_back = true;
+      for (i = 0; i < nops; i++)
+       if (base_reg == regs[i])
+         write_back = false;
+
+      /* Ensure the base is dead if it is updated.  */
+      if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
+       return false;
      }
  
    if (ldm_case == 5)
@@ -13781,8 +13892,7 @@ gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
        rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
        emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
        offset = 0;
-      if (!TARGET_THUMB1)
-       base_reg_rtx = newbase;
+      base_reg_rtx = newbase;
      }
  
    for (i = 0; i < nops; i++)
@@ -15477,7 +15587,7 @@ mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
        *base = addr;
        return true;
      }
-  else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
+  else if (GET_CODE (addr) == PLUS)
      {
        *base = XEXP (addr, 0);
        *offset = XEXP (addr, 1);
@@ -15642,7 +15752,7 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
      }
  
    /* Make sure accesses are to consecutive memory locations.  */
-  if (gap != 4)
+  if (gap != GET_MODE_SIZE (SImode))
      return false;
  
    if (!align_ok_ldrd_strd (align[0], offset))
@@ -15723,6 +15833,55 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
  }
  
  
+/* Return true if parallel execution of the two word-size accesses provided
+   could be satisfied with a single LDRD/STRD instruction.  Two word-size
+   accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
+   register operands and OPERANDS[2,3] are the corresponding memory operands.
+   */
+bool
+valid_operands_ldrd_strd (rtx *operands, bool load)
+{
+  int nops = 2;
+  HOST_WIDE_INT offsets[2], offset, align[2];
+  rtx base = NULL_RTX;
+  rtx cur_base, cur_offset;
+  int i, gap;
+
+  /* Check that the memory references are immediate offsets from the
+     same base register.  Extract the base register, the destination
+     registers, and the corresponding memory offsets.  */
+  for (i = 0; i < nops; i++)
+    {
+      if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
+                                &align[i]))
+       return false;
+
+      if (i == 0)
+       base = cur_base;
+      else if (REGNO (base) != REGNO (cur_base))
+       return false;
+
+      offsets[i] = INTVAL (cur_offset);
+      if (GET_CODE (operands[i]) == SUBREG)
+       return false;
+    }
+
+  if (offsets[0] > offsets[1])
+    return false;
+
+  gap = offsets[1] - offsets[0];
+  offset = offsets[0];
+
+  /* Make sure accesses are to consecutive memory locations.  */
+  if (gap != GET_MODE_SIZE (SImode))
+    return false;
+
+  if (!align_ok_ldrd_strd (align[0], offset))
+    return false;
+
+  return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
+                               false, load);
+}
  
  \f
  /* Print a symbolic form of X to the debug file, F.  */
@@ -15957,7 +16116,7 @@ get_label_padding (rtx label)
  {
    HOST_WIDE_INT align, min_insn_size;
  
-  align = 1 << label_to_alignment (label);
+  align = 1 << label_to_alignment (label).levels[0].log;
    min_insn_size = TARGET_THUMB ? 2 : 4;
    return align > min_insn_size ? align - min_insn_size : 0;
  }
@@ -16030,7 +16189,7 @@ add_minipool_forward_ref (Mfix *fix)
    Mnode *       mp;
  
    /* If the minipool starts before the end of FIX->INSN then this FIX
-     can not be placed into the current pool.  Furthermore, adding the
+     cannot be placed into the current pool.  Furthermore, adding the
       new constant pool entry may cause the pool to start FIX_SIZE bytes
       earlier.  */
    if (minipool_vector_head &&
@@ -17663,7 +17822,11 @@ arm_reorg (void)
  
    if (use_cmse)
      cmse_nonsecure_call_clear_caller_saved ();
-  if (TARGET_THUMB1)
+
+  /* We cannot run the Thumb passes for thunks because there is no CFG.  */
+  if (cfun->is_thunk)
+    ;
+  else if (TARGET_THUMB1)
      thumb1_reorg ();
    else if (TARGET_THUMB2)
      thumb2_reorg ();
@@ -18078,7 +18241,7 @@ arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
           ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
           : !SYMBOL_REF_LOCAL_P (addr)))
      {
-      require_pic_register ();
+      require_pic_register (NULL_RTX, false /*compute_now*/);
        use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
      }
  
@@ -18097,7 +18260,7 @@ arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
  const char *
  output_call (rtx *operands)
  {
-  gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly.  */
+  gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly.  */
  
    /* Handle calls to lr using ip (which may be clobbered in subr anyway).  */
    if (REGNO (operands[0]) == LR_REGNUM)
@@ -18483,12 +18646,18 @@ output_move_double (rtx *operands, bool emit, int *count)
        gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
                    || (TARGET_ARM && TARGET_LDRD));
  
+      /* For TARGET_ARM the first source register of an STRD
+        must be even.  This is usually the case for double-word
+        values but user assembly constraints can force an odd
+        starting register.  */
+      bool allow_strd = TARGET_LDRD
+                        && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
        switch (GET_CODE (XEXP (operands[0], 0)))
          {
         case REG:
           if (emit)
             {
-             if (TARGET_LDRD)
+             if (allow_strd)
                 output_asm_insn ("strd%?\t%1, [%m0]", operands);
               else
                 output_asm_insn ("stm%?\t%m0, %M1", operands);
@@ -18496,7 +18665,7 @@ output_move_double (rtx *operands, bool emit, int *count)
           break;
  
          case PRE_INC:
-         gcc_assert (TARGET_LDRD);
+         gcc_assert (allow_strd);
           if (emit)
             output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
           break;
@@ -18504,7 +18673,7 @@ output_move_double (rtx *operands, bool emit, int *count)
          case PRE_DEC:
           if (emit)
             {
-             if (TARGET_LDRD)
+             if (allow_strd)
                 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
               else
                 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
@@ -18514,7 +18683,7 @@ output_move_double (rtx *operands, bool emit, int *count)
          case POST_INC:
           if (emit)
             {
-             if (TARGET_LDRD)
+             if (allow_strd)
                 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
               else
                 output_asm_insn ("stm%?\t%m0!, %M1", operands);
@@ -18522,7 +18691,7 @@ output_move_double (rtx *operands, bool emit, int *count)
           break;
  
          case POST_DEC:
-         gcc_assert (TARGET_LDRD);
+         gcc_assert (allow_strd);
           if (emit)
             output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
           break;
@@ -18533,8 +18702,8 @@ output_move_double (rtx *operands, bool emit, int *count)
           otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
           otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
  
-         /* IWMMXT allows offsets larger than ldrd can handle,
-            fix these up with a pair of ldr.  */
+         /* IWMMXT allows offsets larger than strd can handle,
+            fix these up with a pair of str.  */
           if (!TARGET_THUMB2
               && CONST_INT_P (otherops[2])
               && (INTVAL(otherops[2]) <= -256
@@ -18599,7 +18768,7 @@ output_move_double (rtx *operands, bool emit, int *count)
                   return "";
                 }
             }
-         if (TARGET_LDRD
+         if (allow_strd
               && (REG_P (otherops[2])
                   || TARGET_THUMB2
                   || (CONST_INT_P (otherops[2])
@@ -19732,7 +19901,7 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
               stack_adjust = offsets->outgoing_args - offsets->saved_regs;
               gcc_assert (stack_adjust == 0 || stack_adjust == 4);
  
-             if (stack_adjust && arm_arch5 && TARGET_ARM)
+             if (stack_adjust && arm_arch5t && TARGET_ARM)
                   sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
               else
                 {
@@ -19807,7 +19976,7 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
           break;
  
         case ARM_FT_INTERWORKED:
-         gcc_assert (arm_arch5 || arm_arch4t);
+         gcc_assert (arm_arch5t || arm_arch4t);
           sprintf (instr, "bx%s\t%%|lr", conditional);
           break;
  
@@ -19829,7 +19998,7 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
                           "msr%s\tAPSR_nzcvq, %%|lr", conditional);
  
               output_asm_insn (instr, & operand);
-             if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
+             if (TARGET_HARD_FLOAT)
                 {
                   /* Clear the cumulative exception-status bits (0-4,7) and the
                      condition code bits (28-31) of the FPSCR.  We need to
@@ -19855,7 +20024,7 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
               snprintf (instr, sizeof (instr), "bxns\t%%|lr");
             }
           /* Use bx if it's available.  */
-         else if (arm_arch5 || arm_arch4t)
+         else if (arm_arch5t || arm_arch4t)
             sprintf (instr, "bx%s\t%%|lr", conditional);
           else
             sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
@@ -22004,7 +22173,7 @@ arm_expand_prologue (void)
        mask &= THUMB2_WORK_REGS;
        if (!IS_NESTED (func_type))
         mask |= (1 << IP_REGNUM);
-      arm_load_pic_register (mask);
+      arm_load_pic_register (mask, NULL_RTX);
      }
  
    /* If we are profiling, make sure no instructions are scheduled before
@@ -23459,7 +23628,7 @@ arm_final_prescan_insn (rtx_insn *insn)
                  used since they make interworking inefficient (the
                  linker can't transform BL<cond> into BLX).  That's
                  only a problem if the machine has BLX.  */
-             if (arm_arch5)
+             if (arm_arch5t)
                 {
                   fail = TRUE;
                   break;
@@ -25235,7 +25404,7 @@ thumb1_expand_prologue (void)
    /* Load the pic register before setting the frame pointer,
       so we can use r7 as a temporary work register.  */
    if (flag_pic && arm_pic_register != INVALID_REGNUM)
-    arm_load_pic_register (live_regs_mask);
+    arm_load_pic_register (live_regs_mask, NULL_RTX);
  
    if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
      emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
@@ -25249,7 +25418,7 @@ thumb1_expand_prologue (void)
    if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
         || flag_stack_clash_protection)
        && size)
-    sorry ("-fstack-check=specific for Thumb-1");
+    sorry ("%<-fstack-check=specific%> for Thumb-1");
  
    amount = offsets->outgoing_args - offsets->saved_regs;
    amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
@@ -26731,6 +26900,8 @@ static void
  arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
                        HOST_WIDE_INT vcall_offset, tree function)
  {
+  const bool long_call_p = arm_is_long_call_p (function);
+
    /* On ARM, this_regno is R0 or R1 depending on
       whether the function returns an aggregate or not.
    */
@@ -26768,9 +26939,22 @@ arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
        TREE_USED (function) = 1;
      }
    rtx funexp = XEXP (DECL_RTL (function), 0);
+  if (long_call_p)
+    {
+      emit_move_insn (temp, funexp);
+      funexp = temp;
+    }
    funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
-  rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
+  rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
    SIBLING_CALL_P (insn) = 1;
+  emit_barrier ();
+
+  /* Indirect calls require a bit of fixup in PIC mode.  */
+  if (long_call_p)
+    {
+      split_all_insns_noflow ();
+      arm_reorg ();
+    }
  
    insn = get_insns ();
    shorten_branches (insn);
@@ -26895,7 +27079,13 @@ arm_setup_incoming_varargs (cumulative_args_t pcum_v,
             inform (input_location, "parameter passing for argument of "
                     "type %qT changed in GCC 7.1", type);
           else if (res > 0)
-           nregs++;
+           {
+             nregs++;
+             if (res > 1 && warn_psabi)
+               inform (input_location,
+                       "parameter passing for argument of type "
+                       "%qT changed in GCC 9.1", type);
+           }
         }
      }
    else
@@ -28370,6 +28560,26 @@ arm_count_output_move_double_insns (rtx *operands)
    return count;
  }
  
+/* Same as above, but operands are a register/memory pair in SImode.
+   Assumes operands has the base register in position 0 and memory in position
+   2 (which is the order provided by the arm_{ldrd,strd} patterns).  */
+int
+arm_count_ldrdstrd_insns (rtx *operands, bool load)
+{
+  int count;
+  rtx ops[2];
+  int regnum, memnum;
+  if (load)
+    regnum = 0, memnum = 1;
+  else
+    regnum = 1, memnum = 0;
+  ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
+  ops[memnum] = adjust_address (operands[2], DImode, 0);
+  output_move_double (ops, false, &count);
+  return count;
+}
+
+
  int
  vfp3_const_double_for_fract_bits (rtx operand)
  {
@@ -28530,8 +28740,7 @@ void
  arm_expand_compare_and_swap (rtx operands[])
  {
    rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
-  machine_mode mode;
-  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+  machine_mode mode, cmp_mode;
  
    bval = operands[0];
    rval = operands[1];
@@ -28579,32 +28788,13 @@ arm_expand_compare_and_swap (rtx operands[])
      }
  
    if (TARGET_THUMB1)
-    {
-      switch (mode)
-       {
-       case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
-       case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
-       case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
-       case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
-       default:
-         gcc_unreachable ();
-       }
-    }
+    cmp_mode = E_SImode;
    else
-    {
-      switch (mode)
-       {
-       case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
-       case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
-       case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
-       case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
-       default:
-         gcc_unreachable ();
-       }
-    }
+    cmp_mode = CC_Zmode;
  
    bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
-  emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
+  emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
+                                        oldval, newval, is_weak, mod_s, mod_f));
  
    if (mode == QImode || mode == HImode)
      emit_move_insn (operands[1], gen_lowpart (mode, rval));
@@ -28636,7 +28826,7 @@ arm_expand_compare_and_swap (rtx operands[])
  void
  arm_split_compare_and_swap (rtx operands[])
  {
-  rtx rval, mem, oldval, newval, neg_bval;
+  rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
    machine_mode mode;
    enum memmodel mod_s, mod_f;
    bool is_weak;
@@ -28648,20 +28838,16 @@ arm_split_compare_and_swap (rtx operands[])
    oldval = operands[3];
    newval = operands[4];
    is_weak = (operands[5] != const0_rtx);
-  mod_s = memmodel_from_int (INTVAL (operands[6]));
+  mod_s_rtx = operands[6];
+  mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
    mod_f = memmodel_from_int (INTVAL (operands[7]));
    neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
    mode = GET_MODE (mem);
  
    bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
  
-  bool use_acquire = TARGET_HAVE_LDACQ
-                     && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
-                         || is_mm_release (mod_s));
-               
-  bool use_release = TARGET_HAVE_LDACQ
-                     && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
-                         || is_mm_acquire (mod_s));
+  bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
+  bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
  
    /* For ARMv8, the load-acquire is too weak for __sync memory orders.  Instead,
       a full barrier is emitted after the store-release.  */
@@ -28756,13 +28942,8 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
  
    bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
  
-  bool use_acquire = TARGET_HAVE_LDACQ
-                     && !(is_mm_relaxed (model) || is_mm_consume (model)
-                         || is_mm_release (model));
-
-  bool use_release = TARGET_HAVE_LDACQ
-                     && !(is_mm_relaxed (model) || is_mm_consume (model)
-                         || is_mm_acquire (model));
+  bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
+  bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
  
    /* For ARMv8, a load-acquire is too weak for __sync memory orders.  Instead,
       a full barrier is emitted after the store-release.  */
@@ -28979,7 +29160,6 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
  {
    unsigned int i, odd, mask, nelt = d->perm.length ();
    rtx out0, out1, in0, in1;
-  rtx (*gen)(rtx, rtx, rtx, rtx);
    int first_elem;
    int swap_nelt;
  
@@ -29013,22 +29193,6 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
    if (d->testing_p)
      return true;
  
-  switch (d->vmode)
-    {
-    case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
-    case E_V8QImode:  gen = gen_neon_vuzpv8qi_internal;  break;
-    case E_V8HImode:  gen = gen_neon_vuzpv8hi_internal;  break;
-    case E_V4HImode:  gen = gen_neon_vuzpv4hi_internal;  break;
-    case E_V8HFmode:  gen = gen_neon_vuzpv8hf_internal;  break;
-    case E_V4HFmode:  gen = gen_neon_vuzpv4hf_internal;  break;
-    case E_V4SImode:  gen = gen_neon_vuzpv4si_internal;  break;
-    case E_V2SImode:  gen = gen_neon_vuzpv2si_internal;  break;
-    case E_V2SFmode:  gen = gen_neon_vuzpv2sf_internal;  break;
-    case E_V4SFmode:  gen = gen_neon_vuzpv4sf_internal;  break;
-    default:
-      gcc_unreachable ();
-    }
-
    in0 = d->op0;
    in1 = d->op1;
    if (swap_nelt != 0)
@@ -29039,7 +29203,7 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
    if (odd)
      std::swap (out0, out1);
  
-  emit_insn (gen (out0, in0, in1, out1));
+  emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
    return true;
  }
  
@@ -29050,7 +29214,6 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
  {
    unsigned int i, high, mask, nelt = d->perm.length ();
    rtx out0, out1, in0, in1;
-  rtx (*gen)(rtx, rtx, rtx, rtx);
    int first_elem;
    bool is_swapped;
  
@@ -29088,22 +29251,6 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
    if (d->testing_p)
      return true;
  
-  switch (d->vmode)
-    {
-    case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
-    case E_V8QImode:  gen = gen_neon_vzipv8qi_internal;  break;
-    case E_V8HImode:  gen = gen_neon_vzipv8hi_internal;  break;
-    case E_V4HImode:  gen = gen_neon_vzipv4hi_internal;  break;
-    case E_V8HFmode:  gen = gen_neon_vzipv8hf_internal;  break;
-    case E_V4HFmode:  gen = gen_neon_vzipv4hf_internal;  break;
-    case E_V4SImode:  gen = gen_neon_vzipv4si_internal;  break;
-    case E_V2SImode:  gen = gen_neon_vzipv2si_internal;  break;
-    case E_V2SFmode:  gen = gen_neon_vzipv2sf_internal;  break;
-    case E_V4SFmode:  gen = gen_neon_vzipv4sf_internal;  break;
-    default:
-      gcc_unreachable ();
-    }
-
    in0 = d->op0;
    in1 = d->op1;
    if (is_swapped)
@@ -29114,17 +29261,16 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
    if (high)
      std::swap (out0, out1);
  
-  emit_insn (gen (out0, in0, in1, out1));
+  emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
    return true;
  }
  
  /* Recognize patterns for the VREV insns.  */
-
  static bool
  arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
  {
    unsigned int i, j, diff, nelt = d->perm.length ();
-  rtx (*gen)(rtx, rtx);
+  rtx (*gen) (machine_mode, rtx, rtx);
  
    if (!d->one_vector_p)
      return false;
@@ -29133,23 +29279,29 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
    switch (diff)
      {
      case 7:
-      switch (d->vmode)
-       {
-       case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
-       case E_V8QImode:  gen = gen_neon_vrev64v8qi;  break;
-       default:
-         return false;
-       }
-      break;
+       switch (d->vmode)
+        {
+         case E_V16QImode:
+         case E_V8QImode:
+          gen = gen_neon_vrev64;
+          break;
+         default:
+          return false;
+        }
+       break;
      case 3:
-      switch (d->vmode)
-       {
-       case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
-       case E_V8QImode:  gen = gen_neon_vrev32v8qi;  break;
-       case E_V8HImode:  gen = gen_neon_vrev64v8hi;  break;
-       case E_V4HImode:  gen = gen_neon_vrev64v4hi;  break;
-       case E_V8HFmode:  gen = gen_neon_vrev64v8hf;  break;
-       case E_V4HFmode:  gen = gen_neon_vrev64v4hf;  break;
+       switch (d->vmode)
+        {
+       case E_V16QImode:
+       case E_V8QImode:
+          gen = gen_neon_vrev32;
+          break;
+       case E_V8HImode:
+       case E_V4HImode:
+       case E_V8HFmode:
+       case E_V4HFmode:
+          gen = gen_neon_vrev64;
+          break;
         default:
           return false;
         }
@@ -29157,15 +29309,21 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
      case 1:
        switch (d->vmode)
         {
-       case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
-       case E_V8QImode:  gen = gen_neon_vrev16v8qi;  break;
-       case E_V8HImode:  gen = gen_neon_vrev32v8hi;  break;
-       case E_V4HImode:  gen = gen_neon_vrev32v4hi;  break;
-       case E_V4SImode:  gen = gen_neon_vrev64v4si;  break;
-       case E_V2SImode:  gen = gen_neon_vrev64v2si;  break;
-       case E_V4SFmode:  gen = gen_neon_vrev64v4sf;  break;
-       case E_V2SFmode:  gen = gen_neon_vrev64v2sf;  break;
-       default:
+       case E_V16QImode:
+       case E_V8QImode:
+          gen = gen_neon_vrev16;
+          break;
+       case E_V8HImode:
+       case E_V4HImode:
+          gen = gen_neon_vrev32;
+          break;
+       case E_V4SImode:
+       case E_V2SImode:
+       case E_V4SFmode:
+       case E_V2SFmode:
+          gen = gen_neon_vrev64;
+         break;
+        default:
           return false;
         }
        break;
@@ -29190,7 +29348,7 @@ arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
    if (d->testing_p)
      return true;
  
-  emit_insn (gen (d->target, d->op0));
+  emit_insn (gen (d->vmode, d->target, d->op0));
    return true;
  }
  
@@ -29201,7 +29359,6 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
  {
    unsigned int i, odd, mask, nelt = d->perm.length ();
    rtx out0, out1, in0, in1;
-  rtx (*gen)(rtx, rtx, rtx, rtx);
  
    if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
      return false;
@@ -29227,22 +29384,6 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
    if (d->testing_p)
      return true;
  
-  switch (d->vmode)
-    {
-    case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
-    case E_V8QImode:  gen = gen_neon_vtrnv8qi_internal;  break;
-    case E_V8HImode:  gen = gen_neon_vtrnv8hi_internal;  break;
-    case E_V4HImode:  gen = gen_neon_vtrnv4hi_internal;  break;
-    case E_V8HFmode:  gen = gen_neon_vtrnv8hf_internal;  break;
-    case E_V4HFmode:  gen = gen_neon_vtrnv4hf_internal;  break;
-    case E_V4SImode:  gen = gen_neon_vtrnv4si_internal;  break;
-    case E_V2SImode:  gen = gen_neon_vtrnv2si_internal;  break;
-    case E_V2SFmode:  gen = gen_neon_vtrnv2sf_internal;  break;
-    case E_V4SFmode:  gen = gen_neon_vtrnv4sf_internal;  break;
-    default:
-      gcc_unreachable ();
-    }
-
    in0 = d->op0;
    in1 = d->op1;
    if (BYTES_BIG_ENDIAN)
@@ -29256,7 +29397,7 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
    if (odd)
      std::swap (out0, out1);
  
-  emit_insn (gen (out0, in0, in1, out1));
+  emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
    return true;
  }
  
@@ -29266,7 +29407,6 @@ static bool
  arm_evpc_neon_vext (struct expand_vec_perm_d *d)
  {
    unsigned int i, nelt = d->perm.length ();
-  rtx (*gen) (rtx, rtx, rtx, rtx);
    rtx offset;
  
    unsigned int location;
@@ -29302,29 +29442,16 @@ arm_evpc_neon_vext (struct expand_vec_perm_d *d)
  
    location = d->perm[0];
  
-  switch (d->vmode)
-    {
-    case E_V16QImode: gen = gen_neon_vextv16qi; break;
-    case E_V8QImode: gen = gen_neon_vextv8qi; break;
-    case E_V4HImode: gen = gen_neon_vextv4hi; break;
-    case E_V8HImode: gen = gen_neon_vextv8hi; break;
-    case E_V2SImode: gen = gen_neon_vextv2si; break;
-    case E_V4SImode: gen = gen_neon_vextv4si; break;
-    case E_V4HFmode: gen = gen_neon_vextv4hf; break;
-    case E_V8HFmode: gen = gen_neon_vextv8hf; break;
-    case E_V2SFmode: gen = gen_neon_vextv2sf; break;
-    case E_V4SFmode: gen = gen_neon_vextv4sf; break;
-    case E_V2DImode: gen = gen_neon_vextv2di; break;
-    default:
-      return false;
-    }
-
    /* Success! */
    if (d->testing_p)
      return true;
  
    offset = GEN_INT (location);
-  emit_insn (gen (d->target, d->op0, d->op1, offset));
+
+  if(d->vmode == E_DImode)
+    return false;
+
+  emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
    return true;
  }
  
@@ -29787,6 +29914,9 @@ arm_valid_symbolic_address_p (rtx addr)
    rtx xop0, xop1 = NULL_RTX;
    rtx tmp = addr;
  
+  if (target_word_relocations)
+    return false;
+
    if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
      return true;
  
@@ -30056,7 +30186,6 @@ arm_block_set_aligned_vect (rtx dstbase,
    rtx dst, addr, mem;
    rtx val_vec, reg;
    machine_mode mode;
-  unsigned HOST_WIDE_INT v = value;
    unsigned int offset = 0;
  
    gcc_assert ((align & 0x3) == 0);
@@ -30075,10 +30204,8 @@ arm_block_set_aligned_vect (rtx dstbase,
  
    dst = copy_addr_to_reg (XEXP (dstbase, 0));
  
-  v = sext_hwi (v, BITS_PER_WORD);
-
    reg = gen_reg_rtx (mode);
-  val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
+  val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
    /* Emit instruction loading the constant value.  */
    emit_move_insn (reg, val_vec);
  
@@ -30851,7 +30978,7 @@ arm_insert_attributes (tree fndecl, tree * attributes)
      return;
  
    if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
-      || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
+      || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
     return;
  
    /* Nested definitions must inherit mode.  */
@@ -31430,7 +31557,7 @@ arm_coproc_builtin_available (enum unspecv builtin)
        case VUNSPEC_MRC2:
         /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
            ARMv8-{A,M}.  */
-       if (arm_arch5)
+       if (arm_arch5t)
           return true;
         break;
        case VUNSPEC_MCRR:
@@ -31541,6 +31668,16 @@ arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
    return align;
  }
  
+/* Emit a speculation barrier on target architectures that do not have
+   DSB/ISB directly.  Such systems probably don't need a barrier
+   themselves, but if the code is ever run on a later architecture, it
+   might become a problem.  */
+void
+arm_emit_speculation_barrier_function ()
+{
+  emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
+}
+
  #if CHECKING_P
  namespace selftest {