sim: profile: disconnect from watchpoint core

[thirdparty/binutils-gdb.git] / gas / config / tc-arm.c
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c

index fc86db4e070e333e35eabbde3bf23b3a415ffa0b..e97036a4223128f8704607aa7732c620ca6e03ba 100644 (file)
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -1,5 +1,5 @@
  /* tc-arm.c -- Assemble for the ARM
-   Copyright (C) 1994-2014 Free Software Foundation, Inc.
+   Copyright (C) 1994-2015 Free Software Foundation, Inc.
     Contributed by Richard Earnshaw (rwe@pegasus.esprit.ec.org)
         Modified by David Taylor (dtaylor@armltd.co.uk)
         Cirrus coprocessor mods by Aldy Hernandez (aldyh@redhat.com)
@@ -236,6 +236,8 @@ static const arm_feature_set fpu_neon_ext_fma = ARM_FEATURE (0, FPU_NEON_EXT_FMA
  static const arm_feature_set fpu_vfp_ext_fma = ARM_FEATURE (0, FPU_VFP_EXT_FMA);
  static const arm_feature_set fpu_vfp_ext_armv8 =
    ARM_FEATURE (0, FPU_VFP_EXT_ARMV8);
+static const arm_feature_set fpu_vfp_ext_armv8xd =
+  ARM_FEATURE (0, FPU_VFP_EXT_ARMV8xD);
  static const arm_feature_set fpu_neon_ext_armv8 =
    ARM_FEATURE (0, FPU_NEON_EXT_ARMV8);
  static const arm_feature_set fpu_crypto_ext_armv8 =
@@ -249,6 +251,8 @@ static arm_feature_set selected_cpu = ARM_ARCH_NONE;
  /* Must be long enough to hold any of the names in arm_cpus.  */
  static char selected_cpu_name[16];
  
+extern FLONUM_TYPE generic_floating_point_number;
+
  /* Return if no cpu was selected on command-line.  */
  static bfd_boolean
  no_cpu_selected (void)
@@ -630,6 +634,7 @@ struct asm_opcode
  #define LITERAL_MASK   0xf000f000
  #define OPCODE_MASK    0xfe1fffff
  #define V4_STR_BIT     0x00000020
+#define VLDR_VMOV_SAME 0x0040f000
  
  #define T2_SUBS_PC_LR  0xf3de8f00
  
@@ -792,6 +797,7 @@ typedef struct literal_pool
    struct dwarf2_line_info locs [MAX_LITERAL_POOL_SIZE];
  #endif
    struct literal_pool *  next;
+  unsigned int          alignment;
  } literal_pool;
  
  /* Pointer to a linked list of literal pools.  */
@@ -2626,13 +2632,12 @@ static void mapping_state_2 (enum mstate state, int max_chars);
  /* Set the mapping state to STATE.  Only call this when about to
     emit some STATE bytes to the file.  */
  
+#define TRANSITION(from, to) (mapstate == (from) && state == (to))
  void
  mapping_state (enum mstate state)
  {
    enum mstate mapstate = seg_info (now_seg)->tc_segment_info_data.mapstate;
  
-#define TRANSITION(from, to) (mapstate == (from) && state == (to))
-
    if (mapstate == state)
      /* The mapping symbol has already been emitted.
         There is nothing else to do.  */
@@ -2655,24 +2660,10 @@ mapping_state (enum mstate state)
      record_alignment (now_seg, state == MAP_ARM ? 2 : 1);
  
    if (TRANSITION (MAP_UNDEFINED, MAP_DATA))
-    /* This case will be evaluated later in the next else.  */
+    /* This case will be evaluated later.  */
      return;
-  else if (TRANSITION (MAP_UNDEFINED, MAP_ARM)
-         || TRANSITION (MAP_UNDEFINED, MAP_THUMB))
-    {
-      /* Only add the symbol if the offset is > 0:
-        if we're at the first frag, check it's size > 0;
-        if we're not at the first frag, then for sure
-           the offset is > 0.  */
-      struct frag * const frag_first = seg_info (now_seg)->frchainP->frch_root;
-      const int add_symbol = (frag_now != frag_first) || (frag_now_fix () > 0);
-
-      if (add_symbol)
-       make_mapping_symbol (MAP_DATA, (valueT) 0, frag_first);
-    }
  
    mapping_state_2 (state, 0);
-#undef TRANSITION
  }
  
  /* Same as mapping_state, but MAX_CHARS bytes have already been
@@ -2691,9 +2682,20 @@ mapping_state_2 (enum mstate state, int max_chars)
         There is nothing else to do.  */
      return;
  
+  if (TRANSITION (MAP_UNDEFINED, MAP_ARM)
+         || TRANSITION (MAP_UNDEFINED, MAP_THUMB))
+    {
+      struct frag * const frag_first = seg_info (now_seg)->frchainP->frch_root;
+      const int add_symbol = (frag_now != frag_first) || (frag_now_fix () > 0);
+
+      if (add_symbol)
+       make_mapping_symbol (MAP_DATA, (valueT) 0, frag_first);
+    }
+
    seg_info (now_seg)->tc_segment_info_data.mapstate = state;
    make_mapping_symbol (state, (valueT) frag_now_fix () - max_chars, frag_now);
  }
+#undef TRANSITION
  #else
  #define mapping_state(x) ((void)0)
  #define mapping_state_2(x, y) ((void)0)
@@ -3159,6 +3161,7 @@ find_or_make_literal_pool (void)
        pool->sub_section            = now_subseg;
        pool->next           = list_of_pools;
        pool->symbol         = NULL;
+      pool->alignment      = 2;
  
        /* Add it to the list.  */
        list_of_pools = pool;
@@ -3180,33 +3183,74 @@ find_or_make_literal_pool (void)
     structure to the relevant literal pool.  */
  
  static int
-add_to_lit_pool (void)
+add_to_lit_pool (unsigned int nbytes)
  {
+#define PADDING_SLOT 0x1
+#define LIT_ENTRY_SIZE_MASK 0xFF
    literal_pool * pool;
-  unsigned int entry;
+  unsigned int entry, pool_size = 0;
+  bfd_boolean padding_slot_p = FALSE;
+  unsigned imm1 = 0;
+  unsigned imm2 = 0;
+
+  if (nbytes == 8)
+    {
+      imm1 = inst.operands[1].imm;
+      imm2 = (inst.operands[1].regisimm ? inst.operands[1].reg
+              : inst.reloc.exp.X_unsigned ? 0
+              : ((bfd_int64_t) inst.operands[1].imm) >> 32);
+      if (target_big_endian)
+       {
+         imm1 = imm2;
+         imm2 = inst.operands[1].imm;
+       }
+    }
  
    pool = find_or_make_literal_pool ();
  
    /* Check if this literal value is already in the pool.  */
    for (entry = 0; entry < pool->next_free_entry; entry ++)
      {
-      if ((pool->literals[entry].X_op == inst.reloc.exp.X_op)
-         && (inst.reloc.exp.X_op == O_constant)
-         && (pool->literals[entry].X_add_number
-             == inst.reloc.exp.X_add_number)
-         && (pool->literals[entry].X_unsigned
-             == inst.reloc.exp.X_unsigned))
+      if (nbytes == 4)
+       {
+         if ((pool->literals[entry].X_op == inst.reloc.exp.X_op)
+             && (inst.reloc.exp.X_op == O_constant)
+             && (pool->literals[entry].X_add_number
+                 == inst.reloc.exp.X_add_number)
+             && (pool->literals[entry].X_md == nbytes)
+             && (pool->literals[entry].X_unsigned
+                 == inst.reloc.exp.X_unsigned))
+           break;
+
+         if ((pool->literals[entry].X_op == inst.reloc.exp.X_op)
+             && (inst.reloc.exp.X_op == O_symbol)
+             && (pool->literals[entry].X_add_number
+                 == inst.reloc.exp.X_add_number)
+             && (pool->literals[entry].X_add_symbol
+                 == inst.reloc.exp.X_add_symbol)
+             && (pool->literals[entry].X_op_symbol
+                 == inst.reloc.exp.X_op_symbol)
+             && (pool->literals[entry].X_md == nbytes))
+           break;
+       }
+      else if ((nbytes == 8)
+              && !(pool_size & 0x7)
+              && ((entry + 1) != pool->next_free_entry)
+              && (pool->literals[entry].X_op == O_constant)
+              && (pool->literals[entry].X_add_number == (offsetT) imm1)
+              && (pool->literals[entry].X_unsigned
+                  == inst.reloc.exp.X_unsigned)
+              && (pool->literals[entry + 1].X_op == O_constant)
+              && (pool->literals[entry + 1].X_add_number == (offsetT) imm2)
+              && (pool->literals[entry + 1].X_unsigned
+                  == inst.reloc.exp.X_unsigned))
         break;
  
-      if ((pool->literals[entry].X_op == inst.reloc.exp.X_op)
-         && (inst.reloc.exp.X_op == O_symbol)
-         && (pool->literals[entry].X_add_number
-             == inst.reloc.exp.X_add_number)
-         && (pool->literals[entry].X_add_symbol
-             == inst.reloc.exp.X_add_symbol)
-         && (pool->literals[entry].X_op_symbol
-             == inst.reloc.exp.X_op_symbol))
+      padding_slot_p = ((pool->literals[entry].X_md >> 8) == PADDING_SLOT);
+      if (padding_slot_p && (nbytes == 4))
         break;
+
+      pool_size += 4;
      }
  
    /* Do we need to create a new entry? */
@@ -3218,7 +3262,64 @@ add_to_lit_pool (void)
           return FAIL;
         }
  
-      pool->literals[entry] = inst.reloc.exp;
+      if (nbytes == 8)
+       {
+         /* For 8-byte entries, we align to an 8-byte boundary,
+            and split it into two 4-byte entries, because on 32-bit
+            host, 8-byte constants are treated as big num, thus
+            saved in "generic_bignum" which will be overwritten
+            by later assignments.
+
+            We also need to make sure there is enough space for
+            the split.
+
+            We also check to make sure the literal operand is a
+            constant number.  */
+         if (!(inst.reloc.exp.X_op == O_constant
+               || inst.reloc.exp.X_op == O_big))
+           {
+             inst.error = _("invalid type for literal pool");
+             return FAIL;
+           }
+         else if (pool_size & 0x7)
+           {
+             if ((entry + 2) >= MAX_LITERAL_POOL_SIZE)
+               {
+                 inst.error = _("literal pool overflow");
+                 return FAIL;
+               }
+
+             pool->literals[entry] = inst.reloc.exp;
+             pool->literals[entry].X_add_number = 0;
+             pool->literals[entry++].X_md = (PADDING_SLOT << 8) | 4;
+             pool->next_free_entry += 1;
+             pool_size += 4;
+           }
+         else if ((entry + 1) >= MAX_LITERAL_POOL_SIZE)
+           {
+             inst.error = _("literal pool overflow");
+             return FAIL;
+           }
+
+         pool->literals[entry] = inst.reloc.exp;
+         pool->literals[entry].X_op = O_constant;
+         pool->literals[entry].X_add_number = imm1;
+         pool->literals[entry].X_unsigned = inst.reloc.exp.X_unsigned;
+         pool->literals[entry++].X_md = 4;
+         pool->literals[entry] = inst.reloc.exp;
+         pool->literals[entry].X_op = O_constant;
+         pool->literals[entry].X_add_number = imm2;
+         pool->literals[entry].X_unsigned = inst.reloc.exp.X_unsigned;
+         pool->literals[entry].X_md = 4;
+         pool->alignment = 3;
+         pool->next_free_entry += 1;
+       }
+      else
+       {
+         pool->literals[entry] = inst.reloc.exp;
+         pool->literals[entry].X_md = 4;
+       }
+
  #ifdef OBJ_ELF
        /* PR ld/12974: Record the location of the first source line to reference
          this entry in the literal pool.  If it turns out during linking that the
@@ -3229,9 +3330,14 @@ add_to_lit_pool (void)
  #endif
        pool->next_free_entry += 1;
      }
+  else if (padding_slot_p)
+    {
+      pool->literals[entry] = inst.reloc.exp;
+      pool->literals[entry].X_md = nbytes;
+    }
  
    inst.reloc.exp.X_op        = O_symbol;
-  inst.reloc.exp.X_add_number = ((int) entry) * 4;
+  inst.reloc.exp.X_add_number = pool_size;
    inst.reloc.exp.X_add_symbol = pool->symbol;
  
    return SUCCESS;
@@ -3273,7 +3379,7 @@ symbol_locate (symbolS *    symbolP,
                valueT       valu,       /* Symbol value.  */
                fragS *      frag)       /* Associated fragment.  */
  {
-  unsigned int name_length;
+  size_t name_length;
    char * preserved_copy_of_name;
  
    name_length = strlen (name) + 1;   /* +1 for \0.  */
@@ -3314,7 +3420,6 @@ symbol_locate (symbolS *    symbolP,
  #endif /* DEBUG_SYMS  */
  }
  
-
  static void
  s_ltorg (int ignored ATTRIBUTE_UNUSED)
  {
@@ -3331,7 +3436,7 @@ s_ltorg (int ignored ATTRIBUTE_UNUSED)
    /* Align pool as you have word accesses.
       Only make a frag if we have to.  */
    if (!need_pass_2)
-    frag_align (2, 0, 0);
+    frag_align (pool->alignment, 0, 0);
  
    record_alignment (now_seg, 2);
  
@@ -3358,7 +3463,8 @@ s_ltorg (int ignored ATTRIBUTE_UNUSED)
         dwarf2_gen_line_info (frag_now_fix (), pool->locs + entry);
  #endif
        /* First output the expression in the instruction to the pool.  */
-      emit_expr (&(pool->literals[entry]), 4); /* .word  */
+      emit_expr (&(pool->literals[entry]),
+                pool->literals[entry].X_md & LIT_ENTRY_SIZE_MASK);
      }
  
    /* Mark the pool as empty.  */
@@ -4669,28 +4775,31 @@ parse_immediate (char **str, int *val, int min, int max,
     instructions. Puts the result directly in inst.operands[i].  */
  
  static int
-parse_big_immediate (char **str, int i)
+parse_big_immediate (char **str, int i, expressionS *in_exp,
+                    bfd_boolean allow_symbol_p)
  {
    expressionS exp;
+  expressionS *exp_p = in_exp ? in_exp : &exp;
    char *ptr = *str;
  
-  my_get_expression (&exp, &ptr, GE_OPT_PREFIX_BIG);
+  my_get_expression (exp_p, &ptr, GE_OPT_PREFIX_BIG);
  
-  if (exp.X_op == O_constant)
+  if (exp_p->X_op == O_constant)
      {
-      inst.operands[i].imm = exp.X_add_number & 0xffffffff;
+      inst.operands[i].imm = exp_p->X_add_number & 0xffffffff;
        /* If we're on a 64-bit host, then a 64-bit number can be returned using
          O_constant.  We have to be careful not to break compilation for
          32-bit X_add_number, though.  */
-      if ((exp.X_add_number & ~(offsetT)(0xffffffffU)) != 0)
+      if ((exp_p->X_add_number & ~(offsetT)(0xffffffffU)) != 0)
         {
-         /* X >> 32 is illegal if sizeof (exp.X_add_number) == 4.  */
-         inst.operands[i].reg = ((exp.X_add_number >> 16) >> 16) & 0xffffffff;
+         /* X >> 32 is illegal if sizeof (exp_p->X_add_number) == 4.  */
+         inst.operands[i].reg = (((exp_p->X_add_number >> 16) >> 16)
+                                 & 0xffffffff);
           inst.operands[i].regisimm = 1;
         }
      }
-  else if (exp.X_op == O_big
-          && LITTLENUM_NUMBER_OF_BITS * exp.X_add_number > 32)
+  else if (exp_p->X_op == O_big
+          && LITTLENUM_NUMBER_OF_BITS * exp_p->X_add_number > 32)
      {
        unsigned parts = 32 / LITTLENUM_NUMBER_OF_BITS, j, idx = 0;
  
@@ -4703,7 +4812,7 @@ parse_big_immediate (char **str, int i)
          PR 11972: Bignums can now be sign-extended to the
          size of a .octa so check that the out of range bits
          are all zero or all one.  */
-      if (LITTLENUM_NUMBER_OF_BITS * exp.X_add_number > 64)
+      if (LITTLENUM_NUMBER_OF_BITS * exp_p->X_add_number > 64)
         {
           LITTLENUM_TYPE m = -1;
  
@@ -4711,7 +4820,7 @@ parse_big_immediate (char **str, int i)
               && generic_bignum[parts * 2] != m)
             return FAIL;
  
-         for (j = parts * 2 + 1; j < (unsigned) exp.X_add_number; j++)
+         for (j = parts * 2 + 1; j < (unsigned) exp_p->X_add_number; j++)
             if (generic_bignum[j] != generic_bignum[j-1])
               return FAIL;
         }
@@ -4726,7 +4835,7 @@ parse_big_immediate (char **str, int i)
                                 << (LITTLENUM_NUMBER_OF_BITS * j);
        inst.operands[i].regisimm = 1;
      }
-  else
+  else if (!(exp_p->X_op == O_symbol && allow_symbol_p))
      return FAIL;
  
    *str = ptr;
@@ -4835,6 +4944,41 @@ is_quarter_float (unsigned imm)
    return (imm & 0x7ffff) == 0 && ((imm & 0x7e000000) ^ bs) == 0;
  }
  
+
+/* Detect the presence of a floating point or integer zero constant,
+   i.e. #0.0 or #0.  */
+
+static bfd_boolean
+parse_ifimm_zero (char **in)
+{
+  int error_code;
+
+  if (!is_immediate_prefix (**in))
+    return FALSE;
+
+  ++*in;
+
+  /* Accept #0x0 as a synonym for #0.  */
+  if (strncmp (*in, "0x", 2) == 0)
+    {
+      int val;
+      if (parse_immediate (in, &val, 0, 0, TRUE) == FAIL)
+        return FALSE;
+      return TRUE;
+    }
+
+  error_code = atof_generic (in, ".", EXP_CHARS,
+                             &generic_floating_point_number);
+
+  if (!error_code
+      && generic_floating_point_number.sign == '+'
+      && (generic_floating_point_number.low
+          > generic_floating_point_number.leader))
+    return TRUE;
+
+  return FALSE;
+}
+
  /* Parse an 8-bit "quarter-precision" floating point number of the form:
     0baBbbbbbc defgh000 00000000 00000000.
     The zero and minus-zero cases need special handling, since they can't be
@@ -5319,10 +5463,12 @@ parse_address_main (char **str, int i, int group_relocations,
           inst.operands[i].reg = REG_PC;
           inst.operands[i].isreg = 1;
           inst.operands[i].preind = 1;
-       }
-      /* Otherwise a load-constant pseudo op, no special treatment needed here.  */
  
-      if (my_get_expression (&inst.reloc.exp, &p, GE_NO_PREFIX))
+         if (my_get_expression (&inst.reloc.exp, &p, GE_OPT_PREFIX_BIG))
+           return PARSE_OPERAND_FAIL;
+       }
+      else if (parse_big_immediate (&p, i, &inst.reloc.exp,
+                                   /*allow_symbol_p=*/TRUE))
         return PARSE_OPERAND_FAIL;
  
        *str = p;
@@ -6152,7 +6298,8 @@ parse_neon_mov (char **str, int *which_operand)
              Case 10: VMOV.F32 <Sd>, #<imm>
              Case 11: VMOV.F64 <Dd>, #<imm>  */
         inst.operands[i].immisfloat = 1;
-      else if (parse_big_immediate (&ptr, i) == SUCCESS)
+      else if (parse_big_immediate (&ptr, i, NULL, /*allow_symbol_p=*/FALSE)
+              == SUCCESS)
           /* Case 2: VMOV<c><q>.<dt> <Qd>, #<imm>
              Case 3: VMOV<c><q>.<dt> <Dd>, #<imm>  */
         ;
@@ -6305,6 +6452,7 @@ enum operand_parse_code
  
    OP_RNDQ_I0,   /* Neon D or Q reg, or immediate zero.  */
    OP_RVSD_I0,  /* VFP S or D reg, or immediate zero.  */
+  OP_RSVD_FI0, /* VFP S or D reg, or floating point immediate zero.  */
    OP_RR_RNSC,   /* ARM reg or Neon scalar.  */
    OP_RNSDQ_RNSC, /* Vector S, D or Q reg, or Neon scalar.  */
    OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar.  */
@@ -6588,6 +6736,22 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           po_reg_or_goto (REG_TYPE_VFSD, try_imm0);
           break;
  
+       case OP_RSVD_FI0:
+         {
+           po_reg_or_goto (REG_TYPE_VFSD, try_ifimm0);
+           break;
+           try_ifimm0:
+           if (parse_ifimm_zero (&str))
+             inst.operands[i].imm = 0;
+           else
+           {
+             inst.error
+               = _("only floating point zero is allowed as immediate value");
+             goto failure;
+           }
+         }
+         break;
+
         case OP_RR_RNSC:
           {
             po_scalar_or_goto (8, try_rr);
@@ -6637,7 +6801,8 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
             try_immbig:
             /* There's a possibility of getting a 64-bit immediate here, so
                we need special handling.  */
-           if (parse_big_immediate (&str, i) == FAIL)
+           if (parse_big_immediate (&str, i, NULL, /*allow_symbol_p=*/FALSE)
+               == FAIL)
               {
                 inst.error = _("immediate value is out of range");
                 goto failure;
@@ -6873,7 +7038,7 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
           val = parse_reg_list (&str);
           if (*str == '^')
             {
-             inst.operands[1].writeback = 1;
+             inst.operands[i].writeback = 1;
               str++;
             }
           break;
@@ -7089,12 +7254,12 @@ parse_operands (char *str, const unsigned int *pattern, bfd_boolean thumb)
  #define warn_deprecated_sp(reg)                        \
    do                                           \
      if (warn_on_deprecated && reg == REG_SP)   \
-       as_warn (_("use of r13 is deprecated"));        \
+       as_tsktsk (_("use of r13 is deprecated"));      \
    while (0)
  
  /* Functions for operand encoding.  ARM, then Thumb.  */
  
-#define rotate_left(v, n) (v << n | v >> (32 - n))
+#define rotate_left(v, n) (v << (n & 31) | v >> ((32 - n) & 31))
  
  /* If VAL can be encoded in the immediate field of an ARM instruction,
     return the encoded form.  Otherwise, return FAIL.  */
@@ -7321,7 +7486,7 @@ encode_arm_addr_mode_2 (int i, bfd_boolean is_t)
           if (warn_on_deprecated
               && !is_load
               && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v7))
-           as_warn (_("use of PC in this instruction is deprecated"));
+           as_tsktsk (_("use of PC in this instruction is deprecated"));
         }
  
        if (inst.reloc.type == BFD_RELOC_UNUSED)
@@ -7378,71 +7543,204 @@ encode_arm_addr_mode_3 (int i, bfd_boolean is_t)
      }
  }
  
-/* inst.operands[i] was set up by parse_address.  Encode it into an
-   ARM-format instruction.  Reject all forms which cannot be encoded
-   into a coprocessor load/store instruction.  If wb_ok is false,
-   reject use of writeback; if unind_ok is false, reject use of
-   unindexed addressing.  If reloc_override is not 0, use it instead
-   of BFD_ARM_CP_OFF_IMM, unless the initial relocation is a group one
-   (in which case it is preserved).  */
+/* Write immediate bits [7:0] to the following locations:
+
+  |28/24|23     19|18 16|15                    4|3     0|
+  |  a  |x x x x x|b c d|x x x x x x x x x x x x|e f g h|
+
+  This function is used by VMOV/VMVN/VORR/VBIC.  */
+
+static void
+neon_write_immbits (unsigned immbits)
+{
+  inst.instruction |= immbits & 0xf;
+  inst.instruction |= ((immbits >> 4) & 0x7) << 16;
+  inst.instruction |= ((immbits >> 7) & 0x1) << (thumb_mode ? 28 : 24);
+}
+
+/* Invert low-order SIZE bits of XHI:XLO.  */
+
+static void
+neon_invert_size (unsigned *xlo, unsigned *xhi, int size)
+{
+  unsigned immlo = xlo ? *xlo : 0;
+  unsigned immhi = xhi ? *xhi : 0;
+
+  switch (size)
+    {
+    case 8:
+      immlo = (~immlo) & 0xff;
+      break;
+
+    case 16:
+      immlo = (~immlo) & 0xffff;
+      break;
+
+    case 64:
+      immhi = (~immhi) & 0xffffffff;
+      /* fall through.  */
+
+    case 32:
+      immlo = (~immlo) & 0xffffffff;
+      break;
+
+    default:
+      abort ();
+    }
+
+  if (xlo)
+    *xlo = immlo;
+
+  if (xhi)
+    *xhi = immhi;
+}
+
+/* True if IMM has form 0bAAAAAAAABBBBBBBBCCCCCCCCDDDDDDDD for bits
+   A, B, C, D.  */
  
  static int
-encode_arm_cp_address (int i, int wb_ok, int unind_ok, int reloc_override)
+neon_bits_same_in_bytes (unsigned imm)
  {
-  inst.instruction |= inst.operands[i].reg << 16;
+  return ((imm & 0x000000ff) == 0 || (imm & 0x000000ff) == 0x000000ff)
+        && ((imm & 0x0000ff00) == 0 || (imm & 0x0000ff00) == 0x0000ff00)
+        && ((imm & 0x00ff0000) == 0 || (imm & 0x00ff0000) == 0x00ff0000)
+        && ((imm & 0xff000000) == 0 || (imm & 0xff000000) == 0xff000000);
+}
  
-  gas_assert (!(inst.operands[i].preind && inst.operands[i].postind));
+/* For immediate of above form, return 0bABCD.  */
  
-  if (!inst.operands[i].preind && !inst.operands[i].postind) /* unindexed */
+static unsigned
+neon_squash_bits (unsigned imm)
+{
+  return (imm & 0x01) | ((imm & 0x0100) >> 7) | ((imm & 0x010000) >> 14)
+        | ((imm & 0x01000000) >> 21);
+}
+
+/* Compress quarter-float representation to 0b...000 abcdefgh.  */
+
+static unsigned
+neon_qfloat_bits (unsigned imm)
+{
+  return ((imm >> 19) & 0x7f) | ((imm >> 24) & 0x80);
+}
+
+/* Returns CMODE. IMMBITS [7:0] is set to bits suitable for inserting into
+   the instruction. *OP is passed as the initial value of the op field, and
+   may be set to a different value depending on the constant (i.e.
+   "MOV I64, 0bAAAAAAAABBBB..." which uses OP = 1 despite being MOV not
+   MVN).  If the immediate looks like a repeated pattern then also
+   try smaller element sizes.  */
+
+static int
+neon_cmode_for_move_imm (unsigned immlo, unsigned immhi, int float_p,
+                        unsigned *immbits, int *op, int size,
+                        enum neon_el_type type)
+{
+  /* Only permit float immediates (including 0.0/-0.0) if the operand type is
+     float.  */
+  if (type == NT_float && !float_p)
+    return FAIL;
+
+  if (type == NT_float && is_quarter_float (immlo) && immhi == 0)
      {
-      gas_assert (!inst.operands[i].writeback);
-      if (!unind_ok)
+      if (size != 32 || *op == 1)
+       return FAIL;
+      *immbits = neon_qfloat_bits (immlo);
+      return 0xf;
+    }
+
+  if (size == 64)
+    {
+      if (neon_bits_same_in_bytes (immhi)
+         && neon_bits_same_in_bytes (immlo))
         {
-         inst.error = _("instruction does not support unindexed addressing");
-         return FAIL;
+         if (*op == 1)
+           return FAIL;
+         *immbits = (neon_squash_bits (immhi) << 4)
+                    | neon_squash_bits (immlo);
+         *op = 1;
+         return 0xe;
         }
-      inst.instruction |= inst.operands[i].imm;
-      inst.instruction |= INDEX_UP;
-      return SUCCESS;
-    }
  
-  if (inst.operands[i].preind)
-    inst.instruction |= PRE_INDEX;
+      if (immhi != immlo)
+       return FAIL;
+    }
  
-  if (inst.operands[i].writeback)
+  if (size >= 32)
      {
-      if (inst.operands[i].reg == REG_PC)
+      if (immlo == (immlo & 0x000000ff))
         {
-         inst.error = _("pc may not be used with write-back");
-         return FAIL;
+         *immbits = immlo;
+         return 0x0;
         }
-      if (!wb_ok)
+      else if (immlo == (immlo & 0x0000ff00))
         {
-         inst.error = _("instruction does not support writeback");
-         return FAIL;
+         *immbits = immlo >> 8;
+         return 0x2;
         }
-      inst.instruction |= WRITE_BACK;
+      else if (immlo == (immlo & 0x00ff0000))
+       {
+         *immbits = immlo >> 16;
+         return 0x4;
+       }
+      else if (immlo == (immlo & 0xff000000))
+       {
+         *immbits = immlo >> 24;
+         return 0x6;
+       }
+      else if (immlo == ((immlo & 0x0000ff00) | 0x000000ff))
+       {
+         *immbits = (immlo >> 8) & 0xff;
+         return 0xc;
+       }
+      else if (immlo == ((immlo & 0x00ff0000) | 0x0000ffff))
+       {
+         *immbits = (immlo >> 16) & 0xff;
+         return 0xd;
+       }
+
+      if ((immlo & 0xffff) != (immlo >> 16))
+       return FAIL;
+      immlo &= 0xffff;
      }
  
-  if (reloc_override)
-    inst.reloc.type = (bfd_reloc_code_real_type) reloc_override;
-  else if ((inst.reloc.type < BFD_RELOC_ARM_ALU_PC_G0_NC
-           || inst.reloc.type > BFD_RELOC_ARM_LDC_SB_G2)
-          && inst.reloc.type != BFD_RELOC_ARM_LDR_PC_G0)
+  if (size >= 16)
      {
-      if (thumb_mode)
-       inst.reloc.type = BFD_RELOC_ARM_T32_CP_OFF_IMM;
-      else
-       inst.reloc.type = BFD_RELOC_ARM_CP_OFF_IMM;
+      if (immlo == (immlo & 0x000000ff))
+       {
+         *immbits = immlo;
+         return 0x8;
+       }
+      else if (immlo == (immlo & 0x0000ff00))
+       {
+         *immbits = immlo >> 8;
+         return 0xa;
+       }
+
+      if ((immlo & 0xff) != (immlo >> 8))
+       return FAIL;
+      immlo &= 0xff;
      }
  
-  /* Prefer + for zero encoded value.  */
-  if (!inst.operands[i].negative)
-    inst.instruction |= INDEX_UP;
+  if (immlo == (immlo & 0x000000ff))
+    {
+      /* Don't allow MVN with 8-bit immediate.  */
+      if (*op == 1)
+       return FAIL;
+      *immbits = immlo;
+      return 0xe;
+    }
  
-  return SUCCESS;
+  return FAIL;
  }
  
+enum lit_type
+{
+  CONST_THUMB,
+  CONST_ARM,
+  CONST_VEC
+};
+
  /* inst.reloc.exp describes an "=expr" load pseudo-operation.
     Determine whether it can be performed with a move instruction; if
     it can, convert inst.instruction to that move instruction and
@@ -7453,9 +7751,12 @@ encode_arm_cp_address (int i, int wb_ok, int unind_ok, int reloc_override)
     inst.operands[i] describes the destination register.         */
  
  static bfd_boolean
-move_or_literal_pool (int i, bfd_boolean thumb_p, bfd_boolean mode_3)
+move_or_literal_pool (int i, enum lit_type t, bfd_boolean mode_3)
  {
    unsigned long tbit;
+  bfd_boolean thumb_p = (t == CONST_THUMB);
+  bfd_boolean arm_p   = (t == CONST_ARM);
+  bfd_boolean vec64_p = (t == CONST_VEC) && !inst.operands[i].issingle;
  
    if (thumb_p)
      tbit = (inst.instruction > 0xffff) ? THUMB2_LOAD_BIT : THUMB_LOAD_BIT;
@@ -7467,14 +7768,18 @@ move_or_literal_pool (int i, bfd_boolean thumb_p, bfd_boolean mode_3)
        inst.error = _("invalid pseudo operation");
        return TRUE;
      }
-  if (inst.reloc.exp.X_op != O_constant && inst.reloc.exp.X_op != O_symbol)
+  if (inst.reloc.exp.X_op != O_constant
+      && inst.reloc.exp.X_op != O_symbol
+      && inst.reloc.exp.X_op != O_big)
      {
        inst.error = _("constant expression expected");
        return TRUE;
      }
-  if (inst.reloc.exp.X_op == O_constant)
+  if ((inst.reloc.exp.X_op == O_constant
+       || inst.reloc.exp.X_op == O_big)
+      && !inst.operands[i].issingle)
      {
-      if (thumb_p)
+      if (thumb_p && inst.reloc.exp.X_op == O_constant)
         {
           if (!unified_syntax && (inst.reloc.exp.X_add_number & ~0xFF) == 0)
             {
@@ -7484,7 +7789,7 @@ move_or_literal_pool (int i, bfd_boolean thumb_p, bfd_boolean mode_3)
               return TRUE;
             }
         }
-      else
+      else if (arm_p && inst.reloc.exp.X_op == O_constant)
         {
           int value = encode_arm_immediate (inst.reloc.exp.X_add_number);
           if (value != FAIL)
@@ -7506,23 +7811,130 @@ move_or_literal_pool (int i, bfd_boolean thumb_p, bfd_boolean mode_3)
               return TRUE;
             }
         }
+      else if (vec64_p)
+       {
+         int op = 0;
+         unsigned immbits = 0;
+         unsigned immlo = inst.operands[1].imm;
+         unsigned immhi = inst.operands[1].regisimm
+                          ? inst.operands[1].reg
+                          : inst.reloc.exp.X_unsigned
+                            ? 0
+                            : ((bfd_int64_t)((int) immlo)) >> 32;
+         int cmode = neon_cmode_for_move_imm (immlo, immhi, FALSE, &immbits,
+                                              &op, 64, NT_invtype);
+
+         if (cmode == FAIL)
+           {
+             neon_invert_size (&immlo, &immhi, 64);
+             op = !op;
+             cmode = neon_cmode_for_move_imm (immlo, immhi, FALSE, &immbits,
+                                              &op, 64, NT_invtype);
+           }
+         if (cmode != FAIL)
+           {
+             inst.instruction = (inst.instruction & VLDR_VMOV_SAME)
+                                 | (1 << 23)
+                                 | (cmode << 8)
+                                 | (op << 5)
+                                 | (1 << 4);
+             /* Fill other bits in vmov encoding for both thumb and arm.  */
+             if (thumb_mode)
+               inst.instruction |= (0x7 << 29) | (0xF << 24);
+             else
+               inst.instruction |= (0xF << 28) | (0x1 << 25);
+             neon_write_immbits (immbits);
+             return TRUE;
+           }
+       }
+    }
+
+  if (add_to_lit_pool ((!inst.operands[i].isvec
+                       || inst.operands[i].issingle) ? 4 : 8) == FAIL)
+    return TRUE;
+
+  inst.operands[1].reg = REG_PC;
+  inst.operands[1].isreg = 1;
+  inst.operands[1].preind = 1;
+  inst.reloc.pc_rel = 1;
+  inst.reloc.type = (thumb_p
+                    ? BFD_RELOC_ARM_THUMB_OFFSET
+                    : (mode_3
+                       ? BFD_RELOC_ARM_HWLITERAL
+                       : BFD_RELOC_ARM_LITERAL));
+  return FALSE;
+}
+
+/* inst.operands[i] was set up by parse_address.  Encode it into an
+   ARM-format instruction.  Reject all forms which cannot be encoded
+   into a coprocessor load/store instruction.  If wb_ok is false,
+   reject use of writeback; if unind_ok is false, reject use of
+   unindexed addressing.  If reloc_override is not 0, use it instead
+   of BFD_ARM_CP_OFF_IMM, unless the initial relocation is a group one
+   (in which case it is preserved).  */
+
+static int
+encode_arm_cp_address (int i, int wb_ok, int unind_ok, int reloc_override)
+{
+  if (!inst.operands[i].isreg)
+    {
+      gas_assert (inst.operands[0].isvec);
+      if (move_or_literal_pool (0, CONST_VEC, /*mode_3=*/FALSE))
+       return SUCCESS;
+    }
+
+  inst.instruction |= inst.operands[i].reg << 16;
+
+  gas_assert (!(inst.operands[i].preind && inst.operands[i].postind));
+
+  if (!inst.operands[i].preind && !inst.operands[i].postind) /* unindexed */
+    {
+      gas_assert (!inst.operands[i].writeback);
+      if (!unind_ok)
+       {
+         inst.error = _("instruction does not support unindexed addressing");
+         return FAIL;
+       }
+      inst.instruction |= inst.operands[i].imm;
+      inst.instruction |= INDEX_UP;
+      return SUCCESS;
+    }
+
+  if (inst.operands[i].preind)
+    inst.instruction |= PRE_INDEX;
+
+  if (inst.operands[i].writeback)
+    {
+      if (inst.operands[i].reg == REG_PC)
+       {
+         inst.error = _("pc may not be used with write-back");
+         return FAIL;
+       }
+      if (!wb_ok)
+       {
+         inst.error = _("instruction does not support writeback");
+         return FAIL;
+       }
+      inst.instruction |= WRITE_BACK;
      }
  
-  if (add_to_lit_pool () == FAIL)
+  if (reloc_override)
+    inst.reloc.type = (bfd_reloc_code_real_type) reloc_override;
+  else if ((inst.reloc.type < BFD_RELOC_ARM_ALU_PC_G0_NC
+           || inst.reloc.type > BFD_RELOC_ARM_LDC_SB_G2)
+          && inst.reloc.type != BFD_RELOC_ARM_LDR_PC_G0)
      {
-      inst.error = _("literal pool insertion failed");
-      return TRUE;
+      if (thumb_mode)
+       inst.reloc.type = BFD_RELOC_ARM_T32_CP_OFF_IMM;
+      else
+       inst.reloc.type = BFD_RELOC_ARM_CP_OFF_IMM;
      }
-  inst.operands[1].reg = REG_PC;
-  inst.operands[1].isreg = 1;
-  inst.operands[1].preind = 1;
-  inst.reloc.pc_rel = 1;
-  inst.reloc.type = (thumb_p
-                    ? BFD_RELOC_ARM_THUMB_OFFSET
-                    : (mode_3
-                       ? BFD_RELOC_ARM_HWLITERAL
-                       : BFD_RELOC_ARM_LITERAL));
-  return FALSE;
+
+  /* Prefer + for zero encoded value.  */
+  if (!inst.operands[i].negative)
+    inst.instruction |= INDEX_UP;
+
+  return SUCCESS;
  }
  
  /* Functions for instruction encoding, sorted by sub-architecture.
@@ -7573,7 +7985,7 @@ check_obsolete (const arm_feature_set *feature, const char *msg)
  {
    if (ARM_CPU_IS_ANY (cpu_variant))
      {
-      as_warn ("%s", msg);
+      as_tsktsk ("%s", msg);
        return TRUE;
      }
    else if (ARM_CPU_HAS_FEATURE (cpu_variant, *feature))
@@ -7601,7 +8013,7 @@ do_rd_rm_rn (void)
                            _("swp{b} use is obsoleted for ARMv8 and later"))
           && warn_on_deprecated
           && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v6))
-       as_warn (_("swp{b} use is deprecated for ARMv6 and ARMv7"));
+       as_tsktsk (_("swp{b} use is deprecated for ARMv6 and ARMv7"));
      }
  
    inst.instruction |= inst.operands[0].reg << 12;
@@ -7978,7 +8390,7 @@ do_co_reg (void)
             if (! ARM_CPU_IS_ANY (cpu_variant)
                 && warn_on_deprecated
                 && ARM_CPU_HAS_FEATURE (cpu_variant, r->deprecated))
-             as_warn ("%s", r->dep_msg);
+             as_tsktsk ("%s", r->dep_msg);
           }
        }
  
@@ -8255,7 +8667,7 @@ do_ldst (void)
  {
    inst.instruction |= inst.operands[0].reg << 12;
    if (!inst.operands[1].isreg)
-    if (move_or_literal_pool (0, /*thumb_p=*/FALSE, /*mode_3=*/FALSE))
+    if (move_or_literal_pool (0, CONST_ARM, /*mode_3=*/FALSE))
        return;
    encode_arm_addr_mode_2 (1, /*is_t=*/FALSE);
    check_ldr_r15_aligned ();
@@ -8288,7 +8700,7 @@ do_ldstv4 (void)
    constraint (inst.operands[0].reg == REG_PC, BAD_PC);
    inst.instruction |= inst.operands[0].reg << 12;
    if (!inst.operands[1].isreg)
-    if (move_or_literal_pool (0, /*thumb_p=*/FALSE, /*mode_3=*/TRUE))
+    if (move_or_literal_pool (0, CONST_ARM, /*mode_3=*/TRUE))
        return;
    encode_arm_addr_mode_3 (1, /*is_t=*/FALSE);
  }
@@ -8624,6 +9036,8 @@ do_pli (void)
  static void
  do_push_pop (void)
  {
+  constraint (inst.operands[0].writeback,
+             _("push/pop do not support {reglist}^"));
    inst.operands[1] = inst.operands[0];
    memset (&inst.operands[0], 0, sizeof inst.operands[0]);
    inst.operands[0].isreg = 1;
@@ -8701,7 +9115,7 @@ do_setend (void)
  {
    if (warn_on_deprecated
        && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8))
-      as_warn (_("setend use is deprecated for ARMv8"));
+      as_tsktsk (_("setend use is deprecated for ARMv8"));
  
    if (inst.operands[0].imm)
      inst.instruction |= 0x200;
@@ -10832,7 +11246,7 @@ do_t_ldst (void)
         {
           if (opcode <= 0xffff)
             inst.instruction = THUMB_OP32 (opcode);
-         if (move_or_literal_pool (0, /*thumb_p=*/TRUE, /*mode_3=*/FALSE))
+         if (move_or_literal_pool (0, CONST_THUMB, /*mode_3=*/FALSE))
             return;
         }
        if (inst.operands[1].isreg
@@ -10938,7 +11352,7 @@ do_t_ldst (void)
  
    inst.instruction = THUMB_OP16 (inst.instruction);
    if (!inst.operands[1].isreg)
-    if (move_or_literal_pool (0, /*thumb_p=*/TRUE, /*mode_3=*/FALSE))
+    if (move_or_literal_pool (0, CONST_THUMB, /*mode_3=*/FALSE))
        return;
  
    constraint (!inst.operands[1].preind
@@ -11144,7 +11558,7 @@ do_t_mov_cmp (void)
                   if ((Rn == REG_SP || Rn == REG_PC)
                       && (Rm == REG_SP || Rm == REG_PC))
                     {
-                     as_warn (_("Use of r%u as a source register is "
+                     as_tsktsk (_("Use of r%u as a source register is "
                                  "deprecated when r%u is the destination "
                                  "register."), Rm, Rn);
                     }
@@ -11968,7 +12382,7 @@ do_t_setend (void)
  {
    if (warn_on_deprecated
        && ARM_CPU_HAS_FEATURE (cpu_variant, arm_ext_v8))
-      as_warn (_("setend use is deprecated for ARMv8"));
+      as_tsktsk (_("setend use is deprecated for ARMv8"));
  
    set_it_insn_type (OUTSIDE_IT_INSN);
    if (inst.operands[0].imm)
@@ -13696,8 +14110,12 @@ do_neon_shl_imm (void)
      {
        enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
        struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
+      int imm = inst.operands[2].imm;
+
+      constraint (imm < 0 || (unsigned)imm >= et.size,
+                 _("immediate out of range for shift"));
        NEON_ENCODE (IMMED, inst);
-      neon_imm_shift (FALSE, 0, neon_quad (rs), et, inst.operands[2].imm);
+      neon_imm_shift (FALSE, 0, neon_quad (rs), et, imm);
      }
    else
      {
@@ -13728,10 +14146,12 @@ do_neon_qshl_imm (void)
      {
        enum neon_shape rs = neon_select_shape (NS_DDI, NS_QQI, NS_NULL);
        struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+      int imm = inst.operands[2].imm;
  
+      constraint (imm < 0 || (unsigned)imm >= et.size,
+                 _("immediate out of range for shift"));
        NEON_ENCODE (IMMED, inst);
-      neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et,
-                     inst.operands[2].imm);
+      neon_imm_shift (TRUE, et.type == NT_unsigned, neon_quad (rs), et, imm);
      }
    else
      {
@@ -13819,197 +14239,6 @@ neon_cmode_for_logic_imm (unsigned immediate, unsigned *immbits, int size)
    return FAIL;
  }
  
-/* True if IMM has form 0bAAAAAAAABBBBBBBBCCCCCCCCDDDDDDDD for bits
-   A, B, C, D.  */
-
-static int
-neon_bits_same_in_bytes (unsigned imm)
-{
-  return ((imm & 0x000000ff) == 0 || (imm & 0x000000ff) == 0x000000ff)
-        && ((imm & 0x0000ff00) == 0 || (imm & 0x0000ff00) == 0x0000ff00)
-        && ((imm & 0x00ff0000) == 0 || (imm & 0x00ff0000) == 0x00ff0000)
-        && ((imm & 0xff000000) == 0 || (imm & 0xff000000) == 0xff000000);
-}
-
-/* For immediate of above form, return 0bABCD.  */
-
-static unsigned
-neon_squash_bits (unsigned imm)
-{
-  return (imm & 0x01) | ((imm & 0x0100) >> 7) | ((imm & 0x010000) >> 14)
-        | ((imm & 0x01000000) >> 21);
-}
-
-/* Compress quarter-float representation to 0b...000 abcdefgh.  */
-
-static unsigned
-neon_qfloat_bits (unsigned imm)
-{
-  return ((imm >> 19) & 0x7f) | ((imm >> 24) & 0x80);
-}
-
-/* Returns CMODE. IMMBITS [7:0] is set to bits suitable for inserting into
-   the instruction. *OP is passed as the initial value of the op field, and
-   may be set to a different value depending on the constant (i.e.
-   "MOV I64, 0bAAAAAAAABBBB..." which uses OP = 1 despite being MOV not
-   MVN).  If the immediate looks like a repeated pattern then also
-   try smaller element sizes.  */
-
-static int
-neon_cmode_for_move_imm (unsigned immlo, unsigned immhi, int float_p,
-                        unsigned *immbits, int *op, int size,
-                        enum neon_el_type type)
-{
-  /* Only permit float immediates (including 0.0/-0.0) if the operand type is
-     float.  */
-  if (type == NT_float && !float_p)
-    return FAIL;
-
-  if (type == NT_float && is_quarter_float (immlo) && immhi == 0)
-    {
-      if (size != 32 || *op == 1)
-       return FAIL;
-      *immbits = neon_qfloat_bits (immlo);
-      return 0xf;
-    }
-
-  if (size == 64)
-    {
-      if (neon_bits_same_in_bytes (immhi)
-         && neon_bits_same_in_bytes (immlo))
-       {
-         if (*op == 1)
-           return FAIL;
-         *immbits = (neon_squash_bits (immhi) << 4)
-                    | neon_squash_bits (immlo);
-         *op = 1;
-         return 0xe;
-       }
-
-      if (immhi != immlo)
-       return FAIL;
-    }
-
-  if (size >= 32)
-    {
-      if (immlo == (immlo & 0x000000ff))
-       {
-         *immbits = immlo;
-         return 0x0;
-       }
-      else if (immlo == (immlo & 0x0000ff00))
-       {
-         *immbits = immlo >> 8;
-         return 0x2;
-       }
-      else if (immlo == (immlo & 0x00ff0000))
-       {
-         *immbits = immlo >> 16;
-         return 0x4;
-       }
-      else if (immlo == (immlo & 0xff000000))
-       {
-         *immbits = immlo >> 24;
-         return 0x6;
-       }
-      else if (immlo == ((immlo & 0x0000ff00) | 0x000000ff))
-       {
-         *immbits = (immlo >> 8) & 0xff;
-         return 0xc;
-       }
-      else if (immlo == ((immlo & 0x00ff0000) | 0x0000ffff))
-       {
-         *immbits = (immlo >> 16) & 0xff;
-         return 0xd;
-       }
-
-      if ((immlo & 0xffff) != (immlo >> 16))
-       return FAIL;
-      immlo &= 0xffff;
-    }
-
-  if (size >= 16)
-    {
-      if (immlo == (immlo & 0x000000ff))
-       {
-         *immbits = immlo;
-         return 0x8;
-       }
-      else if (immlo == (immlo & 0x0000ff00))
-       {
-         *immbits = immlo >> 8;
-         return 0xa;
-       }
-
-      if ((immlo & 0xff) != (immlo >> 8))
-       return FAIL;
-      immlo &= 0xff;
-    }
-
-  if (immlo == (immlo & 0x000000ff))
-    {
-      /* Don't allow MVN with 8-bit immediate.  */
-      if (*op == 1)
-       return FAIL;
-      *immbits = immlo;
-      return 0xe;
-    }
-
-  return FAIL;
-}
-
-/* Write immediate bits [7:0] to the following locations:
-
-  |28/24|23     19|18 16|15                    4|3     0|
-  |  a  |x x x x x|b c d|x x x x x x x x x x x x|e f g h|
-
-  This function is used by VMOV/VMVN/VORR/VBIC.  */
-
-static void
-neon_write_immbits (unsigned immbits)
-{
-  inst.instruction |= immbits & 0xf;
-  inst.instruction |= ((immbits >> 4) & 0x7) << 16;
-  inst.instruction |= ((immbits >> 7) & 0x1) << 24;
-}
-
-/* Invert low-order SIZE bits of XHI:XLO.  */
-
-static void
-neon_invert_size (unsigned *xlo, unsigned *xhi, int size)
-{
-  unsigned immlo = xlo ? *xlo : 0;
-  unsigned immhi = xhi ? *xhi : 0;
-
-  switch (size)
-    {
-    case 8:
-      immlo = (~immlo) & 0xff;
-      break;
-
-    case 16:
-      immlo = (~immlo) & 0xffff;
-      break;
-
-    case 64:
-      immhi = (~immhi) & 0xffffffff;
-      /* fall through.  */
-
-    case 32:
-      immlo = (~immlo) & 0xffffffff;
-      break;
-
-    default:
-      abort ();
-    }
-
-  if (xlo)
-    *xlo = immlo;
-
-  if (xhi)
-    *xhi = immhi;
-}
-
  static void
  do_neon_logic (void)
  {
@@ -14819,6 +15048,13 @@ do_vfp_nsyn_cvt_fpv8 (enum neon_cvt_flavour flavour,
    int sz, op;
    int rm;
  
+  /* Targets like FPv5-SP-D16 don't support FP v8 instructions with
+     D register operands.  */
+  if (flavour == neon_cvt_flavour_s32_f64
+      || flavour == neon_cvt_flavour_u32_f64)
+    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+               _(BAD_FPU));
+
    set_it_insn_type (OUTSIDE_IT_INSN);
  
    switch (flavour)
@@ -15083,11 +15319,21 @@ do_neon_cvttb_1 (bfd_boolean t)
      }
    else if (neon_check_type (2, rs, N_F16, N_F64 | N_VFP).type != NT_invtype)
      {
+      /* The VCVTB and VCVTT instructions with D-register operands
+         don't work for SP only targets.  */
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+                 _(BAD_FPU));
+
        inst.error = NULL;
        do_neon_cvttb_2 (t, /*to=*/TRUE, /*is_double=*/TRUE);
      }
    else if (neon_check_type (2, rs, N_F64 | N_VFP, N_F16).type != NT_invtype)
      {
+      /* The VCVTB and VCVTT instructions with D-register operands
+         don't work for SP only targets.  */
+      constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+                 _(BAD_FPU));
+
        inst.error = NULL;
        do_neon_cvttb_2 (t, /*to=*/FALSE, /*is_double=*/TRUE);
      }
@@ -15856,7 +16102,7 @@ do_neon_ldr_str (void)
        if (thumb_mode)
         inst.error = _("Use of PC here is UNPREDICTABLE");
        else if (warn_on_deprecated)
-       as_warn (_("Use of PC here is deprecated"));
+       as_tsktsk (_("Use of PC here is deprecated"));
      }
  
    if (inst.operands[0].issingle)
@@ -16214,6 +16460,12 @@ do_neon_ldx_stx (void)
  static void
  do_vfp_nsyn_fpv8 (enum neon_shape rs)
  {
+  /* Targets like FPv5-SP-D16 don't support FP v8 instructions with
+     D register operands.  */
+  if (neon_shape_class[rs] == SC_DOUBLE)
+    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+               _(BAD_FPU));
+
    NEON_ENCODE (FPV8, inst);
  
    if (rs == NS_FFF)
@@ -16259,6 +16511,12 @@ do_vrint_1 (enum neon_cvt_mode mode)
    if (rs == NS_NULL)
      return;
  
+  /* Targets like FPv5-SP-D16 don't support FP v8 instructions with
+     D register operands.  */
+  if (neon_shape_class[rs] == SC_DOUBLE)
+    constraint (!ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_armv8),
+               _(BAD_FPU));
+
    et = neon_check_type (2, rs, N_EQK | N_VFP, N_F32 | N_F64 | N_KEY | N_VFP);
    if (et.type != NT_invtype)
      {
@@ -16871,7 +17129,7 @@ opcode_lookup (char **str)
         }
  
        if (warn_on_deprecated && unified_syntax)
-       as_warn (_("conditional infixes are deprecated in unified syntax"));
+       as_tsktsk (_("conditional infixes are deprecated in unified syntax"));
        affix = base + (opcode->tag - OT_odd_infix_0);
        cond = (const struct asm_cond *) hash_find_n (arm_cond_hsh, affix, 2);
        gas_assert (cond);
@@ -16957,7 +17215,7 @@ opcode_lookup (char **str)
        if (warn_on_deprecated && unified_syntax
           && (opcode->tag == OT_cinfix3
               || opcode->tag == OT_cinfix3_deprecated))
-       as_warn (_("conditional infixes are deprecated in unified syntax"));
+       as_tsktsk (_("conditional infixes are deprecated in unified syntax"));
  
        inst.cond = cond->value;
        return opcode;
@@ -17266,6 +17524,9 @@ static const struct depr_insn_mask depr_it_insns[] = {
    { 0x4800, 0xf800, N_("Literal loads") },
    { 0x4478, 0xf478, N_("Hi-register ADD, MOV, CMP, BX, BLX using pc") },
    { 0x4487, 0xfc87, N_("Hi-register ADD, MOV, CMP using pc") },
+  /* NOTE: 0x00dd is not the real encoding, instead, it is the 'tvalue'
+     field in asm_opcode. 'tvalue' is used at the stage this check happen.  */
+  { 0x00dd, 0x7fff, N_("ADD/SUB sp, sp #imm") },
    { 0, 0, NULL }
  };
  
@@ -17284,7 +17545,7 @@ it_fsm_post_encode (void)
      {
        if (inst.instruction >= 0x10000)
         {
-         as_warn (_("IT blocks containing 32-bit Thumb instructions are "
+         as_tsktsk (_("IT blocks containing 32-bit Thumb instructions are "
                      "deprecated in ARMv8"));
           now_it.warn_deprecated = TRUE;
         }
@@ -17296,7 +17557,7 @@ it_fsm_post_encode (void)
             {
               if ((inst.instruction & p->mask) == p->pattern)
                 {
-                 as_warn (_("IT blocks containing 16-bit Thumb instructions "
+                 as_tsktsk (_("IT blocks containing 16-bit Thumb instructions "
                              "of the following class are deprecated in ARMv8: "
                              "%s"), p->description);
                   now_it.warn_deprecated = TRUE;
@@ -17309,7 +17570,7 @@ it_fsm_post_encode (void)
  
        if (now_it.block_length > 1)
         {
-         as_warn (_("IT blocks containing more than one conditional "
+         as_tsktsk (_("IT blocks containing more than one conditional "
                      "instruction are deprecated in ARMv8"));
           now_it.warn_deprecated = TRUE;
         }
@@ -17373,7 +17634,7 @@ md_assemble (char *str)
      }
  
    if (warn_on_deprecated && opcode->tag == OT_cinfix3_deprecated)
-    as_warn (_("s suffix on comparison instruction is deprecated"));
+    as_tsktsk (_("s suffix on comparison instruction is deprecated"));
  
    /* The value which unconditional instructions should have in place of the
       condition field.  */
@@ -17456,9 +17717,9 @@ md_assemble (char *str)
        /* Many Thumb-2 instructions also have Thumb-1 variants, so explicitly
          set those bits when Thumb-2 32-bit instructions are seen.  ie.
          anything other than bl/blx and v6-M instructions.
-        This is overly pessimistic for relaxable instructions.  */
-      if (((inst.size == 4 && (inst.instruction & 0xf800e800) != 0xf000e800)
-          || inst.relax)
+        The impact of relaxable instructions will be considered later after we
+        finish all relaxation.  */
+      if ((inst.size == 4 && (inst.instruction & 0xf800e800) != 0xf000e800)
           && !(ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_msr)
                || ARM_CPU_HAS_FEATURE (*opcode->tvariant, arm_ext_barrier)))
         ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
@@ -18737,9 +18998,9 @@ static const struct asm_opcode insns[] =
  
    /* FP for ARMv8.  */
  #undef  ARM_VARIANT
-#define ARM_VARIANT   & fpu_vfp_ext_armv8
+#define ARM_VARIANT   & fpu_vfp_ext_armv8xd
  #undef  THUMB_VARIANT
-#define THUMB_VARIANT & fpu_vfp_ext_armv8
+#define THUMB_VARIANT & fpu_vfp_ext_armv8xd
  
    nUF(vseleq, _vseleq, 3, (RVSD, RVSD, RVSD),          vsel),
    nUF(vselvs, _vselvs, 3, (RVSD, RVSD, RVSD),          vsel),
@@ -19371,8 +19632,8 @@ static const struct asm_opcode insns[] =
   nCE(vnmul,     _vnmul,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmla,     _vnmla,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
   nCE(vnmls,     _vnmls,   3, (RVSD, RVSD, RVSD), vfp_nsyn_nmul),
- nCE(vcmp,      _vcmp,    2, (RVSD, RVSD_I0),    vfp_nsyn_cmp),
- nCE(vcmpe,     _vcmpe,   2, (RVSD, RVSD_I0),    vfp_nsyn_cmp),
+ nCE(vcmp,      _vcmp,    2, (RVSD, RSVD_FI0),    vfp_nsyn_cmp),
+ nCE(vcmpe,     _vcmpe,   2, (RVSD, RSVD_FI0),    vfp_nsyn_cmp),
   NCE(vpush,     0,       1, (VRSDLST),          vfp_nsyn_push),
   NCE(vpop,      0,       1, (VRSDLST),          vfp_nsyn_pop),
   NCE(vcvtz,     0,       2, (RVSD, RVSD),       vfp_nsyn_cvtz),
@@ -20292,6 +20553,11 @@ md_convert_frag (bfd *abfd, segT asec ATTRIBUTE_UNUSED, fragS *fragp)
    fixp->fx_file = fragp->fr_file;
    fixp->fx_line = fragp->fr_line;
    fragp->fr_fix += fragp->fr_var;
+
+  /* Set whether we use thumb-2 ISA based on final relaxation results.  */
+  if (thumb_mode && fragp->fr_var == 4 && no_cpu_selected ()
+      && !ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_arch_t2))
+    ARM_MERGE_FEATURE_SETS (arm_arch_used, thumb_arch_used, arm_ext_v6t2);
  }
  
  /* Return the size of a relaxable immediate operand instruction.
@@ -20632,7 +20898,8 @@ arm_handle_align (fragS * fragP)
  
    if (fragP->tc_frag_data.thumb_mode & (~ MODE_RECORDED))
      {
-      if (ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v6t2))
+      if (ARM_CPU_HAS_FEATURE (selected_cpu_name[0]
+                              ? selected_cpu : arm_arch_none, arm_ext_v6t2))
         {
           narrow_noop = thumb_noop[1][target_big_endian];
           noop = wide_thumb_noop[target_big_endian];
@@ -20646,7 +20913,9 @@ arm_handle_align (fragS * fragP)
      }
    else
      {
-      noop = arm_noop[ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v6k) != 0]
+      noop = arm_noop[ARM_CPU_HAS_FEATURE (selected_cpu_name[0]
+                                          ? selected_cpu : arm_arch_none,
+                                          arm_ext_v6k) != 0]
                      [target_big_endian];
        noop_size = 4;
  #ifdef OBJ_ELF
@@ -23680,6 +23949,8 @@ md_begin (void)
        mcpu_cpu_opt = &cpu_default;
        selected_cpu = cpu_default;
      }
+  else if (no_cpu_selected ())
+    selected_cpu = cpu_default;
  #else
    if (mcpu_cpu_opt)
      selected_cpu = *mcpu_cpu_opt;
@@ -24173,10 +24444,14 @@ static const struct arm_cpu_option_table arm_cpus[] =
                                                                   "Cortex-A12"),
    ARM_CPU_OPT ("cortex-a15",   ARM_ARCH_V7VE,   FPU_ARCH_NEON_VFP_V4,
                                                                   "Cortex-A15"),
+  ARM_CPU_OPT ("cortex-a17",   ARM_ARCH_V7VE,   FPU_ARCH_NEON_VFP_V4,
+                                                                 "Cortex-A17"),
    ARM_CPU_OPT ("cortex-a53",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                                   "Cortex-A53"),
    ARM_CPU_OPT ("cortex-a57",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
                                                                   "Cortex-A57"),
+  ARM_CPU_OPT ("cortex-a72",    ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "Cortex-A72"),
    ARM_CPU_OPT ("cortex-r4",    ARM_ARCH_V7R,    FPU_NONE,        "Cortex-R4"),
    ARM_CPU_OPT ("cortex-r4f",   ARM_ARCH_V7R,    FPU_ARCH_VFP_V3D16,
                                                                   "Cortex-R4F"),
@@ -24185,6 +24460,7 @@ static const struct arm_cpu_option_table arm_cpus[] =
    ARM_CPU_OPT ("cortex-r7",    ARM_ARCH_V7R_IDIV,
                                                  FPU_ARCH_VFP_V3D16,
                                                                   "Cortex-R7"),
+  ARM_CPU_OPT ("cortex-m7",    ARM_ARCH_V7EM,   FPU_NONE,        "Cortex-M7"),
    ARM_CPU_OPT ("cortex-m4",    ARM_ARCH_V7EM,   FPU_NONE,        "Cortex-M4"),
    ARM_CPU_OPT ("cortex-m3",    ARM_ARCH_V7M,    FPU_NONE,        "Cortex-M3"),
    ARM_CPU_OPT ("cortex-m1",    ARM_ARCH_V6SM,   FPU_NONE,        "Cortex-M1"),
@@ -24202,6 +24478,14 @@ static const struct arm_cpu_option_table arm_cpus[] =
    /* Marvell processors.  */
    ARM_CPU_OPT ("marvell-pj4",   ARM_FEATURE (ARM_AEXT_V7A | ARM_EXT_MP | ARM_EXT_SEC, 0),
                                                 FPU_ARCH_VFP_V3D16, NULL),
+  ARM_CPU_OPT ("marvell-whitney", ARM_FEATURE (ARM_AEXT_V7A | ARM_EXT_MP
+                                              | ARM_EXT_SEC, 0),
+                                              FPU_ARCH_NEON_VFP_V4, NULL),
+  /* APM X-Gene family.  */
+  ARM_CPU_OPT ("xgene1",        ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "APM X-Gene 1"),
+  ARM_CPU_OPT ("xgene2",        ARM_ARCH_V8A,    FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                                                                 "APM X-Gene 2"),
  
    { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, NULL }
  };
@@ -24272,40 +24556,51 @@ struct arm_option_extension_value_table
  {
    char *name;
    size_t name_len;
-  const arm_feature_set value;
+  const arm_feature_set merge_value;
+  const arm_feature_set clear_value;
    const arm_feature_set allowed_archs;
  };
  
  /* The following table must be in alphabetical order with a NULL last entry.
     */
-#define ARM_EXT_OPT(N, V, AA) { N, sizeof (N) - 1, V, AA }
+#define ARM_EXT_OPT(N, M, C, AA) { N, sizeof (N) - 1, M, C, AA }
  static const struct arm_option_extension_value_table arm_extensions[] =
  {
-  ARM_EXT_OPT ("crc",  ARCH_CRC_ARMV8, ARM_FEATURE (ARM_EXT_V8, 0)),
+  ARM_EXT_OPT ("crc",  ARCH_CRC_ARMV8, ARM_FEATURE (0, CRC_EXT_ARMV8),
+                        ARM_FEATURE (ARM_EXT_V8, 0)),
    ARM_EXT_OPT ("crypto", FPU_ARCH_CRYPTO_NEON_VFP_ARMV8,
+                        ARM_FEATURE (0, FPU_CRYPTO_ARMV8),
                                    ARM_FEATURE (ARM_EXT_V8, 0)),
-  ARM_EXT_OPT ("fp",     FPU_ARCH_VFP_ARMV8,
+  ARM_EXT_OPT ("fp",     FPU_ARCH_VFP_ARMV8, ARM_FEATURE (0, FPU_VFP_ARMV8),
                                    ARM_FEATURE (ARM_EXT_V8, 0)),
    ARM_EXT_OPT ("idiv", ARM_FEATURE (ARM_EXT_ADIV | ARM_EXT_DIV, 0),
+                       ARM_FEATURE (ARM_EXT_ADIV | ARM_EXT_DIV, 0),
                                    ARM_FEATURE (ARM_EXT_V7A | ARM_EXT_V7R, 0)),
-  ARM_EXT_OPT ("iwmmxt",ARM_FEATURE (0, ARM_CEXT_IWMMXT),      ARM_ANY),
-  ARM_EXT_OPT ("iwmmxt2",
-                       ARM_FEATURE (0, ARM_CEXT_IWMMXT2),      ARM_ANY),
-  ARM_EXT_OPT ("maverick",
-                       ARM_FEATURE (0, ARM_CEXT_MAVERICK),     ARM_ANY),
+  ARM_EXT_OPT ("iwmmxt",ARM_FEATURE (0, ARM_CEXT_IWMMXT),
+                       ARM_FEATURE (0, ARM_CEXT_IWMMXT), ARM_ANY),
+  ARM_EXT_OPT ("iwmmxt2", ARM_FEATURE (0, ARM_CEXT_IWMMXT2),
+                       ARM_FEATURE (0, ARM_CEXT_IWMMXT2), ARM_ANY),
+  ARM_EXT_OPT ("maverick", ARM_FEATURE (0, ARM_CEXT_MAVERICK),
+                       ARM_FEATURE (0, ARM_CEXT_MAVERICK), ARM_ANY),
    ARM_EXT_OPT ("mp",   ARM_FEATURE (ARM_EXT_MP, 0),
+                       ARM_FEATURE (ARM_EXT_MP, 0),
                                    ARM_FEATURE (ARM_EXT_V7A | ARM_EXT_V7R, 0)),
    ARM_EXT_OPT ("simd",   FPU_ARCH_NEON_VFP_ARMV8,
+                       ARM_FEATURE(0, FPU_NEON_ARMV8),
                                    ARM_FEATURE (ARM_EXT_V8, 0)),
    ARM_EXT_OPT ("os",   ARM_FEATURE (ARM_EXT_OS, 0),
+                       ARM_FEATURE (ARM_EXT_OS, 0),
                                    ARM_FEATURE (ARM_EXT_V6M, 0)),
    ARM_EXT_OPT ("sec",  ARM_FEATURE (ARM_EXT_SEC, 0),
+                       ARM_FEATURE (ARM_EXT_SEC, 0),
                                    ARM_FEATURE (ARM_EXT_V6K | ARM_EXT_V7A, 0)),
    ARM_EXT_OPT ("virt", ARM_FEATURE (ARM_EXT_VIRT | ARM_EXT_ADIV
                                      | ARM_EXT_DIV, 0),
+                       ARM_FEATURE (ARM_EXT_VIRT, 0),
                                    ARM_FEATURE (ARM_EXT_V7A, 0)),
-  ARM_EXT_OPT ("xscale",ARM_FEATURE (0, ARM_CEXT_XSCALE),      ARM_ANY),
-  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE }
+  ARM_EXT_OPT ("xscale",ARM_FEATURE (0, ARM_CEXT_XSCALE),
+                       ARM_FEATURE (0, ARM_CEXT_XSCALE), ARM_ANY),
+  { NULL, 0, ARM_ARCH_NONE, ARM_ARCH_NONE, ARM_ARCH_NONE }
  };
  #undef ARM_EXT_OPT
  
@@ -24353,6 +24648,8 @@ static const struct arm_option_fpu_value_table arm_fpus[] =
    {"vfpv4",            FPU_ARCH_VFP_V4},
    {"vfpv4-d16",                FPU_ARCH_VFP_V4D16},
    {"fpv4-sp-d16",      FPU_ARCH_VFP_V4_SP_D16},
+  {"fpv5-d16",         FPU_ARCH_VFP_V5D16},
+  {"fpv5-sp-d16",      FPU_ARCH_VFP_V5_SP_D16},
    {"neon-vfpv4",       FPU_ARCH_NEON_VFP_V4},
    {"fp-armv8",         FPU_ARCH_VFP_ARMV8},
    {"neon-fp-armv8",    FPU_ARCH_NEON_VFP_ARMV8},
@@ -24480,9 +24777,9 @@ arm_parse_extension (char *str, const arm_feature_set **opt_p)
  
             /* Add or remove the extension.  */
             if (adding_value)
-             ARM_MERGE_FEATURE_SETS (*ext_set, *ext_set, opt->value);
+             ARM_MERGE_FEATURE_SETS (*ext_set, *ext_set, opt->merge_value);
             else
-             ARM_CLEAR_FEATURE (*ext_set, *ext_set, opt->value);
+             ARM_CLEAR_FEATURE (*ext_set, *ext_set, opt->clear_value);
  
             break;
           }
@@ -24870,7 +25167,7 @@ aeabi_set_attribute_string (int tag, const char *value)
  }
  
  /* Set the public EABI object attributes.  */
-static void
+void
  aeabi_set_public_attributes (void)
  {
    int arch;
@@ -24893,6 +25190,8 @@ aeabi_set_public_attributes (void)
    if (ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_arch_any))
      ARM_MERGE_FEATURE_SETS (flags, flags, arm_ext_v4t);
  
+  selected_cpu = flags;
+
    /* Allow the user to override the reported architecture.  */
    if (object_arch)
      {
@@ -24976,8 +25275,10 @@ aeabi_set_public_attributes (void)
         ARM_CPU_HAS_FEATURE (flags, arm_arch_t2) ? 2 : 1);
  
    /* Tag_VFP_arch.  */
-  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_armv8))
-    aeabi_set_attribute_int (Tag_VFP_arch, 7);
+  if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_armv8xd))
+    aeabi_set_attribute_int (Tag_VFP_arch,
+                            ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32)
+                            ? 7 : 8);
    else if (ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_fma))
      aeabi_set_attribute_int (Tag_VFP_arch,
                              ARM_CPU_HAS_FEATURE (flags, fpu_vfp_ext_d32)
@@ -25213,9 +25514,10 @@ s_arm_arch_extension (int ignored ATTRIBUTE_UNUSED)
           }
  
         if (adding_value)
-         ARM_MERGE_FEATURE_SETS (selected_cpu, selected_cpu, opt->value);
+         ARM_MERGE_FEATURE_SETS (selected_cpu, selected_cpu,
+                                 opt->merge_value);
         else
-         ARM_CLEAR_FEATURE (selected_cpu, selected_cpu, opt->value);
+         ARM_CLEAR_FEATURE (selected_cpu, selected_cpu, opt->clear_value);
  
         mcpu_cpu_opt = &selected_cpu;
         ARM_MERGE_FEATURE_SETS (cpu_variant, *mcpu_cpu_opt, *mfpu_opt);
@@ -25345,14 +25647,18 @@ arm_convert_symbolic_attribute (const char *name)
  }
  
  
-/* Apply sym value for relocations only in the case that
-   they are for local symbols and you have the respective
-   architectural feature for blx and simple switches.  */
+/* Apply sym value for relocations only in the case that they are for
+   local symbols in the same segment as the fixup and you have the
+   respective architectural feature for blx and simple switches.  */
  int
-arm_apply_sym_value (struct fix * fixP)
+arm_apply_sym_value (struct fix * fixP, segT this_seg)
  {
    if (fixP->fx_addsy
        && ARM_CPU_HAS_FEATURE (selected_cpu, arm_ext_v5t)
+      /* PR 17444: If the local symbol is in a different section then a reloc
+        will always be generated for it, so applying the symbol value now
+        will result in a double offset being stored in the relocation.  */
+      && (S_GET_SEGMENT (fixP->fx_addsy) == this_seg)
        && !S_FORCE_RELOC (fixP->fx_addsy, TRUE))
      {
        switch (fixP->fx_r_type)
@@ -25366,7 +25672,7 @@ arm_apply_sym_value (struct fix * fixP)
         case BFD_RELOC_ARM_PCREL_CALL:
         case BFD_RELOC_THUMB_PCREL_BLX:
           if (THUMB_IS_FUNC (fixP->fx_addsy))
-             return 1;
+           return 1;
           break;
  
         default: