* binutils/readelf.c (arm_attr_tag_VFP_arch): Add VFPv3.

author Julian Brown <julian@codesourcery.com>

Mon, 3 Apr 2006 00:03:34 +0000 (00:03 +0000)

committer Julian Brown <julian@codesourcery.com>

Mon, 3 Apr 2006 00:03:34 +0000 (00:03 +0000)
author Julian Brown <julian@codesourcery.com>
Mon, 3 Apr 2006 00:03:34 +0000 (00:03 +0000)
committer Julian Brown <julian@codesourcery.com>
Mon, 3 Apr 2006 00:03:34 +0000 (00:03 +0000)
diff --git a/ChangeLog.csl b/ChangeLog.csl

index 75d7adb9ccf636c74192d0d6d49b111c0c8d2d2f..5ac69a86722c656e8c897ecf40ef763bf69a15b3 100644 (file)
--- a/ChangeLog.csl
+++ b/ChangeLog.csl
@@ -1,3 +1,208 @@
+2005-04-03  Julian Brown  <julian@codesourcery.com>
+           Nathan Sidwell  <nathan@codesourcery.com>
+
+       * binutils/readelf.c (arm_attr_tag_VFP_arch): Add VFPv3.
+    
+       * gas/config/tc-arm.c (limits.h): Include.
+       (fpu_arch_vfp_v3, fpu_vfp_ext_v3, fpu_neon_ext_v1)
+       (fpu_vfp_v3_or_neon_ext): Declare constants.
+       (neon_el_type): New enumeration of types for Neon vector elements.
+       (neon_type_el): New struct. Define type and size of a vector element.
+       (NEON_MAX_TYPE_ELS): Define constant. The maximum number of types per
+       instruction.
+       (neon_type): Define struct. The type of an instruction.
+       (arm_it): Add 'vectype' for the current instruction.
+       (isscalar, immisalign, regisimm, isquad): New predicates for operands.
+       (vfp_sp_reg_pos): Rename to...
+       (vfp_reg_pos): ...this, and add VFP_REG_Dd, VFP_REG_Dm, VFP_REG_Dn
+       tags.
+       (arm_reg_type): Add REG_TYPE_NQ (Neon Q register) and REG_TYPE_NDQ
+       (Neon D or Q register).
+       (reg_expected_msgs): Sync with above. Allow VFD to mean VFP or Neon
+       D register.
+       (GE_OPT_PREFIX_BIG): Define constant, for use in...
+       (my_get_expression): Allow above constant as argument to accept
+       64-bit constants with optional prefix.
+       (arm_reg_parse): Add extra argument to return the specific type of
+       register in when either a D or Q register (REG_TYPE_NDQ) is requested.
+       Can be NULL.
+       (parse_scalar): New function. Parse Neon scalar (vector reg and index).
+       (parse_reg_list): Update for new arm_reg_parse args.
+       (parse_vfp_reg_list): Allow parsing of Neon D/Q register lists.
+       (parse_neon_el_struct_list): New function. Parse element/structure
+       register lists for VLD<n>/VST<n> instructions.
+       (s_arm_unwind_save_vfp): Update for new parse_vfp_reg_list args.
+       (s_arm_unwind_save_mmxwr): Likewise.
+       (s_arm_unwind_save_mmxwcg): Likewise.
+       (s_arm_unwind_movsp): Likewise.
+       (s_arm_unwind_setfp): Likewise.
+       (parse_big_immediate): New function. Parse an immediate, which may
+       be 64 bits wide. Put results in inst.operands[i].
+       (parse_shift): Update for new arm_reg_parse args.
+       (parse_address): Likewise. Add parsing of alignment specifiers.
+       (parse_neon_mov): Parse the operands of a VMOV instruction.
+       (operand_parse_code): Add OP_RND, OP_RNQ, OP_RNDQ, OP_RNSC,
+       OP_NRDLST, OP_NSTRLST, OP_NILO, OP_RNDQ_I0, OP_RR_RNSC,
+       OP_RNDQ_RNSC, OP_RND_RNSC, OP_VMOV, OP_RNDQ_IMVNb, OP_RNDQ_I63b,
+       OP_I0, OP_I16z, OP_I32z, OP_I64, OP_I64z, OP_oI32b, OP_oRND,
+       OP_oRNQ, OP_oRNDQ.
+       (parse_operands): Handle new codes above.
+       (encode_arm_vfp_sp_reg): Rename to...
+       (encode_arm_vfp_reg): ...this. Handle D regs (0-31) too. Complain if
+       selected VFP version only supports D0-D15.
+       (do_vfp_sp_monadic, do_vfp_sp_dyadic, do_vfp_sp_compare_z)
+       (do_vfp_dp_sp_cvt, do_vfp_reg_from_sp, do_vfp_reg2_from_sp2)
+       (do_vfp_sp_from_reg, do_vfp_sp2_from_reg2, do_vfp_sp_ldst)
+       (do_vfp_dp_ldst, vfp_sp_ldstm, vfp_dp_ldstm): Update for new
+       encode_arm_vfp_reg name, and allow 32 D regs.
+       (do_vfp_dp_rd_rm, do_vfp_dp_rn_rd, do_vfp_dp_rd_rn)
+       (do_vfp_dp_rd_rn_rm, do_vfp_rm_rd_rn): New functions to encode VFP
+       insns allowing 32 D regs.
+       (do_vfp_sp_const, do_vfp_dp_const, vfp_conv, do_vfp_sp_conv_16)
+       (do_vfp_dp_conv_16, do_vfp_sp_conv_32, do_vfp_dp_conv_32): Handle
+       constant-load and conversion insns introduced with VFPv3.
+       (neon_tab_entry): New struct.
+       (NEON_ENC_TAB): Bit patterns for overloaded Neon instructions, and
+       those which are the targets of pseudo-instructions.
+       (neon_opc): Enumerate opcodes, use as indices into...
+       (neon_enc_tab): ...this. Hold data from NEON_ENC_TAB.
+       (NEON_ENC_INTEGER, NEON_ENC_ARMREG, NEON_ENC_POLY, NEON_ENC_FLOAT)
+       (NEON_ENC_SCALAR, NEON_ENC_IMMED, NEON_ENC_INTERLV, NEON_ENC_LANE)
+       (NEON_ENC_DUP): Define meaningful helper macros to look up values in
+       neon_enc_tab.
+       (neon_shape): Enumerate shapes (permitted register widths, etc.) for
+       Neon instructions.
+       (neon_type_mask): New. Compact type representation for type
+       checking.
+       (N_SU_ALL, N_SU_32, N_SU_16_64, N_SUF_32, N_I_ALL, N_IF_32): Common
+       permitted type combinations.
+       (N_IGNORE_TYPE): New macro.
+       (neon_check_shape): New function. Check an instruction shape for
+       multiple alternatives. Return the specific shape for the current
+       instruction.
+       (neon_modify_type_size): New function. Modify a vector type and
+       size, depending on the bit mask in argument 1.
+       (neon_type_promote): New function. Convert a given "key" type (of an
+       operand) into the correct type for a different operand, based on a bit
+       mask.
+       (type_chk_of_el_type): New function. Convert a type and size into the
+       compact representation used for type checking.
+       (el_type_of_type_ckh): New function. Reverse of above (only when a
+       single bit is set in the bit mask).
+       (modify_types_allowed): New function. Alter a mask of allowed types
+       based on a bit mask of modifications.
+       (neon_check_type): New function. Check the type of the current
+       instruction against the variable argument list. The "key" type of the
+       instruction is returned.
+       (neon_dp_fixup): New function. Fill in and modify instruction bits for
+       a Neon data-processing instruction depending on whether we're in ARM
+       mode or Thumb-2 mode.
+       (neon_logbits): New function.
+       (neon_three_same, neon_two_same, do_neon_dyadic_i_su)
+       (do_neon_dyadic_i64_su, neon_imm_shift, do_neon_shl_imm)
+       (do_neon_qshl_imm, neon_cmode_for_logic_imm)
+       (neon_bits_same_in_bytes, neon_squash_bits, neon_is_quarter_float)
+       (neon_qfloat_bits, neon_cmode_for_move_imm, neon_write_immbits)
+       (neon_invert_size, do_neon_logic, do_neon_bitfield, neon_dyadic)
+       (do_neon_dyadic_if_su, do_neon_dyadic_if_su_d, do_neon_dyadic_if_i)
+       (do_neon_dyadic_if_i_d, do_neon_addsub_if_i, neon_exchange_operands)
+       (neon_compare, do_neon_cmp, do_neon_cmp_inv, do_neon_ceq)
+       (neon_scalar_for_mul, neon_mul_mac, do_neon_mac_maybe_scalar)
+       (do_neon_tst, do_neon_mul, do_neon_qdmulh, do_neon_fcmp_absolute)
+       (do_neon_fcmp_absolute_inv, do_neon_step, do_neon_abs_neg)
+       (do_neon_sli, do_neon_sri, do_neon_qshlu_imm, do_neon_qmovn)
+       (do_neon_qmovun, do_neon_rshift_sat_narrow)
+       (do_neon_rshift_sat_narrow_u, do_neon_movn, do_neon_rshift_narrow)
+       (do_neon_shll, neon_cvt_flavour, do_neon_cvt, neon_move_immediate)
+       (do_neon_mvn, neon_mixed_length, do_neon_dyadic_long, do_neon_abal)
+       (neon_mac_reg_scalar_long, do_neon_mac_maybe_scalar_long)
+       (do_neon_dyadic_wide, do_neon_vmull, do_neon_ext, do_neon_rev)
+       (do_neon_dup, do_neon_mov, do_neon_rshift_round_imm, do_neon_movl)
+       (do_neon_trn, do_neon_zip_uzp, do_neon_sat_abs_neg)
+       (do_neon_pair_long, do_neon_recip_est, do_neon_cls, do_neon_clz)
+       (do_neon_cnt, do_neon_swp, do_neon_tbl_tbx, do_neon_ldm_stm)
+       (do_neon_ldr_str, do_neon_ld_st_interleave, neon_alignment_bit)
+       (do_neon_ld_st_lane, do_neon_ld_dup, do_neon_ldx_stx): New
+       functions. Neon bit encoding and encoding helpers.
+       (parse_neon_type): New function. Parse Neon type specifier.
+       (opcode_lookup): Allow parsing of Neon type specifiers.
+       (REGNUM2, REGSETH, REGSET2): New macros.
+       (reg_names): Add new VFPv3 and Neon registers.
+       (NUF, nUF, NCE, nCE): New macros for opcode table.
+       (insns): More VFP registers allowed in fcpyd, fmdhr, fmdlr, fmrdh,
+       fmrdl, fabsd, fnegd, fsqrtd, faddd, fsubd, fmuld, fdivd, fmacd,
+       fmscd, fnmuld, fnmacd, fnmscd, fcmpd, fcmpzd, fcmped, fcmpezd,
+       fmdrr, fmrrd. Add Neon instructions vaba, vhadd, vrhadd, vhsub,
+       vqadd, vqsub, vrshl, vqrshl, vshl, vqshl{u}, vand, vbic, vorr, vorn,
+       veor, vbsl, vbit, vbif, vabd, vmax, vmin, vcge, vcgt, vclt, vcle,
+       vceq, vpmax, vpmin, vmla, vmls, vpadd, vadd, vsub, vtst, vmul,
+       vqdmulh, vqrdmulh, vacge, vacgt, vaclt, vacle, vrecps, vrsqrts,
+       vabs, vneg, v{r}shr,  v{r}sra, vsli, vsri, vqshrn, vq{r}shr{u}n,
+       v{r}shrn, vshll, vcvt, vmov, vmvn, vabal, vabdl, vaddl, vsubl,
+       vmlal, vmlsl, vaddw, vsubw, v{r}addhn, v{r}subhn, vqdmlal, vqdmlsl,
+       vqdmull, vmull, vext, vrev64, vrev32, vrev16, vdup, vmovl, v{q}movn,
+       vzip, vuzp, vqabs, vqneg, vpadal, vpaddl, vrecpe, vrsqrte, vcls,
+       vclz, vcnt, vswp, vtrn, vtbl, vtbx, vldm, vstm, vldr, vstr,
+       vld[1234], vst[1234], fconst[sd], f[us][lh]to[sd], fto[us][lh][sd].
+       (tc_arm_regname_to_dw2regnum): Update for arm_reg_parse args.
+       (arm_cpu_option_table): Add Neon and VFPv3 to Cortex-A8.
+       (arm_option_cpu_value): Add vfp3 and neon.
+       (aeabi_set_public_attributes): Support VFPv3 and NEON attributes.
+       Fix VFPv1 attribute.
+
+       * gas/testsuite/gas/arm/copro.s: Avoid ldcl which encodes as a bad Neon
+       instruction.
+       * gas/testsuite/gas/arm/copro.d: Update accordingly.
+       * gas/testsuite/gas/arm/neon-cond.s: New test. Conditional Neon opcodes
+       in ARM mode.
+       * gas/testsuite/gas/arm/neon-cond.d: Expected results of above.
+       * gas/testsuite/gas/arm/neon-cov.s: New test. Coverage of Neon
+       instructions.
+       * gas/testsuite/gas/arm/neon-cov.d: Expected results of above.
+       * gas/testsuite/gas/arm/neon-ldst-es.s: New test. Element and structure
+       loads and stores.
+       * gas/testsuite/gas/arm/neon-ldst-es.d: Expected results of above.
+       * gas/testsuite/gas/arm/neon-ldst-rm.s: New test. Single and multiple
+       register loads and stores.
+       * gas/testsuite/gas/arm/neon-ldst-rm.d: Expected results of above.
+       * gas/testsuite/gas/arm/neon-omit.s: New test. Omission of optional
+       operands.
+       * gas/testsuite/gas/arm/neon-omit.d: Expected results of above.
+       * gas/testsuite/gas/arm/vfp1.d: Expect Neon syntax for some VFP
+       instructions.
+       * gas/testsuite/gas/arm/vfp1_t2.d: Likewise.
+       * gas/testsuite/gas/arm/vfp1xD.d: Likewise.
+       * gas/testsuite/gas/arm/vfp1xD_t2.d: Likewise.
+       * gas/testsuite/gas/arm/vfp2.d: Likewise.
+       * gas/testsuite/gas/arm/vfp2_t2.d: Likewise.
+       * gas/testsuite/gas/arm/vfp3-32drs.s: New test. Extended D register
+       range for VFP instructions.
+       * gas/testsuite/gas/arm/vfp3-32drs.d: Expected results of above.
+       * gas/testsuite/gas/arm/vfp3-const-conv.s: New test. VFPv3
+       constant-load and conversion instructions.
+       * gas/testsuite/gas/arm/vfp3-const-conv.d: Expected results of above.
+
+       * include/opcode/arm.h (FPU_VFP_EXT_V3): Define constant.
+       (FPU_NEON_EXT_V1): Likewise.
+       (FPU_VFP_HARD): Update.
+       (FPU_VFP_V3): Define macro.
+       (FPU_ARCH_VFP_V3, FPU_ARCH_VFP_V3_PLUS_NEON_V1): Define macros.
+
+       * opcodes/arm-dis.c (coprocessor_opcodes): Add %A, %B, %k,
+       convert %<code>[zy] into %[zy]<code>.  Expand meaning of
+       %<bitfield>['`?].
+       Add unified load/store instruction names.
+       (neon_opcode_table): New.
+       (arm_opcodes): Expand meaning of %<bitfield>['`?].
+       (arm_decode_bitfield): New.
+       (print_insn_coprocessor): Add pc argument. Add %A & %B specifiers.
+       Use arm_decode_bitfield and adjust numeric specifiers.
+       Adjust %z & %y.
+       (print_insn_neon): New.
+       (print_insn_arm): Adjust print_insn_coprocessor call. Call
+       print_insn_neon. Use arm_decode_bitfield and adjust numeric specifiers.
+       (print_insn_thumb32): Likewise.
+
  2005-04-01  Paul Brook  <paul@codesourcery.com>
  
         * config/tc-arm.c (arm_fix_adjustable): Return 0 for function symbols.
diff --git a/binutils/readelf.c b/binutils/readelf.c

index d8bd54f440f316b67b6acc328d7ab4285f8faf5e..4a9bd1a87e0a3d55fde4a6347904e9c6e1ea51f1 100644 (file)
--- a/binutils/readelf.c
+++ b/binutils/readelf.c
@@ -7731,7 +7731,8 @@ static const char *arm_attr_tag_CPU_arch[] =
  static const char *arm_attr_tag_ARM_ISA_use[] = {"No", "Yes"};
  static const char *arm_attr_tag_THUMB_ISA_use[] =
    {"No", "Thumb-1", "Thumb-2"};
-static const char *arm_attr_tag_VFP_arch[] = {"No", "VFPv1", "VFPv2"};
+/* FIXME: VFPv3 encoding was extrapolated!  */
+static const char *arm_attr_tag_VFP_arch[] = {"No", "VFPv1", "VFPv2", "VFPv3"};
  static const char *arm_attr_tag_WMMX_arch[] = {"No", "WMMXv1"};
  static const char *arm_attr_tag_NEON_arch[] = {"No", "NEONv1"};
  static const char *arm_attr_tag_ABI_PCS_config[] =
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c

index 05158eec024802928413d0297c26c53b84328ce4..b2541feb7ae1f98b5c64257dc1641cf0891c3c28 100644 (file)
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -26,6 +26,7 @@
     02110-1301, USA.  */
  
  #include <string.h>
+#include <limits.h>
  #define         NO_RELOC 0
  #include "as.h"
  #include "safe-ctype.h"
@@ -155,6 +156,8 @@ static const arm_feature_set *mfpu_opt = NULL;
  static const arm_feature_set fpu_default = FPU_DEFAULT;
  static const arm_feature_set fpu_arch_vfp_v1 = FPU_ARCH_VFP_V1;
  static const arm_feature_set fpu_arch_vfp_v2 = FPU_ARCH_VFP_V2;
+static const arm_feature_set fpu_arch_vfp_v3 = FPU_ARCH_VFP_V3;
+static const arm_feature_set fpu_arch_neon_v1 = FPU_ARCH_NEON_V1;
  static const arm_feature_set fpu_arch_fpa = FPU_ARCH_FPA;
  static const arm_feature_set fpu_any_hard = FPU_ANY_HARD;
  static const arm_feature_set fpu_arch_maverick = FPU_ARCH_MAVERICK;
@@ -206,6 +209,10 @@ static const arm_feature_set fpu_vfp_ext_v1xd =
    ARM_FEATURE (0, FPU_VFP_EXT_V1xD);
  static const arm_feature_set fpu_vfp_ext_v1 = ARM_FEATURE (0, FPU_VFP_EXT_V1);
  static const arm_feature_set fpu_vfp_ext_v2 = ARM_FEATURE (0, FPU_VFP_EXT_V2);
+static const arm_feature_set fpu_vfp_ext_v3 = ARM_FEATURE (0, FPU_VFP_EXT_V3);
+static const arm_feature_set fpu_neon_ext_v1 = ARM_FEATURE (0, FPU_NEON_EXT_V1);
+static const arm_feature_set fpu_vfp_v3_or_neon_ext =
+  ARM_FEATURE (0, FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
  
  static int mfloat_abi_opt = -1;
  /* Record user cpu selection for object attributes.  */
@@ -256,6 +263,31 @@ static int thumb_mode = 0;
  
  static bfd_boolean unified_syntax = FALSE;
  
+enum neon_el_type
+{
+  NT_untyped,
+  NT_integer,
+  NT_float,
+  NT_poly,
+  NT_signed,
+  NT_unsigned,
+  NT_invtype
+};
+
+struct neon_type_el
+{
+  enum neon_el_type type;
+  unsigned size;
+};
+
+#define NEON_MAX_TYPE_ELS 4
+
+struct neon_type
+{
+  struct neon_type_el el[NEON_MAX_TYPE_ELS];
+  unsigned elems;
+};
+
  struct arm_it
  {
    const char * error;
@@ -263,6 +295,7 @@ struct arm_it
    int          size;
    int          size_req;
    int          cond;
+  struct neon_type vectype;
    /* Set to the opcode if the instruction needs relaxation.
       Zero if the instruction is not relaxed.  */
    unsigned long        relax;
@@ -280,6 +313,12 @@ struct arm_it
      unsigned present   : 1;  /* Operand present.  */
      unsigned isreg     : 1;  /* Operand was a register.  */
      unsigned immisreg  : 1;  /* .imm field is a second register.  */
+    unsigned isscalar   : 1;  /* Operand is a (Neon) scalar.  */
+    unsigned immisalign : 1;  /* Immediate is an alignment specifier.  */
+    /* Note: we abuse "regisimm" to mean "is Neon register" in VMOV
+       instructions. This allows us to disambiguate ARM <-> vector insns.  */
+    unsigned regisimm   : 1;  /* 64-bit immediate, reg forms high 32 bits.  */
+    unsigned isquad     : 1;  /* Operand is Neon quad-precision register.  */
      unsigned hasreloc  : 1;  /* Operand has relocation suffix.  */
      unsigned writeback : 1;  /* Operand has trailing !  */
      unsigned preind    : 1;  /* Preindexed address.  */
@@ -355,9 +394,10 @@ struct reloc_entry
    bfd_reloc_code_real_type reloc;
  };
  
-enum vfp_sp_reg_pos
+enum vfp_reg_pos
  {
-  VFP_REG_Sd, VFP_REG_Sm, VFP_REG_Sn
+  VFP_REG_Sd, VFP_REG_Sm, VFP_REG_Sn,
+  VFP_REG_Dd, VFP_REG_Dm, VFP_REG_Dn
  };
  
  enum vfp_ldstm_type
@@ -375,6 +415,8 @@ enum arm_reg_type
    REG_TYPE_FN,
    REG_TYPE_VFS,
    REG_TYPE_VFD,
+  REG_TYPE_NQ,
+  REG_TYPE_NDQ,
    REG_TYPE_VFC,
    REG_TYPE_MVF,
    REG_TYPE_MVD,
@@ -405,7 +447,9 @@ const char *const reg_expected_msgs[] =
    N_("co-processor register expected"),
    N_("FPA register expected"),
    N_("VFP single precision register expected"),
-  N_("VFP double precision register expected"),
+  N_("VFP/Neon double precision register expected"),
+  N_("Neon quad precision register expected"),
+  N_("Neon double or quad precision register expected"),
    N_("VFP system register expected"),
    N_("Maverick MVF register expected"),
    N_("Maverick MVD register expected"),
@@ -691,6 +735,9 @@ static int in_my_get_expression = 0;
  #define GE_NO_PREFIX 0
  #define GE_IMM_PREFIX 1
  #define GE_OPT_PREFIX 2
+/* This is a bit of a hack. Use an optional prefix, and also allow big (64-bit)
+   immediates, as can be used in Neon VMVN and VMOV immediate instructions.  */
+#define GE_OPT_PREFIX_BIG 3
  
  static int
  my_get_expression (expressionS * ep, char ** str, int prefix_mode)
@@ -700,7 +747,8 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode)
  
    /* In unified syntax, all prefixes are optional.  */
    if (unified_syntax)
-    prefix_mode = GE_OPT_PREFIX;
+    prefix_mode = (prefix_mode == GE_OPT_PREFIX_BIG) ? prefix_mode
+                  : GE_OPT_PREFIX;
  
    switch (prefix_mode)
      {
@@ -714,6 +762,7 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode)
        (*str)++;
        break;
      case GE_OPT_PREFIX:
+    case GE_OPT_PREFIX_BIG:
        if (is_immediate_prefix (**str))
         (*str)++;
        break;
@@ -755,11 +804,12 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode)
    /* Get rid of any bignums now, so that we don't generate an error for which
       we can't establish a line number later on.         Big numbers are never valid
       in instructions, which is where this routine is always called.  */
-  if (ep->X_op == O_big
-      || (ep->X_add_symbol
-         && (walk_no_bignums (ep->X_add_symbol)
-             || (ep->X_op_symbol
-                 && walk_no_bignums (ep->X_op_symbol)))))
+  if (prefix_mode != GE_OPT_PREFIX_BIG
+      && (ep->X_op == O_big
+          || (ep->X_add_symbol
+             && (walk_no_bignums (ep->X_add_symbol)
+                 || (ep->X_op_symbol
+                     && walk_no_bignums (ep->X_op_symbol))))))
      {
        inst.error = _("invalid constant");
        *str = input_line_pointer;
@@ -940,14 +990,24 @@ arm_reg_parse_multi (char **ccp)
  }
  
  /* As above, but the register must be of type TYPE, and the return
-   value is the register number or FAIL.  */
+   value is the register number or FAIL.
+   If RTYPE is non-zero, return the (possibly restricted) type of the
+   register (e.g. Neon double or quad reg when either has been requested).  */
  
  static int
-arm_reg_parse (char **ccp, enum arm_reg_type type)
+arm_reg_parse (char **ccp, enum arm_reg_type type, enum arm_reg_type *rtype)
  {
    char *start = *ccp;
    struct reg_entry *reg = arm_reg_parse_multi (ccp);
  
+  /* Undo polymorphism for Neon D and Q registers.  */
+  if (reg && type == REG_TYPE_NDQ
+      && (reg->type == REG_TYPE_NQ || reg->type == REG_TYPE_VFD))
+    type = reg->type;
+
+  if (rtype)
+    *rtype = type;
+
    if (reg && reg->type == type)
      return reg->number;
  
@@ -986,6 +1046,46 @@ arm_reg_parse (char **ccp, enum arm_reg_type type)
    return FAIL;
  }
  
+/* Parse a Neon scalar. Most of the time when we're parsing a scalar, we don't
+   have enough information to be able to do a good job bounds-checking. So, we
+   just do easy checks here, and do further checks later.  */
+
+static int
+parse_scalar (char **ccp, int elsize)
+{
+  int regno, elno;
+  char *str = *ccp;
+  expressionS exp;
+  
+  if ((regno = arm_reg_parse (&str, REG_TYPE_VFD, NULL)) == FAIL)
+    return FAIL;
+  
+  if (skip_past_char (&str, '[') == FAIL)
+    return FAIL;
+  
+  my_get_expression (&exp, &str, GE_NO_PREFIX);
+  if (exp.X_op != O_constant)
+    {
+      inst.error = _("constant expression required");
+      return FAIL;
+    }
+  elno = exp.X_add_number;
+  
+  if (elno >= 64 / elsize)
+    {
+      inst.error = _("scalar index out of range");
+      return FAIL;
+    }
+  
+  if (skip_past_char (&str, ']') == FAIL)
+    return FAIL;
+  
+  /* Parsed scalar successfully. Skip over it.  */
+  *ccp = str;
+  
+  return (regno * 8) + elno;
+}
+
  /* Parse an ARM register list.  Returns the bitmask, or FAIL.  */
  static long
  parse_reg_list (char ** strp)
@@ -1009,7 +1109,7 @@ parse_reg_list (char ** strp)
             {
               int reg;
  
-             if ((reg = arm_reg_parse (&str, REG_TYPE_RN)) == FAIL)
+             if ((reg = arm_reg_parse (&str, REG_TYPE_RN, NULL)) == FAIL)
                 {
                   inst.error = _(reg_expected_msgs[REG_TYPE_RN]);
                   return FAIL;
@@ -1111,53 +1211,101 @@ parse_reg_list (char ** strp)
    return range;
  }
  
+/* Types of registers in a list.  */
+
+enum reg_list_els
+{
+  REGLIST_VFP_S,
+  REGLIST_VFP_D,
+  REGLIST_NEON_D
+};
+
  /* Parse a VFP register list.  If the string is invalid return FAIL.
     Otherwise return the number of registers, and set PBASE to the first
-   register.  Double precision registers are matched if DP is nonzero. */
+   register.  Parses registers of type ETYPE.
+   If REGLIST_NEON_D is used, several syntax enhancements are enabled:
+     - Q registers can be used to specify pairs of D registers
+     - { } can be omitted from around a singleton register list
+         FIXME: This is not implemented, as it would require backtracking in
+         some cases, e.g.:
+           vtbl.8 d3,d4,d5
+         This could be done (the meaning isn't really ambiguous), but doesn't
+         fit in well with the current parsing framework.
+     - 32 D registers may be used (also true for VFPv3).  */
  
  static int
-parse_vfp_reg_list (char **str, unsigned int *pbase, int dp)
+parse_vfp_reg_list (char **str, unsigned int *pbase, enum reg_list_els etype)
  {
    int base_reg;
    int new_base;
-  int regtype;
-  int max_regs;
+  enum arm_reg_type regtype = 0;
+  int max_regs = 0;
    int count = 0;
    int warned = 0;
    unsigned long mask = 0;
    int i;
  
    if (**str != '{')
-    return FAIL;
+    {
+      inst.error = _("expecting {");
+      return FAIL;
+    }
  
    (*str)++;
  
-  if (dp)
-    {
-      regtype = REG_TYPE_VFD;
-      max_regs = 16;
-    }
-  else
+  switch (etype)
      {
+    case REGLIST_VFP_S:
        regtype = REG_TYPE_VFS;
        max_regs = 32;
+      break;
+    
+    case REGLIST_VFP_D:
+      regtype = REG_TYPE_VFD;
+      /* VFPv3 allows 32 D registers.  */
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v3))
+        {
+          max_regs = 32;
+          if (thumb_mode)
+            ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
+                                    fpu_vfp_ext_v3);
+          else
+            ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used,
+                                    fpu_vfp_ext_v3);
+        }
+      else
+        max_regs = 16;
+      break;
+    
+    case REGLIST_NEON_D:
+      regtype = REG_TYPE_NDQ;
+      max_regs = 32;
+      break;
      }
  
    base_reg = max_regs;
  
    do
      {
-      new_base = arm_reg_parse (str, regtype);
+      int setmask = 1, addregs = 1;
+      new_base = arm_reg_parse (str, regtype, &regtype);
        if (new_base == FAIL)
         {
           inst.error = gettext (reg_expected_msgs[regtype]);
           return FAIL;
         }
  
+      /* Note: a value of 2 * n is returned for the register Q<n>.  */
+      if (regtype == REG_TYPE_NQ)
+        {
+          setmask = 3;
+          addregs = 2;
+        }
+
        if (new_base < base_reg)
         base_reg = new_base;
  
-      if (mask & (1 << new_base))
+      if (mask & (setmask << new_base))
         {
           inst.error = _("invalid register list");
           return FAIL;
@@ -1169,8 +1317,8 @@ parse_vfp_reg_list (char **str, unsigned int *pbase, int dp)
           warned = 1;
         }
  
-      mask |= 1 << new_base;
-      count++;
+      mask |= setmask << new_base;
+      count += addregs;
  
        if (**str == '-') /* We have the start of a range expression */
         {
@@ -1178,28 +1326,31 @@ parse_vfp_reg_list (char **str, unsigned int *pbase, int dp)
  
           (*str)++;
  
-         if ((high_range = arm_reg_parse (str, regtype)) == FAIL)
+         if ((high_range = arm_reg_parse (str, regtype, NULL)) == FAIL)
             {
               inst.error = gettext (reg_expected_msgs[regtype]);
               return FAIL;
             }
  
+          if (regtype == REG_TYPE_NQ)
+            high_range = high_range + 1;
+
           if (high_range <= new_base)
             {
               inst.error = _("register range not in ascending order");
               return FAIL;
             }
  
-         for (new_base++; new_base <= high_range; new_base++)
+         for (new_base += addregs; new_base <= high_range; new_base += addregs)
             {
-             if (mask & (1 << new_base))
+             if (mask & (setmask << new_base))
                 {
                   inst.error = _("invalid register list");
                   return FAIL;
                 }
  
-             mask |= 1 << new_base;
-             count++;
+             mask |= setmask << new_base;
+             count += addregs;
             }
         }
      }
@@ -1227,6 +1378,180 @@ parse_vfp_reg_list (char **str, unsigned int *pbase, int dp)
    return count;
  }
  
+/* Parse element/structure lists for Neon VLD<n> and VST<n> instructions.
+   The base register is put in *PBASE.
+   The lane (or one of the #defined constants below) is placed in bits [3:0] of
+   the return value.
+   The register stride (minus one) is put in bit 4 of the return value.
+   Bits [6:5] encode the list length (minus one).  */
+
+#define NEON_ALL_LANES         15
+#define NEON_INTERLEAVE_LANES  14
+#define NEON_LANE(X)           ((X) & 0xf)
+#define NEON_REG_STRIDE(X)     (((X) & (1 << 4)) ? 2 : 1)
+#define NEON_REGLIST_LENGTH(X) ((((X) >> 5) & 3) + 1)
+
+static int
+parse_neon_el_struct_list (char **str, unsigned *pbase)
+{
+  char *ptr = *str;
+  int base_reg = -1;
+  int reg_incr = -1;
+  int count = 0;
+  int lane = -1;
+  int leading_brace = 0;
+  enum arm_reg_type rtype = REG_TYPE_NDQ;
+  int addregs = 1;
+  const char *const incr_error = "register stride must be 1 or 2";
+  const char *const type_error = "mismatched element/structure types in list";
+  
+  if (skip_past_char (&ptr, '{') == SUCCESS)
+    leading_brace = 1;
+  
+  do
+    {
+      int getreg = arm_reg_parse (&ptr, rtype, &rtype);
+      if (getreg == FAIL)
+        {
+          inst.error = _(reg_expected_msgs[rtype]);
+          return FAIL;
+        }
+      
+      if (base_reg == -1)
+        {
+          base_reg = getreg;
+          if (rtype == REG_TYPE_NQ)
+            {
+              reg_incr = 1;
+              addregs = 2;
+            }
+        }
+      else if (reg_incr == -1)
+        {
+          reg_incr = getreg - base_reg;
+          if (reg_incr < 1 || reg_incr > 2)
+            {
+              inst.error = _(incr_error);
+              return FAIL;
+            }
+        }
+      else if (getreg != base_reg + reg_incr * count)
+        {
+          inst.error = _(incr_error);
+          return FAIL;
+        }
+      
+      /* Handle Dn-Dm or Qn-Qm syntax. Can only be used with non-indexed list
+         modes.  */
+      if (ptr[0] == '-')
+        {
+          int hireg, dregs = (rtype == REG_TYPE_NQ) ? 2 : 1;
+          if (lane == -1)
+            lane = NEON_INTERLEAVE_LANES;
+          else if (lane != NEON_INTERLEAVE_LANES)
+            {
+              inst.error = _(type_error);
+              return FAIL;
+            }
+          if (reg_incr == -1)
+            reg_incr = 1;
+          else if (reg_incr != 1)
+            {
+              inst.error = _("don't use Rn-Rm syntax with non-unit stride");
+              return FAIL;
+            }
+          ptr++;
+          hireg = arm_reg_parse (&ptr, rtype, NULL);
+          if (hireg == FAIL)
+            {
+              inst.error = _(reg_expected_msgs[rtype]);
+              return FAIL;
+            }
+          count += hireg + dregs - getreg;
+          continue;
+        }
+      
+      /* If we're using Q registers, we can't use [] or [n] syntax.  */
+      if (rtype == REG_TYPE_NQ)
+        {
+          count += 2;
+          continue;
+        }
+      
+      if (skip_past_char (&ptr, '[') == SUCCESS)
+        {
+          if (skip_past_char (&ptr, ']') == SUCCESS)
+            {
+              if (lane == -1)
+                lane = NEON_ALL_LANES;
+              else if (lane != NEON_ALL_LANES)
+                {
+                  inst.error = _(type_error);
+                  return FAIL;
+                }
+            }
+          else
+            {
+              expressionS exp;
+              my_get_expression (&exp, &ptr, GE_NO_PREFIX);
+              if (exp.X_op != O_constant)
+                {
+                  inst.error = _("constant expression required");
+                  return FAIL;
+                }
+              if (lane == -1)
+                lane = exp.X_add_number;
+              else if (lane != exp.X_add_number)
+                {
+                  inst.error = _(type_error);
+                  return FAIL;
+                }
+              
+              if (skip_past_char (&ptr, ']') == FAIL)
+                {
+                  inst.error = _("expected ]");
+                  return FAIL;
+                }
+            }
+        }
+      else if (lane == -1)
+        lane = NEON_INTERLEAVE_LANES;
+      else if (lane != NEON_INTERLEAVE_LANES)
+        {
+          inst.error = _(type_error);
+          return FAIL;
+        }
+      count++;
+    }
+  while ((count != 1 || leading_brace) && skip_past_comma (&ptr) != FAIL);
+  
+  /* No lane set by [x]. We must be interleaving structures.  */
+  if (lane == -1)
+    lane = NEON_INTERLEAVE_LANES;
+  
+  /* Sanity check.  */
+  if (lane == -1 || base_reg == -1 || count < 1 || count > 4
+      || (count > 1 && reg_incr == -1))
+    {
+      inst.error = _("error parsing element/structure list");
+      return FAIL;
+    }
+
+  if ((count > 1 || leading_brace) && skip_past_char (&ptr, '}') == FAIL)
+    {
+      inst.error = _("expected }");
+      return FAIL;
+    }
+  
+  if (reg_incr == -1)
+    reg_incr = 1;
+
+  *pbase = base_reg;
+  *str = ptr;
+  
+  return lane | ((reg_incr - 1) << 4) | ((count - 1) << 5);
+}
+
  /* Parse an explicit relocation suffix on an expression.  This is
     either nothing, or a word in parentheses.  Note that if !OBJ_ELF,
     arm_reloc_hsh contains no entries, so this function can only
@@ -2423,7 +2748,7 @@ s_arm_unwind_save_vfp (void)
    unsigned int reg;
    valueT op;
  
-  count = parse_vfp_reg_list (&input_line_pointer, &reg, 1);
+  count = parse_vfp_reg_list (&input_line_pointer, &reg, REGLIST_VFP_D);
    if (count == FAIL)
      {
        as_bad (_("expected register list"));
@@ -2465,7 +2790,7 @@ s_arm_unwind_save_mmxwr (void)
  
    do
      {
-      reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR);
+      reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR, NULL);
  
        if (reg == FAIL)
         {
@@ -2480,7 +2805,7 @@ s_arm_unwind_save_mmxwr (void)
        if (*input_line_pointer == '-')
         {
           input_line_pointer++;
-         hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR);
+         hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR, NULL);
           if (hi_reg == FAIL)
             {
               as_bad (_(reg_expected_msgs[REG_TYPE_MMXWR]));
@@ -2597,7 +2922,7 @@ s_arm_unwind_save_mmxwcg (void)
  
    do
      {
-      reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG);
+      reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG, NULL);
  
        if (reg == FAIL)
         {
@@ -2613,7 +2938,7 @@ s_arm_unwind_save_mmxwcg (void)
        if (*input_line_pointer == '-')
         {
           input_line_pointer++;
-         hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG);
+         hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG, NULL);
           if (hi_reg == FAIL)
             {
               as_bad (_(reg_expected_msgs[REG_TYPE_MMXWCG]));
@@ -2711,7 +3036,7 @@ s_arm_unwind_movsp (int ignored ATTRIBUTE_UNUSED)
    int reg;
    valueT op;
  
-  reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN);
+  reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN, NULL);
    if (reg == FAIL)
      {
        as_bad (_(reg_expected_msgs[REG_TYPE_RN]));
@@ -2772,11 +3097,11 @@ s_arm_unwind_setfp (int ignored ATTRIBUTE_UNUSED)
    int fp_reg;
    int offset;
  
-  fp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN);
+  fp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN, NULL);
    if (skip_past_comma (&input_line_pointer) == FAIL)
      sp_reg = FAIL;
    else
-    sp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN);
+    sp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN, NULL);
  
    if (fp_reg == FAIL || sp_reg == FAIL)
      {
@@ -3043,6 +3368,47 @@ parse_immediate (char **str, int *val, int min, int max,
    return SUCCESS;
  }
  
+/* Less-generic immediate-value read function with the possibility of loading a
+   big (64-bit) immediate, as required by Neon VMOV and VMVN immediate
+   instructions. Puts the result directly in inst.operands[i].  */
+
+static int
+parse_big_immediate (char **str, int i)
+{
+  expressionS exp;
+  char *ptr = *str;
+
+  my_get_expression (&exp, &ptr, GE_OPT_PREFIX_BIG);
+
+  if (exp.X_op == O_constant)
+    inst.operands[i].imm = exp.X_add_number;
+  else if (exp.X_op == O_big
+           && LITTLENUM_NUMBER_OF_BITS * exp.X_add_number > 32
+           && LITTLENUM_NUMBER_OF_BITS * exp.X_add_number <= 64)
+    {
+      unsigned parts = 32 / LITTLENUM_NUMBER_OF_BITS, j, idx = 0;
+      /* Bignums have their least significant bits in
+         generic_bignum[0]. Make sure we put 32 bits in imm and
+         32 bits in reg,  in a (hopefully) portable way.  */
+      assert (parts != 0);
+      inst.operands[i].imm = 0;
+      for (j = 0; j < parts; j++, idx++)
+        inst.operands[i].imm |= generic_bignum[idx]
+                                << (LITTLENUM_NUMBER_OF_BITS * j);
+      inst.operands[i].reg = 0;
+      for (j = 0; j < parts; j++, idx++)
+        inst.operands[i].reg |= generic_bignum[idx]
+                                << (LITTLENUM_NUMBER_OF_BITS * j);
+      inst.operands[i].regisimm = 1;
+    }
+  else
+    return FAIL;
+  
+  *str = ptr;
+
+  return SUCCESS;
+}
+
  /* Returns the pseudo-register number of an FPA immediate constant,
     or FAIL if there isn't a valid constant here.  */
  
@@ -3232,7 +3598,7 @@ parse_shift (char **str, int i, enum parse_shift_mode mode)
        skip_whitespace (p);
  
        if (mode == NO_SHIFT_RESTRICT
-         && (reg = arm_reg_parse (&p, REG_TYPE_RN)) != FAIL)
+         && (reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) != FAIL)
         {
           inst.operands[i].imm = reg;
           inst.operands[i].immisreg = 1;
@@ -3263,7 +3629,7 @@ parse_shifter_operand (char **str, int i)
    int value;
    expressionS expr;
  
-  if ((value = arm_reg_parse (str, REG_TYPE_RN)) != FAIL)
+  if ((value = arm_reg_parse (str, REG_TYPE_RN, NULL)) != FAIL)
      {
        inst.operands[i].reg = value;
        inst.operands[i].isreg = 1;
@@ -3374,7 +3740,7 @@ parse_address (char **str, int i)
        return SUCCESS;
      }
  
-  if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) == FAIL)
+  if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) == FAIL)
      {
        inst.error = _(reg_expected_msgs[REG_TYPE_RN]);
        return FAIL;
@@ -3389,7 +3755,7 @@ parse_address (char **str, int i)
        if (*p == '+') p++;
        else if (*p == '-') p++, inst.operands[i].negative = 1;
  
-      if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) != FAIL)
+      if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) != FAIL)
         {
           inst.operands[i].imm = reg;
           inst.operands[i].immisreg = 1;
@@ -3398,6 +3764,23 @@ parse_address (char **str, int i)
             if (parse_shift (&p, i, SHIFT_IMMEDIATE) == FAIL)
               return FAIL;
         }
+      else if (skip_past_char (&p, ':') == SUCCESS)
+        {
+          /* FIXME: '@' should be used here, but it's filtered out by generic
+             code before we get to see it here. This may be subject to
+             change.  */
+          expressionS exp;
+          my_get_expression (&exp, &p, GE_NO_PREFIX);
+          if (exp.X_op != O_constant)
+            {
+              inst.error = _("alignment must be constant");
+              return FAIL;
+            }
+          inst.operands[i].imm = exp.X_add_number << 8;
+          inst.operands[i].immisalign = 1;
+          /* Alignments are not pre-indexes.  */
+          inst.operands[i].preind = 0;
+        }
        else
         {
           if (inst.operands[i].negative)
@@ -3455,9 +3838,14 @@ parse_address (char **str, int i)
           if (*p == '+') p++;
           else if (*p == '-') p++, inst.operands[i].negative = 1;
  
-         if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) != FAIL)
+         if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) != FAIL)
             {
-             inst.operands[i].imm = reg;
+              /* We might be using the immediate for alignment already. If we
+                 are, OR the register number into the low-order bits.  */
+              if (inst.operands[i].immisalign)
+               inst.operands[i].imm |= reg;
+              else
+                inst.operands[i].imm = reg;
               inst.operands[i].immisreg = 1;
  
               if (skip_past_comma (&p) == SUCCESS)
@@ -3711,7 +4099,7 @@ parse_tb (char **str)
        return FAIL;
      }
  
-  if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) == FAIL)
+  if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) == FAIL)
      {
        inst.error = _(reg_expected_msgs[REG_TYPE_RN]);
        return FAIL;
@@ -3724,7 +4112,7 @@ parse_tb (char **str)
        return FAIL;
      }
    
-  if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) == FAIL)
+  if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) == FAIL)
      {
        inst.error = _(reg_expected_msgs[REG_TYPE_RN]);
        return FAIL;
@@ -3752,6 +4140,158 @@ parse_tb (char **str)
    return SUCCESS;
  }
  
+/* Parse the operands of a Neon VMOV instruction. See do_neon_mov for more
+   information on the types the operands can take and how they are encoded.
+   Note particularly the abuse of ".regisimm" to signify a Neon register.
+   Up to three operands may be read; this function handles setting the
+   ".present" field for each operand itself.
+   Updates STR and WHICH_OPERAND if parsing is successful and returns SUCCESS,
+   else returns FAIL.  */
+
+static int
+parse_neon_mov (char **str, int *which_operand)
+{
+  int i = *which_operand, val;
+  enum arm_reg_type rtype;
+  char *ptr = *str;
+  
+  if ((val = parse_scalar (&ptr, 8)) != FAIL)
+    {
+      /* Case 4: VMOV<c><q>.<size> <Dn[x]>, <Rd>.  */
+      inst.operands[i].reg = val;
+      inst.operands[i].isscalar = 1;
+      inst.operands[i++].present = 1;
+
+      if (skip_past_comma (&ptr) == FAIL)
+        goto wanted_comma;
+      
+      if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) == FAIL)
+        goto wanted_arm;
+      
+      inst.operands[i].reg = val;
+      inst.operands[i].isreg = 1;
+      inst.operands[i].present = 1;
+    }
+  else if ((val = arm_reg_parse (&ptr, REG_TYPE_NDQ, &rtype)) != FAIL)
+    {
+      /* Cases 0, 1, 2, 3, 5 (D only).  */
+      if (skip_past_comma (&ptr) == FAIL)
+        goto wanted_comma;
+      
+      inst.operands[i].reg = val;
+      inst.operands[i].isreg = 1;
+      inst.operands[i].isquad = (rtype == REG_TYPE_NQ);
+      inst.operands[i++].present = 1;
+
+      if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) != FAIL)
+        {
+          /* Case 5: VMOV<c><q> <Dm>, <Rd>, <Rn>.  */
+          inst.operands[i-1].regisimm = 1;
+          inst.operands[i].reg = val;
+          inst.operands[i].isreg = 1;
+          inst.operands[i++].present = 1;
+
+          if (rtype == REG_TYPE_NQ)
+            {
+              inst.error = _("can't use Neon quad register here");
+              return FAIL;
+            }
+          if (skip_past_comma (&ptr) == FAIL)
+            goto wanted_comma;
+          if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) == FAIL)
+            goto wanted_arm;
+          inst.operands[i].reg = val;
+          inst.operands[i].isreg = 1;
+          inst.operands[i].present = 1;
+        }
+      else if (parse_big_immediate (&ptr, i) == SUCCESS)
+        {
+          /* Case 2: VMOV<c><q>.<dt> <Qd>, #<imm>
+             Case 3: VMOV<c><q>.<dt> <Dd>, #<imm>  */
+          if (!thumb_mode && (inst.instruction & 0xf0000000) != 0xe0000000)
+            goto bad_cond;
+        }
+      else if ((val = arm_reg_parse (&ptr, REG_TYPE_NDQ, &rtype)) != FAIL)
+        {
+          /* Case 0: VMOV<c><q> <Qd>, <Qm>
+             Case 1: VMOV<c><q> <Dd>, <Dm>  */
+          if (!thumb_mode && (inst.instruction & 0xf0000000) != 0xe0000000)
+            goto bad_cond;
+
+          inst.operands[i].reg = val;
+          inst.operands[i].isreg = 1;
+          inst.operands[i].isquad = (rtype == REG_TYPE_NQ);
+          inst.operands[i].present = 1;
+        }
+      else
+        {
+          inst.error = _("expected <Rm> or <Dm> or <Qm> operand");
+          return FAIL;
+        }
+    }
+  else if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) != FAIL)
+    {
+      /* Cases 6, 7.  */
+      inst.operands[i].reg = val;
+      inst.operands[i].isreg = 1;
+      inst.operands[i++].present = 1;
+      
+      if (skip_past_comma (&ptr) == FAIL)
+        goto wanted_comma;
+      
+      if ((val = parse_scalar (&ptr, 8)) != FAIL)
+        {
+          /* Case 6: VMOV<c><q>.<dt> <Rd>, <Dn[x]>  */
+          inst.operands[i].reg = val;
+          inst.operands[i].isscalar = 1;
+          inst.operands[i].present = 1;
+        }
+      else if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) != FAIL)
+        {
+          /* Case 7: VMOV<c><q> <Rd>, <Rn>, <Dm>  */
+          inst.operands[i].reg = val;
+          inst.operands[i].isreg = 1;
+          inst.operands[i++].present = 1;
+          
+          if (skip_past_comma (&ptr) == FAIL)
+            goto wanted_comma;
+          
+          if ((val = arm_reg_parse (&ptr, REG_TYPE_VFD, NULL)) == FAIL)
+            {
+              inst.error = _(reg_expected_msgs[REG_TYPE_VFD]);
+              return FAIL;
+            }
+
+          inst.operands[i].reg = val;
+          inst.operands[i].isreg = 1;
+          inst.operands[i].regisimm = 1;
+          inst.operands[i].present = 1;
+        }
+    }
+  else
+    {
+      inst.error = _("parse error");
+      return FAIL;
+    }
+
+  /* Successfully parsed the operands. Update args.  */
+  *which_operand = i;
+  *str = ptr;
+  return SUCCESS;
+
+  wanted_comma:
+  inst.error = _("expected comma");
+  return FAIL;
+  
+  wanted_arm:
+  inst.error = _(reg_expected_msgs[REG_TYPE_RN]);
+  return FAIL;
+
+  bad_cond:
+  inst.error = _("instruction cannot be conditionalized");
+  return FAIL;
+}
+
  /* Matcher codes for parse_operands.  */
  enum operand_parse_code
  {
@@ -3765,7 +4305,11 @@ enum operand_parse_code
    OP_RCN,      /* Coprocessor register */
    OP_RF,       /* FPA register */
    OP_RVS,      /* VFP single precision register */
-  OP_RVD,      /* VFP double precision register */
+  OP_RVD,      /* VFP double precision register (0..15) */
+  OP_RND,       /* Neon double precision register (0..31) */
+  OP_RNQ,      /* Neon quad precision register */
+  OP_RNDQ,      /* Neon double or quad precision register */
+  OP_RNSC,      /* Neon scalar D[X] */
    OP_RVC,      /* VFP control register */
    OP_RMF,      /* Maverick F register */
    OP_RMD,      /* Maverick D register */
@@ -3781,14 +4325,31 @@ enum operand_parse_code
    OP_REGLST,   /* ARM register list */
    OP_VRSLST,   /* VFP single-precision register list */
    OP_VRDLST,   /* VFP double-precision register list */
-
+  OP_NRDLST,    /* Neon double-precision register list (d0-d31, qN aliases) */
+  OP_NSTRLST,   /* Neon element/structure list */
+
+  OP_NILO,      /* Neon immediate/logic operands 2 or 2+3. (VBIC, VORR...)  */
+  OP_RNDQ_I0,   /* Neon D or Q reg, or immediate zero.  */
+  OP_RR_RNSC,   /* ARM reg or Neon scalar.  */
+  OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar.  */
+  OP_RND_RNSC,  /* Neon D reg, or Neon scalar.  */
+  OP_VMOV,      /* Neon VMOV operands.  */
+  OP_RNDQ_IMVNb,/* Neon D or Q reg, or immediate good for VMVN.  */
+  OP_RNDQ_I63b, /* Neon D or Q reg, or immediate for shift.  */
+
+  OP_I0,        /* immediate zero */
    OP_I7,       /* immediate value 0 .. 7 */
    OP_I15,      /*                 0 .. 15 */
    OP_I16,      /*                 1 .. 16 */
+  OP_I16z,      /*                 0 .. 16 */
    OP_I31,      /*                 0 .. 31 */
    OP_I31w,     /*                 0 .. 31, optional trailing ! */
    OP_I32,      /*                 1 .. 32 */
+  OP_I32z,     /*                 0 .. 32 */
+  OP_I63,      /*                 0 .. 63 */
    OP_I63s,     /*               -64 .. 63 */
+  OP_I64,      /*                 1 .. 64 */
+  OP_I64z,     /*                 0 .. 64 */
    OP_I255,     /*                 0 .. 255 */
    OP_Iffff,    /*                 0 .. 65535 */
  
@@ -3818,11 +4379,15 @@ enum operand_parse_code
    /* Optional operands.         */
    OP_oI7b,      /* immediate, prefix optional, 0 .. 7 */
    OP_oI31b,     /*                             0 .. 31 */
+  OP_oI32b,      /*                             1 .. 32 */
    OP_oIffffb,   /*                             0 .. 65535 */
    OP_oI255c,    /*       curly-brace enclosed, 0 .. 255 */
  
    OP_oRR,       /* ARM register */
    OP_oRRnpc,    /* ARM register, not the PC */
+  OP_oRND,       /* Optional Neon double precision register */
+  OP_oRNQ,       /* Optional Neon quad precision register */
+  OP_oRNDQ,      /* Optional Neon double or quad precision register */
    OP_oSHll,     /* LSL immediate */
    OP_oSHar,     /* ASR immediate */
    OP_oSHllar,   /* LSL or ASR immediate */
@@ -3843,6 +4408,7 @@ parse_operands (char *str, const unsigned char *pattern)
    char *backtrack_pos = 0;
    const char *backtrack_error = 0;
    int i, val, backtrack_index = 0;
+  enum arm_reg_type rtype;
  
  #define po_char_or_fail(chr) do {              \
    if (skip_past_char (&str, chr) == FAIL)      \
@@ -3850,7 +4416,7 @@ parse_operands (char *str, const unsigned char *pattern)
  } while (0)
  
  #define po_reg_or_fail(regtype) do {                   \
-  val = arm_reg_parse (&str, regtype);                 \
+  val = arm_reg_parse (&str, regtype, &rtype);         \
    if (val == FAIL)                                     \
      {                                                  \
        inst.error = _(reg_expected_msgs[regtype]);      \
@@ -3858,15 +4424,17 @@ parse_operands (char *str, const unsigned char *pattern)
      }                                                  \
    inst.operands[i].reg = val;                          \
    inst.operands[i].isreg = 1;                          \
+  inst.operands[i].isquad = (rtype == REG_TYPE_NQ);    \
  } while (0)
  
-#define po_reg_or_goto(regtype, label) do {    \
-  val = arm_reg_parse (&str, regtype);         \
-  if (val == FAIL)                             \
-    goto label;                                        \
-                                               \
-  inst.operands[i].reg = val;                  \
-  inst.operands[i].isreg = 1;                  \
+#define po_reg_or_goto(regtype, label) do {            \
+  val = arm_reg_parse (&str, regtype, &rtype);         \
+  if (val == FAIL)                                     \
+    goto label;                                                \
+                                                       \
+  inst.operands[i].reg = val;                          \
+  inst.operands[i].isreg = 1;                          \
+  inst.operands[i].isquad = (rtype == REG_TYPE_NQ);    \
  } while (0)
  
  #define po_imm_or_fail(min, max, popt) do {                    \
@@ -3875,6 +4443,14 @@ parse_operands (char *str, const unsigned char *pattern)
    inst.operands[i].imm = val;                                  \
  } while (0)
  
+#define po_scalar_or_goto(elsz, label) do {    \
+  val = parse_scalar (&str, elsz);             \
+  if (val == FAIL)                             \
+    goto label;                                        \
+  inst.operands[i].reg = val;                  \
+  inst.operands[i].isscalar = 1;               \
+} while (0)
+
  #define po_misc_or_fail(expr) do {             \
    if (expr)                                    \
      goto failure;                              \
@@ -3908,6 +4484,8 @@ parse_operands (char *str, const unsigned char *pattern)
         case OP_RF:    po_reg_or_fail (REG_TYPE_FN);      break;
         case OP_RVS:   po_reg_or_fail (REG_TYPE_VFS);     break;
         case OP_RVD:   po_reg_or_fail (REG_TYPE_VFD);     break;
+        case OP_oRND:
+       case OP_RND:   po_reg_or_fail (REG_TYPE_VFD);     break;
         case OP_RVC:   po_reg_or_fail (REG_TYPE_VFC);     break;
         case OP_RMF:   po_reg_or_fail (REG_TYPE_MVF);     break;
         case OP_RMD:   po_reg_or_fail (REG_TYPE_MVD);     break;
@@ -3919,6 +4497,104 @@ parse_operands (char *str, const unsigned char *pattern)
         case OP_RIWC:  po_reg_or_fail (REG_TYPE_MMXWC);   break;
         case OP_RIWG:  po_reg_or_fail (REG_TYPE_MMXWCG);  break;
         case OP_RXA:   po_reg_or_fail (REG_TYPE_XSCALE);  break;
+        case OP_oRNQ:
+       case OP_RNQ:   po_reg_or_fail (REG_TYPE_NQ);      break;
+        case OP_oRNDQ:
+       case OP_RNDQ:  po_reg_or_fail (REG_TYPE_NDQ);     break;
+
+        /* Neon scalar. Using an element size of 8 means that some invalid
+           scalars are accepted here, so deal with those in later code.  */
+        case OP_RNSC:  po_scalar_or_goto (8, failure);    break;
+
+        /* WARNING: We can expand to two operands here. This has the potential
+           to totally confuse the backtracking mechanism! It will be OK at
+           least as long as we don't try to use optional args as well,
+           though.  */
+        case OP_NILO:
+          {
+            po_reg_or_goto (REG_TYPE_NDQ, try_imm);
+            i++;
+            skip_past_comma (&str);
+            po_reg_or_goto (REG_TYPE_NDQ, one_reg_only);
+            break;
+            one_reg_only:
+            /* Optional register operand was omitted. Unfortunately, it's in
+               operands[i-1] and we need it to be in inst.operands[i]. Fix that
+               here (this is a bit grotty).  */
+            inst.operands[i] = inst.operands[i-1];
+            inst.operands[i-1].present = 0;
+            break;
+            try_imm:
+            /* Immediate gets verified properly later, so accept any now.  */
+            po_imm_or_fail (INT_MIN, INT_MAX, TRUE);
+          }
+          break;
+
+        case OP_RNDQ_I0:
+          {
+            po_reg_or_goto (REG_TYPE_NDQ, try_imm0);
+            break;
+            try_imm0:
+            po_imm_or_fail (0, 0, TRUE);
+          }
+          break;
+
+        case OP_RR_RNSC:
+          {
+            po_scalar_or_goto (8, try_rr);
+            break;
+            try_rr:
+            po_reg_or_fail (REG_TYPE_RN);
+          }
+          break;
+
+        case OP_RNDQ_RNSC:
+          {
+            po_scalar_or_goto (8, try_ndq);
+            break;
+            try_ndq:
+            po_reg_or_fail (REG_TYPE_NDQ);
+          }
+          break;
+
+        case OP_RND_RNSC:
+          {
+            po_scalar_or_goto (8, try_vfd);
+            break;
+            try_vfd:
+            po_reg_or_fail (REG_TYPE_VFD);
+          }
+          break;
+
+        case OP_VMOV:
+          /* WARNING: parse_neon_mov can move the operand counter, i. If we're
+             not careful then bad things might happen.  */
+          po_misc_or_fail (parse_neon_mov (&str, &i) == FAIL);
+          break;
+
+        case OP_RNDQ_IMVNb:
+          {
+            po_reg_or_goto (REG_TYPE_NDQ, try_mvnimm);
+            break;
+            try_mvnimm:
+            /* There's a possibility of getting a 64-bit immediate here, so
+               we need special handling.  */
+            if (parse_big_immediate (&str, i) == FAIL)
+              {
+                inst.error = _("immediate value is out of range");
+                goto failure;
+              }
+          }
+          break;
+
+        case OP_RNDQ_I63b:
+          {
+            po_reg_or_goto (REG_TYPE_NDQ, try_shimm);
+            break;
+            try_shimm:
+            po_imm_or_fail (0, 63, TRUE);
+          }
+          break;
  
         case OP_RRnpcb:
           po_char_or_fail ('[');
@@ -3936,9 +4612,14 @@ parse_operands (char *str, const unsigned char *pattern)
         case OP_I7:      po_imm_or_fail (  0,      7, FALSE);   break;
         case OP_I15:     po_imm_or_fail (  0,     15, FALSE);   break;
         case OP_I16:     po_imm_or_fail (  1,     16, FALSE);   break;
+        case OP_I16z:   po_imm_or_fail (  0,     16, FALSE);   break;
         case OP_I31:     po_imm_or_fail (  0,     31, FALSE);   break;
         case OP_I32:     po_imm_or_fail (  1,     32, FALSE);   break;
+        case OP_I32z:   po_imm_or_fail (  0,     32, FALSE);   break;
         case OP_I63s:    po_imm_or_fail (-64,     63, FALSE);   break;
+        case OP_I63:    po_imm_or_fail (  0,     63, FALSE);   break;
+        case OP_I64:    po_imm_or_fail (  1,     64, FALSE);   break;
+        case OP_I64z:   po_imm_or_fail (  0,     64, FALSE);   break;
         case OP_I255:    po_imm_or_fail (  0,    255, FALSE);   break;
         case OP_Iffff:   po_imm_or_fail (  0, 0xffff, FALSE);   break;
  
@@ -3948,6 +4629,7 @@ parse_operands (char *str, const unsigned char *pattern)
         case OP_I15b:    po_imm_or_fail (  0,     15, TRUE);    break;
         case OP_oI31b:
         case OP_I31b:    po_imm_or_fail (  0,     31, TRUE);    break;
+        case OP_oI32b:   po_imm_or_fail (  1,     32, TRUE);    break;
         case OP_oIffffb: po_imm_or_fail (  0, 0xffff, TRUE);    break;
  
           /* Immediate variants */
@@ -4066,13 +4748,22 @@ parse_operands (char *str, const unsigned char *pattern)
           break;
  
         case OP_VRSLST:
-         val = parse_vfp_reg_list (&str, &inst.operands[i].reg, 0);
+         val = parse_vfp_reg_list (&str, &inst.operands[i].reg, REGLIST_VFP_S);
           break;
  
         case OP_VRDLST:
-         val = parse_vfp_reg_list (&str, &inst.operands[i].reg, 1);
+         val = parse_vfp_reg_list (&str, &inst.operands[i].reg, REGLIST_VFP_D);
           break;
  
+        case OP_NRDLST:
+          val = parse_vfp_reg_list (&str, &inst.operands[i].reg,
+                                    REGLIST_NEON_D);
+          break;
+
+       case OP_NSTRLST:
+          val = parse_neon_el_struct_list (&str, &inst.operands[i].reg);
+          break;
+
           /* Addressing modes */
         case OP_ADDR:
           po_misc_or_fail (parse_address (&str, i));
@@ -4121,6 +4812,8 @@ parse_operands (char *str, const unsigned char *pattern)
         case OP_REGLST:
         case OP_VRSLST:
         case OP_VRDLST:
+        case OP_NRDLST:
+        case OP_NSTRLST:
           if (val == FAIL)
             goto failure;
           inst.operands[i].imm = val;
@@ -4178,6 +4871,7 @@ parse_operands (char *str, const unsigned char *pattern)
  #undef po_reg_or_fail
  #undef po_reg_or_goto
  #undef po_imm_or_fail
+#undef po_scalar_or_fail
  \f
  /* Shorthand macro for instruction encoding functions issuing errors.  */
  #define constraint(expr, err) do {             \
@@ -4236,11 +4930,30 @@ encode_thumb32_immediate (unsigned int val)
  
    return FAIL;
  }
-/* Encode a VFP SP register number into inst.instruction.  */
+/* Encode a VFP SP or DP register number into inst.instruction.  */
  
  static void
-encode_arm_vfp_sp_reg (int reg, enum vfp_sp_reg_pos pos)
-{
+encode_arm_vfp_reg (int reg, enum vfp_reg_pos pos)
+{
+  if ((pos == VFP_REG_Dd || pos == VFP_REG_Dn || pos == VFP_REG_Dm)
+      && reg > 15)
+    {
+      if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v3))
+        {
+          if (thumb_mode)
+            ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
+                                    fpu_vfp_ext_v3);
+          else
+            ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used,
+                                    fpu_vfp_ext_v3);
+        }
+      else
+        {
+          inst.error = _("D register out of range for selected VFP version");
+          return;
+        }
+    }
+
    switch (pos)
      {
      case VFP_REG_Sd:
@@ -4255,6 +4968,18 @@ encode_arm_vfp_sp_reg (int reg, enum vfp_sp_reg_pos pos)
        inst.instruction |= ((reg >> 1) << 0) | ((reg & 1) << 5);
        break;
  
+    case VFP_REG_Dd:
+      inst.instruction |= ((reg & 15) << 12) | ((reg >> 4) << 22);
+      break;
+    
+    case VFP_REG_Dn:
+      inst.instruction |= ((reg & 15) << 16) | ((reg >> 4) << 7);
+      break;
+    
+    case VFP_REG_Dm:
+      inst.instruction |= (reg & 15) | ((reg >> 4) << 5);
+      break;
+
      default:
        abort ();
      }
@@ -5542,43 +6267,43 @@ do_sxth (void)
  static void
  do_vfp_sp_monadic (void)
  {
-  encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd);
-  encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sm);
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sm);
  }
  
  static void
  do_vfp_sp_dyadic (void)
  {
-  encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd);
-  encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sn);
-  encode_arm_vfp_sp_reg (inst.operands[2].reg, VFP_REG_Sm);
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sn);
+  encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Sm);
  }
  
  static void
  do_vfp_sp_compare_z (void)
  {
-  encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd);
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
  }
  
  static void
  do_vfp_dp_sp_cvt (void)
  {
-  inst.instruction |= inst.operands[0].reg << 12;
-  encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sm);
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sm);
  }
  
  static void
  do_vfp_sp_dp_cvt (void)
  {
-  encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd);
-  inst.instruction |= inst.operands[1].reg;
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dm);
  }
  
  static void
  do_vfp_reg_from_sp (void)
  {
    inst.instruction |= inst.operands[0].reg << 12;
-  encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sn);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sn);
  }
  
  static void
@@ -5588,13 +6313,13 @@ do_vfp_reg2_from_sp2 (void)
               _("only two consecutive VFP SP registers allowed here"));
    inst.instruction |= inst.operands[0].reg << 12;
    inst.instruction |= inst.operands[1].reg << 16;
-  encode_arm_vfp_sp_reg (inst.operands[2].reg, VFP_REG_Sm);
+  encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Sm);
  }
  
  static void
  do_vfp_sp_from_reg (void)
  {
-  encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sn);
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sn);
    inst.instruction |= inst.operands[1].reg << 12;
  }
  
@@ -5603,7 +6328,7 @@ do_vfp_sp2_from_reg2 (void)
  {
    constraint (inst.operands[0].imm != 2,
               _("only two consecutive VFP SP registers allowed here"));
-  encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sm);
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sm);
    inst.instruction |= inst.operands[1].reg << 12;
    inst.instruction |= inst.operands[2].reg << 16;
  }
@@ -5611,14 +6336,14 @@ do_vfp_sp2_from_reg2 (void)
  static void
  do_vfp_sp_ldst (void)
  {
-  encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd);
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
    encode_arm_cp_address (1, FALSE, TRUE, 0);
  }
  
  static void
  do_vfp_dp_ldst (void)
  {
-  inst.instruction |= inst.operands[0].reg << 12;
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
    encode_arm_cp_address (1, FALSE, TRUE, 0);
  }
  
@@ -5632,7 +6357,7 @@ vfp_sp_ldstm (enum vfp_ldstm_type ldstm_type)
      constraint (ldstm_type != VFP_LDSTMIA,
                 _("this addressing mode requires base-register writeback"));
    inst.instruction |= inst.operands[0].reg << 16;
-  encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sd);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sd);
    inst.instruction |= inst.operands[1].imm;
  }
  
@@ -5648,7 +6373,7 @@ vfp_dp_ldstm (enum vfp_ldstm_type ldstm_type)
                 _("this addressing mode requires base-register writeback"));
  
    inst.instruction |= inst.operands[0].reg << 16;
-  inst.instruction |= inst.operands[1].reg << 12;
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd);
  
    count = inst.operands[1].imm << 1;
    if (ldstm_type == VFP_LDSTMIAX || ldstm_type == VFP_LDSTMDBX)
@@ -5692,6 +6417,103 @@ do_vfp_xp_ldstmdb (void)
  {
    vfp_dp_ldstm (VFP_LDSTMDBX);
  }
+
+static void
+do_vfp_dp_rd_rm (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dm);
+}
+
+static void
+do_vfp_dp_rn_rd (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dn);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd);
+}
+
+static void
+do_vfp_dp_rd_rn (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dn);
+}
+
+static void
+do_vfp_dp_rd_rn_rm (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dn);
+  encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Dm);
+}
+
+static void
+do_vfp_dp_rd (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+}
+
+static void
+do_vfp_dp_rm_rd_rn (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dm);
+  encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd);
+  encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Dn);
+}
+
+/* VFPv3 instructions.  */
+static void
+do_vfp_sp_const (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+  inst.instruction |= (inst.operands[1].imm & 15) << 16;
+  inst.instruction |= (inst.operands[1].imm >> 4);
+}
+
+static void
+do_vfp_dp_const (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+  inst.instruction |= (inst.operands[1].imm & 15) << 16;
+  inst.instruction |= (inst.operands[1].imm >> 4);
+}
+
+static void
+vfp_conv (int srcsize)
+{
+  unsigned immbits = srcsize - inst.operands[1].imm;
+  inst.instruction |= (immbits & 1) << 5;
+  inst.instruction |= (immbits >> 1);
+}
+
+static void
+do_vfp_sp_conv_16 (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+  vfp_conv (16);
+}
+
+static void
+do_vfp_dp_conv_16 (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+  vfp_conv (16);
+}
+
+static void
+do_vfp_sp_conv_32 (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+  vfp_conv (32);
+}
+
+static void
+do_vfp_dp_conv_32 (void)
+{
+  encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+  vfp_conv (32);
+}
+
  \f
  /* FPA instructions.  Also in a logical order. */
  
@@ -7998,52 +8820,2595 @@ do_t_usat16 (void)
    inst.instruction |= inst.operands[1].imm;
    inst.instruction |= inst.operands[2].reg << 16;
  }
-\f
-/* Overall per-instruction processing. */
  
-/* We need to be able to fix up arbitrary expressions in some statements.
-   This is so that we can handle symbols that are an arbitrary distance from
-   the pc.  The most common cases are of the form ((+/-sym -/+ . - 8) & mask),
-   which returns part of an address in a form which will be valid for
-   a data instruction. We do this by pushing the expression into a symbol
-   in the expr_section, and creating a fix for that.  */
+/* Neon instruction encoder helpers.  */
+  
+/* Encodings for the different types for various Neon opcodes.  */
  
-static void
-fix_new_arm (fragS *      frag,
-            int           where,
-            short int     size,
-            expressionS * exp,
-            int           pc_rel,
-            int           reloc)
+/* An "invalid" code for the following tables.  */
+#define N_INV -1u
+
+struct neon_tab_entry
  {
-  fixS *          new_fix;
+  unsigned integer;
+  unsigned float_or_poly;
+  unsigned scalar_or_imm;
+};
+  
+/* Map overloaded Neon opcodes to their respective encodings.  */
+#define NEON_ENC_TAB                                   \
+  X(vabd,      0x0000700, 0x1200d00, N_INV),           \
+  X(vmax,      0x0000600, 0x0000f00, N_INV),           \
+  X(vmin,      0x0000610, 0x0200f00, N_INV),           \
+  X(vpadd,     0x0000b10, 0x1000d00, N_INV),           \
+  X(vpmax,     0x0000a00, 0x1000f00, N_INV),           \
+  X(vpmin,     0x0000a10, 0x1200f00, N_INV),           \
+  X(vadd,      0x0000800, 0x0000d00, N_INV),           \
+  X(vsub,      0x1000800, 0x0200d00, N_INV),           \
+  X(vceq,      0x1000810, 0x0000e00, 0x1b10100),       \
+  X(vcge,      0x0000310, 0x1000e00, 0x1b10080),       \
+  X(vcgt,      0x0000300, 0x1200e00, 0x1b10000),       \
+  /* Register variants of the following two instructions are encoded as
+     vcge / vcgt with the operands reversed. */        \
+  X(vclt,      0x0000310, 0x1000e00, 0x1b10200),       \
+  X(vcle,      0x0000300, 0x1200e00, 0x1b10180),       \
+  X(vmla,      0x0000900, 0x0000d10, 0x0800040),       \
+  X(vmls,      0x1000900, 0x0200d10, 0x0800440),       \
+  X(vmul,      0x0000910, 0x1000d10, 0x0800840),       \
+  X(vmull,     0x0800c00, 0x0800e00, 0x0800a40), /* polynomial not float.  */ \
+  X(vmlal,     0x0800800, N_INV,     0x0800240),       \
+  X(vmlsl,     0x0800a00, N_INV,     0x0800640),       \
+  X(vqdmlal,   0x0800900, N_INV,     0x0800340),       \
+  X(vqdmlsl,   0x0800b00, N_INV,     0x0800740),       \
+  X(vqdmull,   0x0800d00, N_INV,     0x0800b40),       \
+  X(vqdmulh,    0x0000b00, N_INV,     0x0800c40),      \
+  X(vqrdmulh,   0x1000b00, N_INV,     0x0800d40),      \
+  X(vshl,      0x0000400, N_INV,     0x0800510),       \
+  X(vqshl,     0x0000410, N_INV,     0x0800710),       \
+  X(vand,      0x0000110, N_INV,     0x0800030),       \
+  X(vbic,      0x0100110, N_INV,     0x0800030),       \
+  X(veor,      0x1000110, N_INV,     N_INV),           \
+  X(vorn,      0x0300110, N_INV,     0x0800010),       \
+  X(vorr,      0x0200110, N_INV,     0x0800010),       \
+  X(vmvn,      0x1b00580, N_INV,     0x0800030),       \
+  X(vshll,     0x1b20300, N_INV,     0x0800a10), /* max shift, immediate.  */ \
+  X(vcvt,       0x1b30600, N_INV,     0x0800e10), /* integer, fixed-point.  */ \
+  X(vdup,       0xe800b10, N_INV,     0x1b00c00), /* arm, scalar.  */ \
+  X(vld1,       0x0200000, 0x0a00000, 0x0a00c00), /* interlv, lane, dup.  */ \
+  X(vst1,      0x0000000, 0x0800000, N_INV),           \
+  X(vld2,      0x0200100, 0x0a00100, 0x0a00d00),       \
+  X(vst2,      0x0000100, 0x0800100, N_INV),           \
+  X(vld3,      0x0200200, 0x0a00200, 0x0a00e00),       \
+  X(vst3,      0x0000200, 0x0800200, N_INV),           \
+  X(vld4,      0x0200300, 0x0a00300, 0x0a00f00),       \
+  X(vst4,      0x0000300, 0x0800300, N_INV),           \
+  X(vmovn,     0x1b20200, N_INV,     N_INV),           \
+  X(vtrn,      0x1b20080, N_INV,     N_INV),           \
+  X(vqmovn,    0x1b20200, N_INV,     N_INV),           \
+  X(vqmovun,   0x1b20240, N_INV,     N_INV)
+
+enum neon_opc
+{
+#define X(OPC,I,F,S) N_MNEM_##OPC
+NEON_ENC_TAB
+#undef X
+};
  
-  switch (exp->X_op)
-    {
-    case O_constant:
-    case O_symbol:
-    case O_add:
-    case O_subtract:
-      new_fix = fix_new_exp (frag, where, size, exp, pc_rel, reloc);
-      break;
+static const struct neon_tab_entry neon_enc_tab[] =
+{
+#define X(OPC,I,F,S) { (I), (F), (S) }
+NEON_ENC_TAB
+#undef X
+};
  
-    default:
-      new_fix = fix_new (frag, where, size, make_expr_symbol (exp), 0,
-                        pc_rel, reloc);
-      break;
-    }
+#define NEON_ENC_INTEGER(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_ARMREG(X)  (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_POLY(X)    (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_FLOAT(X)   (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_SCALAR(X)  (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+#define NEON_ENC_IMMED(X)   (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+#define NEON_ENC_INTERLV(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_LANE(X)    (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_DUP(X)     (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+
+/* Shapes for instruction operands. Some (e.g. NS_DDD_QQQ) represent multiple
+   shapes which an instruction can accept. The following mnemonic characters
+   are used in the tag names for this enumeration:
+
+     D - Neon D<n> register
+     Q - Neon Q<n> register
+     I - Immediate
+     S - Scalar
+     R - ARM register
+     L - D<n> register list
+*/
  
-  /* Mark whether the fix is to a THUMB instruction, or an ARM
-     instruction.  */
-  new_fix->tc_fix_data = thumb_mode;
-}
+enum neon_shape
+{
+  NS_DDD_QQQ,
+  NS_DDD,
+  NS_QQQ,
+  NS_DDI_QQI,
+  NS_DDI,
+  NS_QQI,
+  NS_DDS_QQS,
+  NS_DDS,
+  NS_QQS,
+  NS_DD_QQ,
+  NS_DD,
+  NS_QQ,
+  NS_DS_QS,
+  NS_DS,
+  NS_QS,
+  NS_DR_QR,
+  NS_DR,
+  NS_QR,
+  NS_DI_QI,
+  NS_DI,
+  NS_QI,
+  NS_DLD,
+  NS_DQ,
+  NS_QD,
+  NS_DQI,
+  NS_QDI,
+  NS_QDD,
+  NS_QDS,
+  NS_QQD,
+  NS_DQQ,
+  NS_DDDI_QQQI,
+  NS_DDDI,
+  NS_QQQI,
+  NS_IGNORE
+};
  
-/* Create a frg for an instruction requiring relaxation.  */
-static void
-output_relax_insn (void)
-{
-  char * to;
-  symbolS *sym;
+/* Bit masks used in type checking given instructions.
+  'N_EQK' means the type must be the same as (or based on in some way) the key
+   type, which itself is marked with the 'N_KEY' bit. If the 'N_EQK' bit is
+   set, various other bits can be set as well in order to modify the meaning of
+   the type constraint.  */
+
+enum neon_type_mask
+{
+  N_S8   = 0x000001,
+  N_S16  = 0x000002,
+  N_S32  = 0x000004,
+  N_S64  = 0x000008,
+  N_U8   = 0x000010,
+  N_U16  = 0x000020,
+  N_U32  = 0x000040,
+  N_U64  = 0x000080,
+  N_I8   = 0x000100,
+  N_I16  = 0x000200,
+  N_I32  = 0x000400,
+  N_I64  = 0x000800,
+  N_8    = 0x001000,
+  N_16   = 0x002000,
+  N_32   = 0x004000,
+  N_64   = 0x008000,
+  N_P8   = 0x010000,
+  N_P16  = 0x020000,
+  N_F32  = 0x040000,
+  N_KEY  = 0x080000, /* key element (main type specifier).  */
+  N_EQK  = 0x100000, /* given operand has the same type & size as the key.  */
+  N_DBL  = 0x000001, /* if N_EQK, this operand is twice the size.  */
+  N_HLF  = 0x000002, /* if N_EQK, this operand is half the size.  */
+  N_SGN  = 0x000004, /* if N_EQK, this operand is forced to be signed.  */
+  N_UNS  = 0x000008, /* if N_EQK, this operand is forced to be unsigned.  */
+  N_INT  = 0x000010, /* if N_EQK, this operand is forced to be integer.  */
+  N_FLT  = 0x000020, /* if N_EQK, this operand is forced to be float.  */
+  N_UTYP = 0,
+  N_MAX_NONSPECIAL = N_F32
+};
+
+#define N_SU_ALL   (N_S8 | N_S16 | N_S32 | N_S64 | N_U8 | N_U16 | N_U32 | N_U64)
+#define N_SU_32    (N_S8 | N_S16 | N_S32 | N_U8 | N_U16 | N_U32)
+#define N_SU_16_64 (N_S16 | N_S32 | N_S64 | N_U16 | N_U32 | N_U64)
+#define N_SUF_32   (N_SU_32 | N_F32)
+#define N_I_ALL    (N_I8 | N_I16 | N_I32 | N_I64)
+#define N_IF_32    (N_I8 | N_I16 | N_I32 | N_F32)
+
+/* Pass this as the first type argument to neon_check_type to ignore types
+   altogether.  */
+#define N_IGNORE_TYPE (N_KEY | N_EQK)
+
+/* Check the shape of a Neon instruction (sizes of registers). Returns the more
+   specific shape when there are two alternatives. For non-polymorphic shapes,
+   checking is done during operand parsing, so is not implemented here.  */
+
+static enum neon_shape
+neon_check_shape (enum neon_shape req)
+{
+#define RR(X) (inst.operands[(X)].isreg)
+#define RD(X) (inst.operands[(X)].isreg && !inst.operands[(X)].isquad)
+#define RQ(X) (inst.operands[(X)].isreg && inst.operands[(X)].isquad)
+#define IM(X) (!inst.operands[(X)].isreg && !inst.operands[(X)].isscalar)
+#define SC(X) (!inst.operands[(X)].isreg && inst.operands[(X)].isscalar)
+
+  /* Fix missing optional operands. FIXME: we don't know at this point how
+     many arguments we should have, so this makes the assumption that we have
+     > 1. This is true of all current Neon opcodes, I think, but may not be
+     true in the future.  */
+  if (!inst.operands[1].present)
+    inst.operands[1] = inst.operands[0];
+
+  switch (req)
+      {
+    case NS_DDD_QQQ:
+      {
+        if (RD(0) && RD(1) && RD(2))
+          return NS_DDD;
+        else if (RQ(0) && RQ(1) && RQ(1))
+          return NS_QQQ;
+        else
+          inst.error = _("expected <Qd>, <Qn>, <Qm> or <Dd>, <Dn>, <Dm> "
+                         "operands");
+      }
+      break;
+    
+    case NS_DDI_QQI:
+      {
+        if (RD(0) && RD(1) && IM(2))
+          return NS_DDI;
+        else if (RQ(0) && RQ(1) && IM(2))
+          return NS_QQI;
+        else
+          inst.error = _("expected <Qd>, <Qn>, #<imm> or <Dd>, <Dn>, #<imm> "
+                         "operands");
+      }
+        break;
+  
+    case NS_DDDI_QQQI:
+      {
+        if (RD(0) && RD(1) && RD(2) && IM(3))
+          return NS_DDDI;
+        if (RQ(0) && RQ(1) && RQ(2) && IM(3))
+          return NS_QQQI;
+        else
+          inst.error = _("expected <Qd>, <Qn>, <Qm>, #<imm> or "
+                         "<Dd>, <Dn>, <Dm>, #<imm> operands");
+      }
+        break;
+  
+    case NS_DDS_QQS:
+      {
+        if (RD(0) && RD(1) && SC(2))
+          return NS_DDS;
+        else if (RQ(0) && RQ(1) && SC(2))
+          return NS_QQS;
+        else
+          inst.error = _("expected <Qd>, <Qn>, <Dm[x]> or <Dd>, <Dn>, <Dm[x]> "
+                         "operands");
+      }
+      break;
+  
+    case NS_DD_QQ:
+      {
+        if (RD(0) && RD(1))
+          return NS_DD;
+        else if (RQ(0) && RQ(1))
+          return NS_QQ;
+        else
+          inst.error = _("expected <Qd>, <Qm> or <Dd>, <Dm> operands");
+      }
+      break;
+  
+    case NS_DS_QS:
+      {
+        if (RD(0) && SC(1))
+          return NS_DS;
+        else if (RQ(0) && SC(1))
+          return NS_QS;
+        else
+          inst.error = _("expected <Qd>, <Dm[x]> or <Dd>, <Dm[x]> operands");
+      }
+      break;
+
+    case NS_DR_QR:
+      {
+        if (RD(0) && RR(1))
+          return NS_DR;
+        else if (RQ(0) && RR(1))
+          return NS_QR;
+        else
+          inst.error = _("expected <Qd>, <Rm> or <Dd>, <Rm> operands");
+      }
+      break;
+
+    case NS_DI_QI:
+      {
+        if (RD(0) && IM(1))
+          return NS_DI;
+        else if (RQ(0) && IM(1))
+          return NS_QI;
+        else
+          inst.error = _("expected <Qd>, #<imm> or <Dd>, #<imm> operands");
+      }
+      break;
+   
+    default:
+      abort ();
+    }
+
+  return req;
+#undef RR
+#undef RD
+#undef RQ
+#undef IM
+#undef SC
+}
+  
+static void
+neon_modify_type_size (unsigned typebits, enum neon_el_type *g_type,
+                       unsigned *g_size)
+{
+  /* Allow modification to be made to types which are constrained to be
+     based on the key element, based on bits set alongside N_EQK.  */
+  if ((typebits & N_EQK) != 0)
+    {
+      if ((typebits & N_HLF) != 0)
+       *g_size /= 2;
+      else if ((typebits & N_DBL) != 0)
+       *g_size *= 2;
+      if ((typebits & N_SGN) != 0)
+       *g_type = NT_signed;
+      else if ((typebits & N_UNS) != 0)
+        *g_type = NT_unsigned;
+      else if ((typebits & N_INT) != 0)
+        *g_type = NT_integer;
+      else if ((typebits & N_FLT) != 0)
+        *g_type = NT_float;
+    }
+}
+  
+/* Return operand OPNO promoted by bits set in THISARG. KEY should be the "key"
+   operand type, i.e. the single type specified in a Neon instruction when it
+   is the only one given.  */
+
+static struct neon_type_el
+neon_type_promote (struct neon_type_el *key, unsigned thisarg)
+{
+  struct neon_type_el dest = *key;
+  
+  assert ((thisarg & N_EQK) != 0);
+  
+  neon_modify_type_size (thisarg, &dest.type, &dest.size);
+
+  return dest;
+}
+
+/* Convert Neon type and size into compact bitmask representation.  */
+
+static enum neon_type_mask
+type_chk_of_el_type (enum neon_el_type type, unsigned size)
+{
+  switch (type)
+    {
+    case NT_untyped:
+      switch (size)
+        {
+        case 8:  return N_8;
+        case 16: return N_16;
+        case 32: return N_32;
+        case 64: return N_64;
+        default: ;
+        }
+      break;
+
+    case NT_integer:
+      switch (size)
+        {
+        case 8:  return N_I8;
+        case 16: return N_I16;
+        case 32: return N_I32;
+        case 64: return N_I64;
+        default: ;
+        }
+      break;
+
+    case NT_float:
+      if (size == 32)
+        return N_F32;
+      break;
+
+    case NT_poly:
+      switch (size)
+        {
+        case 8:  return N_P8;
+        case 16: return N_P16;
+        default: ;
+        }
+      break;
+
+    case NT_signed:
+      switch (size)
+        {
+        case 8:  return N_S8;
+        case 16: return N_S16;
+        case 32: return N_S32;
+        case 64: return N_S64;
+        default: ;
+        }
+      break;
+
+    case NT_unsigned:
+      switch (size)
+        {
+        case 8:  return N_U8;
+        case 16: return N_U16;
+        case 32: return N_U32;
+        case 64: return N_U64;
+        default: ;
+        }
+      break;
+
+    default: ;
+    }
+  
+  return N_UTYP;
+}
+
+/* Convert compact Neon bitmask type representation to a type and size. Only
+   handles the case where a single bit is set in the mask.  */
+
+static void
+el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
+                     enum neon_type_mask mask)
+{
+  if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0)
+    *size = 8;
+  if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_P16)) != 0)
+    *size = 16;
+  if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0)
+    *size = 32;
+  if ((mask & (N_S64 | N_U64 | N_I64 | N_64)) != 0)
+    *size = 64;
+  if ((mask & (N_S8 | N_S16 | N_S32 | N_S64)) != 0)
+    *type = NT_signed;
+  if ((mask & (N_U8 | N_U16 | N_U32 | N_U64)) != 0)
+    *type = NT_unsigned;
+  if ((mask & (N_I8 | N_I16 | N_I32 | N_I64)) != 0)
+    *type = NT_integer;
+  if ((mask & (N_8 | N_16 | N_32 | N_64)) != 0)
+    *type = NT_untyped;
+  if ((mask & (N_P8 | N_P16)) != 0)
+    *type = NT_poly;
+  if ((mask & N_F32) != 0)
+    *type = NT_float;
+}
+
+/* Modify a bitmask of allowed types. This is only needed for type
+   relaxation.  */
+
+static unsigned
+modify_types_allowed (unsigned allowed, unsigned mods)
+{
+  unsigned size;
+  enum neon_el_type type;
+  unsigned destmask;
+  int i;
+  
+  destmask = 0;
+  
+  for (i = 1; i <= N_MAX_NONSPECIAL; i <<= 1)
+    {
+      el_type_of_type_chk (&type, &size, allowed & i);
+      neon_modify_type_size (mods, &type, &size);
+      destmask |= type_chk_of_el_type (type, size);
+    }
+  
+  return destmask;
+}
+
+/* Check type and return type classification.
+   The manual states (paraphrase): If one datatype is given, it indicates the
+   type given in:
+    - the second operand, if there is one
+    - the operand, if there is no second operand
+    - the result, if there are no operands.
+   This isn't quite good enough though, so we use a concept of a "key" datatype
+   which is set on a per-instruction basis, which is the one which matters when
+   only one data type is written.
+   Note: this function has side-effects (e.g. filling in missing operands). All
+   Neon instructions should call it before performing bit encoding.
+*/
+
+static struct neon_type_el
+neon_check_type (unsigned els, enum neon_shape ns, ...)
+{
+  va_list ap;
+  unsigned i, pass, key_el = 0;
+  unsigned types[NEON_MAX_TYPE_ELS];
+  enum neon_el_type k_type = NT_invtype;
+  unsigned k_size = -1u;
+  struct neon_type_el badtype = {NT_invtype, -1};
+  unsigned key_allowed = 0;
+
+  /* Optional registers in Neon instructions are always (not) in operand 1.
+     Fill in the missing operand here, if it was omitted.  */
+  if (els > 1 && !inst.operands[1].present)
+    inst.operands[1] = inst.operands[0];
+
+  /* Suck up all the varargs.  */
+  va_start (ap, ns);
+  for (i = 0; i < els; i++)
+    {
+      unsigned thisarg = va_arg (ap, unsigned);
+      if (thisarg == N_IGNORE_TYPE)
+        {
+          va_end (ap);
+          return badtype;
+        }
+      types[i] = thisarg;
+      if ((thisarg & N_KEY) != 0)
+        key_el = i;
+    }
+  va_end (ap);
+
+  /* Duplicate inst.vectype elements here as necessary.
+     FIXME: No idea if this is exactly the same as the ARM assembler,
+     particularly when an insn takes one register and one non-register
+     operand. */
+  if (inst.vectype.elems == 1 && els > 1)
+    {
+      unsigned j;
+      inst.vectype.elems = els;
+      inst.vectype.el[key_el] = inst.vectype.el[0];
+      for (j = 0; j < els; j++)
+        {
+          if (j != key_el)
+            inst.vectype.el[j] = neon_type_promote (&inst.vectype.el[key_el],
+                                                    types[j]);
+        }
+    }
+  else if (inst.vectype.elems != els)
+    {
+      inst.error = _("type specifier has the wrong number of parts");
+      return badtype;
+    }
+
+  for (pass = 0; pass < 2; pass++)
+    {
+      for (i = 0; i < els; i++)
+        {
+          unsigned thisarg = types[i];
+          unsigned types_allowed = ((thisarg & N_EQK) != 0 && pass != 0)
+            ? modify_types_allowed (key_allowed, thisarg) : thisarg;
+          enum neon_el_type g_type = inst.vectype.el[i].type;
+          unsigned g_size = inst.vectype.el[i].size;
+
+          /* Decay more-specific signed & unsigned types to sign-insensitive
+            integer types if sign-specific variants are unavailable.  */
+          if ((g_type == NT_signed || g_type == NT_unsigned)
+             && (types_allowed & N_SU_ALL) == 0)
+           g_type = NT_integer;
+
+          /* If only untyped args are allowed, decay any more specific types to
+            them. Some instructions only care about signs for some element
+            sizes, so handle that properly.  */
+          if ((g_size == 8 && (types_allowed & N_8) != 0)
+             || (g_size == 16 && (types_allowed & N_16) != 0)
+             || (g_size == 32 && (types_allowed & N_32) != 0)
+             || (g_size == 64 && (types_allowed & N_64) != 0))
+           g_type = NT_untyped;
+
+          if (pass == 0)
+            {
+              if ((thisarg & N_KEY) != 0)
+                {
+                  k_type = g_type;
+                  k_size = g_size;
+                  key_allowed = thisarg & ~N_KEY;
+                }
+            }
+          else
+            {
+              if ((thisarg & N_EQK) == 0)
+                {
+                  unsigned given_type = type_chk_of_el_type (g_type, g_size);
+
+                  if ((given_type & types_allowed) == 0)
+                    {
+                     inst.error = _("bad type in Neon instruction");
+                     return badtype;
+                    }
+                }
+              else
+                {
+                  enum neon_el_type mod_k_type = k_type;
+                  unsigned mod_k_size = k_size;
+                  neon_modify_type_size (thisarg, &mod_k_type, &mod_k_size);
+                  if (g_type != mod_k_type || g_size != mod_k_size)
+                    {
+                      inst.error = _("inconsistent types in Neon instruction");
+                      return badtype;
+                    }
+                }
+            }
+        }
+    }
+
+  return inst.vectype.el[key_el];
+}
+
+/* Fix up Neon data-processing instructions, ORing in the correct bits for
+   ARM mode or Thumb mode and moving the encoded bit 24 to bit 28.  */
+
+static unsigned
+neon_dp_fixup (unsigned i)
+{
+  if (thumb_mode)
+    {
+      /* The U bit is at bit 24 by default. Move to bit 28 in Thumb mode.  */
+      if (i & (1 << 24))
+        i |= 1 << 28;
+      
+      i &= ~(1 << 24);
+      
+      i |= 0xef000000;
+    }
+  else
+    i |= 0xf2000000;
+  
+  return i;
+}
+
+/* Turn a size (8, 16, 32, 64) into the respective bit number minus 3
+   (0, 1, 2, 3).  */
+
+static unsigned
+neon_logbits (unsigned x)
+{
+  return ffs (x) - 4;
+}
+
+#define LOW4(R) ((R) & 0xf)
+#define HI1(R) (((R) >> 4) & 1)
+
+/* Encode insns with bit pattern:
+
+  |28/24|23|22 |21 20|19 16|15 12|11    8|7|6|5|4|3  0|
+  |  U  |x |D  |size | Rn  | Rd  |x x x x|N|Q|M|x| Rm |
+  
+  SIZE is passed in bits. -1 means size field isn't changed, in case it has a
+  different meaning for some instruction.  */
+
+static void
+neon_three_same (int first_optional, int isquad, int ubit, int size)
+{
+  /* FIXME optional argument handling.  */
+  if (first_optional && !inst.operands[0].present)
+    inst.operands[0].reg = inst.operands[1].reg;
+
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= LOW4 (inst.operands[2].reg);
+  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+  inst.instruction |= (isquad != 0) << 6;
+  inst.instruction |= (ubit != 0) << 24;
+  if (size != -1)
+    inst.instruction |= neon_logbits (size) << 20;
+  
+  inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+/* Encode instructions of the form:
+
+  |28/24|23|22|21 20|19 18|17 16|15 12|11      7|6|5|4|3  0|
+  |  U  |x |D |x  x |size |x  x | Rd  |x x x x x|Q|M|x| Rm |
+
+  Don't write size if SIZE == -1.  */
+
+static void
+neon_two_same (int qbit, int ubit, int size)
+{
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg);
+  inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+  inst.instruction |= (qbit != 0) << 6;
+  inst.instruction |= (ubit != 0) << 24;
+
+  if (size != -1)
+    inst.instruction |= neon_logbits (size) << 18;
+
+  inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+/* Neon instruction encoders, in approximate order of appearance.  */
+
+static void
+do_neon_dyadic_i_su (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+  struct neon_type_el et = neon_check_type (3, rs,
+    N_EQK, N_EQK, N_SU_32 | N_KEY);
+  neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size);
+}
+
+static void
+do_neon_dyadic_i64_su (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+  struct neon_type_el et = neon_check_type (3, rs,
+    N_EQK, N_EQK, N_SU_ALL | N_KEY);
+  neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size);
+}
+
+static void
+neon_imm_shift (int write_ubit, int uval, int isquad, struct neon_type_el et,
+                unsigned immbits)
+{
+  unsigned size = et.size >> 3;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg);
+  inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+  inst.instruction |= (isquad != 0) << 6;
+  inst.instruction |= immbits << 16;
+  inst.instruction |= (size >> 3) << 7;
+  inst.instruction |= (size & 0x7) << 19;
+  if (write_ubit)
+    inst.instruction |= (uval != 0) << 24;
+
+  inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_shl_imm (void)
+{
+  if (!inst.operands[2].isreg)
+    {
+      enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+      struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
+      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      neon_imm_shift (FALSE, 0, rs == NS_QQI, et, inst.operands[2].imm);
+    }
+  else
+    {
+      enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+      struct neon_type_el et = neon_check_type (3, rs,
+        N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
+      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size);
+    }
+}
+
+static void
+do_neon_qshl_imm (void)
+{
+  if (!inst.operands[2].isreg)
+    {
+      enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+      struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      neon_imm_shift (TRUE, et.type == NT_unsigned, rs == NS_QQI, et,
+                      inst.operands[2].imm);
+    }
+  else
+    {
+      enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+      struct neon_type_el et = neon_check_type (3, rs,
+        N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
+      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size);
+    }
+}
+
+static int
+neon_cmode_for_logic_imm (unsigned immediate, unsigned *immbits, int size)
+{
+  /* Handle .I8 and .I64 as pseudo-instructions.  */
+  switch (size)
+    {
+    case 8:
+      /* Unfortunately, this will make everything apart from zero out-of-range.
+         FIXME is this the intended semantics? There doesn't seem much point in
+         accepting .I8 if so.  */
+      immediate |= immediate << 8;
+      size = 16;
+      break;
+    case 64:
+      /* Similarly, anything other than zero will be replicated in bits [63:32],
+         which probably isn't want we want if we specified .I64.  */
+      if (immediate != 0)
+        goto bad_immediate;
+      size = 32;
+      break;
+    default: ;
+    }
+
+  if (immediate == (immediate & 0x000000ff))
+    {
+      *immbits = immediate;
+      return (size == 16) ? 0x9 : 0x1;
+    }
+  else if (immediate == (immediate & 0x0000ff00))
+    {
+      *immbits = immediate >> 8;
+      return (size == 16) ? 0xb : 0x3;
+    }
+  else if (immediate == (immediate & 0x00ff0000))
+    {
+      *immbits = immediate >> 16;
+      return 0x5;
+    }
+  else if (immediate == (immediate & 0xff000000))
+    {
+      *immbits = immediate >> 24;
+      return 0x7;
+    }
+
+  bad_immediate:
+  inst.error = _("immediate value out of range");
+  return FAIL;
+}
+
+/* True if IMM has form 0bAAAAAAAABBBBBBBBCCCCCCCCDDDDDDDD for bits
+   A, B, C, D.  */
+
+static int
+neon_bits_same_in_bytes (unsigned imm)
+{
+  return ((imm & 0x000000ff) == 0 || (imm & 0x000000ff) == 0x000000ff)
+         && ((imm & 0x0000ff00) == 0 || (imm & 0x0000ff00) == 0x0000ff00)
+         && ((imm & 0x00ff0000) == 0 || (imm & 0x00ff0000) == 0x00ff0000)
+         && ((imm & 0xff000000) == 0 || (imm & 0xff000000) == 0xff000000);
+}
+
+/* For immediate of above form, return 0bABCD.  */
+
+static unsigned
+neon_squash_bits (unsigned imm)
+{
+  return (imm & 0x01) | ((imm & 0x0100) >> 7) | ((imm & 0x010000) >> 14)
+         | ((imm & 0x01000000) >> 21);
+}
+
+/* Returns 1 if a number has "quarter-precision" float format
+   0baBbbbbbc defgh000 00000000 00000000.  */
+
+static int
+neon_is_quarter_float (unsigned imm)
+{
+  int b = (imm & 0x20000000) != 0;
+  int bs = (b << 25) | (b << 26) | (b << 27) | (b << 28) | (b << 29)
+           | ((!b) << 30);
+  return (imm & 0x81ffffff) == (imm & 0x81f80000)
+         && ((imm & 0x7e000000) ^ bs) == 0;
+}
+
+/* Compress above representation to 0b...000 abcdefgh.  */
+
+static unsigned
+neon_qfloat_bits (unsigned imm)
+{
+  return ((imm >> 19) & 0x7f) | (imm >> 24);
+}
+
+/* Returns CMODE. IMMBITS [7:0] is set to bits suitable for inserting into
+   the instruction. *OP is passed as the initial value of the op field, and
+   may be set to a different value depending on the constant (i.e.
+   "MOV I64, 0bAAAAAAAABBBB..." which uses OP = 1 despite being MOV not
+   MVN).  */
+
+static int
+neon_cmode_for_move_imm (unsigned immlo, unsigned immhi, unsigned *immbits,
+                         int *op, int size)
+{
+  if (size == 64 && neon_bits_same_in_bytes (immhi)
+      && neon_bits_same_in_bytes (immlo))
+    {
+      /* Check this one first so we don't have to bother with immhi in later
+         tests.  */
+      if (*op == 1)
+        return FAIL;
+      *immbits = (neon_squash_bits (immhi) << 4) | neon_squash_bits (immlo);
+      *op = 1;
+      return 0xe;
+    }
+  else if (immhi != 0)
+    return FAIL;
+  else if (immlo == (immlo & 0x000000ff))
+    {
+      /* 64-bit case was already handled. Don't allow MVN with 8-bit
+         immediate.  */
+      if ((size != 8 && size != 16 && size != 32)
+          || (size == 8 && *op == 1))
+        return FAIL;
+      *immbits = immlo;
+      return (size == 8) ? 0xe : (size == 16) ? 0x8 : 0x0;
+    }
+  else if (immlo == (immlo & 0x0000ff00))
+    {
+      if (size != 16 && size != 32)
+        return FAIL;
+      *immbits = immlo >> 8;
+      return (size == 16) ? 0xa : 0x2;
+    }
+  else if (immlo == (immlo & 0x00ff0000))
+    {
+      if (size != 32)
+        return FAIL;
+      *immbits = immlo >> 16;
+      return 0x4;
+    }
+  else if (immlo == (immlo & 0xff000000))
+    {
+      if (size != 32)
+        return FAIL;
+      *immbits = immlo >> 24;
+      return 0x6;
+    }
+  else if (immlo == ((immlo & 0x0000ff00) | 0x000000ff))
+    {
+      if (size != 32)
+        return FAIL;
+      *immbits = (immlo >> 8) & 0xff;
+      return 0xc;
+    }
+  else if (immlo == ((immlo & 0x00ff0000) | 0x0000ffff))
+    {
+      if (size != 32)
+        return FAIL;
+      *immbits = (immlo >> 16) & 0xff;
+      return 0xd;
+    }
+  else if (neon_is_quarter_float (immlo))
+    {
+      if (size != 32 || *op == 1)
+        return FAIL;
+      *immbits = neon_qfloat_bits (immlo);
+      return 0xf;
+    }
+
+  return FAIL;
+}
+
+/* Write immediate bits [7:0] to the following locations:
+
+  |28/24|23     19|18 16|15                    4|3     0|
+  |  a  |x x x x x|b c d|x x x x x x x x x x x x|e f g h|
+
+  This function is used by VMOV/VMVN/VORR/VBIC.  */
+
+static void
+neon_write_immbits (unsigned immbits)
+{
+  inst.instruction |= immbits & 0xf;
+  inst.instruction |= ((immbits >> 4) & 0x7) << 16;
+  inst.instruction |= ((immbits >> 7) & 0x1) << 24;
+}
+
+/* Invert low-order SIZE bits of XHI:XLO.  */
+
+static void
+neon_invert_size (unsigned *xlo, unsigned *xhi, int size)
+{
+  unsigned immlo = xlo ? *xlo : 0;
+  unsigned immhi = xhi ? *xhi : 0;
+
+  switch (size)
+    {
+    case 8:
+      immlo = (~immlo) & 0xff;
+      break;
+
+    case 16:
+      immlo = (~immlo) & 0xffff;
+      break;
+
+    case 64:
+      immhi = (~immhi) & 0xffffffff;
+      /* fall through.  */
+
+    case 32:
+      immlo = (~immlo) & 0xffffffff;
+      break;
+
+    default:
+      abort ();
+    }
+
+  if (xlo)
+    *xlo = immlo;
+
+  if (xhi)
+    *xhi = immhi;
+}
+
+static void
+do_neon_logic (void)
+{
+  if (inst.operands[2].present && inst.operands[2].isreg)
+    {
+      enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+      neon_check_type (3, rs, N_IGNORE_TYPE);
+      /* U bit and size field were set as part of the bitmask.  */
+      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      neon_three_same (TRUE, rs == NS_QQQ, 0, -1);
+    }
+  else
+    {
+      enum neon_shape rs = neon_check_shape (NS_DI_QI);
+      struct neon_type_el et = neon_check_type (1, rs, N_I8 | N_I16 | N_I32
+                                                | N_I64 | N_F32);
+      enum neon_opc opcode = inst.instruction & 0x0fffffff;
+      unsigned immbits;
+      int cmode;
+      
+      if (et.type == NT_invtype)
+        return;
+      
+      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+
+      switch (opcode)
+        {
+        case N_MNEM_vbic:
+          cmode = neon_cmode_for_logic_imm (inst.operands[1].imm, &immbits,
+                                            et.size);
+          break;
+        
+        case N_MNEM_vorr:
+          cmode = neon_cmode_for_logic_imm (inst.operands[1].imm, &immbits,
+                                            et.size);
+          break;
+        
+        case N_MNEM_vand:
+          /* Pseudo-instruction for VBIC.  */
+          immbits = inst.operands[1].imm;
+          neon_invert_size (&immbits, 0, et.size);
+          cmode = neon_cmode_for_logic_imm (immbits, &immbits, et.size);
+          break;
+        
+        case N_MNEM_vorn:
+          /* Pseudo-instruction for VORR.  */
+          immbits = inst.operands[1].imm;
+          neon_invert_size (&immbits, 0, et.size);
+          cmode = neon_cmode_for_logic_imm (immbits, &immbits, et.size);
+          break;
+        
+        default:
+          abort ();
+        }
+
+      if (cmode == FAIL)
+        return;
+
+      inst.instruction |= (rs == NS_QI) << 6;
+      inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+      inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+      inst.instruction |= cmode << 8;
+      neon_write_immbits (immbits);
+      
+      inst.instruction = neon_dp_fixup (inst.instruction);
+    }
+}
+
+static void
+do_neon_bitfield (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+  /* FIXME: Check that no type was given.  */
+  neon_three_same (FALSE, rs == NS_QQQ, 0, -1);
+}
+
+static void
+neon_dyadic (enum neon_el_type ubit_meaning, unsigned types)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+  struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK, types | N_KEY);
+  if (et.type == NT_float)
+    {
+      inst.instruction = NEON_ENC_FLOAT (inst.instruction);
+      neon_three_same (TRUE, rs == NS_QQQ, 0, -1);
+    }
+  else
+    {
+      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      neon_three_same (TRUE, rs == NS_QQQ, et.type == ubit_meaning, et.size);
+    }
+}
+
+static void
+do_neon_dyadic_if_su (void)
+{
+  neon_dyadic (NT_unsigned, N_SUF_32);
+}
+
+static void
+do_neon_dyadic_if_su_d (void)
+{
+  /* This version only allow D registers, but that constraint is enforced during
+     operand parsing so we don't need to do anything extra here.  */
+  neon_dyadic (NT_unsigned, N_SUF_32);
+}
+
+static void
+do_neon_dyadic_if_i (void)
+{
+  neon_dyadic (NT_unsigned, N_IF_32);
+}
+
+static void
+do_neon_dyadic_if_i_d (void)
+{
+  neon_dyadic (NT_unsigned, N_IF_32);
+}
+
+static void
+do_neon_addsub_if_i (void)
+{
+  /* The "untyped" case can't happen. Do this to stop the "U" bit being
+     affected if we specify unsigned args.  */
+  neon_dyadic (NT_untyped, N_IF_32 | N_I64);
+}
+
+/* Swaps operands 1 and 2. If operand 1 (optional arg) was omitted, we want the
+   result to be:
+     V<op> A,B     (A is operand 0, B is operand 2)
+   to mean:
+     V<op> A,B,A
+   not:
+     V<op> A,B,B
+   so handle that case specially.  */
+
+static void
+neon_exchange_operands (void)
+{
+  void *scratch = alloca (sizeof (inst.operands[0]));
+  if (inst.operands[1].present)
+    {
+      /* Swap operands[1] and operands[2].  */
+      memcpy (scratch, &inst.operands[1], sizeof (inst.operands[0]));
+      inst.operands[1] = inst.operands[2];
+      memcpy (&inst.operands[2], scratch, sizeof (inst.operands[0]));
+    }
+  else
+    {
+      inst.operands[1] = inst.operands[2];
+      inst.operands[2] = inst.operands[0];
+    }
+}
+
+static void
+neon_compare (unsigned regtypes, unsigned immtypes, int invert)
+{
+  if (inst.operands[2].isreg)
+    {
+      if (invert)
+        neon_exchange_operands ();
+      neon_dyadic (NT_unsigned, regtypes);
+    }
+  else
+    {
+      enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+      struct neon_type_el et = neon_check_type (2, rs, N_EQK, immtypes | N_KEY);
+
+      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+      inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+      inst.instruction |= LOW4 (inst.operands[1].reg);
+      inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+      inst.instruction |= (rs == NS_QQI) << 6;
+      inst.instruction |= (et.type == NT_float) << 10;
+      inst.instruction |= neon_logbits (et.size) << 18;
+      
+      inst.instruction = neon_dp_fixup (inst.instruction);
+    }
+}
+
+static void
+do_neon_cmp (void)
+{
+  neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, FALSE);
+}
+
+static void
+do_neon_cmp_inv (void)
+{
+  neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, TRUE);
+}
+
+static void
+do_neon_ceq (void)
+{
+  neon_compare (N_IF_32, N_IF_32, FALSE);
+}
+
+/* For multiply instructions, we have the possibility of 16-bit or 32-bit
+   scalars, which are encoded in 5 bits, M : Rm.
+   For 16-bit scalars, the register is encoded in Rm[2:0] and the index in
+   M:Rm[3], and for 32-bit scalars, the register is encoded in Rm[3:0] and the
+   index in M.  */
+
+static unsigned
+neon_scalar_for_mul (unsigned scalar, unsigned elsize)
+{
+  unsigned regno = scalar >> 3;
+  unsigned elno = scalar & 7;
+
+  switch (elsize)
+    {
+    case 16:
+      if (regno > 7 || elno > 3)
+        goto bad_scalar;
+      return regno | (elno << 3);
+    
+    case 32:
+      if (regno > 15 || elno > 1)
+        goto bad_scalar;
+      return regno | (elno << 4);
+
+    default:
+    bad_scalar:
+      as_bad (_("Scalar out of range for multiply instruction"));
+    }
+
+  return 0;
+}
+
+/* Encode multiply / multiply-accumulate scalar instructions.  */
+
+static void
+neon_mul_mac (struct neon_type_el et, int ubit)
+{
+  unsigned scalar = neon_scalar_for_mul (inst.operands[2].reg, et.size);
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= LOW4 (scalar);
+  inst.instruction |= HI1 (scalar) << 5;
+  inst.instruction |= (et.type == NT_float) << 8;
+  inst.instruction |= neon_logbits (et.size) << 20;
+  inst.instruction |= (ubit != 0) << 24;
+
+  inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_mac_maybe_scalar (void)
+{
+  if (inst.operands[2].isscalar)
+    {
+      enum neon_shape rs = neon_check_shape (NS_DDS_QQS);
+      struct neon_type_el et = neon_check_type (3, rs,
+        N_EQK, N_EQK, N_I16 | N_I32 | N_F32 | N_KEY);
+      inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+      neon_mul_mac (et, rs == NS_QQS);
+    }
+  else
+    do_neon_dyadic_if_i ();
+}
+
+static void
+do_neon_tst (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+  struct neon_type_el et = neon_check_type (3, rs,
+    N_EQK, N_EQK, N_8 | N_16 | N_32 | N_KEY);
+  neon_three_same (TRUE, rs == NS_QQQ, 0, et.size);
+}
+
+/* VMUL with 3 registers allows the P8 type. The scalar version supports the
+   same types as the MAC equivalents. The polynomial type for this instruction
+   is encoded the same as the integer type.  */
+
+static void
+do_neon_mul (void)
+{
+  if (inst.operands[2].isscalar)
+    do_neon_mac_maybe_scalar ();
+  else
+    neon_dyadic (NT_poly, N_I8 | N_I16 | N_I32 | N_F32 | N_P8);
+}
+
+static void
+do_neon_qdmulh (void)
+{
+  if (inst.operands[2].isscalar)
+    {
+      enum neon_shape rs = neon_check_shape (NS_DDS_QQS);
+      struct neon_type_el et = neon_check_type (3, rs,
+        N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+      inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+      neon_mul_mac (et, rs == NS_QQS);
+    }
+  else
+    {
+      enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+      struct neon_type_el et = neon_check_type (3, rs,
+        N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      /* The U bit (rounding) comes from bit mask.  */
+      neon_three_same (TRUE, rs == NS_QQQ, 0, et.size);
+    }
+}
+
+static void
+do_neon_fcmp_absolute (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+  neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
+  /* Size field comes from bit mask.  */
+  neon_three_same (TRUE, rs == NS_QQQ, 1, -1);
+}
+
+static void
+do_neon_fcmp_absolute_inv (void)
+{
+  neon_exchange_operands ();
+  do_neon_fcmp_absolute ();
+}
+
+static void
+do_neon_step (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+  neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
+  neon_three_same (TRUE, rs == NS_QQQ, 0, -1);
+}
+
+static void
+do_neon_abs_neg (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  struct neon_type_el et = neon_check_type (3, rs,
+    N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_F32 | N_KEY);
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg);
+  inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+  inst.instruction |= (rs == NS_QQ) << 6;
+  inst.instruction |= (et.type == NT_float) << 10;
+  inst.instruction |= neon_logbits (et.size) << 18;
+  
+  inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_sli (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+  int imm = inst.operands[2].imm;
+  constraint (imm < 0 || (unsigned)imm >= et.size,
+              _("immediate out of range for insert"));
+  neon_imm_shift (FALSE, 0, rs == NS_QQI, et, imm);
+}
+
+static void
+do_neon_sri (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+  int imm = inst.operands[2].imm;
+  constraint (imm < 1 || (unsigned)imm > et.size,
+              _("immediate out of range for insert"));
+  neon_imm_shift (FALSE, 0, rs == NS_QQI, et, et.size - imm);
+}
+
+static void
+do_neon_qshlu_imm (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK | N_UNS, N_S8 | N_S16 | N_S32 | N_S64 | N_KEY);
+  int imm = inst.operands[2].imm;
+  constraint (imm < 0 || (unsigned)imm >= et.size,
+              _("immediate out of range for shift"));
+  /* Only encodes the 'U present' variant of the instruction.
+     In this case, signed types have OP (bit 8) set to 0.
+     Unsigned types have OP set to 1.  */
+  inst.instruction |= (et.type == NT_unsigned) << 8;
+  /* The rest of the bits are the same as other immediate shifts.  */
+  neon_imm_shift (FALSE, 0, rs == NS_QQI, et, imm);
+}
+
+static void
+do_neon_qmovn (void)
+{
+  struct neon_type_el et = neon_check_type (2, NS_DQ,
+    N_EQK | N_HLF, N_SU_16_64 | N_KEY);
+  /* Saturating move where operands can be signed or unsigned, and the
+     destination has the same signedness.  */
+  inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+  if (et.type == NT_unsigned)
+    inst.instruction |= 0xc0;
+  else
+    inst.instruction |= 0x80;
+  neon_two_same (0, 1, et.size / 2);
+}
+
+static void
+do_neon_qmovun (void)
+{
+  struct neon_type_el et = neon_check_type (2, NS_DQ,
+    N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY);
+  /* Saturating move with unsigned results. Operands must be signed.  */
+  inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+  neon_two_same (0, 1, et.size / 2);
+}
+
+static void
+do_neon_rshift_sat_narrow (void)
+{
+  /* FIXME: Types for narrowing. If operands are signed, results can be signed
+     or unsigned. If operands are unsigned, results must also be unsigned.  */
+  struct neon_type_el et = neon_check_type (2, NS_DQI,
+    N_EQK | N_HLF, N_SU_16_64 | N_KEY);
+  int imm = inst.operands[2].imm;
+  /* This gets the bounds check, size encoding and immediate bits calculation
+     right.  */
+  et.size /= 2;
+  
+  /* VQ{R}SHRN.I<size> <Dd>, <Qm>, #0 is a synonym for
+     VQMOVN.I<size> <Dd>, <Qm>.  */
+  if (imm == 0)
+    {
+      inst.operands[2].present = 0;
+      inst.instruction = N_MNEM_vqmovn;
+      do_neon_qmovn ();
+      return;
+    }
+  
+  constraint (imm < 1 || (unsigned)imm > et.size,
+              _("immediate out of range"));
+  neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, et.size - imm);
+}
+
+static void
+do_neon_rshift_sat_narrow_u (void)
+{
+  /* FIXME: Types for narrowing. If operands are signed, results can be signed
+     or unsigned. If operands are unsigned, results must also be unsigned.  */
+  struct neon_type_el et = neon_check_type (2, NS_DQI,
+    N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY);
+  int imm = inst.operands[2].imm;
+  /* This gets the bounds check, size encoding and immediate bits calculation
+     right.  */
+  et.size /= 2;
+
+  /* VQSHRUN.I<size> <Dd>, <Qm>, #0 is a synonym for
+     VQMOVUN.I<size> <Dd>, <Qm>.  */
+  if (imm == 0)
+    {
+      inst.operands[2].present = 0;
+      inst.instruction = N_MNEM_vqmovun;
+      do_neon_qmovun ();
+      return;
+    }
+
+  constraint (imm < 1 || (unsigned)imm > et.size,
+              _("immediate out of range"));
+  /* FIXME: The manual is kind of unclear about what value U should have in
+     VQ{R}SHRUN instructions, but U=0, op=0 definitely encodes VRSHR, so it
+     must be 1.  */
+  neon_imm_shift (TRUE, 1, 0, et, et.size - imm);
+}
+
+static void
+do_neon_movn (void)
+{
+  struct neon_type_el et = neon_check_type (2, NS_DQ,
+    N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY);
+  inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+  neon_two_same (0, 1, et.size / 2);
+}
+
+static void
+do_neon_rshift_narrow (void)
+{
+  struct neon_type_el et = neon_check_type (2, NS_DQI,
+    N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY);
+  int imm = inst.operands[2].imm;
+  /* This gets the bounds check, size encoding and immediate bits calculation
+     right.  */
+  et.size /= 2;
+  
+  /* If immediate is zero then we are a pseudo-instruction for
+     VMOVN.I<size> <Dd>, <Qm>  */
+  if (imm == 0)
+    {
+      inst.operands[2].present = 0;
+      inst.instruction = N_MNEM_vmovn;
+      do_neon_movn ();
+      return;
+    }
+  
+  constraint (imm < 1 || (unsigned)imm > et.size,
+              _("immediate out of range for narrowing operation"));
+  neon_imm_shift (FALSE, 0, 0, et, et.size - imm);
+}
+
+static void
+do_neon_shll (void)
+{
+  /* FIXME: Type checking when lengthening.  */
+  struct neon_type_el et = neon_check_type (2, NS_QDI,
+    N_EQK | N_DBL, N_I8 | N_I16 | N_I32 | N_KEY);
+  unsigned imm = inst.operands[2].imm;
+
+  if (imm == et.size)
+    {
+      /* Maximum shift variant.  */
+      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+      inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+      inst.instruction |= LOW4 (inst.operands[1].reg);
+      inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+      inst.instruction |= neon_logbits (et.size) << 18;
+      
+      inst.instruction = neon_dp_fixup (inst.instruction);
+    }
+  else
+    {
+      /* A more-specific type check for non-max versions.  */
+      et = neon_check_type (2, NS_QDI,
+        N_EQK | N_DBL, N_SU_32 | N_KEY);
+      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, imm);
+    }
+}
+
+/* Check the various types for the VCVT instruction, and return the one that
+   the current instruction is.  */
+
+static int
+neon_cvt_flavour (enum neon_shape rs)
+{
+#define CVT_VAR(C,X,Y)                         \
+  et = neon_check_type (2, rs, (X), (Y));      \
+  if (et.type != NT_invtype)                   \
+    {                                          \
+      inst.error = NULL;                       \
+      return (C);                              \
+    }
+  struct neon_type_el et;
+  
+  CVT_VAR (0, N_S32, N_F32);
+  CVT_VAR (1, N_U32, N_F32);
+  CVT_VAR (2, N_F32, N_S32);
+  CVT_VAR (3, N_F32, N_U32);
+  
+  return -1;
+#undef CVT_VAR
+}
+
+static void
+do_neon_cvt (void)
+{
+  /* Fixed-point conversion with #0 immediate is encoded as an integer
+     conversion.  */
+  if (inst.operands[2].present && inst.operands[2].imm != 0)
+    {
+      enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+      int flavour = neon_cvt_flavour (rs);
+      unsigned immbits = 32 - inst.operands[2].imm;
+      unsigned enctab[] = { 0x0000100, 0x1000100, 0x0, 0x1000000 };
+      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      if (flavour != -1)
+        inst.instruction |= enctab[flavour];
+      inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+      inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+      inst.instruction |= LOW4 (inst.operands[1].reg);
+      inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+      inst.instruction |= (rs == NS_QQI) << 6;
+      inst.instruction |= 1 << 21;
+      inst.instruction |= immbits << 16;
+    }
+  else
+    {
+      enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+      int flavour = neon_cvt_flavour (rs);
+      unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080 };
+      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      if (flavour != -1)
+        inst.instruction |= enctab[flavour];
+      inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+      inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+      inst.instruction |= LOW4 (inst.operands[1].reg);
+      inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+      inst.instruction |= (rs == NS_QQ) << 6;
+      inst.instruction |= 2 << 18;
+    }
+  inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+neon_move_immediate (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DI_QI);
+  struct neon_type_el et = neon_check_type (1, rs,
+    N_I8 | N_I16 | N_I32 | N_I64 | N_F32);
+  unsigned immlo, immhi = 0, immbits;
+  int op, cmode;
+
+  /* We start out as an MVN instruction if OP = 1, MOV otherwise.  */
+  op = (inst.instruction & (1 << 5)) != 0;
+
+  immlo = inst.operands[1].imm;
+  if (inst.operands[1].regisimm)
+    immhi = inst.operands[1].reg;
+
+  constraint (et.size < 32 && (immlo & ~((1 << et.size) - 1)) != 0,
+              _("immediate has bits set outside the operand size"));
+
+  if ((cmode = neon_cmode_for_move_imm (immlo, immhi, &immbits, &op,
+                                        et.size)) == FAIL)
+    {
+      /* Invert relevant bits only.  */
+      neon_invert_size (&immlo, &immhi, et.size);
+      /* Flip from VMOV/VMVN to VMVN/VMOV. Some immediate types are unavailable
+         with one or the other; those cases are caught by
+         neon_cmode_for_move_imm.  */
+      op = !op;
+      if ((cmode = neon_cmode_for_move_imm (immlo, immhi, &immbits, &op,
+                                            et.size)) == FAIL)
+        {
+          inst.error = _("immediate out of range");
+          return;
+        }
+    }
+
+  inst.instruction &= ~(1 << 5);
+  inst.instruction |= op << 5;
+
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= (rs == NS_QI) << 6;
+  inst.instruction |= cmode << 8;
+
+  neon_write_immbits (immbits);
+}
+
+static void
+do_neon_mvn (void)
+{
+  if (inst.operands[1].isreg)
+    {
+      enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+      
+      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+      inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+      inst.instruction |= LOW4 (inst.operands[1].reg);
+      inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+      inst.instruction |= (rs == NS_QQ) << 6;
+    }
+  else
+    {
+      inst.instruction = NEON_ENC_IMMED (inst.instruction);
+      neon_move_immediate ();
+    }
+
+  inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+/* Encode instructions of form:
+
+  |28/24|23|22|21 20|19 16|15 12|11    8|7|6|5|4|3  0|
+  |  U  |x |D |size | Rn  | Rd  |x x x x|N|x|M|x| Rm |
+
+*/
+
+static void
+neon_mixed_length (struct neon_type_el et, unsigned size)
+{
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= LOW4 (inst.operands[2].reg);
+  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+  inst.instruction |= (et.type == NT_unsigned) << 24;
+  inst.instruction |= neon_logbits (size) << 20;
+  
+  inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_dyadic_long (void)
+{
+  /* FIXME: Type checking for lengthening op.  */
+  struct neon_type_el et = neon_check_type (3, NS_QDD,
+    N_EQK | N_DBL, N_EQK, N_SU_32 | N_KEY);
+  neon_mixed_length (et, et.size);
+}
+
+static void
+do_neon_abal (void)
+{
+  struct neon_type_el et = neon_check_type (3, NS_QDD,
+    N_EQK | N_INT | N_DBL, N_EQK, N_SU_32 | N_KEY);
+  neon_mixed_length (et, et.size);
+}
+
+static void
+neon_mac_reg_scalar_long (unsigned regtypes, unsigned scalartypes)
+{
+  if (inst.operands[2].isscalar)
+    {
+      struct neon_type_el et = neon_check_type (2, NS_QDS,
+        N_EQK | N_DBL, regtypes | N_KEY);
+      inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+      neon_mul_mac (et, et.type == NT_unsigned);
+    }
+  else
+    {
+      struct neon_type_el et = neon_check_type (3, NS_QDD,
+        N_EQK | N_DBL, N_EQK, scalartypes | N_KEY);
+      inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      neon_mixed_length (et, et.size);
+    }
+}
+
+static void
+do_neon_mac_maybe_scalar_long (void)
+{
+  neon_mac_reg_scalar_long (N_S16 | N_S32 | N_U16 | N_U32, N_SU_32);
+}
+
+static void
+do_neon_dyadic_wide (void)
+{
+  struct neon_type_el et = neon_check_type (3, NS_QQD,
+    N_EQK | N_DBL, N_EQK | N_DBL, N_SU_32 | N_KEY);
+  neon_mixed_length (et, et.size);
+}
+
+static void
+do_neon_dyadic_narrow (void)
+{
+  struct neon_type_el et = neon_check_type (3, NS_QDD,
+    N_EQK | N_DBL, N_EQK, N_I16 | N_I32 | N_I64 | N_KEY);
+  neon_mixed_length (et, et.size / 2);
+}
+
+static void
+do_neon_mul_sat_scalar_long (void)
+{
+  neon_mac_reg_scalar_long (N_S16 | N_S32, N_S16 | N_S32);
+}
+
+static void
+do_neon_vmull (void)
+{
+  if (inst.operands[2].isscalar)
+    do_neon_mac_maybe_scalar_long ();
+  else
+    {
+      struct neon_type_el et = neon_check_type (3, NS_QDD,
+        N_EQK | N_DBL, N_EQK, N_SU_32 | N_P8 | N_KEY);
+      if (et.type == NT_poly)
+        inst.instruction = NEON_ENC_POLY (inst.instruction);
+      else
+        inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+      /* For polynomial encoding, size field must be 0b00 and the U bit must be
+         zero. Should be OK as-is.  */
+      neon_mixed_length (et, et.size);
+    }
+}
+
+static void
+do_neon_ext (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDDI_QQQI);
+  struct neon_type_el et = neon_check_type (3, rs,
+    N_EQK, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+  unsigned imm = (inst.operands[3].imm * et.size) / 8;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= LOW4 (inst.operands[2].reg);
+  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+  inst.instruction |= (rs == NS_QQQI) << 6;
+  inst.instruction |= imm << 8;
+  
+  inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_rev (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK, N_8 | N_16 | N_32 | N_KEY);
+  unsigned op = (inst.instruction >> 7) & 3;
+  /* N (width of reversed regions) is encoded as part of the bitmask. We
+     extract it here to check the elements to be reversed are smaller.
+     Otherwise we'd get a reserved instruction.  */
+  unsigned elsize = (op == 2) ? 16 : (op == 1) ? 32 : (op == 0) ? 64 : 0;
+  assert (elsize != 0);
+  constraint (et.size >= elsize,
+              _("elements must be smaller than reversal region"));
+  neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_dup (void)
+{
+  if (inst.operands[1].isscalar)
+    {
+      enum neon_shape rs = neon_check_shape (NS_DS_QS);
+      struct neon_type_el et = neon_check_type (1, rs, N_8 | N_16 | N_32);
+      unsigned sizebits = et.size >> 3;
+      unsigned dm = inst.operands[1].reg >> 3;
+      int logsize = neon_logbits (et.size);
+      unsigned x = (inst.operands[1].reg & 7) << logsize;
+      inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+      inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+      inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+      inst.instruction |= LOW4 (dm);
+      inst.instruction |= HI1 (dm) << 5;
+      inst.instruction |= (rs == NS_QS) << 6;
+      inst.instruction |= x << 17;
+      inst.instruction |= sizebits << 16;
+      
+      inst.instruction = neon_dp_fixup (inst.instruction);
+    }
+  else
+    {
+      enum neon_shape rs = neon_check_shape (NS_DR_QR);
+      struct neon_type_el et = neon_check_type (1, rs, N_8 | N_16 | N_32);
+      unsigned save_cond = inst.instruction & 0xf0000000;
+      /* Duplicate ARM register to lanes of vector.  */
+      inst.instruction = NEON_ENC_ARMREG (inst.instruction);
+      switch (et.size)
+        {
+        case 8:  inst.instruction |= 0x400000; break;
+        case 16: inst.instruction |= 0x000020; break;
+        case 32: inst.instruction |= 0x000000; break;
+        default: break;
+        }
+      inst.instruction |= LOW4 (inst.operands[1].reg) << 12;
+      inst.instruction |= LOW4 (inst.operands[0].reg) << 16;
+      inst.instruction |= HI1 (inst.operands[0].reg) << 7;
+      inst.instruction |= (rs == NS_QR) << 21;
+      /* The encoding for this instruction is identical for the ARM and Thumb
+         variants, except for the condition field.  */
+      if (thumb_mode)
+        inst.instruction |= 0xe0000000;
+      else
+        inst.instruction |= save_cond;
+    }
+}
+
+/* VMOV has particularly many variations. It can be one of:
+     0. VMOV<c><q> <Qd>, <Qm>
+     1. VMOV<c><q> <Dd>, <Dm>
+   (Register operations, which are VORR with Rm = Rn.)
+     2. VMOV<c><q>.<dt> <Qd>, #<imm>
+     3. VMOV<c><q>.<dt> <Dd>, #<imm>
+   (Immediate loads.)
+     4. VMOV<c><q>.<size> <Dn[x]>, <Rd>
+   (ARM register to scalar.)
+     5. VMOV<c><q> <Dm>, <Rd>, <Rn>
+   (Two ARM registers to vector.)
+     6. VMOV<c><q>.<dt> <Rd>, <Dn[x]>
+   (Scalar to ARM register.)
+     7. VMOV<c><q> <Rd>, <Rn>, <Dm>
+   (Vector to two ARM registers.)
+  
+   We should have just enough information to be able to disambiguate most of
+   these, apart from "Two ARM registers to vector" and "Vector to two ARM
+   registers" cases. For these, abuse the .regisimm operand field to signify a
+   Neon register.
+   
+   All the encoded bits are hardcoded by this function.
+   
+   FIXME: Some of the checking may be a bit sloppy (in a couple of cases you
+   can specify a type where it doesn't make sense to, and is ignored).
+*/
+
+static void
+do_neon_mov (void)
+{
+  int nargs = inst.operands[0].present + inst.operands[1].present
+              + inst.operands[2].present;
+  unsigned save_cond = thumb_mode ? 0xe0000000 : inst.instruction & 0xf0000000;
+
+  switch (nargs)
+    {
+    case 2:
+      /* Cases 0, 1, 2, 3, 4, 6.  */
+      if (inst.operands[1].isscalar)
+        {
+          /* Case 6.  */
+          struct neon_type_el et = neon_check_type (1, NS_IGNORE,
+            N_S8 | N_S16 | N_U8 | N_U16 | N_32);
+          unsigned logsize = neon_logbits (et.size);
+          unsigned dn = inst.operands[1].reg >> 3;
+          unsigned x = inst.operands[1].reg & 7;
+          unsigned abcdebits = 0;
+
+          constraint (x >= 64 / et.size, _("scalar index out of range"));
+
+          switch (et.size)
+            {
+            case 8:  abcdebits = (et.type == NT_signed) ? 0x08 : 0x18; break;
+            case 16: abcdebits = (et.type == NT_signed) ? 0x01 : 0x11; break;
+            case 32: abcdebits = 0x00; break;
+            default: ;
+            }
+
+          abcdebits |= x << logsize;
+          inst.instruction = save_cond;
+          inst.instruction |= 0xe100b10;
+          inst.instruction |= LOW4 (dn) << 16;
+          inst.instruction |= HI1 (dn) << 7;
+          inst.instruction |= inst.operands[0].reg << 12;
+          inst.instruction |= (abcdebits & 3) << 5;
+          inst.instruction |= (abcdebits >> 2) << 21;
+        }
+      else if (inst.operands[1].isreg)
+        {
+          /* Cases 0, 1, 4.  */
+          if (inst.operands[0].isscalar)
+            {
+              /* Case 4.  */
+              unsigned bcdebits = 0;
+              struct neon_type_el et = neon_check_type (1, NS_IGNORE,
+                                                        N_8 | N_16 | N_32);
+              int logsize = neon_logbits (et.size);
+              unsigned dn = inst.operands[0].reg >> 3;
+              unsigned x = inst.operands[0].reg & 7;
+
+              constraint (x >= 64 / et.size, _("scalar index out of range"));
+
+              switch (et.size)
+                {
+                case 8:  bcdebits = 0x8; break;
+                case 16: bcdebits = 0x1; break;
+                case 32: bcdebits = 0x0; break;
+                default: ;
+                }
+
+              bcdebits |= x << logsize;
+              inst.instruction = save_cond;
+              inst.instruction |= 0xe000b10;
+              inst.instruction |= LOW4 (dn) << 16;
+              inst.instruction |= HI1 (dn) << 7;
+              inst.instruction |= inst.operands[1].reg << 12;
+              inst.instruction |= (bcdebits & 3) << 5;
+              inst.instruction |= (bcdebits >> 2) << 21;
+            }
+          else
+            {
+              /* Cases 0, 1.  */
+              enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+              /* The architecture manual I have doesn't explicitly state which
+                 value the U bit should have for register->register moves, but
+                 the equivalent VORR instruction has U = 0, so do that.  */
+              inst.instruction = 0x0200110;
+              inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+              inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+              inst.instruction |= LOW4 (inst.operands[1].reg);
+              inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+              inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+              inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+              inst.instruction |= (rs == NS_QQ) << 6;
+              
+              inst.instruction = neon_dp_fixup (inst.instruction);
+            }
+        }
+      else
+        {
+          /* Cases 2, 3.  */
+          inst.instruction = 0x0800010;
+          neon_move_immediate ();
+          inst.instruction = neon_dp_fixup (inst.instruction);
+        }
+      break;
+    
+    case 3:
+      /* Cases 5, 7.  */
+      if (inst.operands[0].regisimm)
+        {
+          /* Case 5.  */
+          inst.instruction = save_cond;
+          inst.instruction |= 0xc400b10;
+          inst.instruction |= LOW4 (inst.operands[0].reg);
+          inst.instruction |= HI1 (inst.operands[0].reg) << 5;
+          inst.instruction |= inst.operands[1].reg << 12;
+          inst.instruction |= inst.operands[2].reg << 16;
+        }
+      else
+        {
+          /* Case 7.  */
+          inst.instruction = save_cond;
+          inst.instruction |= 0xc500b10;
+          inst.instruction |= inst.operands[0].reg << 12;
+          inst.instruction |= inst.operands[1].reg << 16;
+          inst.instruction |= LOW4 (inst.operands[2].reg);
+          inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+        }
+      break;
+    
+    default:
+      abort ();
+    }
+}
+
+static void
+do_neon_rshift_round_imm (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+  struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+  int imm = inst.operands[2].imm;
+
+  /* imm == 0 case is encoded as VMOV for V{R}SHR.  */
+  if (imm == 0)
+    {
+      inst.operands[2].present = 0;
+      do_neon_mov ();
+      return;
+    }
+
+  constraint (imm < 1 || (unsigned)imm > et.size,
+              _("immediate out of range for shift"));
+  neon_imm_shift (TRUE, et.type == NT_unsigned, rs == NS_QQI, et,
+                  et.size - imm);
+}
+
+static void
+do_neon_movl (void)
+{
+  struct neon_type_el et = neon_check_type (2, NS_QD,
+    N_EQK | N_DBL, N_SU_32 | N_KEY);
+  unsigned sizebits = et.size >> 3;
+  inst.instruction |= sizebits << 19;
+  neon_two_same (0, et.type == NT_unsigned, -1);
+}
+
+static void
+do_neon_trn (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK, N_8 | N_16 | N_32 | N_KEY);
+  inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+  neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_zip_uzp (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK, N_8 | N_16 | N_32 | N_KEY);
+  if (rs == NS_DD && et.size == 32)
+    {
+      /* Special case: encode as VTRN.32 <Dd>, <Dm>.  */
+      inst.instruction = N_MNEM_vtrn;
+      do_neon_trn ();
+      return;
+    }
+  neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_sat_abs_neg (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+  neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_pair_long (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_32 | N_KEY);
+  /* Unsigned is encoded in OP field (bit 7) for these instruction.  */
+  inst.instruction |= (et.type == NT_unsigned) << 7;
+  neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_recip_est (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK | N_FLT, N_F32 | N_U32 | N_KEY);
+  inst.instruction |= (et.type == NT_float) << 8;
+  neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_cls (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+  neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_clz (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK, N_I8 | N_I16 | N_I32 | N_KEY);
+  neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_cnt (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  struct neon_type_el et = neon_check_type (2, rs,
+    N_EQK | N_INT, N_8 | N_KEY);
+  neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_swp (void)
+{
+  enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+  neon_two_same (rs == NS_QQ, 1, -1);
+}
+
+static void
+do_neon_tbl_tbx (void)
+{
+  unsigned listlenbits;
+  neon_check_type (1, NS_DLD, N_8);
+  
+  if (inst.operands[1].imm < 1 || inst.operands[1].imm > 4)
+    {
+      inst.error = _("bad list length for table lookup");
+      return;
+    }
+  
+  listlenbits = inst.operands[1].imm - 1;
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+  inst.instruction |= LOW4 (inst.operands[2].reg);
+  inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+  inst.instruction |= listlenbits << 8;
+  
+  inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_ldm_stm (void)
+{
+  /* P, U and L bits are part of bitmask.  */
+  int is_dbmode = (inst.instruction & (1 << 24)) != 0;
+  unsigned offsetbits = inst.operands[1].imm * 2;
+
+  constraint (is_dbmode && !inst.operands[0].writeback,
+              _("writeback (!) must be used for VLDMDB and VSTMDB"));
+
+  constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+              _("register list must contain at least 1 and at most 16 "
+                "registers"));
+
+  inst.instruction |= inst.operands[0].reg << 16;
+  inst.instruction |= inst.operands[0].writeback << 21;
+  inst.instruction |= LOW4 (inst.operands[1].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[1].reg) << 22;
+
+  inst.instruction |= offsetbits;
+  
+  if (thumb_mode)
+    inst.instruction |= 0xe0000000;
+}
+
+static void
+do_neon_ldr_str (void)
+{
+  unsigned offsetbits;
+  int offset_up = 1;
+  int is_ldr = (inst.instruction & (1 << 20)) != 0;
+  
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  
+  constraint (inst.reloc.pc_rel && !is_ldr,
+              _("PC-relative addressing unavailable with VSTR"));
+  
+  constraint (!inst.reloc.pc_rel && inst.reloc.exp.X_op != O_constant,
+              _("Immediate value must be a constant"));
+  
+  if (inst.reloc.exp.X_add_number < 0)
+    {
+      offset_up = 0;
+      offsetbits = -inst.reloc.exp.X_add_number / 4;
+    }
+  else
+    offsetbits = inst.reloc.exp.X_add_number / 4;
+  
+  /* FIXME: Does this catch everything?  */
+  constraint (!inst.operands[1].isreg || !inst.operands[1].preind
+              || inst.operands[1].postind || inst.operands[1].writeback
+              || inst.operands[1].immisreg || inst.operands[1].shifted,
+              BAD_ADDR_MODE);
+  constraint ((inst.operands[1].imm & 3) != 0,
+              _("Offset must be a multiple of 4"));
+  constraint (offsetbits != (offsetbits & 0xff),
+              _("Immediate offset out of range"));
+
+  inst.instruction |= inst.operands[1].reg << 16;
+  inst.instruction |= offsetbits & 0xff;
+  inst.instruction |= offset_up << 23;
+  
+  if (thumb_mode)
+    inst.instruction |= 0xe0000000;
+
+  if (inst.reloc.pc_rel)
+    {
+      if (thumb_mode)
+        inst.reloc.type = BFD_RELOC_ARM_T32_CP_OFF_IMM;
+      else
+        inst.reloc.type = BFD_RELOC_ARM_CP_OFF_IMM;
+    }
+  else
+    inst.reloc.type = BFD_RELOC_UNUSED;
+}
+
+/* "interleave" version also handles non-interleaving register VLD1/VST1
+   instructions.  */
+
+static void
+do_neon_ld_st_interleave (void)
+{
+  struct neon_type_el et = neon_check_type (1, NS_IGNORE,
+                                            N_8 | N_16 | N_32 | N_64);
+  unsigned alignbits = 0;
+  unsigned idx;
+  /* The bits in this table go:
+     0: register stride of one (0) or two (1)
+     1,2: register list length, minus one (1, 2, 3, 4).
+     3,4: <n> in instruction type, minus one (VLD<n> / VST<n>).
+     We use -1 for invalid entries.  */
+  const int typetable[] =
+    {
+      0x7,  -1, 0xa,  -1, 0x6,  -1, 0x2,  -1, /* VLD1 / VST1.  */
+       -1,  -1, 0x8, 0x9,  -1,  -1, 0x3,  -1, /* VLD2 / VST2.  */
+       -1,  -1,  -1,  -1, 0x4, 0x5,  -1,  -1, /* VLD3 / VST3.  */
+       -1,  -1,  -1,  -1,  -1,  -1, 0x0, 0x1  /* VLD4 / VST4.  */
+    };
+  int typebits;
+
+  if (inst.operands[1].immisalign)
+    switch (inst.operands[1].imm >> 8)
+      {
+      case 64: alignbits = 1; break;
+      case 128:
+        if (NEON_REGLIST_LENGTH (inst.operands[0].imm) == 3)
+          goto bad_alignment;
+        alignbits = 2;
+        break;
+      case 256:
+        if (NEON_REGLIST_LENGTH (inst.operands[0].imm) == 3)
+          goto bad_alignment;
+        alignbits = 3;
+        break;
+      default:
+      bad_alignment:
+        inst.error = _("bad alignment");
+        return;
+      }
+
+  inst.instruction |= alignbits << 4;
+  inst.instruction |= neon_logbits (et.size) << 6;
+
+  /* Bits [4:6] of the immediate in a list specifier encode register stride
+     (minus 1) in bit 4, and list length in bits [5:6]. We put the <n> of
+     VLD<n>/VST<n> in bits [9:8] of the initial bitmask. Suck it out here, look
+     up the right value for "type" in a table based on this value and the given
+     list style, then stick it back.  */
+  idx = ((inst.operands[0].imm >> 4) & 7)
+        | (((inst.instruction >> 8) & 3) << 3);
+
+  typebits = typetable[idx];
+  
+  constraint (typebits == -1, _("bad list type for instruction"));
+
+  inst.instruction &= ~0xf00;
+  inst.instruction |= typebits << 8;
+}
+
+/* Check alignment is valid for do_neon_ld_st_lane and do_neon_ld_dup.
+   *DO_ALIGN is set to 1 if the relevant alignment bit should be set, 0
+   otherwise. The variable arguments are a list of pairs of legal (size, align)
+   values, terminated with -1.  */
+
+static int
+neon_alignment_bit (int size, int align, int *do_align, ...)
+{
+  va_list ap;
+  int result = FAIL, thissize, thisalign;
+    
+  if (!inst.operands[1].immisalign)
+    {
+      *do_align = 0;
+      return SUCCESS;
+    }
+      
+  va_start (ap, do_align);
+
+  do
+    {
+      thissize = va_arg (ap, int);
+      if (thissize == -1)
+        break;
+      thisalign = va_arg (ap, int);
+
+      if (size == thissize && align == thisalign)
+        result = SUCCESS;
+    }
+  while (result != SUCCESS);
+
+  va_end (ap);
+
+  if (result == SUCCESS)
+    *do_align = 1;
+  else
+    inst.error = _("unsupported alignment for instruction");
+    
+  return result;
+}
+
+static void
+do_neon_ld_st_lane (void)
+{
+  struct neon_type_el et = neon_check_type (1, NS_IGNORE, N_8 | N_16 | N_32);
+  int align_good, do_align = 0;
+  int logsize = neon_logbits (et.size);
+  int align = inst.operands[1].imm >> 8;
+  int n = (inst.instruction >> 8) & 3;
+  int max_el = 64 / et.size;
+  
+  constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != n + 1,
+              _("bad list length"));
+  constraint (NEON_LANE (inst.operands[0].imm) >= max_el,
+              _("scalar index out of range"));
+  constraint (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2
+              && et.size == 8,
+              _("stride of 2 unavailable when element size is 8"));
+  
+  switch (n)
+    {
+    case 0:  /* VLD1 / VST1.  */
+      align_good = neon_alignment_bit (et.size, align, &do_align, 16, 16,
+                                       32, 32, -1);
+      if (align_good == FAIL)
+        return;
+      if (do_align)
+        {
+          unsigned alignbits = 0;
+          switch (et.size)
+            {
+            case 16: alignbits = 0x1; break;
+            case 32: alignbits = 0x3; break;
+            default: ;
+            }
+          inst.instruction |= alignbits << 4;
+        }
+      break;
+
+    case 1:  /* VLD2 / VST2.  */
+      align_good = neon_alignment_bit (et.size, align, &do_align, 8, 16, 16, 32,
+                                       32, 64, -1);
+      if (align_good == FAIL)
+        return;
+      if (do_align)
+        inst.instruction |= 1 << 4;
+      break;
+
+    case 2:  /* VLD3 / VST3.  */
+      constraint (inst.operands[1].immisalign,
+                  _("can't use alignment with this instruction"));
+      break;
+
+    case 3:  /* VLD4 / VST4.  */
+      align_good = neon_alignment_bit (et.size, align, &do_align, 8, 32,
+                                       16, 64, 32, 64, 32, 128, -1);
+      if (align_good == FAIL)
+        return;
+      if (do_align)
+        {
+          unsigned alignbits = 0;
+          switch (et.size)
+            {
+            case 8:  alignbits = 0x1; break;
+            case 16: alignbits = 0x1; break;
+            case 32: alignbits = (align == 64) ? 0x1 : 0x2; break;
+            default: ;
+            }
+          inst.instruction |= alignbits << 4;
+        }
+      break;
+
+    default: ;
+    }
+
+  /* Reg stride of 2 is encoded in bit 5 when size==16, bit 6 when size==32.  */
+  if (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2)
+    inst.instruction |= 1 << (4 + logsize);
+      
+  inst.instruction |= NEON_LANE (inst.operands[0].imm) << (logsize + 5);
+  inst.instruction |= logsize << 10;
+}
+
+/* Encode single n-element structure to all lanes VLD<n> instructions.  */
+
+static void
+do_neon_ld_dup (void)
+{
+  struct neon_type_el et = neon_check_type (1, NS_IGNORE, N_8 | N_16 | N_32);
+  int align_good, do_align = 0;
+
+  switch ((inst.instruction >> 8) & 3)
+    {
+    case 0:  /* VLD1.  */
+      assert (NEON_REG_STRIDE (inst.operands[0].imm) != 2);
+      align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8,
+                                       &do_align, 16, 16, 32, 32, -1);
+      if (align_good == FAIL)
+        return;
+      switch (NEON_REGLIST_LENGTH (inst.operands[0].imm))
+        {
+        case 1: break;
+        case 2: inst.instruction |= 1 << 5; break;
+        default: inst.error = _("bad list length"); return;
+        }
+      inst.instruction |= neon_logbits (et.size) << 6;
+      break;
+
+    case 1:  /* VLD2.  */
+      align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8,
+                                       &do_align, 8, 16, 16, 32, 32, 64, -1);
+      if (align_good == FAIL)
+        return;
+      constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 2,
+                  _("bad list length"));
+      if (NEON_REG_STRIDE (inst.operands[0].imm) == 2)
+        inst.instruction |= 1 << 5;
+      inst.instruction |= neon_logbits (et.size) << 6;
+      break;
+
+    case 2:  /* VLD3.  */
+      constraint (inst.operands[1].immisalign,
+                  _("can't use alignment with this instruction"));
+      constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 3,
+                  _("bad list length"));
+      if (NEON_REG_STRIDE (inst.operands[0].imm) == 2)
+        inst.instruction |= 1 << 5;
+      inst.instruction |= neon_logbits (et.size) << 6;
+      break;
+
+    case 3:  /* VLD4.  */
+      {
+        int align = inst.operands[1].imm >> 8;
+        align_good = neon_alignment_bit (et.size, align, &do_align, 8, 32,
+                                         16, 64, 32, 64, 32, 128, -1);
+        if (align_good == FAIL)
+          return;
+        constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4,
+                    _("bad list length"));
+        if (NEON_REG_STRIDE (inst.operands[0].imm) == 2)
+          inst.instruction |= 1 << 5;
+        if (et.size == 32 && align == 128)
+          inst.instruction |= 0x3 << 6;
+        else
+          inst.instruction |= neon_logbits (et.size) << 6;
+      }
+      break;
+
+    default: ;
+    }
+
+  inst.instruction |= do_align << 4;
+}
+
+/* Disambiguate VLD<n> and VST<n> instructions, and fill in common bits (those
+   apart from bits [11:4].  */
+
+static void
+do_neon_ldx_stx (void)
+{
+  switch (NEON_LANE (inst.operands[0].imm))
+    {
+    case NEON_INTERLEAVE_LANES:
+      inst.instruction = NEON_ENC_INTERLV (inst.instruction);
+      do_neon_ld_st_interleave ();
+      break;
+    
+    case NEON_ALL_LANES:
+      inst.instruction = NEON_ENC_DUP (inst.instruction);
+      do_neon_ld_dup ();
+      break;
+    
+    default:
+      inst.instruction = NEON_ENC_LANE (inst.instruction);
+      do_neon_ld_st_lane ();
+    }
+
+  /* L bit comes from bit mask.  */
+  inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+  inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+  inst.instruction |= inst.operands[1].reg << 16;
+  
+  if (inst.operands[1].postind)
+    {
+      int postreg = inst.operands[1].imm & 0xf;
+      constraint (!inst.operands[1].immisreg,
+                  _("post-index must be a register"));
+      constraint (postreg == 0xd || postreg == 0xf,
+                  _("bad register for post-index"));
+      inst.instruction |= postreg;
+    }
+  else if (inst.operands[1].writeback)
+    {
+      inst.instruction |= 0xd;
+    }
+  else
+    inst.instruction |= 0xf; 
+  
+  if (thumb_mode)
+    inst.instruction |= 0xf9000000;
+  else
+    inst.instruction |= 0xf4000000;
+}
+
+\f
+/* Overall per-instruction processing. */
+
+/* We need to be able to fix up arbitrary expressions in some statements.
+   This is so that we can handle symbols that are an arbitrary distance from
+   the pc.  The most common cases are of the form ((+/-sym -/+ . - 8) & mask),
+   which returns part of an address in a form which will be valid for
+   a data instruction. We do this by pushing the expression into a symbol
+   in the expr_section, and creating a fix for that.  */
+
+static void
+fix_new_arm (fragS *      frag,
+            int           where,
+            short int     size,
+            expressionS * exp,
+            int           pc_rel,
+            int           reloc)
+{
+  fixS *          new_fix;
+
+  switch (exp->X_op)
+    {
+    case O_constant:
+    case O_symbol:
+    case O_add:
+    case O_subtract:
+      new_fix = fix_new_exp (frag, where, size, exp, pc_rel, reloc);
+      break;
+
+    default:
+      new_fix = fix_new (frag, where, size, make_expr_symbol (exp), 0,
+                        pc_rel, reloc);
+      break;
+    }
+
+  /* Mark whether the fix is to a THUMB instruction, or an ARM
+     instruction.  */
+  new_fix->tc_fix_data = thumb_mode;
+}
+
+/* Create a frg for an instruction requiring relaxation.  */
+static void
+output_relax_insn (void)
+{
+  char * to;
+  symbolS *sym;
    int offset;
  
  #ifdef OBJ_ELF
@@ -8123,6 +11488,85 @@ output_inst (const char * str)
  #endif
  }
  
+/* Parse a Neon type specifier. *STR should point at the leading '.'
+   character. Does no verification at this stage that the type fits the opcode
+   properly. E.g.,
+
+     .i32.i32.s16
+     .s32.f32
+     .u16
+
+   Can all be legally parsed by this function.
+
+   Fills in neon_type struct pointer with parsed information, and updates STR
+   to point after the parsed type specifier. Returns TRUE if this was a legal
+   type, FALSE if not.  */
+
+static bfd_boolean
+parse_neon_type (struct neon_type *type, char **str)
+{
+  char *ptr = *str;
+
+  if (type)
+    type->elems = 0;
+
+  while (type->elems < NEON_MAX_TYPE_ELS)
+    {
+      enum neon_el_type thistype = NT_untyped;
+      unsigned thissize = -1u;
+
+      if (*ptr != '.')
+       break;
+
+      ptr++;
+
+      /* Just a size without an explicit type.  */
+      if (ISDIGIT (*ptr))
+       goto parsesize;
+
+      switch (*ptr)
+       {
+       case 'i': thistype = NT_integer; break;
+       case 'f': thistype = NT_float; break;
+       case 'p': thistype = NT_poly; break;
+       case 's': thistype = NT_signed; break;
+       case 'u': thistype = NT_unsigned; break;
+       default:
+         as_bad (_("Unexpected character `%c' in type specifier"), *ptr);
+         return 0;
+       }
+
+      ptr++;
+
+      /* .f is an abbreviation for .f32.  */
+      if (thistype == NT_float && !ISDIGIT (*ptr))
+       thissize = 32;
+      else
+       {
+       parsesize:
+         thissize = strtoul (ptr, &ptr, 10);
+
+         if (thissize != 8 && thissize != 16 && thissize != 32
+              && thissize != 64)
+            {
+              as_bad (_("Bad size %d in type specifier"), thissize);
+             return 0;
+           }
+       }
+
+      if (type)
+        {
+          type->el[type->elems].type = thistype;
+         type->el[type->elems].size = thissize;
+         type->elems++;
+       }
+    }
+
+  *str = ptr;
+
+  return 1;
+}
+
  /* Tag values used in struct asm_opcode's tag field.  */
  enum opcode_tag
  {
@@ -8222,17 +11666,30 @@ opcode_lookup (char **str)
    if (end == base)
      return 0;
  
-  /* Handle a possible width suffix.  */
+  /* Handle a possible width suffix and/or Neon type suffix.  */
    if (end[0] == '.')
      {
-      if (end[1] == 'w' && (end[2] == ' ' || end[2] == '\0'))
+      int offset = 2;
+      
+      if (end[1] == 'w')
         inst.size_req = 4;
-      else if (end[1] == 'n' && (end[2] == ' ' || end[2] == '\0'))
+      else if (end[1] == 'n')
         inst.size_req = 2;
        else
-       return 0;
+        offset = 0;
+
+      inst.vectype.elems = 0;
+
+      *str = end + offset;
  
-      *str = end + 2;
+      if (end[offset] == '.')      
+       {
+         /* See if we have a Neon type suffix.  */
+          if (!parse_neon_type (&inst.vectype, str))
+           return 0;
+        }
+      else if (end[offset] != '\0' && end[offset] != ' ')
+        return 0;
      }
    else
      *str = end;
@@ -8576,11 +12033,22 @@ arm_canonicalize_symbol_name (char * name)
  
  #define REGDEF(s,n,t) { #s, n, REG_TYPE_##t, TRUE }
  #define REGNUM(p,n,t) REGDEF(p##n, n, t)
+#define REGNUM2(p,n,t) REGDEF(p##n, 2 * n, t)
  #define REGSET(p,t) \
    REGNUM(p, 0,t), REGNUM(p, 1,t), REGNUM(p, 2,t), REGNUM(p, 3,t), \
    REGNUM(p, 4,t), REGNUM(p, 5,t), REGNUM(p, 6,t), REGNUM(p, 7,t), \
    REGNUM(p, 8,t), REGNUM(p, 9,t), REGNUM(p,10,t), REGNUM(p,11,t), \
    REGNUM(p,12,t), REGNUM(p,13,t), REGNUM(p,14,t), REGNUM(p,15,t)
+#define REGSETH(p,t) \
+  REGNUM(p,16,t), REGNUM(p,17,t), REGNUM(p,18,t), REGNUM(p,19,t), \
+  REGNUM(p,20,t), REGNUM(p,21,t), REGNUM(p,22,t), REGNUM(p,23,t), \
+  REGNUM(p,24,t), REGNUM(p,25,t), REGNUM(p,26,t), REGNUM(p,27,t), \
+  REGNUM(p,28,t), REGNUM(p,29,t), REGNUM(p,30,t), REGNUM(p,31,t)
+#define REGSET2(p,t) \
+  REGNUM2(p, 0,t), REGNUM2(p, 1,t), REGNUM2(p, 2,t), REGNUM2(p, 3,t), \
+  REGNUM2(p, 4,t), REGNUM2(p, 5,t), REGNUM2(p, 6,t), REGNUM2(p, 7,t), \
+  REGNUM2(p, 8,t), REGNUM2(p, 9,t), REGNUM2(p,10,t), REGNUM2(p,11,t), \
+  REGNUM2(p,12,t), REGNUM2(p,13,t), REGNUM2(p,14,t), REGNUM2(p,15,t)
  
  static const struct reg_entry reg_names[] =
  {
@@ -8619,20 +12087,16 @@ static const struct reg_entry reg_names[] =
    REGNUM(F,4,FN), REGNUM(F,5,FN), REGNUM(F,6,FN), REGNUM(F,7, FN),
  
    /* VFP SP registers. */
-  REGSET(s,VFS),
-  REGNUM(s,16,VFS), REGNUM(s,17,VFS), REGNUM(s,18,VFS), REGNUM(s,19,VFS),
-  REGNUM(s,20,VFS), REGNUM(s,21,VFS), REGNUM(s,22,VFS), REGNUM(s,23,VFS),
-  REGNUM(s,24,VFS), REGNUM(s,25,VFS), REGNUM(s,26,VFS), REGNUM(s,27,VFS),
-  REGNUM(s,28,VFS), REGNUM(s,29,VFS), REGNUM(s,30,VFS), REGNUM(s,31,VFS),
-
-  REGSET(S,VFS),
-  REGNUM(S,16,VFS), REGNUM(S,17,VFS), REGNUM(S,18,VFS), REGNUM(S,19,VFS),
-  REGNUM(S,20,VFS), REGNUM(S,21,VFS), REGNUM(S,22,VFS), REGNUM(S,23,VFS),
-  REGNUM(S,24,VFS), REGNUM(S,25,VFS), REGNUM(S,26,VFS), REGNUM(S,27,VFS),
-  REGNUM(S,28,VFS), REGNUM(S,29,VFS), REGNUM(S,30,VFS), REGNUM(S,31,VFS),
+  REGSET(s,VFS),  REGSET(S,VFS),
+  REGSETH(s,VFS), REGSETH(S,VFS),
  
    /* VFP DP Registers. */
-  REGSET(d,VFD), REGSET(D,VFS),
+  REGSET(d,VFD),  REGSET(D,VFD),
+  /* Extra Neon DP registers.  */
+  REGSETH(d,VFD), REGSETH(D,VFD),
+
+  /* Neon QP registers.  */
+  REGSET2(q,NQ),  REGSET2(Q,NQ),
  
    /* VFP control registers.  */
    REGDEF(fpsid,0,VFC), REGDEF(fpscr,1,VFC), REGDEF(fpexc,8,VFC),
@@ -8971,6 +12435,30 @@ static struct asm_barrier_opt barrier_opt_names[] =
  #define UF(mnem, op, nops, ops, ae)    \
    { #mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0, ARM_VARIANT, 0, do_##ae, NULL }
  
+/* Neon data-processing. ARM versions are unconditional with cond=0xf.
+   The Thumb and ARM variants are mostly the same (bits 0-23 and 24/28), so we
+   use the same encoding function for each.  */
+#define NUF(mnem, op, nops, ops, enc)                                  \
+  { #mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0x##op,           \
+    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc }
+
+/* Neon data processing, version which indirects through neon_enc_tab for
+   the various overloaded versions of opcodes.  */
+#define nUF(mnem, op, nops, ops, enc)                                  \
+  { #mnem, OPS##nops ops, OT_unconditionalF, N_MNEM_##op, N_MNEM_##op, \
+    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc }
+
+/* Neon insn with conditional suffix for the ARM version, non-overloaded
+   version.  */
+#define NCE(mnem, op, nops, ops, enc)                                  \
+  { #mnem, OPS##nops ops, OT_csuffix, 0x##op, 0x##op, ARM_VARIANT,     \
+    THUMB_VARIANT, do_##enc, do_##enc }
+
+/* Neon insn with conditional suffix for the ARM version, overloaded types.  */
+#define nCE(mnem, op, nops, ops, enc)                                  \
+  { #mnem, OPS##nops ops, OT_csuffix, N_MNEM_##op, N_MNEM_##op,                \
+    ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc }
+
  #define do_0 0
  
  /* Thumb-only, unconditional.  */
@@ -9911,13 +13399,13 @@ static const struct asm_opcode insns[] =
  #undef ARM_VARIANT
  #define ARM_VARIANT &fpu_vfp_ext_v1 /* VFP V1 (Double precision).  */
    /* Moves and type conversions.  */
- cCE(fcpyd,    eb00b40, 2, (RVD, RVD),       rd_rm),
+ cCE(fcpyd,    eb00b40, 2, (RVD, RVD),       vfp_dp_rd_rm),
   cCE(fcvtds,   eb70ac0, 2, (RVD, RVS),       vfp_dp_sp_cvt),
   cCE(fcvtsd,   eb70bc0, 2, (RVS, RVD),       vfp_sp_dp_cvt),
- cCE(fmdhr,    e200b10, 2, (RVD, RR),        rn_rd),
- cCE(fmdlr,    e000b10, 2, (RVD, RR),        rn_rd),
- cCE(fmrdh,    e300b10, 2, (RR, RVD),        rd_rn),
- cCE(fmrdl,    e100b10, 2, (RR, RVD),        rd_rn),
+ cCE(fmdhr,    e200b10, 2, (RVD, RR),        vfp_dp_rn_rd),
+ cCE(fmdlr,    e000b10, 2, (RVD, RR),        vfp_dp_rn_rd),
+ cCE(fmrdh,    e300b10, 2, (RR, RVD),        vfp_dp_rd_rn),
+ cCE(fmrdl,    e100b10, 2, (RR, RVD),        vfp_dp_rd_rn),
   cCE(fsitod,   eb80bc0, 2, (RVD, RVS),       vfp_dp_sp_cvt),
   cCE(fuitod,   eb80b40, 2, (RVD, RVS),       vfp_dp_sp_cvt),
   cCE(ftosid,   ebd0b40, 2, (RVS, RVD),       vfp_sp_dp_cvt),
@@ -9938,34 +13426,327 @@ static const struct asm_opcode insns[] =
   cCE(fstmfdd,  d200b00, 2, (RRw, VRDLST),    vfp_dp_ldstmdb),
  
    /* Monadic operations.  */
- cCE(fabsd,    eb00bc0, 2, (RVD, RVD),       rd_rm),
- cCE(fnegd,    eb10b40, 2, (RVD, RVD),       rd_rm),
- cCE(fsqrtd,   eb10bc0, 2, (RVD, RVD),       rd_rm),
+ cCE(fabsd,    eb00bc0, 2, (RVD, RVD),       vfp_dp_rd_rm),
+ cCE(fnegd,    eb10b40, 2, (RVD, RVD),       vfp_dp_rd_rm),
+ cCE(fsqrtd,   eb10bc0, 2, (RVD, RVD),       vfp_dp_rd_rm),
  
    /* Dyadic operations.         */
- cCE(faddd,    e300b00, 3, (RVD, RVD, RVD),  rd_rn_rm),
- cCE(fsubd,    e300b40, 3, (RVD, RVD, RVD),  rd_rn_rm),
- cCE(fmuld,    e200b00, 3, (RVD, RVD, RVD),  rd_rn_rm),
- cCE(fdivd,    e800b00, 3, (RVD, RVD, RVD),  rd_rn_rm),
- cCE(fmacd,    e000b00, 3, (RVD, RVD, RVD),  rd_rn_rm),
- cCE(fmscd,    e100b00, 3, (RVD, RVD, RVD),  rd_rn_rm),
- cCE(fnmuld,   e200b40, 3, (RVD, RVD, RVD),  rd_rn_rm),
- cCE(fnmacd,   e000b40, 3, (RVD, RVD, RVD),  rd_rn_rm),
- cCE(fnmscd,   e100b40, 3, (RVD, RVD, RVD),  rd_rn_rm),
+ cCE(faddd,    e300b00, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE(fsubd,    e300b40, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE(fmuld,    e200b00, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE(fdivd,    e800b00, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE(fmacd,    e000b00, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE(fmscd,    e100b00, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE(fnmuld,   e200b40, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE(fnmacd,   e000b40, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
+ cCE(fnmscd,   e100b40, 3, (RVD, RVD, RVD),  vfp_dp_rd_rn_rm),
  
    /* Comparisons.  */
- cCE(fcmpd,    eb40b40, 2, (RVD, RVD),       rd_rm),
- cCE(fcmpzd,   eb50b40, 1, (RVD),            rd),
- cCE(fcmped,   eb40bc0, 2, (RVD, RVD),       rd_rm),
- cCE(fcmpezd,  eb50bc0, 1, (RVD),            rd),
+ cCE(fcmpd,    eb40b40, 2, (RVD, RVD),       vfp_dp_rd_rm),
+ cCE(fcmpzd,   eb50b40, 1, (RVD),            vfp_dp_rd),
+ cCE(fcmped,   eb40bc0, 2, (RVD, RVD),       vfp_dp_rd_rm),
+ cCE(fcmpezd,  eb50bc0, 1, (RVD),            vfp_dp_rd),
  
  #undef ARM_VARIANT
  #define ARM_VARIANT &fpu_vfp_ext_v2
   cCE(fmsrr,    c400a10, 3, (VRSLST, RR, RR), vfp_sp2_from_reg2),
   cCE(fmrrs,    c500a10, 3, (RR, RR, VRSLST), vfp_reg2_from_sp2),
- cCE(fmdrr,    c400b10, 3, (RVD, RR, RR),    rm_rd_rn),
- cCE(fmrrd,    c500b10, 3, (RR, RR, RVD),    rd_rn_rm),
+ cCE(fmdrr,    c400b10, 3, (RVD, RR, RR),    vfp_dp_rm_rd_rn),
+ cCE(fmrrd,    c500b10, 3, (RR, RR, RVD),    vfp_dp_rd_rn_rm),
+
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &fpu_neon_ext_v1
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_neon_ext_v1
+  /* Data processing with three registers of the same length.  */
+  /* integer ops, valid types S8 S16 S32 U8 U16 U32.  */
+ NUF(vaba,      0000710, 3, (RNDQ, RNDQ,  RNDQ), neon_dyadic_i_su),
+ NUF(vabaq,     0000710, 3, (RNQ,  RNQ,   RNQ),  neon_dyadic_i_su),
+ NUF(vhadd,     0000000, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
+ NUF(vhaddq,    0000000, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i_su),
+ NUF(vrhadd,    0000100, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
+ NUF(vrhaddq,   0000100, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i_su),
+ NUF(vhsub,     0000200, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
+ NUF(vhsubq,    0000200, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i_su),
+  /* integer ops, valid types S8 S16 S32 S64 U8 U16 U32 U64.  */
+ NUF(vqadd,     0000010, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
+ NUF(vqaddq,    0000010, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i64_su),
+ NUF(vqsub,     0000210, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
+ NUF(vqsubq,    0000210, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i64_su),
+ NUF(vrshl,     0000500, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
+ NUF(vrshlq,    0000500, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i64_su),
+ NUF(vqrshl,    0000510, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
+ NUF(vqrshlq,   0000510, 3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_i64_su),
+  /* If not immediate, fall back to neon_dyadic_i64_su.
+     shl_imm should accept I8 I16 I32 I64,
+     qshl_imm should accept S8 S16 S32 S64 U8 U16 U32 U64.  */
+ nUF(vshl,      vshl,    3, (RNDQ, oRNDQ, RNDQ_I63b), neon_shl_imm),
+ nUF(vshlq,     vshl,    3, (RNQ,  oRNQ,  RNDQ_I63b), neon_shl_imm),
+ nUF(vqshl,     vqshl,   3, (RNDQ, oRNDQ, RNDQ_I63b), neon_qshl_imm),
+ nUF(vqshlq,    vqshl,   3, (RNQ,  oRNQ,  RNDQ_I63b), neon_qshl_imm),
+  /* Logic ops, types optional & ignored.  */
+ nUF(vand,      vand,    2, (RNDQ, NILO),        neon_logic),
+ nUF(vandq,     vand,    2, (RNQ,  NILO),        neon_logic),
+ nUF(vbic,      vbic,    2, (RNDQ, NILO),        neon_logic),
+ nUF(vbicq,     vbic,    2, (RNQ,  NILO),        neon_logic),
+ nUF(vorr,      vorr,    2, (RNDQ, NILO),        neon_logic),
+ nUF(vorrq,     vorr,    2, (RNQ,  NILO),        neon_logic),
+ nUF(vorn,      vorn,    2, (RNDQ, NILO),        neon_logic),
+ nUF(vornq,     vorn,    2, (RNQ,  NILO),        neon_logic),
+ nUF(veor,      veor,    3, (RNDQ, oRNDQ, RNDQ), neon_logic),
+ nUF(veorq,     veor,    3, (RNQ,  oRNQ,  RNQ),  neon_logic),
+  /* Bitfield ops, untyped.  */
+ NUF(vbsl,      1100110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
+ NUF(vbslq,     1100110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
+ NUF(vbit,      1200110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
+ NUF(vbitq,     1200110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
+ NUF(vbif,      1300110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
+ NUF(vbifq,     1300110, 3, (RNQ,  RNQ,  RNQ),  neon_bitfield),
+  /* Int and float variants, types S8 S16 S32 U8 U16 U32 F32.  */
+ nUF(vabd,      vabd,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
+ nUF(vabdq,     vabd,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
+ nUF(vmax,      vmax,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
+ nUF(vmaxq,     vmax,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
+ nUF(vmin,      vmin,    3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
+ nUF(vminq,     vmin,    3, (RNQ,  oRNQ,  RNQ),  neon_dyadic_if_su),
+  /* Comparisons. Types S8 S16 S32 U8 U16 U32 F32. Non-immediate versions fall
+     back to neon_dyadic_if_su.  */
+ nUF(vcge,      vcge,    3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp),
+ nUF(vcgeq,     vcge,    3, (RNQ,  oRNQ,  RNDQ_I0), neon_cmp),
+ nUF(vcgt,      vcgt,    3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp),
+ nUF(vcgtq,     vcgt,    3, (RNQ,  oRNQ,  RNDQ_I0), neon_cmp),
+ nUF(vclt,      vclt,    3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp_inv),
+ nUF(vcltq,     vclt,    3, (RNQ,  oRNQ,  RNDQ_I0), neon_cmp_inv),
+ nUF(vcle,      vcle,    3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp_inv),
+ nUF(vcleq,     vcle,    3, (RNQ,  oRNQ,  RNDQ_I0), neon_cmp_inv),
+  /* Comparison. Type I8 I16 I32 F32. Non-immediate -> neon_dyadic_if_i.  */
+ nUF(vceq,      vceq,    3, (RNDQ, oRNDQ, RNDQ_I0), neon_ceq),
+ nUF(vceqq,     vceq,    3, (RNQ,  oRNQ,  RNDQ_I0), neon_ceq),
+  /* As above, D registers only.  */
+ nUF(vpmax,     vpmax,   3, (RND, oRND, RND), neon_dyadic_if_su_d),
+ nUF(vpmin,     vpmin,   3, (RND, oRND, RND), neon_dyadic_if_su_d),
+  /* Int and float variants, signedness unimportant.  */
+  /* If not scalar, fall back to neon_dyadic_if_i.  */
+ nUF(vmla,      vmla,    3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_mac_maybe_scalar),
+ nUF(vmlaq,     vmla,    3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_mac_maybe_scalar),
+ nUF(vmls,      vmls,    3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_mac_maybe_scalar),
+ nUF(vmlsq,     vmls,    3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_mac_maybe_scalar),
+ nUF(vpadd,     vpadd,   3, (RND,  oRND,  RND),       neon_dyadic_if_i_d),
+  /* Add/sub take types I8 I16 I32 I64 F32.  */
+ nUF(vadd,      vadd,    3, (RNDQ, oRNDQ, RNDQ), neon_addsub_if_i),
+ nUF(vaddq,     vadd,    3, (RNQ,  oRNQ,  RNQ),  neon_addsub_if_i),
+ nUF(vsub,      vsub,    3, (RNDQ, oRNDQ, RNDQ), neon_addsub_if_i),
+ nUF(vsubq,     vsub,    3, (RNQ,  oRNQ,  RNQ),  neon_addsub_if_i),
+  /* vtst takes sizes 8, 16, 32.  */
+ NUF(vtst,      0000810, 3, (RNDQ, oRNDQ, RNDQ), neon_tst),
+ NUF(vtstq,     0000810, 3, (RNQ,  oRNQ,  RNQ),  neon_tst),
+  /* VMUL takes I8 I16 I32 F32 P8.  */
+ nUF(vmul,      vmul,    3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_mul),
+ nUF(vmulq,     vmul,    3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_mul),
+  /* VQD{R}MULH takes S16 S32.  */
+ nUF(vqdmulh,   vqdmulh,  3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
+ nUF(vqdmulhq,  vqdmulh,  3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qdmulh),
+ nUF(vqrdmulh,  vqrdmulh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
+ nUF(vqrdmulhq, vqrdmulh, 3, (RNQ,  oRNQ,  RNDQ_RNSC), neon_qdmulh),
+ NUF(vacge,     0000e10,  3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute),
+ NUF(vacgeq,    0000e10,  3, (RNQ,  oRNQ,  RNQ),  neon_fcmp_absolute),
+ NUF(vacgt,     0200e10,  3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute),
+ NUF(vacgtq,    0200e10,  3, (RNQ,  oRNQ,  RNQ),  neon_fcmp_absolute),
+ NUF(vaclt,     0000e10,  3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute_inv),
+ NUF(vacltq,    0000e10,  3, (RNQ,  oRNQ,  RNQ),  neon_fcmp_absolute_inv),
+ NUF(vacle,     0200e10,  3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute_inv),
+ NUF(vacleq,    0200e10,  3, (RNQ,  oRNQ,  RNQ),  neon_fcmp_absolute_inv),
+ NUF(vrecps,    0000f10,  3, (RNDQ, oRNDQ, RNDQ), neon_step),
+ NUF(vrecpsq,   0000f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
+ NUF(vrsqrts,   0200f10,  3, (RNDQ, oRNDQ, RNDQ), neon_step),
+ NUF(vrsqrtsq,  0200f10,  3, (RNQ,  oRNQ,  RNQ),  neon_step),
+
+  /* Two address, int/float. Types S8 S16 S32 F32.  */
+ NUF(vabs,      1b10300, 2, (RNDQ, RNDQ),     neon_abs_neg),
+ NUF(vabsq,     1b10300, 2, (RNQ,  RNQ),      neon_abs_neg),
+ NUF(vneg,      1b10380, 2, (RNDQ, RNDQ),     neon_abs_neg),
+ NUF(vnegq,     1b10380, 2, (RNQ,  RNQ),      neon_abs_neg),
+
+  /* Data processing with two registers and a shift amount.  */
+  /* Right shifts, and variants with rounding.
+     Types accepted S8 S16 S32 S64 U8 U16 U32 U64.  */
+ NUF(vshr,      0800010, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm),
+ NUF(vshrq,     0800010, 3, (RNQ,  oRNQ,  I64z), neon_rshift_round_imm),
+ NUF(vrshr,     0800210, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm),
+ NUF(vrshrq,    0800210, 3, (RNQ,  oRNQ,  I64z), neon_rshift_round_imm),
+ NUF(vsra,      0800110, 3, (RNDQ, oRNDQ, I64),  neon_rshift_round_imm),
+ NUF(vsraq,     0800110, 3, (RNQ,  oRNQ,  I64),  neon_rshift_round_imm),
+ NUF(vrsra,     0800310, 3, (RNDQ, oRNDQ, I64),  neon_rshift_round_imm),
+ NUF(vrsraq,    0800310, 3, (RNQ,  oRNQ,  I64),  neon_rshift_round_imm),
+  /* Shift and insert. Sizes accepted 8 16 32 64.  */
+ NUF(vsli,      1800510, 3, (RNDQ, oRNDQ, I63), neon_sli),
+ NUF(vsliq,     1800510, 3, (RNQ,  oRNQ,  I63), neon_sli),
+ NUF(vsri,      1800410, 3, (RNDQ, oRNDQ, I64), neon_sri),
+ NUF(vsriq,     1800410, 3, (RNQ,  oRNQ,  I64), neon_sri),
+  /* QSHL{U} immediate accepts S8 S16 S32 S64 U8 U16 U32 U64.  */
+ NUF(vqshlu,    1800610, 3, (RNDQ, oRNDQ, I63), neon_qshlu_imm),
+ NUF(vqshluq,   1800610, 3, (RNQ,  oRNQ,  I63), neon_qshlu_imm),
+  /* Right shift immediate, saturating & narrowing, with rounding variants.
+     Types accepted S16 S32 S64 U16 U32 U64.  */
+ NUF(vqshrn,    0800910, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow),
+ NUF(vqrshrn,   0800950, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow),
+  /* As above, unsigned. Types accepted S16 S32 S64.  */
+ NUF(vqshrun,   0800810, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow_u),
+ NUF(vqrshrun,  0800850, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow_u),
+  /* Right shift narrowing. Types accepted I16 I32 I64.  */
+ NUF(vshrn,     0800810, 3, (RND, RNQ, I32z), neon_rshift_narrow),
+ NUF(vrshrn,    0800850, 3, (RND, RNQ, I32z), neon_rshift_narrow),
+  /* Special case. Types S8 S16 S32 U8 U16 U32. Handles max shift variant.  */
+ nUF(vshll,     vshll,   3, (RNQ, RND, I32),  neon_shll),
+  /* CVT with optional immediate for fixed-point variant.  */
+ nUF(vcvt,      vcvt,    3, (RNDQ, RNDQ, oI32b), neon_cvt),
+ nUF(vcvtq,     vcvt,    3, (RNQ,  RNQ,  oI32b), neon_cvt),
+
+  /* One register and an immediate value. All encoding special-cased!  */
+ NCE(vmov,      0,       1, (VMOV),             neon_mov),
+ NCE(vmovq,     0,       1, (VMOV),             neon_mov),
+ nUF(vmvn,      vmvn,    2, (RNDQ, RNDQ_IMVNb), neon_mvn),
+ nUF(vmvnq,     vmvn,    2, (RNQ,  RNDQ_IMVNb), neon_mvn),
+
+  /* Data processing, three registers of different lengths.  */
+  /* Dyadic, long insns. Types S8 S16 S32 U8 U16 U32.  */
+ NUF(vabal,     0800500, 3, (RNQ, RND, RND),  neon_abal),
+ NUF(vabdl,     0800700, 3, (RNQ, RND, RND),  neon_dyadic_long),
+ NUF(vaddl,     0800000, 3, (RNQ, RND, RND),  neon_dyadic_long),
+ NUF(vsubl,     0800200, 3, (RNQ, RND, RND),  neon_dyadic_long),
+  /* If not scalar, fall back to neon_dyadic_long.
+     Vector types as above, scalar types S16 S32 U16 U32.  */
+ nUF(vmlal,     vmlal,   3, (RNQ, RND, RND_RNSC), neon_mac_maybe_scalar_long),
+ nUF(vmlsl,     vmlsl,   3, (RNQ, RND, RND_RNSC), neon_mac_maybe_scalar_long),
+  /* Dyadic, widening insns. Types S8 S16 S32 U8 U16 U32.  */
+ NUF(vaddw,     0800100, 3, (RNQ, oRNQ, RND), neon_dyadic_wide),
+ NUF(vsubw,     0800300, 3, (RNQ, oRNQ, RND), neon_dyadic_wide),
+  /* Dyadic, narrowing insns. Types I16 I32 I64.  */
+ NUF(vaddhn,    0800400, 3, (RND, RNQ, RNQ),  neon_dyadic_narrow),
+ NUF(vraddhn,   1800400, 3, (RND, RNQ, RNQ),  neon_dyadic_narrow),
+ NUF(vsubhn,    0800600, 3, (RND, RNQ, RNQ),  neon_dyadic_narrow),
+ NUF(vrsubhn,   1800600, 3, (RND, RNQ, RNQ),  neon_dyadic_narrow),
+  /* Saturating doubling multiplies. Types S16 S32.  */
+ nUF(vqdmlal,   vqdmlal, 3, (RNQ, RND, RND_RNSC), neon_mul_sat_scalar_long),
+ nUF(vqdmlsl,   vqdmlsl, 3, (RNQ, RND, RND_RNSC), neon_mul_sat_scalar_long),
+ nUF(vqdmull,   vqdmull, 3, (RNQ, RND, RND_RNSC), neon_mul_sat_scalar_long),
+  /* VMULL. Vector types S8 S16 S32 U8 U16 U32 P8, scalar types
+     S16 S32 U16 U32.  */
+ nUF(vmull,     vmull,   3, (RNQ, RND, RND_RNSC), neon_vmull),
+
+  /* Extract. Size 8.  */
+ NUF(vext,      0b00000, 4, (RNDQ, oRNDQ, RNDQ, I7), neon_ext),
+ NUF(vextq,     0b00000, 4, (RNQ,  oRNQ,  RNQ,  I7), neon_ext),
+
+  /* Two registers, miscellaneous.  */
+  /* Reverse. Sizes 8 16 32 (must be < size in opcode).  */
+ NUF(vrev64,    1b00000, 2, (RNDQ, RNDQ),     neon_rev),
+ NUF(vrev64q,   1b00000, 2, (RNQ,  RNQ),      neon_rev),
+ NUF(vrev32,    1b00080, 2, (RNDQ, RNDQ),     neon_rev),
+ NUF(vrev32q,   1b00080, 2, (RNQ,  RNQ),      neon_rev),
+ NUF(vrev16,    1b00100, 2, (RNDQ, RNDQ),     neon_rev),
+ NUF(vrev16q,   1b00100, 2, (RNQ,  RNQ),      neon_rev),
+  /* Vector replicate. Sizes 8 16 32.  */
+ nCE(vdup,      vdup,    2, (RNDQ, RR_RNSC),  neon_dup),
+ nCE(vdupq,     vdup,    2, (RNQ,  RR_RNSC),  neon_dup),
+  /* VMOVL. Types S8 S16 S32 U8 U16 U32.  */
+ NUF(vmovl,     0800a10, 2, (RNQ, RND),       neon_movl),
+  /* VMOVN. Types I16 I32 I64.  */
+ nUF(vmovn,     vmovn,   2, (RND, RNQ),       neon_movn),
+  /* VQMOVN. Types S16 S32 S64 U16 U32 U64.  */
+ nUF(vqmovn,    vqmovn,  2, (RND, RNQ),       neon_qmovn),
+  /* VQMOVUN. Types S16 S32 S64.  */
+ nUF(vqmovun,   vqmovun, 2, (RND, RNQ),       neon_qmovun),
+  /* VZIP / VUZP. Sizes 8 16 32.  */
+ NUF(vzip,      1b20180, 2, (RNDQ, RNDQ),     neon_zip_uzp),
+ NUF(vzipq,     1b20180, 2, (RNQ,  RNQ),      neon_zip_uzp),
+ NUF(vuzp,      1b20100, 2, (RNDQ, RNDQ),     neon_zip_uzp),
+ NUF(vuzpq,     1b20100, 2, (RNQ,  RNQ),      neon_zip_uzp),
+  /* VQABS / VQNEG. Types S8 S16 S32.  */
+ NUF(vqabs,     1b00700, 2, (RNDQ, RNDQ),     neon_sat_abs_neg),
+ NUF(vqabsq,    1b00700, 2, (RNQ,  RNQ),      neon_sat_abs_neg),
+ NUF(vqneg,     1b00780, 2, (RNDQ, RNDQ),     neon_sat_abs_neg),
+ NUF(vqnegq,    1b00780, 2, (RNQ,  RNQ),      neon_sat_abs_neg),
+  /* Pairwise, lengthening. Types S8 S16 S32 U8 U16 U32.  */
+ NUF(vpadal,    1b00600, 2, (RNDQ, RNDQ),     neon_pair_long),
+ NUF(vpadalq,   1b00600, 2, (RNQ,  RNQ),      neon_pair_long),
+ NUF(vpaddl,    1b00200, 2, (RNDQ, RNDQ),     neon_pair_long),
+ NUF(vpaddlq,   1b00200, 2, (RNQ,  RNQ),      neon_pair_long),
+  /* Reciprocal estimates. Types U32 F32.  */
+ NUF(vrecpe,    1b30400, 2, (RNDQ, RNDQ),     neon_recip_est),
+ NUF(vrecpeq,   1b30400, 2, (RNQ,  RNQ),      neon_recip_est),
+ NUF(vrsqrte,   1b30480, 2, (RNDQ, RNDQ),     neon_recip_est),
+ NUF(vrsqrteq,  1b30480, 2, (RNQ,  RNQ),      neon_recip_est),
+  /* VCLS. Types S8 S16 S32.  */
+ NUF(vcls,      1b00400, 2, (RNDQ, RNDQ),     neon_cls),
+ NUF(vclsq,     1b00400, 2, (RNQ,  RNQ),      neon_cls),
+  /* VCLZ. Types I8 I16 I32.  */
+ NUF(vclz,      1b00480, 2, (RNDQ, RNDQ),     neon_clz),
+ NUF(vclzq,     1b00480, 2, (RNQ,  RNQ),      neon_clz),
+  /* VCNT. Size 8.  */
+ NUF(vcnt,      1b00500, 2, (RNDQ, RNDQ),     neon_cnt),
+ NUF(vcntq,     1b00500, 2, (RNQ,  RNQ),      neon_cnt),
+  /* Two address, untyped.  */
+ NUF(vswp,      1b20000, 2, (RNDQ, RNDQ),     neon_swp),
+ NUF(vswpq,     1b20000, 2, (RNQ,  RNQ),      neon_swp),
+  /* VTRN. Sizes 8 16 32.  */
+ nUF(vtrn,      vtrn,    2, (RNDQ, RNDQ),     neon_trn),
+ nUF(vtrnq,     vtrn,    2, (RNQ,  RNQ),      neon_trn),
+
+  /* Table lookup. Size 8.  */
+ NUF(vtbl,      1b00800, 3, (RND, NRDLST, RND), neon_tbl_tbx),
+ NUF(vtbx,      1b00840, 3, (RND, NRDLST, RND), neon_tbl_tbx),
+
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &fpu_vfp_v3_or_neon_ext
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_vfp_v3_or_neon_ext
+
+  /* Load/store instructions. Available in Neon or VFPv3.  */
+ NCE(vldm,      c900b00, 2, (RRw, NRDLST),    neon_ldm_stm),
+ NCE(vldmia,    c900b00, 2, (RRw, NRDLST),    neon_ldm_stm),
+ NCE(vldmdb,    d100b00, 2, (RRw, NRDLST),    neon_ldm_stm),
+ NCE(vstm,      c800b00, 2, (RRw, NRDLST),    neon_ldm_stm),
+ NCE(vstmia,    c800b00, 2, (RRw, NRDLST),    neon_ldm_stm),
+ NCE(vstmdb,    d000b00, 2, (RRw, NRDLST),    neon_ldm_stm),
+ NCE(vldr,      d100b00, 2, (RND, ADDR),      neon_ldr_str),
+ NCE(vstr,      d000b00, 2, (RND, ADDR),      neon_ldr_str),
+
+  /* Neon element/structure load/store.  */
+ nUF(vld1,      vld1,    2, (NSTRLST, ADDR),  neon_ldx_stx),
+ nUF(vst1,      vst1,    2, (NSTRLST, ADDR),  neon_ldx_stx),
+ nUF(vld2,      vld2,    2, (NSTRLST, ADDR),  neon_ldx_stx),
+ nUF(vst2,      vst2,    2, (NSTRLST, ADDR),  neon_ldx_stx),
+ nUF(vld3,      vld3,    2, (NSTRLST, ADDR),  neon_ldx_stx),
+ nUF(vst3,      vst3,    2, (NSTRLST, ADDR),  neon_ldx_stx),
+ nUF(vld4,      vld4,    2, (NSTRLST, ADDR),  neon_ldx_stx),
+ nUF(vst4,      vst4,    2, (NSTRLST, ADDR),  neon_ldx_stx),
+
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &fpu_vfp_ext_v3
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_vfp_ext_v3
+
+ cCE(fconsts,   eb00a00, 2, (RVS, I255),      vfp_sp_const),
+ cCE(fconstd,   eb00b00, 2, (RVD, I255),      vfp_dp_const),
+ cCE(fshtos,    eba0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE(fshtod,    eba0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
+ cCE(fsltos,    eba0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE(fsltod,    eba0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
+ cCE(fuhtos,    ebb0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE(fuhtod,    ebb0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
+ cCE(fultos,    ebb0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE(fultod,    ebb0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
+ cCE(ftoshs,    ebe0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE(ftoshd,    ebe0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
+ cCE(ftosls,    ebe0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE(ftosld,    ebe0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
+ cCE(ftouhs,    ebf0a40, 2, (RVS, I16z),      vfp_sp_conv_16),
+ cCE(ftouhd,    ebf0b40, 2, (RVD, I16z),      vfp_dp_conv_16),
+ cCE(ftouls,    ebf0ac0, 2, (RVS, I32),       vfp_sp_conv_32),
+ cCE(ftould,    ebf0bc0, 2, (RVD, I32),       vfp_dp_conv_32),
  
+#undef THUMB_VARIANT
  #undef ARM_VARIANT
  #define ARM_VARIANT &arm_cext_xscale /* Intel XScale extensions.        */
   cCE(mia,      e200010, 3, (RXA, RRnpc, RRnpc), xsc_mia),
@@ -10236,6 +14017,10 @@ static const struct asm_opcode insns[] =
  #undef UE
  #undef UF
  #undef UT
+#undef NUF
+#undef nUF
+#undef NCE
+#undef nCE
  #undef OPS0
  #undef OPS1
  #undef OPS2
@@ -11147,7 +14932,7 @@ create_unwind_entry (int have_data)
  int
  tc_arm_regname_to_dw2regnum (const char *regname)
  {
-  int reg = arm_reg_parse ((char **) &regname, REG_TYPE_RN);
+  int reg = arm_reg_parse ((char **) &regname, REG_TYPE_RN, NULL);
  
    if (reg == FAIL)
      return -1;
@@ -13343,7 +17128,9 @@ static const struct arm_cpu_option_table arm_cpus[] =
    {"arm1156t2f-s",     ARM_ARCH_V6T2,   FPU_ARCH_VFP_V2, NULL},
    {"arm1176jz-s",      ARM_ARCH_V6ZK,   FPU_NONE,        NULL},
    {"arm1176jzf-s",     ARM_ARCH_V6ZK,   FPU_ARCH_VFP_V2, NULL},
-  {"cortex-a8",                ARM_ARCH_V7A,    FPU_ARCH_VFP_V2, NULL},
+  {"cortex-a8",                ARM_ARCH_V7A,    ARM_FEATURE(0, FPU_VFP_V3
+                                                        | FPU_NEON_EXT_V1),
+                                                          NULL},
    {"cortex-r4",                ARM_ARCH_V7R,    FPU_NONE,        NULL},
    {"cortex-m3",                ARM_ARCH_V7M,    FPU_NONE,        NULL},
    /* ??? XSCALE is really an architecture.  */
@@ -13433,6 +17220,7 @@ static const struct arm_option_cpu_value_table arm_fpus[] =
    {"softvfp+vfp",      FPU_ARCH_VFP_V2},
    {"vfp",              FPU_ARCH_VFP_V2},
    {"vfp9",             FPU_ARCH_VFP_V2},
+  {"vfp3",              FPU_ARCH_VFP_V3},
    {"vfp10",            FPU_ARCH_VFP_V2},
    {"vfp10-r0",         FPU_ARCH_VFP_V1},
    {"vfpxd",            FPU_ARCH_VFP_V1xD},
@@ -13441,6 +17229,7 @@ static const struct arm_option_cpu_value_table arm_fpus[] =
    {"arm1136jfs",       FPU_ARCH_VFP_V2},
    {"arm1136jf-s",      FPU_ARCH_VFP_V2},
    {"maverick",         FPU_ARCH_MAVERICK},
+  {"neon",              FPU_ARCH_VFP_V3_PLUS_NEON_V1},
    {NULL,               ARM_ARCH_NONE}
  };
  
@@ -13836,7 +17625,7 @@ aeabi_set_public_attributes (void)
    ARM_MERGE_FEATURE_SETS (flags, arm_arch_used, thumb_arch_used);
    ARM_MERGE_FEATURE_SETS (flags, flags, *mfpu_opt);
    ARM_MERGE_FEATURE_SETS (flags, flags, selected_cpu);
-  
+    
    tmp = flags;
    arch = 0;
    for (p = cpu_arch_ver; p->val; p++)
@@ -13881,16 +17670,25 @@ aeabi_set_public_attributes (void)
      elf32_arm_add_eabi_attr_int (stdoutput, 9,
         ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_arch_t2) ? 2 : 1);
    /* Tag_VFP_arch.  */
-  if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_arch_vfp_v2)
-      || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_arch_vfp_v2))
+  if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v3)
+      || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v3))
+    elf32_arm_add_eabi_attr_int (stdoutput, 10, 3);
+  else if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v2)
+           || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v2))
      elf32_arm_add_eabi_attr_int (stdoutput, 10, 2);
-  else if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_arch_vfp_v1)
-          || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_arch_vfp_v1))
+  else if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v1)
+           || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v1)
+           || ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v1xd)
+           || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v1xd))
      elf32_arm_add_eabi_attr_int (stdoutput, 10, 1);
    /* Tag_WMMX_arch.  */
    if (ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_cext_iwmmxt)
        || ARM_CPU_HAS_FEATURE (arm_arch_used, arm_cext_iwmmxt))
      elf32_arm_add_eabi_attr_int (stdoutput, 11, 1);
+  /* Tag_NEON_arch.  */
+  if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_neon_ext_v1)
+      || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_neon_ext_v1))
+    elf32_arm_add_eabi_attr_int (stdoutput, 12, 1);
  }
  
  /* Add the .ARM.attributes section.  */
diff --git a/gas/testsuite/gas/arm/copro.d b/gas/testsuite/gas/arm/copro.d

index 5f5dd110e99a1f32aee5522e7fabf1829bd79b97..8fb657b9348d742e5ee8d4b4daef2061cb7d3a1a 100644 (file)
--- a/gas/testsuite/gas/arm/copro.d
+++ b/gas/testsuite/gas/arm/copro.d
@@ -31,7 +31,7 @@ Disassembly of section .text:
  0+054 <[^>]*> ecc52805         stcl    8, cr2, \[r5\], \{5\}
  0+058 <[^>]*> fcd61906         ldc2l   9, cr1, \[r6\], \{6\}
  0+05c <[^>]*> fcc70a07         stc2l   10, cr0, \[r7\], \{7\}
-0+060 <[^>]*> ecd88bff         ldcl    11, cr8, \[r8\], \{255\}
+0+060 <[^>]*> ecd88cff         ldcl    12, cr8, \[r8\], \{255\}
  0+064 <[^>]*> ecc99cfe         stcl    12, cr9, \[r9\], \{254\}
  0+068 <[^>]*> ec507d04         mrrc    13, 0, r7, r0, cr4
  0+06c <[^>]*> ec407e05         mcrr    14, 0, r7, r0, cr5
diff --git a/gas/testsuite/gas/arm/copro.s b/gas/testsuite/gas/arm/copro.s

index 334b000f4409a60c36f8fdd4e8cb616f82a712e0..e6976329c7561ef9cbe21682eac5645587e71bcf 100644 (file)
--- a/gas/testsuite/gas/arm/copro.s
+++ b/gas/testsuite/gas/arm/copro.s
@@ -33,7 +33,8 @@ bar:
          stcl    p8,  c2, [r5], {5}
          ldc2l   9,   c1, [r6], {6}
          stc2l   p10, c0, [r7], {7}
-        ldcl    11,  c8, [r8], {255}
+        @ using '11' below results in an (invalid) Neon vldmia instruction.
+        ldcl    12,  c8, [r8], {255}
          stcl    p12, c9, [r9], {254}
          mrrc    13,   0, r7, r0, cr4
          mcrr    p14,  0, r7, r0, cr5
diff --git a/gas/testsuite/gas/arm/neon-cond.d b/gas/testsuite/gas/arm/neon-cond.d

new file mode 100644 (file)

index 0000000..0b7d8ed
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-cond.d
@@ -0,0 +1,14 @@
+# name: Conditional Neon instructions
+# as: -mfpu=neon
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> 0d943b00    vldreq  d3, \[r4\]
+0[0-9a-f]+ <[^>]+> be035b70    vmovlt\.16      d3\[1\], r5
+0[0-9a-f]+ <[^>]+> ac474b13    vmovge  d3, r4, r7
+0[0-9a-f]+ <[^>]+> 3c543b3e    vmovcc  r3, r4, d30
+0[0-9a-f]+ <[^>]+> 1e223b10    vmovne\.32      d2\[1\], r3
+0[0-9a-f]+ <[^>]+> 2c521b13    vmovcs  r1, r2, d3
+0[0-9a-f]+ <[^>]+> 3c421b14    vmovcc  d4, r1, r2
diff --git a/gas/testsuite/gas/arm/neon-cond.s b/gas/testsuite/gas/arm/neon-cond.s

new file mode 100644 (file)

index 0000000..8f62575
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-cond.s
@@ -0,0 +1,13 @@
+@ test conditional compilation 
+
+       .arm
+       .text
+       .syntax unified
+
+       vldreq.32 d3,[r4]
+       vmovlt.16 d3[1], r5
+       vmovge d3, r4, r7
+       vmovcc r3, r4, d30
+       vmovne.32 d2[1],r3
+       vmovcs r1,r2,d3
+       vmovcc d4,r1,r2
diff --git a/gas/testsuite/gas/arm/neon-cov.d b/gas/testsuite/gas/arm/neon-cov.d

new file mode 100644 (file)

index 0000000..c2ef0eb
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-cov.d
@@ -0,0 +1,1263 @@
+# name: Neon instruction coverage
+# as: -mfpu=neon
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> f2000750    vaba\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000750    vaba\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000710    vaba\.s8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100750    vaba\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100750    vaba\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100710    vaba\.s16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200750    vaba\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200750    vaba\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200710    vaba\.s32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000750    vaba\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000750    vaba\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000710    vaba\.u8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100750    vaba\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100750    vaba\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100710    vaba\.u16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200750    vaba\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200750    vaba\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200710    vaba\.u32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000040    vhadd\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000040    vhadd\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000000    vhadd\.s8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100040    vhadd\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100040    vhadd\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100000    vhadd\.s16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200040    vhadd\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200040    vhadd\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200000    vhadd\.s32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000040    vhadd\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000040    vhadd\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000000    vhadd\.u8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100040    vhadd\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100040    vhadd\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100000    vhadd\.u16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200040    vhadd\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200040    vhadd\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200000    vhadd\.u32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000140    vrhadd\.s8      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000140    vrhadd\.s8      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000100    vrhadd\.s8      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100140    vrhadd\.s16     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100140    vrhadd\.s16     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100100    vrhadd\.s16     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200140    vrhadd\.s32     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200140    vrhadd\.s32     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200100    vrhadd\.s32     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000140    vrhadd\.u8      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000140    vrhadd\.u8      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000100    vrhadd\.u8      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100140    vrhadd\.u16     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100140    vrhadd\.u16     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100100    vrhadd\.u16     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200140    vrhadd\.u32     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200140    vrhadd\.u32     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200100    vrhadd\.u32     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000240    vhsub\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000240    vhsub\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000200    vhsub\.s8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100240    vhsub\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100240    vhsub\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100200    vhsub\.s16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200240    vhsub\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200240    vhsub\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200200    vhsub\.s32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000240    vhsub\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000240    vhsub\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000200    vhsub\.u8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100240    vhsub\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100240    vhsub\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100200    vhsub\.u16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200240    vhsub\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200240    vhsub\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200200    vhsub\.u32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000050    vqadd\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000050    vqadd\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000010    vqadd\.s8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100050    vqadd\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100050    vqadd\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100010    vqadd\.s16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200050    vqadd\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200050    vqadd\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200010    vqadd\.s32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300050    vqadd\.s64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300050    vqadd\.s64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300010    vqadd\.s64      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000050    vqadd\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000050    vqadd\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000010    vqadd\.u8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100050    vqadd\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100050    vqadd\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100010    vqadd\.u16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200050    vqadd\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200050    vqadd\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200010    vqadd\.u32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300050    vqadd\.u64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300050    vqadd\.u64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300010    vqadd\.u64      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000250    vqsub\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000250    vqsub\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000210    vqsub\.s8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100250    vqsub\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100250    vqsub\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100210    vqsub\.s16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200250    vqsub\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200250    vqsub\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200210    vqsub\.s32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300250    vqsub\.s64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300250    vqsub\.s64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300210    vqsub\.s64      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000250    vqsub\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000250    vqsub\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000210    vqsub\.u8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100250    vqsub\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100250    vqsub\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100210    vqsub\.u16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200250    vqsub\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200250    vqsub\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200210    vqsub\.u32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300250    vqsub\.u64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300250    vqsub\.u64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300210    vqsub\.u64      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000540    vrshl\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000540    vrshl\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000500    vrshl\.s8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100540    vrshl\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100540    vrshl\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100500    vrshl\.s16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200540    vrshl\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200540    vrshl\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200500    vrshl\.s32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300540    vrshl\.s64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300540    vrshl\.s64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300500    vrshl\.s64      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000540    vrshl\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000540    vrshl\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000500    vrshl\.u8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100540    vrshl\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100540    vrshl\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100500    vrshl\.u16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200540    vrshl\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200540    vrshl\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200500    vrshl\.u32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300540    vrshl\.u64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300540    vrshl\.u64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300500    vrshl\.u64      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000550    vqrshl\.s8      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000550    vqrshl\.s8      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000510    vqrshl\.s8      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100550    vqrshl\.s16     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100550    vqrshl\.s16     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100510    vqrshl\.s16     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200550    vqrshl\.s32     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200550    vqrshl\.s32     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200510    vqrshl\.s32     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300550    vqrshl\.s64     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300550    vqrshl\.s64     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300510    vqrshl\.s64     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000550    vqrshl\.u8      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000550    vqrshl\.u8      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000510    vqrshl\.u8      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100550    vqrshl\.u16     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100550    vqrshl\.u16     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100510    vqrshl\.u16     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200550    vqrshl\.u32     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200550    vqrshl\.u32     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200510    vqrshl\.u32     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300550    vqrshl\.u64     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300550    vqrshl\.u64     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300510    vqrshl\.u64     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000440    vshl\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000440    vshl\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000400    vshl\.s8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100440    vshl\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100440    vshl\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100400    vshl\.s16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200440    vshl\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200440    vshl\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200400    vshl\.s32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300440    vshl\.s64       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300440    vshl\.s64       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300400    vshl\.s64       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000440    vshl\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000440    vshl\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000400    vshl\.u8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100440    vshl\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100440    vshl\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100400    vshl\.u16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200440    vshl\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200440    vshl\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200400    vshl\.u32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300440    vshl\.u64       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300440    vshl\.u64       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300400    vshl\.u64       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000450    vqshl\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000450    vqshl\.s8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000410    vqshl\.s8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100450    vqshl\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100450    vqshl\.s16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100410    vqshl\.s16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200450    vqshl\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200450    vqshl\.s32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200410    vqshl\.s32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300450    vqshl\.s64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300450    vqshl\.s64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300410    vqshl\.s64      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000450    vqshl\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000450    vqshl\.u8       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000410    vqshl\.u8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100450    vqshl\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100450    vqshl\.u16      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100410    vqshl\.u16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200450    vqshl\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200450    vqshl\.u32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200410    vqshl\.u32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300450    vqshl\.u64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300450    vqshl\.u64      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300410    vqshl\.u64      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2880550    vshl\.s8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2880550    vshl\.s8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2880510    vshl\.s8        d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2900550    vshl\.s16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2900550    vshl\.s16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2900510    vshl\.s16       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2a00550    vshl\.s32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2a00550    vshl\.s32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2a00510    vshl\.s32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f28005d0    vshl\.s64       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f28005d0    vshl\.s64       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2800590    vshl\.s64       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2880750    vqshl\.s8       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2880750    vqshl\.s8       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2880710    vqshl\.s8       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2900750    vqshl\.s16      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2900750    vqshl\.s16      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2900710    vqshl\.s16      d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2a00750    vqshl\.s32      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2a00750    vqshl\.s32      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2a00710    vqshl\.s32      d0, d0, #0
+0[0-9a-f]+ <[^>]+> f28007d0    vqshl\.s64      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f28007d0    vqshl\.s64      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2800790    vqshl\.s64      d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3880750    vqshl\.u8       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880750    vqshl\.u8       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880710    vqshl\.u8       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3900750    vqshl\.u16      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900750    vqshl\.u16      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900710    vqshl\.u16      d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3a00750    vqshl\.u32      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00750    vqshl\.u32      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00710    vqshl\.u32      d0, d0, #0
+0[0-9a-f]+ <[^>]+> f38007d0    vqshl\.u64      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f38007d0    vqshl\.u64      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3800790    vqshl\.u64      d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2000150    vand    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000150    vand    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000110    vand    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100150    vbic    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100150    vbic    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100110    vbic    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200150    vorr    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200150    vorr    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200110    vorr    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300150    vorn    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300150    vorn    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300110    vorn    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000150    veor    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000150    veor    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000110    veor    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f387017f    vbic\.i32       q0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387017f    vbic\.i32       q0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387013f    vbic\.i32       d0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387037f    vbic\.i32       q0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387037f    vbic\.i32       q0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387033f    vbic\.i32       d0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387057f    vbic\.i32       q0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387057f    vbic\.i32       q0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387053f    vbic\.i32       d0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387077f    vbic\.i32       q0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387077f    vbic\.i32       q0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387073f    vbic\.i32       d0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387097f    vbic\.i16       q0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387097f    vbic\.i16       q0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387093f    vbic\.i16       d0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f3870b7f    vbic\.i16       q0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b7f    vbic\.i16       q0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b3f    vbic\.i16       d0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f387015f    vorr\.i32       q0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387015f    vorr\.i32       q0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387011f    vorr\.i32       d0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387035f    vorr\.i32       q0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387035f    vorr\.i32       q0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387031f    vorr\.i32       d0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387055f    vorr\.i32       q0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387055f    vorr\.i32       q0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387051f    vorr\.i32       d0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387075f    vorr\.i32       q0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387075f    vorr\.i32       q0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387071f    vorr\.i32       d0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387095f    vorr\.i16       q0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387095f    vorr\.i16       q0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387091f    vorr\.i16       d0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f3870b5f    vorr\.i16       q0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b5f    vorr\.i16       q0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b1f    vorr\.i16       d0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f387017f    vbic\.i32       q0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387017f    vbic\.i32       q0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387013f    vbic\.i32       d0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387037f    vbic\.i32       q0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387037f    vbic\.i32       q0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387033f    vbic\.i32       d0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387057f    vbic\.i32       q0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387057f    vbic\.i32       q0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387053f    vbic\.i32       d0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387077f    vbic\.i32       q0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387077f    vbic\.i32       q0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387073f    vbic\.i32       d0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387097f    vbic\.i16       q0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387097f    vbic\.i16       q0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387093f    vbic\.i16       d0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f3870b7f    vbic\.i16       q0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b7f    vbic\.i16       q0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b3f    vbic\.i16       d0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f387015f    vorr\.i32       q0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387015f    vorr\.i32       q0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387011f    vorr\.i32       d0, #255        ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387035f    vorr\.i32       q0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387035f    vorr\.i32       q0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387031f    vorr\.i32       d0, #65280      ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387055f    vorr\.i32       q0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387055f    vorr\.i32       q0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387051f    vorr\.i32       d0, #16711680   ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387075f    vorr\.i32       q0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387075f    vorr\.i32       q0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387071f    vorr\.i32       d0, #-16777216  ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387095f    vorr\.i16       q0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387095f    vorr\.i16       q0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387091f    vorr\.i16       d0, #255        ; 0x00ff
+0[0-9a-f]+ <[^>]+> f3870b5f    vorr\.i16       q0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b5f    vorr\.i16       q0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b1f    vorr\.i16       d0, #65280      ; 0xff00
+0[0-9a-f]+ <[^>]+> f3100150    vbsl    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100150    vbsl    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100110    vbsl    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200150    vbit    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200150    vbit    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200110    vbit    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300150    vbif    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300150    vbif    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300110    vbif    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000740    vabd\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000740    vabd\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000700    vabd\.s8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100740    vabd\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100740    vabd\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100700    vabd\.s16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200740    vabd\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200740    vabd\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200700    vabd\.s32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000740    vabd\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000740    vabd\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000700    vabd\.u8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100740    vabd\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100740    vabd\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100700    vabd\.u16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200740    vabd\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200740    vabd\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200700    vabd\.u32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200d40    vabd\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200d40    vabd\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200d00    vabd\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000640    vmax\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000640    vmax\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000600    vmax\.s8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100640    vmax\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100640    vmax\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100600    vmax\.s16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200640    vmax\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200640    vmax\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200600    vmax\.s32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000640    vmax\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000640    vmax\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000600    vmax\.u8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100640    vmax\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100640    vmax\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100600    vmax\.u16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200640    vmax\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200640    vmax\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200600    vmax\.u32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000f40    vmax\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000f40    vmax\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000f00    vmax\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000650    vmin\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000650    vmin\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000610    vmin\.s8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100650    vmin\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100650    vmin\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100610    vmin\.s16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200650    vmin\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200650    vmin\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200610    vmin\.s32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000650    vmin\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000650    vmin\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000610    vmin\.u8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100650    vmin\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100650    vmin\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100610    vmin\.u16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200650    vmin\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200650    vmin\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200610    vmin\.u32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200f40    vmin\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200f40    vmin\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200f00    vmin\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000350    vcge\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000350    vcge\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000310    vcge\.s8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100350    vcge\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100350    vcge\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100310    vcge\.s16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200350    vcge\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200350    vcge\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200310    vcge\.s32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000350    vcge\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000350    vcge\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000310    vcge\.u8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100350    vcge\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100350    vcge\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100310    vcge\.u16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200350    vcge\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200350    vcge\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200310    vcge\.u32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000e40    vcge\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e40    vcge\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e00    vcge\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000340    vcgt\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000340    vcgt\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000300    vcgt\.s8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100340    vcgt\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100340    vcgt\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100300    vcgt\.s16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200340    vcgt\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200340    vcgt\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200300    vcgt\.s32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000340    vcgt\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000340    vcgt\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000300    vcgt\.u8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100340    vcgt\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100340    vcgt\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100300    vcgt\.u16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200340    vcgt\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200340    vcgt\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200300    vcgt\.u32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200e40    vcgt\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e40    vcgt\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e00    vcgt\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000340    vcgt\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000340    vcgt\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000300    vcgt\.s8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100340    vcgt\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100340    vcgt\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100300    vcgt\.s16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200340    vcgt\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200340    vcgt\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200300    vcgt\.s32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000340    vcgt\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000340    vcgt\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000300    vcgt\.u8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100340    vcgt\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100340    vcgt\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100300    vcgt\.u16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200340    vcgt\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200340    vcgt\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200300    vcgt\.u32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200e40    vcgt\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e40    vcgt\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e00    vcgt\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000350    vcge\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000350    vcge\.s8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000310    vcge\.s8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100350    vcge\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100350    vcge\.s16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100310    vcge\.s16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200350    vcge\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200350    vcge\.s32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200310    vcge\.s32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000350    vcge\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000350    vcge\.u8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000310    vcge\.u8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100350    vcge\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100350    vcge\.u16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100310    vcge\.u16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200350    vcge\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200350    vcge\.u32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200310    vcge\.u32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000e40    vcge\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e40    vcge\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e00    vcge\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000850    vceq\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000850    vceq\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000810    vceq\.i8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100850    vceq\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100850    vceq\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100810    vceq\.i16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200850    vceq\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200850    vceq\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200810    vceq\.i32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000e40    vceq\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000e40    vceq\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000e00    vceq\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3b100c0    vcge\.s8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b100c0    vcge\.s8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10080    vcge\.s8        d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b500c0    vcge\.s16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b500c0    vcge\.s16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50080    vcge\.s16       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b900c0    vcge\.s32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b900c0    vcge\.s32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90080    vcge\.s32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b904c0    vcge\.f32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b904c0    vcge\.f32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90480    vcge\.f32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b10040    vcgt\.s8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10040    vcgt\.s8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10000    vcgt\.s8        d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b50040    vcgt\.s16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50040    vcgt\.s16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50000    vcgt\.s16       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90040    vcgt\.s32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90040    vcgt\.s32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90000    vcgt\.s32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90440    vcgt\.f32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90440    vcgt\.f32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90400    vcgt\.f32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b101c0    vcle\.s8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b101c0    vcle\.s8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10180    vcle\.s8        d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b501c0    vcle\.s16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b501c0    vcle\.s16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50180    vcle\.s16       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b901c0    vcle\.s32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b901c0    vcle\.s32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90180    vcle\.s32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b905c0    vcle\.f32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b905c0    vcle\.f32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90580    vcle\.f32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b10240    vclt\.s8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10240    vclt\.s8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10200    vclt\.s8        d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b50240    vclt\.s16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50240    vclt\.s16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50200    vclt\.s16       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90240    vclt\.s32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90240    vclt\.s32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90200    vclt\.s32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90640    vclt\.f32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90640    vclt\.f32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90600    vclt\.f32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b10140    vceq\.i8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10140    vceq\.i8        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10100    vceq\.i8        d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b50140    vceq\.i16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50140    vceq\.i16       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50100    vceq\.i16       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90140    vceq\.i32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90140    vceq\.i32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90100    vceq\.i32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90540    vceq\.f32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90540    vceq\.f32       q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90500    vceq\.f32       d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2000a00    vpmax\.s8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100a00    vpmax\.s16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200a00    vpmax\.s32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000a00    vpmax\.u8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100a00    vpmax\.u16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200a00    vpmax\.u32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000f00    vpmax\.f32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000a10    vpmin\.s8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100a10    vpmin\.s16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200a10    vpmin\.s32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000a10    vpmin\.u8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100a10    vpmin\.u16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200a10    vpmin\.u32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200f00    vpmin\.f32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000940    vmla\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000940    vmla\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000900    vmla\.i8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100940    vmla\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100940    vmla\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100900    vmla\.i16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200940    vmla\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200940    vmla\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200900    vmla\.i32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000d50    vmla\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000d50    vmla\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000d10    vmla\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900040    vmla\.i16       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900040    vmla\.i16       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900040    vmla\.i16       d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00040    vmla\.i32       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00040    vmla\.i32       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00040    vmla\.i32       d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00140    vmla\.f32       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00140    vmla\.f32       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00140    vmla\.f32       d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3000940    vmls\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000940    vmls\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000900    vmls\.i8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100940    vmls\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100940    vmls\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100900    vmls\.i16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200940    vmls\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200940    vmls\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200900    vmls\.i32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200d50    vmls\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200d50    vmls\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200d10    vmls\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900440    vmls\.i16       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900440    vmls\.i16       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900440    vmls\.i16       d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00440    vmls\.i32       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00440    vmls\.i32       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00440    vmls\.i32       d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00540    vmls\.f32       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00540    vmls\.f32       q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00540    vmls\.f32       d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2000b10    vpadd\.i8       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100b10    vpadd\.i16      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200b10    vpadd\.i32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000d00    vpadd\.f32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000840    vadd\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000840    vadd\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000800    vadd\.i8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100840    vadd\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100840    vadd\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100800    vadd\.i16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200840    vadd\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200840    vadd\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200800    vadd\.i32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300840    vadd\.i64       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300840    vadd\.i64       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300800    vadd\.i64       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000d40    vadd\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000d40    vadd\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000d00    vadd\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000840    vsub\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000840    vsub\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000800    vsub\.i8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100840    vsub\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100840    vsub\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100800    vsub\.i16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200840    vsub\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200840    vsub\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200800    vsub\.i32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300840    vsub\.i64       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300840    vsub\.i64       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300800    vsub\.i64       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200d40    vsub\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200d40    vsub\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200d00    vsub\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000850    vtst\.8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000850    vtst\.8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000810    vtst\.8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100850    vtst\.16        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100850    vtst\.16        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100810    vtst\.16        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200850    vtst\.32        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200850    vtst\.32        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200810    vtst\.32        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000950    vmul\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000950    vmul\.i8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000910    vmul\.i8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100950    vmul\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100950    vmul\.i16       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100910    vmul\.i16       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200950    vmul\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200950    vmul\.i32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200910    vmul\.i32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000d50    vmul\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000d50    vmul\.f32       q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000d10    vmul\.f32       d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000950    vmul\.p8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000950    vmul\.p8        q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000910    vmul\.p8        d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100b40    vqdmulh\.s16    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100b40    vqdmulh\.s16    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100b00    vqdmulh\.s16    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200b40    vqdmulh\.s32    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200b40    vqdmulh\.s32    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200b00    vqdmulh\.s32    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900c40    vqdmulh\.s16    q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900c40    vqdmulh\.s16    q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900c40    vqdmulh\.s16    d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00c40    vqdmulh\.s32    q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00c40    vqdmulh\.s32    q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00c40    vqdmulh\.s32    d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3100b40    vqrdmulh\.s16   q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100b40    vqrdmulh\.s16   q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100b00    vqrdmulh\.s16   d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200b40    vqrdmulh\.s32   q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200b40    vqrdmulh\.s32   q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200b00    vqrdmulh\.s32   d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900d40    vqrdmulh\.s16   q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900d40    vqrdmulh\.s16   q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900d40    vqrdmulh\.s16   d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00d40    vqrdmulh\.s32   q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00d40    vqrdmulh\.s32   q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00d40    vqrdmulh\.s32   d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3000e50    vacge\.f32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e50    vacge\.f32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e10    vacge\.f32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200e50    vacgt\.f32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e50    vacgt\.f32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e10    vacgt\.f32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200e50    vacgt\.f32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e50    vacgt\.f32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e10    vacgt\.f32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000e50    vacge\.f32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e50    vacge\.f32      q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e10    vacge\.f32      d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000f50    vrecps\.f32     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000f50    vrecps\.f32     q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000f10    vrecps\.f32     d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200f50    vrsqrts\.f32    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200f50    vrsqrts\.f32    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200f10    vrsqrts\.f32    d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3b10340    vabs\.s8        q0, q0
+0[0-9a-f]+ <[^>]+> f3b10340    vabs\.s8        q0, q0
+0[0-9a-f]+ <[^>]+> f3b10300    vabs\.s8        d0, d0
+0[0-9a-f]+ <[^>]+> f3b50340    vabs\.s16       q0, q0
+0[0-9a-f]+ <[^>]+> f3b50340    vabs\.s16       q0, q0
+0[0-9a-f]+ <[^>]+> f3b50300    vabs\.s16       d0, d0
+0[0-9a-f]+ <[^>]+> f3b90340    vabs\.s32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b90340    vabs\.s32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b90300    vabs\.s32       d0, d0
+0[0-9a-f]+ <[^>]+> f3b90740    vabs\.f32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b90740    vabs\.f32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b90700    vabs\.f32       d0, d0
+0[0-9a-f]+ <[^>]+> f3b103c0    vneg\.s8        q0, q0
+0[0-9a-f]+ <[^>]+> f3b103c0    vneg\.s8        q0, q0
+0[0-9a-f]+ <[^>]+> f3b10380    vneg\.s8        d0, d0
+0[0-9a-f]+ <[^>]+> f3b503c0    vneg\.s16       q0, q0
+0[0-9a-f]+ <[^>]+> f3b503c0    vneg\.s16       q0, q0
+0[0-9a-f]+ <[^>]+> f3b50380    vneg\.s16       d0, d0
+0[0-9a-f]+ <[^>]+> f3b903c0    vneg\.s32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b903c0    vneg\.s32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b90380    vneg\.s32       d0, d0
+0[0-9a-f]+ <[^>]+> f3b907c0    vneg\.f32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b907c0    vneg\.f32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b90780    vneg\.f32       d0, d0
+0[0-9a-f]+ <[^>]+> f2890050    vshr\.s8        q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890050    vshr\.s8        q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890010    vshr\.s8        d0, d0, #7
+0[0-9a-f]+ <[^>]+> f2910050    vshr\.s16       q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910050    vshr\.s16       q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910010    vshr\.s16       d0, d0, #15
+0[0-9a-f]+ <[^>]+> f2a10050    vshr\.s32       q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10050    vshr\.s32       q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10010    vshr\.s32       d0, d0, #31
+0[0-9a-f]+ <[^>]+> f28100d0    vshr\.s64       q0, q0, #63
+0[0-9a-f]+ <[^>]+> f28100d0    vshr\.s64       q0, q0, #63
+0[0-9a-f]+ <[^>]+> f2810090    vshr\.s64       d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3890050    vshr\.u8        q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890050    vshr\.u8        q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890010    vshr\.u8        d0, d0, #7
+0[0-9a-f]+ <[^>]+> f3910050    vshr\.u16       q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910050    vshr\.u16       q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910010    vshr\.u16       d0, d0, #15
+0[0-9a-f]+ <[^>]+> f3a10050    vshr\.u32       q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10050    vshr\.u32       q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10010    vshr\.u32       d0, d0, #31
+0[0-9a-f]+ <[^>]+> f38100d0    vshr\.u64       q0, q0, #63
+0[0-9a-f]+ <[^>]+> f38100d0    vshr\.u64       q0, q0, #63
+0[0-9a-f]+ <[^>]+> f3810090    vshr\.u64       d0, d0, #63
+0[0-9a-f]+ <[^>]+> f2890250    vrshr\.s8       q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890250    vrshr\.s8       q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890210    vrshr\.s8       d0, d0, #7
+0[0-9a-f]+ <[^>]+> f2910250    vrshr\.s16      q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910250    vrshr\.s16      q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910210    vrshr\.s16      d0, d0, #15
+0[0-9a-f]+ <[^>]+> f2a10250    vrshr\.s32      q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10250    vrshr\.s32      q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10210    vrshr\.s32      d0, d0, #31
+0[0-9a-f]+ <[^>]+> f28102d0    vrshr\.s64      q0, q0, #63
+0[0-9a-f]+ <[^>]+> f28102d0    vrshr\.s64      q0, q0, #63
+0[0-9a-f]+ <[^>]+> f2810290    vrshr\.s64      d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3890250    vrshr\.u8       q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890250    vrshr\.u8       q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890210    vrshr\.u8       d0, d0, #7
+0[0-9a-f]+ <[^>]+> f3910250    vrshr\.u16      q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910250    vrshr\.u16      q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910210    vrshr\.u16      d0, d0, #15
+0[0-9a-f]+ <[^>]+> f3a10250    vrshr\.u32      q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10250    vrshr\.u32      q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10210    vrshr\.u32      d0, d0, #31
+0[0-9a-f]+ <[^>]+> f38102d0    vrshr\.u64      q0, q0, #63
+0[0-9a-f]+ <[^>]+> f38102d0    vrshr\.u64      q0, q0, #63
+0[0-9a-f]+ <[^>]+> f3810290    vrshr\.u64      d0, d0, #63
+0[0-9a-f]+ <[^>]+> f2890150    vsra\.s8        q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890150    vsra\.s8        q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890110    vsra\.s8        d0, d0, #7
+0[0-9a-f]+ <[^>]+> f2910150    vsra\.s16       q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910150    vsra\.s16       q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910110    vsra\.s16       d0, d0, #15
+0[0-9a-f]+ <[^>]+> f2a10150    vsra\.s32       q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10150    vsra\.s32       q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10110    vsra\.s32       d0, d0, #31
+0[0-9a-f]+ <[^>]+> f28101d0    vsra\.s64       q0, q0, #63
+0[0-9a-f]+ <[^>]+> f28101d0    vsra\.s64       q0, q0, #63
+0[0-9a-f]+ <[^>]+> f2810190    vsra\.s64       d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3890150    vsra\.u8        q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890150    vsra\.u8        q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890110    vsra\.u8        d0, d0, #7
+0[0-9a-f]+ <[^>]+> f3910150    vsra\.u16       q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910150    vsra\.u16       q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910110    vsra\.u16       d0, d0, #15
+0[0-9a-f]+ <[^>]+> f3a10150    vsra\.u32       q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10150    vsra\.u32       q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10110    vsra\.u32       d0, d0, #31
+0[0-9a-f]+ <[^>]+> f38101d0    vsra\.u64       q0, q0, #63
+0[0-9a-f]+ <[^>]+> f38101d0    vsra\.u64       q0, q0, #63
+0[0-9a-f]+ <[^>]+> f3810190    vsra\.u64       d0, d0, #63
+0[0-9a-f]+ <[^>]+> f2890350    vrsra\.s8       q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890350    vrsra\.s8       q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890310    vrsra\.s8       d0, d0, #7
+0[0-9a-f]+ <[^>]+> f2910350    vrsra\.s16      q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910350    vrsra\.s16      q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910310    vrsra\.s16      d0, d0, #15
+0[0-9a-f]+ <[^>]+> f2a10350    vrsra\.s32      q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10350    vrsra\.s32      q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10310    vrsra\.s32      d0, d0, #31
+0[0-9a-f]+ <[^>]+> f28103d0    vrsra\.s64      q0, q0, #63
+0[0-9a-f]+ <[^>]+> f28103d0    vrsra\.s64      q0, q0, #63
+0[0-9a-f]+ <[^>]+> f2810390    vrsra\.s64      d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3890350    vrsra\.u8       q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890350    vrsra\.u8       q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890310    vrsra\.u8       d0, d0, #7
+0[0-9a-f]+ <[^>]+> f3910350    vrsra\.u16      q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910350    vrsra\.u16      q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910310    vrsra\.u16      d0, d0, #15
+0[0-9a-f]+ <[^>]+> f3a10350    vrsra\.u32      q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10350    vrsra\.u32      q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10310    vrsra\.u32      d0, d0, #31
+0[0-9a-f]+ <[^>]+> f38103d0    vrsra\.u64      q0, q0, #63
+0[0-9a-f]+ <[^>]+> f38103d0    vrsra\.u64      q0, q0, #63
+0[0-9a-f]+ <[^>]+> f3810390    vrsra\.u64      d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3880550    vsli\.8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880550    vsli\.8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880510    vsli\.8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3900550    vsli\.16        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900550    vsli\.16        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900510    vsli\.16        d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3a00550    vsli\.32        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00550    vsli\.32        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00510    vsli\.32        d0, d0, #0
+0[0-9a-f]+ <[^>]+> f38005d0    vsli\.64        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f38005d0    vsli\.64        q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3800590    vsli\.64        d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3890450    vsri\.8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890450    vsri\.8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890410    vsri\.8 d0, d0, #7
+0[0-9a-f]+ <[^>]+> f3910450    vsri\.16        q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910450    vsri\.16        q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910410    vsri\.16        d0, d0, #15
+0[0-9a-f]+ <[^>]+> f3a10450    vsri\.32        q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10450    vsri\.32        q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10410    vsri\.32        d0, d0, #31
+0[0-9a-f]+ <[^>]+> f38104d0    vsri\.64        q0, q0, #63
+0[0-9a-f]+ <[^>]+> f38104d0    vsri\.64        q0, q0, #63
+0[0-9a-f]+ <[^>]+> f3810490    vsri\.64        d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3880650    vqshlu\.s8      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880650    vqshlu\.s8      q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880610    vqshlu\.s8      d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3900650    vqshlu\.s16     q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900650    vqshlu\.s16     q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900610    vqshlu\.s16     d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3a00650    vqshlu\.s32     q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00650    vqshlu\.s32     q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00610    vqshlu\.s32     d0, d0, #0
+0[0-9a-f]+ <[^>]+> f38006d0    vqshlu\.s64     q0, q0, #0
+0[0-9a-f]+ <[^>]+> f38006d0    vqshlu\.s64     q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3800690    vqshlu\.s64     d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2890910    vqshrn\.s16     d0, q0, #7
+0[0-9a-f]+ <[^>]+> f2910910    vqshrn\.s32     d0, q0, #15
+0[0-9a-f]+ <[^>]+> f2a10910    vqshrn\.s64     d0, q0, #31
+0[0-9a-f]+ <[^>]+> f3890910    vqshrn\.u16     d0, q0, #7
+0[0-9a-f]+ <[^>]+> f3910910    vqshrn\.u32     d0, q0, #15
+0[0-9a-f]+ <[^>]+> f3a10910    vqshrn\.u64     d0, q0, #31
+0[0-9a-f]+ <[^>]+> f2890950    vqrshrn\.s16    d0, q0, #7
+0[0-9a-f]+ <[^>]+> f2910950    vqrshrn\.s32    d0, q0, #15
+0[0-9a-f]+ <[^>]+> f2a10950    vqrshrn\.s64    d0, q0, #31
+0[0-9a-f]+ <[^>]+> f3890950    vqrshrn\.u16    d0, q0, #7
+0[0-9a-f]+ <[^>]+> f3910950    vqrshrn\.u32    d0, q0, #15
+0[0-9a-f]+ <[^>]+> f3a10950    vqrshrn\.u64    d0, q0, #31
+0[0-9a-f]+ <[^>]+> f3890810    vqshrun\.s16    d0, q0, #7
+0[0-9a-f]+ <[^>]+> f3910810    vqshrun\.s32    d0, q0, #15
+0[0-9a-f]+ <[^>]+> f3a10810    vqshrun\.s64    d0, q0, #31
+0[0-9a-f]+ <[^>]+> f3890850    vqrshrun\.s16   d0, q0, #7
+0[0-9a-f]+ <[^>]+> f3910850    vqrshrun\.s32   d0, q0, #15
+0[0-9a-f]+ <[^>]+> f3a10850    vqrshrun\.s64   d0, q0, #31
+0[0-9a-f]+ <[^>]+> f2890810    vshrn\.i16      d0, q0, #7
+0[0-9a-f]+ <[^>]+> f2910810    vshrn\.i32      d0, q0, #15
+0[0-9a-f]+ <[^>]+> f2a10810    vshrn\.i64      d0, q0, #31
+0[0-9a-f]+ <[^>]+> f2890850    vrshrn\.i16     d0, q0, #7
+0[0-9a-f]+ <[^>]+> f2910850    vrshrn\.i32     d0, q0, #15
+0[0-9a-f]+ <[^>]+> f2a10850    vrshrn\.i64     d0, q0, #31
+0[0-9a-f]+ <[^>]+> f2890a10    vshll\.s8       d0, q0, #1
+0[0-9a-f]+ <[^>]+> f2910a10    vshll\.s16      d0, q0, #1
+0[0-9a-f]+ <[^>]+> f2a10a10    vshll\.s32      d0, q0, #1
+0[0-9a-f]+ <[^>]+> f3890a10    vshll\.u8       d0, q0, #1
+0[0-9a-f]+ <[^>]+> f3910a10    vshll\.u16      d0, q0, #1
+0[0-9a-f]+ <[^>]+> f3a10a10    vshll\.u32      d0, q0, #1
+0[0-9a-f]+ <[^>]+> f3b20300    vshll\.i8       q0, d0, #8
+0[0-9a-f]+ <[^>]+> f3b60300    vshll\.i16      q0, d0, #16
+0[0-9a-f]+ <[^>]+> f3ba0300    vshll\.i32      q0, d0, #32
+0[0-9a-f]+ <[^>]+> f3bb0740    vcvt\.s32\.f32  q0, q0
+0[0-9a-f]+ <[^>]+> f3bb07c0    vcvt\.u32\.f32  q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0640    vcvt\.f32\.s32  q0, q0
+0[0-9a-f]+ <[^>]+> f3bb06c0    vcvt\.f32\.u32  q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0740    vcvt\.s32\.f32  q0, q0
+0[0-9a-f]+ <[^>]+> f3bb07c0    vcvt\.u32\.f32  q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0640    vcvt\.f32\.s32  q0, q0
+0[0-9a-f]+ <[^>]+> f3bb06c0    vcvt\.f32\.u32  q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0700    vcvt\.s32\.f32  d0, d0
+0[0-9a-f]+ <[^>]+> f3bb0780    vcvt\.u32\.f32  d0, d0
+0[0-9a-f]+ <[^>]+> f3bb0600    vcvt\.f32\.s32  d0, d0
+0[0-9a-f]+ <[^>]+> f3bb0680    vcvt\.f32\.u32  d0, d0
+0[0-9a-f]+ <[^>]+> f2bf0f50    vcvt\.s32\.f32  q0, q0, #1
+0[0-9a-f]+ <[^>]+> f3bf0f50    vcvt\.u32\.f32  q0, q0, #1
+0[0-9a-f]+ <[^>]+> f2bf0e50    vcvt\.f32\.s32  q0, q0, #1
+0[0-9a-f]+ <[^>]+> f3bf0e50    vcvt\.f32\.u32  q0, q0, #1
+0[0-9a-f]+ <[^>]+> f2bf0f50    vcvt\.s32\.f32  q0, q0, #1
+0[0-9a-f]+ <[^>]+> f3bf0f50    vcvt\.u32\.f32  q0, q0, #1
+0[0-9a-f]+ <[^>]+> f2bf0e50    vcvt\.f32\.s32  q0, q0, #1
+0[0-9a-f]+ <[^>]+> f3bf0e50    vcvt\.f32\.u32  q0, q0, #1
+0[0-9a-f]+ <[^>]+> f2bf0f10    vcvt\.s32\.f32  d0, d0, #1
+0[0-9a-f]+ <[^>]+> f3bf0f10    vcvt\.u32\.f32  d0, d0, #1
+0[0-9a-f]+ <[^>]+> f2bf0e10    vcvt\.f32\.s32  d0, d0, #1
+0[0-9a-f]+ <[^>]+> f3bf0e10    vcvt\.f32\.u32  d0, d0, #1
+0[0-9a-f]+ <[^>]+> f2200150    vorr    q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200110    vorr    d0, d0, d0
+0[0-9a-f]+ <[^>]+> ee400b10    vmov\.8 d0\[0\], r0
+0[0-9a-f]+ <[^>]+> ee000b30    vmov\.16        d0\[0\], r0
+0[0-9a-f]+ <[^>]+> ee000b10    vmov\.32        d0\[0\], r0
+0[0-9a-f]+ <[^>]+> ec400b10    vmov    d0, r0, r0
+0[0-9a-f]+ <[^>]+> ee500b10    vmov\.s8        r0, d0\[0\]
+0[0-9a-f]+ <[^>]+> ee100b30    vmov\.s16       r0, d0\[0\]
+0[0-9a-f]+ <[^>]+> eed00b10    vmov\.u8        r0, d0\[0\]
+0[0-9a-f]+ <[^>]+> ee900b30    vmov\.u16       r0, d0\[0\]
+0[0-9a-f]+ <[^>]+> ee100b10    vmov\.32        r0, d0\[0\]
+0[0-9a-f]+ <[^>]+> ec510b10    vmov    r0, r1, d0
+0[0-9a-f]+ <[^>]+> f2870057    vmov\.i32       q0, #119        ; 0x00000077
+0[0-9a-f]+ <[^>]+> f2870017    vmov\.i32       d0, #119        ; 0x00000077
+0[0-9a-f]+ <[^>]+> f2870077    vmvn\.i32       q0, #119        ; 0x00000077
+0[0-9a-f]+ <[^>]+> f2870037    vmvn\.i32       d0, #119        ; 0x00000077
+0[0-9a-f]+ <[^>]+> f2870257    vmov\.i32       q0, #30464      ; 0x00007700
+0[0-9a-f]+ <[^>]+> f2870217    vmov\.i32       d0, #30464      ; 0x00007700
+0[0-9a-f]+ <[^>]+> f2870277    vmvn\.i32       q0, #30464      ; 0x00007700
+0[0-9a-f]+ <[^>]+> f2870237    vmvn\.i32       d0, #30464      ; 0x00007700
+0[0-9a-f]+ <[^>]+> f2870457    vmov\.i32       q0, #7798784    ; 0x00770000
+0[0-9a-f]+ <[^>]+> f2870417    vmov\.i32       d0, #7798784    ; 0x00770000
+0[0-9a-f]+ <[^>]+> f2870477    vmvn\.i32       q0, #7798784    ; 0x00770000
+0[0-9a-f]+ <[^>]+> f2870437    vmvn\.i32       d0, #7798784    ; 0x00770000
+0[0-9a-f]+ <[^>]+> f2870657    vmov\.i32       q0, #1996488704 ; 0x77000000
+0[0-9a-f]+ <[^>]+> f2870617    vmov\.i32       d0, #1996488704 ; 0x77000000
+0[0-9a-f]+ <[^>]+> f2870677    vmvn\.i32       q0, #1996488704 ; 0x77000000
+0[0-9a-f]+ <[^>]+> f2870637    vmvn\.i32       d0, #1996488704 ; 0x77000000
+0[0-9a-f]+ <[^>]+> f2870857    vmov\.i16       q0, #119        ; 0x0077
+0[0-9a-f]+ <[^>]+> f2870817    vmov\.i16       d0, #119        ; 0x0077
+0[0-9a-f]+ <[^>]+> f2870877    vmvn\.i16       q0, #119        ; 0x0077
+0[0-9a-f]+ <[^>]+> f2870837    vmvn\.i16       d0, #119        ; 0x0077
+0[0-9a-f]+ <[^>]+> f2870a57    vmov\.i16       q0, #30464      ; 0x7700
+0[0-9a-f]+ <[^>]+> f2870a17    vmov\.i16       d0, #30464      ; 0x7700
+0[0-9a-f]+ <[^>]+> f2870a77    vmvn\.i16       q0, #30464      ; 0x7700
+0[0-9a-f]+ <[^>]+> f2870a37    vmvn\.i16       d0, #30464      ; 0x7700
+0[0-9a-f]+ <[^>]+> f2870c57    vmov\.i32       q0, #30719      ; 0x000077ff
+0[0-9a-f]+ <[^>]+> f2870c17    vmov\.i32       d0, #30719      ; 0x000077ff
+0[0-9a-f]+ <[^>]+> f2870c77    vmvn\.i32       q0, #30719      ; 0x000077ff
+0[0-9a-f]+ <[^>]+> f2870c37    vmvn\.i32       d0, #30719      ; 0x000077ff
+0[0-9a-f]+ <[^>]+> f2870d57    vmov\.i32       q0, #7864319    ; 0x0077ffff
+0[0-9a-f]+ <[^>]+> f2870d17    vmov\.i32       d0, #7864319    ; 0x0077ffff
+0[0-9a-f]+ <[^>]+> f2870d77    vmvn\.i32       q0, #7864319    ; 0x0077ffff
+0[0-9a-f]+ <[^>]+> f2870d37    vmvn\.i32       d0, #7864319    ; 0x0077ffff
+0[0-9a-f]+ <[^>]+> f2870e57    vmov\.i8        q0, #119        ; 0x77
+0[0-9a-f]+ <[^>]+> f2870e17    vmov\.i8        d0, #119        ; 0x77
+0[0-9a-f]+ <[^>]+> f3810e71    vmov\.i64       q0, #0xff0000ff000000ff
+0[0-9a-f]+ <[^>]+> f3810e31    vmov\.i64       d0, #0xff0000ff000000ff
+0[0-9a-f]+ <[^>]+> f2850f51    vmov\.f32       q0, #1027866624 ; 0x3d440000
+0[0-9a-f]+ <[^>]+> f2850f11    vmov\.f32       d0, #1027866624 ; 0x3d440000
+0[0-9a-f]+ <[^>]+> f3b005c0    vmvn    q0, q0
+0[0-9a-f]+ <[^>]+> f3b005c0    vmvn    q0, q0
+0[0-9a-f]+ <[^>]+> f3b00580    vmvn    d0, d0
+0[0-9a-f]+ <[^>]+> f2800500    vabal\.s8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900500    vabal\.s16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00500    vabal\.s32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800500    vabal\.u8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900500    vabal\.u16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00500    vabal\.u32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2800700    vabdl\.s8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900700    vabdl\.s16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00700    vabdl\.s32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800700    vabdl\.u8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900700    vabdl\.u16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00700    vabdl\.u32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2800000    vaddl\.s8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900000    vaddl\.s16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00000    vaddl\.s32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800000    vaddl\.u8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900000    vaddl\.u16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00000    vaddl\.u32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2800200    vsubl\.s8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900200    vsubl\.s16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00200    vsubl\.s32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800200    vsubl\.u8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900200    vsubl\.u16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00200    vsubl\.u32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2800800    vmlal\.s8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900800    vmlal\.s16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00800    vmlal\.s32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800800    vmlal\.u8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900800    vmlal\.u16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00800    vmlal\.u32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900240    vmlal\.s16      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00240    vmlal\.s32      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900240    vmlal\.u16      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00240    vmlal\.u32      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2800a00    vmlsl\.s8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900a00    vmlsl\.s16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00a00    vmlsl\.s32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800a00    vmlsl\.u8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900a00    vmlsl\.u16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00a00    vmlsl\.u32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900640    vmlsl\.s16      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00640    vmlsl\.s32      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900640    vmlsl\.u16      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00640    vmlsl\.u32      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2800100    vaddw\.s8       q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2900100    vaddw\.s16      q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2a00100    vaddw\.s32      q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3800100    vaddw\.u8       q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3900100    vaddw\.u16      q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3a00100    vaddw\.u32      q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2800300    vsubw\.s8       q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2900300    vsubw\.s16      q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2a00300    vsubw\.s32      q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3800300    vsubw\.u8       q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3900300    vsubw\.u16      q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3a00300    vsubw\.u32      q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2800400    vaddhn\.i16     d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2900400    vaddhn\.i32     d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2a00400    vaddhn\.i64     d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3800400    vraddhn\.i16    d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3900400    vraddhn\.i32    d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3a00400    vraddhn\.i64    d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2800600    vsubhn\.i16     d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2900600    vsubhn\.i32     d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2a00600    vsubhn\.i64     d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3800600    vrsubhn\.i16    d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3900600    vrsubhn\.i32    d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3a00600    vrsubhn\.i64    d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2900900    vqdmlal\.s16    q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00900    vqdmlal\.s32    q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900340    vqdmlal\.s16    q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00340    vqdmlal\.s32    q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900b00    vqdmlsl\.s16    q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00b00    vqdmlsl\.s32    q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900740    vqdmlsl\.s16    q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00740    vqdmlsl\.s32    q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900d00    vqdmull\.s16    q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00d00    vqdmull\.s32    q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900b40    vqdmull\.s16    q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00b40    vqdmull\.s32    q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2800c00    vmull\.s8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900c00    vmull\.s16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00c00    vmull\.s32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800c00    vmull\.u8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900c00    vmull\.u16      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00c00    vmull\.u32      q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2800e00    vmull\.p8       q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900a40    vmull\.s16      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00a40    vmull\.s32      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900a40    vmull\.u16      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00a40    vmull\.u32      q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2b00040    vext\.8 q0, q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2b00040    vext\.8 q0, q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2b00000    vext\.8 d0, d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b00040    vrev64\.8       q0, q0
+0[0-9a-f]+ <[^>]+> f3b00040    vrev64\.8       q0, q0
+0[0-9a-f]+ <[^>]+> f3b00000    vrev64\.8       d0, d0
+0[0-9a-f]+ <[^>]+> f3b40040    vrev64\.16      q0, q0
+0[0-9a-f]+ <[^>]+> f3b40040    vrev64\.16      q0, q0
+0[0-9a-f]+ <[^>]+> f3b40000    vrev64\.16      d0, d0
+0[0-9a-f]+ <[^>]+> f3b80040    vrev64\.32      q0, q0
+0[0-9a-f]+ <[^>]+> f3b80040    vrev64\.32      q0, q0
+0[0-9a-f]+ <[^>]+> f3b80000    vrev64\.32      d0, d0
+0[0-9a-f]+ <[^>]+> f3b000c0    vrev32\.8       q0, q0
+0[0-9a-f]+ <[^>]+> f3b000c0    vrev32\.8       q0, q0
+0[0-9a-f]+ <[^>]+> f3b00080    vrev32\.8       d0, d0
+0[0-9a-f]+ <[^>]+> f3b400c0    vrev32\.16      q0, q0
+0[0-9a-f]+ <[^>]+> f3b400c0    vrev32\.16      q0, q0
+0[0-9a-f]+ <[^>]+> f3b40080    vrev32\.16      d0, d0
+0[0-9a-f]+ <[^>]+> f3b00140    vrev16\.8       q0, q0
+0[0-9a-f]+ <[^>]+> f3b00140    vrev16\.8       q0, q0
+0[0-9a-f]+ <[^>]+> f3b00100    vrev16\.8       d0, d0
+0[0-9a-f]+ <[^>]+> eee00b10    vdup\.8 q0, r0
+0[0-9a-f]+ <[^>]+> eee00b10    vdup\.8 q0, r0
+0[0-9a-f]+ <[^>]+> eec00b10    vdup\.8 d0, r0
+0[0-9a-f]+ <[^>]+> f3b10c40    vdup\.8 q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b10c40    vdup\.8 q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b10c00    vdup\.8 d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> eea00b30    vdup\.16        q0, r0
+0[0-9a-f]+ <[^>]+> eea00b30    vdup\.16        q0, r0
+0[0-9a-f]+ <[^>]+> ee800b30    vdup\.16        d0, r0
+0[0-9a-f]+ <[^>]+> f3b20c40    vdup\.16        q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b20c40    vdup\.16        q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b20c00    vdup\.16        d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> eea00b10    vdup\.32        q0, r0
+0[0-9a-f]+ <[^>]+> eea00b10    vdup\.32        q0, r0
+0[0-9a-f]+ <[^>]+> ee800b10    vdup\.32        d0, r0
+0[0-9a-f]+ <[^>]+> f3b40c40    vdup\.32        q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b40c40    vdup\.32        q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b40c00    vdup\.32        d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2880a10    vmovl\.s8       q0, d0
+0[0-9a-f]+ <[^>]+> f2900a10    vmovl\.s16      q0, d0
+0[0-9a-f]+ <[^>]+> f2a00a10    vmovl\.s32      q0, d0
+0[0-9a-f]+ <[^>]+> f3880a10    vmovl\.u8       q0, d0
+0[0-9a-f]+ <[^>]+> f3900a10    vmovl\.u16      q0, d0
+0[0-9a-f]+ <[^>]+> f3a00a10    vmovl\.u32      q0, d0
+0[0-9a-f]+ <[^>]+> f3b20200    vmovn\.i8       d0, q0
+0[0-9a-f]+ <[^>]+> f3b60200    vmovn\.i16      d0, q0
+0[0-9a-f]+ <[^>]+> f3ba0200    vmovn\.i32      d0, q0
+0[0-9a-f]+ <[^>]+> f3b20280    vqmovn\.s16     d0, q0
+0[0-9a-f]+ <[^>]+> f3b60280    vqmovn\.s32     d0, q0
+0[0-9a-f]+ <[^>]+> f3ba0280    vqmovn\.s64     d0, q0
+0[0-9a-f]+ <[^>]+> f3b202c0    vqmovn\.u16     d0, q0
+0[0-9a-f]+ <[^>]+> f3b602c0    vqmovn\.u32     d0, q0
+0[0-9a-f]+ <[^>]+> f3ba02c0    vqmovn\.u64     d0, q0
+0[0-9a-f]+ <[^>]+> f3b20240    vqmovun\.s16    d0, q0
+0[0-9a-f]+ <[^>]+> f3b60240    vqmovun\.s32    d0, q0
+0[0-9a-f]+ <[^>]+> f3ba0240    vqmovun\.s64    d0, q0
+0[0-9a-f]+ <[^>]+> f3b201c2    vzip\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b201c2    vzip\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b20181    vzip\.8 d0, d1
+0[0-9a-f]+ <[^>]+> f3b601c2    vzip\.16        q0, q1
+0[0-9a-f]+ <[^>]+> f3b601c2    vzip\.16        q0, q1
+0[0-9a-f]+ <[^>]+> f3b60181    vzip\.16        d0, d1
+0[0-9a-f]+ <[^>]+> f3ba01c2    vzip\.32        q0, q1
+0[0-9a-f]+ <[^>]+> f3ba01c2    vzip\.32        q0, q1
+0[0-9a-f]+ <[^>]+> f3ba0081    vtrn\.32        d0, d1
+0[0-9a-f]+ <[^>]+> f3b20142    vuzp\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b20142    vuzp\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b20101    vuzp\.8 d0, d1
+0[0-9a-f]+ <[^>]+> f3b60142    vuzp\.16        q0, q1
+0[0-9a-f]+ <[^>]+> f3b60142    vuzp\.16        q0, q1
+0[0-9a-f]+ <[^>]+> f3b60101    vuzp\.16        d0, d1
+0[0-9a-f]+ <[^>]+> f3ba0142    vuzp\.32        q0, q1
+0[0-9a-f]+ <[^>]+> f3ba0142    vuzp\.32        q0, q1
+0[0-9a-f]+ <[^>]+> f3ba0081    vtrn\.32        d0, d1
+0[0-9a-f]+ <[^>]+> f3b00740    vqabs\.s8       q0, q0
+0[0-9a-f]+ <[^>]+> f3b00740    vqabs\.s8       q0, q0
+0[0-9a-f]+ <[^>]+> f3b00700    vqabs\.s8       d0, d0
+0[0-9a-f]+ <[^>]+> f3b40740    vqabs\.s16      q0, q0
+0[0-9a-f]+ <[^>]+> f3b40740    vqabs\.s16      q0, q0
+0[0-9a-f]+ <[^>]+> f3b40700    vqabs\.s16      d0, d0
+0[0-9a-f]+ <[^>]+> f3b80740    vqabs\.s32      q0, q0
+0[0-9a-f]+ <[^>]+> f3b80740    vqabs\.s32      q0, q0
+0[0-9a-f]+ <[^>]+> f3b80700    vqabs\.s32      d0, d0
+0[0-9a-f]+ <[^>]+> f3b007c0    vqneg\.s8       q0, q0
+0[0-9a-f]+ <[^>]+> f3b007c0    vqneg\.s8       q0, q0
+0[0-9a-f]+ <[^>]+> f3b00780    vqneg\.s8       d0, d0
+0[0-9a-f]+ <[^>]+> f3b407c0    vqneg\.s16      q0, q0
+0[0-9a-f]+ <[^>]+> f3b407c0    vqneg\.s16      q0, q0
+0[0-9a-f]+ <[^>]+> f3b40780    vqneg\.s16      d0, d0
+0[0-9a-f]+ <[^>]+> f3b807c0    vqneg\.s32      q0, q0
+0[0-9a-f]+ <[^>]+> f3b807c0    vqneg\.s32      q0, q0
+0[0-9a-f]+ <[^>]+> f3b80780    vqneg\.s32      d0, d0
+0[0-9a-f]+ <[^>]+> f3b00640    vpadal\.s8      q0, q0
+0[0-9a-f]+ <[^>]+> f3b00640    vpadal\.s8      q0, q0
+0[0-9a-f]+ <[^>]+> f3b00600    vpadal\.s8      d0, d0
+0[0-9a-f]+ <[^>]+> f3b40640    vpadal\.s16     q0, q0
+0[0-9a-f]+ <[^>]+> f3b40640    vpadal\.s16     q0, q0
+0[0-9a-f]+ <[^>]+> f3b40600    vpadal\.s16     d0, d0
+0[0-9a-f]+ <[^>]+> f3b80640    vpadal\.s32     q0, q0
+0[0-9a-f]+ <[^>]+> f3b80640    vpadal\.s32     q0, q0
+0[0-9a-f]+ <[^>]+> f3b80600    vpadal\.s32     d0, d0
+0[0-9a-f]+ <[^>]+> f3b006c0    vpadal\.u8      q0, q0
+0[0-9a-f]+ <[^>]+> f3b006c0    vpadal\.u8      q0, q0
+0[0-9a-f]+ <[^>]+> f3b00680    vpadal\.u8      d0, d0
+0[0-9a-f]+ <[^>]+> f3b406c0    vpadal\.u16     q0, q0
+0[0-9a-f]+ <[^>]+> f3b406c0    vpadal\.u16     q0, q0
+0[0-9a-f]+ <[^>]+> f3b40680    vpadal\.u16     d0, d0
+0[0-9a-f]+ <[^>]+> f3b806c0    vpadal\.u32     q0, q0
+0[0-9a-f]+ <[^>]+> f3b806c0    vpadal\.u32     q0, q0
+0[0-9a-f]+ <[^>]+> f3b80680    vpadal\.u32     d0, d0
+0[0-9a-f]+ <[^>]+> f3b00240    vpaddl\.s8      q0, q0
+0[0-9a-f]+ <[^>]+> f3b00240    vpaddl\.s8      q0, q0
+0[0-9a-f]+ <[^>]+> f3b00200    vpaddl\.s8      d0, d0
+0[0-9a-f]+ <[^>]+> f3b40240    vpaddl\.s16     q0, q0
+0[0-9a-f]+ <[^>]+> f3b40240    vpaddl\.s16     q0, q0
+0[0-9a-f]+ <[^>]+> f3b40200    vpaddl\.s16     d0, d0
+0[0-9a-f]+ <[^>]+> f3b80240    vpaddl\.s32     q0, q0
+0[0-9a-f]+ <[^>]+> f3b80240    vpaddl\.s32     q0, q0
+0[0-9a-f]+ <[^>]+> f3b80200    vpaddl\.s32     d0, d0
+0[0-9a-f]+ <[^>]+> f3b002c0    vpaddl\.u8      q0, q0
+0[0-9a-f]+ <[^>]+> f3b002c0    vpaddl\.u8      q0, q0
+0[0-9a-f]+ <[^>]+> f3b00280    vpaddl\.u8      d0, d0
+0[0-9a-f]+ <[^>]+> f3b402c0    vpaddl\.u16     q0, q0
+0[0-9a-f]+ <[^>]+> f3b402c0    vpaddl\.u16     q0, q0
+0[0-9a-f]+ <[^>]+> f3b40280    vpaddl\.u16     d0, d0
+0[0-9a-f]+ <[^>]+> f3b802c0    vpaddl\.u32     q0, q0
+0[0-9a-f]+ <[^>]+> f3b802c0    vpaddl\.u32     q0, q0
+0[0-9a-f]+ <[^>]+> f3b80280    vpaddl\.u32     d0, d0
+0[0-9a-f]+ <[^>]+> f3bb0440    vrecpe\.u32     q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0440    vrecpe\.u32     q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0400    vrecpe\.u32     d0, d0
+0[0-9a-f]+ <[^>]+> f3bb0540    vrecpe\.f32     q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0540    vrecpe\.f32     q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0500    vrecpe\.f32     d0, d0
+0[0-9a-f]+ <[^>]+> f3bb04c0    vrsqrte\.u32    q0, q0
+0[0-9a-f]+ <[^>]+> f3bb04c0    vrsqrte\.u32    q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0480    vrsqrte\.u32    d0, d0
+0[0-9a-f]+ <[^>]+> f3bb05c0    vrsqrte\.f32    q0, q0
+0[0-9a-f]+ <[^>]+> f3bb05c0    vrsqrte\.f32    q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0580    vrsqrte\.f32    d0, d0
+0[0-9a-f]+ <[^>]+> f3b00440    vcls\.s8        q0, q0
+0[0-9a-f]+ <[^>]+> f3b00440    vcls\.s8        q0, q0
+0[0-9a-f]+ <[^>]+> f3b00400    vcls\.s8        d0, d0
+0[0-9a-f]+ <[^>]+> f3b40440    vcls\.s16       q0, q0
+0[0-9a-f]+ <[^>]+> f3b40440    vcls\.s16       q0, q0
+0[0-9a-f]+ <[^>]+> f3b40400    vcls\.s16       d0, d0
+0[0-9a-f]+ <[^>]+> f3b80440    vcls\.s32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b80440    vcls\.s32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b80400    vcls\.s32       d0, d0
+0[0-9a-f]+ <[^>]+> f3b004c0    vclz\.i8        q0, q0
+0[0-9a-f]+ <[^>]+> f3b004c0    vclz\.i8        q0, q0
+0[0-9a-f]+ <[^>]+> f3b00480    vclz\.i8        d0, d0
+0[0-9a-f]+ <[^>]+> f3b404c0    vclz\.i16       q0, q0
+0[0-9a-f]+ <[^>]+> f3b404c0    vclz\.i16       q0, q0
+0[0-9a-f]+ <[^>]+> f3b40480    vclz\.i16       d0, d0
+0[0-9a-f]+ <[^>]+> f3b804c0    vclz\.i32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b804c0    vclz\.i32       q0, q0
+0[0-9a-f]+ <[^>]+> f3b80480    vclz\.i32       d0, d0
+0[0-9a-f]+ <[^>]+> f3b00540    vcnt\.8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00540    vcnt\.8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00500    vcnt\.8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b20042    vswp    q0, q1
+0[0-9a-f]+ <[^>]+> f3b20042    vswp    q0, q1
+0[0-9a-f]+ <[^>]+> f3b20001    vswp    d0, d1
+0[0-9a-f]+ <[^>]+> f3b200c2    vtrn\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b200c2    vtrn\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b20081    vtrn\.8 d0, d1
+0[0-9a-f]+ <[^>]+> f3b600c2    vtrn\.16        q0, q1
+0[0-9a-f]+ <[^>]+> f3b600c2    vtrn\.16        q0, q1
+0[0-9a-f]+ <[^>]+> f3b60081    vtrn\.16        d0, d1
+0[0-9a-f]+ <[^>]+> f3ba00c2    vtrn\.32        q0, q1
+0[0-9a-f]+ <[^>]+> f3ba00c2    vtrn\.32        q0, q1
+0[0-9a-f]+ <[^>]+> f3ba0081    vtrn\.32        d0, d1
+0[0-9a-f]+ <[^>]+> f3b00800    vtbl\.8 d0, {d0}, d0
+0[0-9a-f]+ <[^>]+> f3b00840    vtbx\.8 d0, {d0}, d0
diff --git a/gas/testsuite/gas/arm/neon-cov.s b/gas/testsuite/gas/arm/neon-cov.s

new file mode 100644 (file)

index 0000000..14bc618
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-cov.s
@@ -0,0 +1,595 @@
+@ Neon tests. Basic bitfield tests, using zero for as many registers/fields as
+@ possible, but without causing instructions to be badly-formed.
+
+       .arm
+       .syntax unified
+       .text
+
+       .macro regs3_1 op opq vtype
+       \op\vtype q0,q0,q0
+       \opq\vtype q0,q0,q0
+       \op\vtype d0,d0,d0
+       .endm
+
+       .macro dregs3_1 op vtype
+       \op\vtype d0,d0,d0
+       .endm
+
+       .macro regn3_1 op operand2 vtype
+       \op\vtype d0,q0,\operand2
+       .endm
+
+       .macro regl3_1 op operand2 vtype
+       \op\vtype q0,d0,\operand2
+       .endm
+
+       .macro regw3_1 op operand2 vtype
+       \op\vtype q0,q0,\operand2
+       .endm
+
+       .macro regs2_1 op opq vtype
+       \op\vtype q0,q0
+       \opq\vtype q0,q0
+       \op\vtype d0,d0
+       .endm
+
+       .macro regs3_su_32 op opq
+       regs3_1 \op \opq .s8
+       regs3_1 \op \opq .s16
+       regs3_1 \op \opq .s32
+       regs3_1 \op \opq .u8
+       regs3_1 \op \opq .u16
+       regs3_1 \op \opq .u32
+       .endm
+
+       regs3_su_32 vaba vabaq
+       regs3_su_32 vhadd vhaddq
+       regs3_su_32 vrhadd vrhaddq
+       regs3_su_32 vhsub vhsubq
+
+       .macro regs3_su_64 op opq
+       regs3_1 \op \opq .s8
+       regs3_1 \op \opq .s16
+       regs3_1 \op \opq .s32
+       regs3_1 \op \opq .s64
+       regs3_1 \op \opq .u8
+       regs3_1 \op \opq .u16
+       regs3_1 \op \opq .u32
+       regs3_1 \op \opq .u64
+       .endm
+
+       regs3_su_64 vqadd vqaddq
+       regs3_su_64 vqsub vqsubq
+       regs3_su_64 vrshl vrshlq
+       regs3_su_64 vqrshl vqrshlq
+
+       regs3_su_64 vshl vshlq
+       regs3_su_64 vqshl vqshlq
+
+       .macro regs2i_1 op opq imm vtype
+       \op\vtype q0,q0,\imm
+       \opq\vtype q0,q0,\imm
+       \op\vtype d0,d0,\imm
+       .endm
+
+       .macro regs2i_su_64 op opq imm
+       regs2i_1 \op \opq \imm .s8
+       regs2i_1 \op \opq \imm .s16
+       regs2i_1 \op \opq \imm .s32
+       regs2i_1 \op \opq \imm .s64
+       regs2i_1 \op \opq \imm .u8
+       regs2i_1 \op \opq \imm .u16
+       regs2i_1 \op \opq \imm .u32
+       regs2i_1 \op \opq \imm .u64
+       .endm
+
+       .macro regs2i_i_64 op opq imm
+       regs2i_1 \op \opq \imm .i8
+       regs2i_1 \op \opq \imm .i16
+       regs2i_1 \op \opq \imm .i32
+       regs2i_1 \op \opq \imm .i64
+       .endm
+
+       regs2i_i_64 vshl vshlq 0
+       regs2i_su_64 vqshl vqshlq 0
+
+       .macro regs3_ntyp op opq
+       regs3_1 \op \opq .8
+       .endm
+
+       regs3_ntyp vand vandq
+       regs3_ntyp vbic vbicq
+       regs3_ntyp vorr vorrq
+       regs3_ntyp vorn vornq
+       regs3_ntyp veor veorq
+
+       .macro logic_imm_1 op opq imm vtype
+       \op\vtype q0,\imm
+       \opq\vtype q0,\imm
+       \op\vtype d0,\imm
+       .endm
+
+       .macro logic_imm op opq
+       logic_imm_1 \op \opq 0x000000ff .i32
+       logic_imm_1 \op \opq 0x0000ff00 .i32
+       logic_imm_1 \op \opq 0x00ff0000 .i32
+       logic_imm_1 \op \opq 0xff000000 .i32
+       logic_imm_1 \op \opq 0x00ff .i16
+       logic_imm_1 \op \opq 0xff00 .i16
+       .endm
+
+       logic_imm vbic vbicq
+       logic_imm vorr vorrq
+
+       .macro logic_inv_imm op opq
+       logic_imm_1 \op \opq 0xffffff00 .i32
+       logic_imm_1 \op \opq 0xffff00ff .i32
+       logic_imm_1 \op \opq 0xff00ffff .i32
+       logic_imm_1 \op \opq 0x00ffffff .i32
+       logic_imm_1 \op \opq 0xff00 .i16
+       logic_imm_1 \op \opq 0x00ff .i16
+       .endm
+
+       logic_inv_imm vand vandq
+       logic_inv_imm vorn vornq
+
+       regs3_ntyp vbsl vbslq
+       regs3_ntyp vbit vbitq
+       regs3_ntyp vbif vbifq
+
+       .macro regs3_suf_32 op opq
+       regs3_1 \op \opq .s8
+       regs3_1 \op \opq .s16
+       regs3_1 \op \opq .s32
+       regs3_1 \op \opq .u8
+       regs3_1 \op \opq .u16
+       regs3_1 \op \opq .u32
+       regs3_1 \op \opq .f32
+       .endm
+
+       .macro regs3_if_32 op opq
+       regs3_1 \op \opq .i8
+       regs3_1 \op \opq .i16
+       regs3_1 \op \opq .i32
+       regs3_1 \op \opq .f32
+       .endm
+
+       regs3_suf_32 vabd vabdq
+       regs3_suf_32 vmax vmaxq
+       regs3_suf_32 vmin vminq
+
+       regs3_suf_32 vcge vcgeq
+       regs3_suf_32 vcgt vcgtq
+       regs3_suf_32 vcle vcleq
+       regs3_suf_32 vclt vcltq
+
+       regs3_if_32 vceq vceqq
+
+       .macro regs2i_sf_0 op opq
+       regs2i_1 \op \opq 0 .s8
+       regs2i_1 \op \opq 0 .s16
+       regs2i_1 \op \opq 0 .s32
+       regs2i_1 \op \opq 0 .f32
+       .endm
+
+       regs2i_sf_0 vcge vcgeq
+       regs2i_sf_0 vcgt vcgtq
+       regs2i_sf_0 vcle vcleq
+       regs2i_sf_0 vclt vcltq
+
+       .macro regs2i_if_0 op opq
+       regs2i_1 \op \opq 0 .i8
+       regs2i_1 \op \opq 0 .i16
+       regs2i_1 \op \opq 0 .i32
+       regs2i_1 \op \opq 0 .f32
+       .endm
+
+       regs2i_if_0 vceq vceqq
+
+       .macro dregs3_suf_32 op
+       dregs3_1 \op .s8
+       dregs3_1 \op .s16
+       dregs3_1 \op .s32
+       dregs3_1 \op .u8
+       dregs3_1 \op .u16
+       dregs3_1 \op .u32
+       dregs3_1 \op .f32
+       .endm
+
+       dregs3_suf_32 vpmax
+       dregs3_suf_32 vpmin
+
+       .macro sregs3_1 op opq vtype
+       \op\vtype q0,q0,q0
+       \opq\vtype q0,q0,q0
+       \op\vtype d0,d0,d0
+       .endm
+
+       .macro sclr21_1 op opq vtype
+       \op\vtype q0,q0,d0[0]
+       \opq\vtype q0,q0,d0[0]
+       \op\vtype d0,d0,d0[0]
+       .endm
+
+       .macro mul_incl_scalar op opq
+       regs3_1 \op \opq .i8
+       regs3_1 \op \opq .i16
+       regs3_1 \op \opq .i32
+       regs3_1 \op \opq .f32
+       sclr21_1 \op \opq .i16
+       sclr21_1 \op \opq .i32
+       sclr21_1 \op \opq .f32
+       .endm
+
+       mul_incl_scalar vmla vmlaq
+       mul_incl_scalar vmls vmlsq
+
+       .macro dregs3_if_32 op
+       dregs3_1 \op .i8
+       dregs3_1 \op .i16
+       dregs3_1 \op .i32
+       dregs3_1 \op .f32
+       .endm
+
+       dregs3_if_32 vpadd
+
+       .macro regs3_if_64 op opq
+       regs3_1 \op \opq .i8
+       regs3_1 \op \opq .i16
+       regs3_1 \op \opq .i32
+       regs3_1 \op \opq .i64
+       regs3_1 \op \opq .f32
+       .endm
+
+       regs3_if_64 vadd vaddq
+       regs3_if_64 vsub vsubq
+
+       .macro regs3_sz_32 op opq
+       regs3_1 \op \opq .8
+       regs3_1 \op \opq .16
+       regs3_1 \op \opq .32
+       .endm
+
+       regs3_sz_32 vtst vtstq
+
+       .macro regs3_ifp_32 op opq
+        regs3_1 \op \opq .i8
+       regs3_1 \op \opq .i16
+       regs3_1 \op \opq .i32
+       regs3_1 \op \opq .f32
+       regs3_1 \op \opq .p8
+       .endm
+
+       regs3_ifp_32 vmul vmulq
+
+       .macro dqmulhs op opq
+       regs3_1 \op \opq .s16
+       regs3_1 \op \opq .s32
+       sclr21_1 \op \opq .s16
+       sclr21_1 \op \opq .s32
+       .endm
+
+       dqmulhs vqdmulh vqdmulhq
+       dqmulhs vqrdmulh vqrdmulhq
+
+       regs3_1 vacge vacgeq .f32
+       regs3_1 vacgt vacgtq .f32
+       regs3_1 vacle vacleq .f32
+       regs3_1 vaclt vacltq .f32
+       regs3_1 vrecps vrecpsq .f32
+       regs3_1 vrsqrts vrsqrtsq .f32
+
+       .macro regs2_sf_32 op opq
+       regs2_1 \op \opq .s8
+       regs2_1 \op \opq .s16
+       regs2_1 \op \opq .s32
+       regs2_1 \op \opq .f32
+       .endm
+
+       regs2_sf_32 vabs vabsq
+       regs2_sf_32 vneg vnegq
+
+       .macro rshift_imm op opq
+       regs2i_1 \op \opq 7 .s8
+       regs2i_1 \op \opq 15 .s16
+       regs2i_1 \op \opq 31 .s32
+       regs2i_1 \op \opq 63 .s64
+       regs2i_1 \op \opq 7 .u8
+       regs2i_1 \op \opq 15 .u16
+       regs2i_1 \op \opq 31 .u32
+       regs2i_1 \op \opq 63 .u64
+       .endm
+
+       rshift_imm vshr vshrq
+       rshift_imm vrshr vrshrq
+       rshift_imm vsra vsraq
+       rshift_imm vrsra vrsraq
+
+       regs2i_1 vsli vsliq 0 .8
+       regs2i_1 vsli vsliq 0 .16
+       regs2i_1 vsli vsliq 0 .32
+       regs2i_1 vsli vsliq 0 .64
+
+       regs2i_1 vsri vsriq 7 .8
+       regs2i_1 vsri vsriq 15 .16
+       regs2i_1 vsri vsriq 31 .32
+       regs2i_1 vsri vsriq 63 .64
+
+       regs2i_1 vqshlu vqshluq 0 .s8
+       regs2i_1 vqshlu vqshluq 0 .s16
+       regs2i_1 vqshlu vqshluq 0 .s32
+       regs2i_1 vqshlu vqshluq 0 .s64
+
+       .macro qrshift_imm op
+       regn3_1 \op 7 .s16
+       regn3_1 \op 15 .s32
+       regn3_1 \op 31 .s64
+       regn3_1 \op 7 .u16
+       regn3_1 \op 15 .u32
+       regn3_1 \op 31 .u64
+       .endm
+
+       .macro qrshiftu_imm op
+       regn3_1 \op 7 .s16
+       regn3_1 \op 15 .s32
+       regn3_1 \op 31 .s64
+       .endm
+
+       .macro qrshifti_imm op
+       regn3_1 \op 7 .i16
+       regn3_1 \op 15 .i32
+       regn3_1 \op 31 .i64
+       .endm
+
+       qrshift_imm vqshrn
+       qrshift_imm vqrshrn
+       qrshiftu_imm vqshrun
+       qrshiftu_imm vqrshrun
+
+       qrshifti_imm vshrn
+       qrshifti_imm vrshrn
+
+       regl3_1 vshll 1 .s8
+       regl3_1 vshll 1 .s16
+       regl3_1 vshll 1 .s32
+       regl3_1 vshll 1 .u8
+       regl3_1 vshll 1 .u16
+       regl3_1 vshll 1 .u32
+
+       regl3_1 vshll 8 .i8
+       regl3_1 vshll 16 .i16
+       regl3_1 vshll 32 .i32
+
+       .macro convert op opr arg="" t1=".s32.f32" t2=".u32.f32" t3=".f32.s32" t4=".f32.u32"
+       \op\t1 \opr,\opr\arg
+       \op\t2 \opr,\opr\arg
+       \op\t3 \opr,\opr\arg
+       \op\t4 \opr,\opr\arg
+       .endm
+
+       convert vcvt q0
+       convert vcvtq q0
+       convert vcvt d0
+       convert vcvt q0 ",1"
+       convert vcvtq q0 ",1"
+       convert vcvt d0 ",1"
+
+       vmov q0,q0
+       vmov d0,d0
+       vmov.8 d0[0],r0
+       vmov.16 d0[0],r0
+       vmov.32 d0[0],r0
+       vmov d0,r0,r0
+       vmov.s8 r0,d0[0]
+       vmov.s16 r0,d0[0]
+       vmov.u8 r0,d0[0]
+       vmov.u16 r0,d0[0]
+       vmov.32 r0,d0[0]
+       vmov r0,r1,d0
+
+       .macro mov_imm op imm vtype
+       \op\vtype q0,\imm
+       \op\vtype d0,\imm
+       .endm
+
+       mov_imm vmov 0x00000077 .i32
+       mov_imm vmvn 0x00000077 .i32
+       mov_imm vmov 0x00007700 .i32
+       mov_imm vmvn 0x00007700 .i32
+       mov_imm vmov 0x00770000 .i32
+       mov_imm vmvn 0x00770000 .i32
+       mov_imm vmov 0x77000000 .i32
+       mov_imm vmvn 0x77000000 .i32
+       mov_imm vmov 0x0077 .i16
+       mov_imm vmvn 0x0077 .i16
+       mov_imm vmov 0x7700 .i16
+       mov_imm vmvn 0x7700 .i16
+       mov_imm vmov 0x000077ff .i32
+       mov_imm vmvn 0x000077ff .i32
+       mov_imm vmov 0x0077ffff .i32
+       mov_imm vmvn 0x0077ffff .i32
+       mov_imm vmov 0x77 .i8
+       mov_imm vmov 0xff0000ff000000ff .i64
+       mov_imm vmov 0x40880000 .f32
+
+       vmvn q0,q0
+       vmvnq q0,q0
+       vmvn d0,d0
+
+       .macro long_ops op
+       regl3_1 \op d0 .s8
+       regl3_1 \op d0 .s16
+       regl3_1 \op d0 .s32
+       regl3_1 \op d0 .u8
+       regl3_1 \op d0 .u16
+       regl3_1 \op d0 .u32
+       .endm
+
+       long_ops vabal
+       long_ops vabdl
+       long_ops vaddl
+       long_ops vsubl
+
+       .macro long_mac op
+       regl3_1 \op d0 .s8
+       regl3_1 \op d0 .s16
+       regl3_1 \op d0 .s32
+       regl3_1 \op d0 .u8
+       regl3_1 \op d0 .u16
+       regl3_1 \op d0 .u32
+       regl3_1 \op "d0[0]" .s16
+       regl3_1 \op "d0[0]" .s32
+       regl3_1 \op "d0[0]" .u16
+       regl3_1 \op "d0[0]" .u32
+       .endm
+
+       long_mac vmlal
+       long_mac vmlsl
+
+       .macro wide_ops op
+       regw3_1 \op d0 .s8
+       regw3_1 \op d0 .s16
+       regw3_1 \op d0 .s32
+       regw3_1 \op d0 .u8
+       regw3_1 \op d0 .u16
+       regw3_1 \op d0 .u32
+       .endm
+
+       wide_ops vaddw
+       wide_ops vsubw
+
+       .macro narr_ops op
+       regn3_1 \op q0 .i16
+       regn3_1 \op q0 .i32
+       regn3_1 \op q0 .i64
+       .endm
+
+       narr_ops vaddhn
+       narr_ops vraddhn
+       narr_ops vsubhn
+       narr_ops vrsubhn
+
+       .macro long_dmac op
+       regl3_1 \op d0 .s16
+       regl3_1 \op d0 .s32
+       regl3_1 \op "d0[0]" .s16
+       regl3_1 \op "d0[0]" .s32
+       .endm
+
+       long_dmac vqdmlal
+       long_dmac vqdmlsl
+       long_dmac vqdmull
+
+       regl3_1 vmull d0 .s8
+       regl3_1 vmull d0 .s16
+       regl3_1 vmull d0 .s32
+       regl3_1 vmull d0 .u8
+       regl3_1 vmull d0 .u16
+       regl3_1 vmull d0 .u32
+       regl3_1 vmull d0 .p8
+       regl3_1 vmull "d0[0]" .s16
+       regl3_1 vmull "d0[0]" .s32
+       regl3_1 vmull "d0[0]" .u16
+       regl3_1 vmull "d0[0]" .u32
+
+       vext.8 q0,q0,q0,0
+       vextq.8 q0,q0,q0,0
+       vext.8 d0,d0,d0,0
+
+       .macro revs op opq vtype
+       \op\vtype q0,q0
+       \opq\vtype q0,q0
+       \op\vtype d0,d0
+       .endm
+
+       revs vrev64 vrev64q .8
+       revs vrev64 vrev64q .16
+       revs vrev64 vrev64q .32
+       revs vrev32 vrev32q .8
+       revs vrev32 vrev32q .16
+       revs vrev16 vrev16q .8
+
+       .macro dups op opq vtype
+       \op\vtype q0,r0
+       \opq\vtype q0,r0
+       \op\vtype d0,r0
+       \op\vtype q0,d0[0]
+       \opq\vtype q0,d0[0]
+       \op\vtype d0,d0[0]
+       .endm
+
+       dups vdup vdupq .8
+       dups vdup vdupq .16
+       dups vdup vdupq .32
+
+       .macro binop_3typ op op1 op2 t1 t2 t3
+       \op\t1 \op1,\op2
+       \op\t2 \op1,\op2
+       \op\t3 \op1,\op2
+       .endm
+
+       binop_3typ vmovl q0 d0 .s8 .s16 .s32
+       binop_3typ vmovl q0 d0 .u8 .u16 .u32
+       binop_3typ vmovn d0 q0 .i16 .i32 .i64
+       binop_3typ vqmovn d0 q0 .s16 .s32 .s64
+       binop_3typ vqmovn d0 q0 .u16 .u32 .u64
+       binop_3typ vqmovun d0 q0 .s16 .s32 .s64
+
+       .macro binops op opq vtype="" rhs="0"
+       \op\vtype q0,q\rhs
+       \opq\vtype q0,q\rhs
+       \op\vtype d0,d\rhs
+       .endm
+
+       .macro regs2_sz_32 op opq
+       binops \op \opq .8 1
+       binops \op \opq .16 1
+       binops \op \opq .32 1
+       .endm
+
+       regs2_sz_32 vzip vzipq
+       regs2_sz_32 vuzp vuzpq
+
+       .macro regs2_s_32 op opq
+       binops \op \opq .s8
+       binops \op \opq .s16
+       binops \op \opq .s32
+       .endm
+
+       regs2_s_32 vqabs vqabsq
+       regs2_s_32 vqneg vqnegq
+
+       .macro regs2_su_32 op opq
+       regs2_s_32 \op \opq
+       binops \op \opq .u8
+       binops \op \opq .u16
+       binops \op \opq .u32
+       .endm
+
+       regs2_su_32 vpadal vpadalq
+       regs2_su_32 vpaddl vpaddlq
+
+       binops vrecpe vrecpeq .u32
+       binops vrecpe vrecpeq .f32
+       binops vrsqrte vrsqrteq .u32
+       binops vrsqrte vrsqrteq .f32
+
+       regs2_s_32 vcls vclsq
+
+       .macro regs2_i_32 op opq
+       binops \op \opq .i8
+       binops \op \opq .i16
+       binops \op \opq .i32
+       .endm
+
+       regs2_i_32 vclz vclzq
+
+       binops vcnt vcntq .8
+
+       binops vswp vswpq "" 1
+
+       regs2_sz_32 vtrn vtrnq
+
+       vtbl.8 d0,{d0},d0
+       vtbx.8 d0,{d0},d0
+       
diff --git a/gas/testsuite/gas/arm/neon-ldst-es.d b/gas/testsuite/gas/arm/neon-ldst-es.d

new file mode 100644 (file)

index 0000000..c520ac9
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-ldst-es.d
@@ -0,0 +1,57 @@
+# name: Neon element and structure loads and stores
+# as: -mfpu=neon
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> f406282f    vst2\.8 {d2-d3}, \[r6, :128\]
+0[0-9a-f]+ <[^>]+> f427140d    vld3\.8 {d1-d3}, \[r7\]!
+0[0-9a-f]+ <[^>]+> f4091553    vst3\.16        {d1,d3,d5}, \[r9, :64\], r3
+0[0-9a-f]+ <[^>]+> f42a208f    vld4\.32        {d2-d5}, \[sl\]
+0[0-9a-f]+ <[^>]+> f40a114f    vst4\.16        {d1,d3,d5,d7}, \[sl\]
+0[0-9a-f]+ <[^>]+> f4aa1c6f    vld1\.16        {d1\[\]-d2\[\]}, \[sl\]
+0[0-9a-f]+ <[^>]+> f4aa1c5f    vld1\.16        {d1\[\]}, \[sl, :16\]
+0[0-9a-f]+ <[^>]+> f4aa1dbf    vld2\.32        {d1\[\],d3\[\]}, \[sl, :64\]
+0[0-9a-f]+ <[^>]+> f4aa3e0c    vld3\.8 {d3\[\]-d5\[\]}, \[sl\], ip
+0[0-9a-f]+ <[^>]+> f4a9af6d    vld4\.16        {d10\[\],d12\[\],d14\[\],d16\[\]}, \[r9\]!
+0[0-9a-f]+ <[^>]+> f4a9af5f    vld4\.16        {d10\[\]-d13\[\]}, \[r9, :64\]
+0[0-9a-f]+ <[^>]+> f4a9af9f    vld4\.32        {d10\[\]-d13\[\]}, \[r9, :64\]
+0[0-9a-f]+ <[^>]+> f4a9afdf    vld4\.32        {d10\[\]-d13\[\]}, \[r9, :128\]
+0[0-9a-f]+ <[^>]+> f4a530ed    vld1\.8 {d3\[7\]}, \[r5\]!
+0[0-9a-f]+ <[^>]+> f48554df    vst1\.16        {d5\[3\]}, \[r5, :16\]
+0[0-9a-f]+ <[^>]+> f4a535dd    vld2\.16        {d3\[3\],d4\[3\]}, \[r5, :32\]!
+0[0-9a-f]+ <[^>]+> f4858a83    vst3\.32        {d8\[1\],d9\[1\],d10\[1\]}, \[r5\], r3
+0[0-9a-f]+ <[^>]+> f4a7804f    vld1\.8 {d8\[2\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7848f    vld1\.16        {d8\[2\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7849f    vld1\.16        {d8\[2\]}, \[r7, :16\]
+0[0-9a-f]+ <[^>]+> f4a7888f    vld1\.32        {d8\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a788bf    vld1\.32        {d8\[1\]}, \[r7, :32\]
+0[0-9a-f]+ <[^>]+> f4a7812f    vld2\.8 {d8\[1\],d9\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7813f    vld2\.8 {d8\[1\],d9\[1\]}, \[r7, :16\]
+0[0-9a-f]+ <[^>]+> f4a7854f    vld2\.16        {d8\[1\],d9\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7855f    vld2\.16        {d8\[1\],d9\[1\]}, \[r7, :32\]
+0[0-9a-f]+ <[^>]+> f4a7856f    vld2\.16        {d8\[1\],d10\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7857f    vld2\.16        {d8\[1\],d10\[1\]}, \[r7, :32\]
+0[0-9a-f]+ <[^>]+> f4a7898f    vld2\.32        {d8\[1\],d9\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7899f    vld2\.32        {d8\[1\],d9\[1\]}, \[r7, :64\]
+0[0-9a-f]+ <[^>]+> f4a789cf    vld2\.32        {d8\[1\],d10\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a789df    vld2\.32        {d8\[1\],d10\[1\]}, \[r7, :64\]
+0[0-9a-f]+ <[^>]+> f4a7822f    vld3\.8 {d8\[1\],d9\[1\],d10\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7864f    vld3\.16        {d8\[1\],d9\[1\],d10\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7866f    vld3\.16        {d8\[1\],d10\[1\],d12\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a78a8f    vld3\.32        {d8\[1\],d9\[1\],d10\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a78acf    vld3\.32        {d8\[1\],d10\[1\],d12\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7834f    vld4\.8 {d8\[2\],d9\[2\],d10\[2\],d11\[2\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7835f    vld4\.8 {d8\[2\],d9\[2\],d10\[2\],d11\[2\]}, \[r7, :32\]
+0[0-9a-f]+ <[^>]+> f4a7876f    vld4\.16        {d8\[1\],d10\[1\],d12\[1\],d14\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7875f    vld4\.16        {d8\[1\],d9\[1\],d10\[1\],d11\[1\]}, \[r7, :64\]
+0[0-9a-f]+ <[^>]+> f4a78bcf    vld4\.32        {d8\[1\],d10\[1\],d12\[1\],d14\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a78bdf    vld4\.32        {d8\[1\],d10\[1\],d12\[1\],d14\[1\]}, \[r7, :64\]
+0[0-9a-f]+ <[^>]+> f4a78bef    vld4\.32        {d8\[1\],d10\[1\],d12\[1\],d14\[1\]}, \[r7, :128\]
+0[0-9a-f]+ <[^>]+> f3b43805    vtbl\.8 d3, {d4}, d5
+0[0-9a-f]+ <[^>]+> f3b23b05    vtbl\.8 d3, {d2-d5}, d5
+0[0-9a-f]+ <[^>]+> f3be3985    vtbl\.8 d3, {d30-d31}, d5
+0[0-9a-f]+ <[^>]+> f427288f    vld2\.32        {d2-d3}, \[r7\]
+0[0-9a-f]+ <[^>]+> f427208f    vld4\.32        {d2-d5}, \[r7\]
+0[0-9a-f]+ <[^>]+> f467c08f    vld4\.32        {d28-d31}, \[r7\]
diff --git a/gas/testsuite/gas/arm/neon-ldst-es.s b/gas/testsuite/gas/arm/neon-ldst-es.s

new file mode 100644 (file)

index 0000000..5a29a43
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-ldst-es.s
@@ -0,0 +1,59 @@
+@ test element and structure loads and stores.
+
+       .text
+       .arm
+       .syntax unified
+
+       vst2.8 {d2,d3},[r6,:128]
+       vld3.8 {d1,d2,d3},[r7]!
+       vst3.16 {d1,d3,d5},[r9,:64],r3
+       vld4.32 {d2,d3,d4,d5},[r10]
+       vst4.16 {d1,d3,d5,d7},[r10]
+       vld1.16 {d1[],d2[]},[r10]
+       vld1.16 {d1[]},[r10,:16]
+       vld2.32 {d1[],d3[]},[r10,:64]
+       vld3.s8 {d3[],d4[],d5[]},[r10],r12
+       vld4.16 {d10[],d12[],d14[],d16[]},[r9]!
+       vld4.16 {d10[],d11[],d12[],d13[]},[r9,:64]
+       vld4.32 {d10[],d11[],d12[],d13[]},[r9,:64]
+       vld4.32 {d10[],d11[],d12[],d13[]},[r9,:128]
+       vld1.8 {d3[7]},[r5]!
+       vst1.16 {d5[3]},[r5,:16]
+       vld2.16 {d3[3],d4[3]},[r5,:32]!
+       vst3.32 {d8[1],d9[1],d10[1]},[r5],r3
+        
+        vld1.8 {d8[2]},[r7]
+        vld1.16 {d8[2]},[r7]
+        vld1.16 {d8[2]},[r7,:16]
+        vld1.32 {d8[1]},[r7]
+        vld1.32 {d8[1]},[r7,:32]
+        vld2.8 {d8[1],d9[1]},[r7]
+        vld2.8 {d8[1],d9[1]},[r7,:16]
+        vld2.16 {d8[1],d9[1]},[r7]
+        vld2.16 {d8[1],d9[1]},[r7,:32]
+        vld2.16 {d8[1],d10[1]},[r7]
+        vld2.16 {d8[1],d10[1]},[r7,:32]
+        vld2.32 {d8[1],d9[1]},[r7]
+        vld2.32 {d8[1],d9[1]},[r7,:64]
+        vld2.32 {d8[1],d10[1]},[r7]
+        vld2.32 {d8[1],d10[1]},[r7,:64]
+        vld3.8 {d8[1],d9[1],d10[1]},[r7]
+        vld3.16 {d8[1],d9[1],d10[1]},[r7]
+        vld3.16 {d8[1],d10[1],d12[1]},[r7]
+        vld3.32 {d8[1],d9[1],d10[1]},[r7]
+        vld3.32 {d8[1],d10[1],d12[1]},[r7]
+       vld4.8 {d8[2],d9[2],d10[2],d11[2]},[r7]
+       vld4.8 {d8[2],d9[2],d10[2],d11[2]},[r7,:32]
+        vld4.16 {d8[1],d10[1],d12[1],d14[1]},[r7]
+        vld4.16 {d8[1],d9[1],d10[1],d11[1]},[r7,:64]
+        vld4.32 {d8[1],d10[1],d12[1],d14[1]},[r7]
+        vld4.32 {d8[1],d10[1],d12[1],d14[1]},[r7,:64]
+        vld4.32 {d8[1],d10[1],d12[1],d14[1]},[r7,:128]
+
+       vtbl.8 d3,{d4},d5
+       vtbl.8 d3,{q1-q2},d5
+       vtbl.8 d3,{q15},d5
+
+       vld2.32 {q1},[r7]
+       vld4.32 {q1-q2},[r7]
+       vld4.32 {q14-q15},[r7]
diff --git a/gas/testsuite/gas/arm/neon-ldst-rm.d b/gas/testsuite/gas/arm/neon-ldst-rm.d

new file mode 100644 (file)

index 0000000..c538fc9
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-ldst-rm.d
@@ -0,0 +1,63 @@
+# name: Neon single and multiple register loads and stores
+# as: -mfpu=neon
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> ec922b02    vldmia  r2, {d2}
+0[0-9a-f]+ <[^>]+> ec922b04    vldmia  r2, {d2-d3}
+0[0-9a-f]+ <[^>]+> ec924b08    vldmia  r2, {d4-d7}
+0[0-9a-f]+ <[^>]+> ecd28b10    vldmia  r2, {d24-d31}
+0[0-9a-f]+ <[^>]+> ec923b20    vldmia  r2, {d3-d18}
+0[0-9a-f]+ <[^>]+> ec922b02    vldmia  r2, {d2}
+0[0-9a-f]+ <[^>]+> ec922b04    vldmia  r2, {d2-d3}
+0[0-9a-f]+ <[^>]+> ec924b08    vldmia  r2, {d4-d7}
+0[0-9a-f]+ <[^>]+> ecd28b10    vldmia  r2, {d24-d31}
+0[0-9a-f]+ <[^>]+> ec923b20    vldmia  r2, {d3-d18}
+0[0-9a-f]+ <[^>]+> ecb22b02    vldmia  r2!, {d2}
+0[0-9a-f]+ <[^>]+> ecb22b04    vldmia  r2!, {d2-d3}
+0[0-9a-f]+ <[^>]+> ecb24b08    vldmia  r2!, {d4-d7}
+0[0-9a-f]+ <[^>]+> ecf28b10    vldmia  r2!, {d24-d31}
+0[0-9a-f]+ <[^>]+> ecb23b20    vldmia  r2!, {d3-d18}
+0[0-9a-f]+ <[^>]+> ed322b02    vldmdb  r2!, {d2}
+0[0-9a-f]+ <[^>]+> ed322b04    vldmdb  r2!, {d2-d3}
+0[0-9a-f]+ <[^>]+> ed324b08    vldmdb  r2!, {d4-d7}
+0[0-9a-f]+ <[^>]+> ed728b10    vldmdb  r2!, {d24-d31}
+0[0-9a-f]+ <[^>]+> ed323b20    vldmdb  r2!, {d3-d18}
+0[0-9a-f]+ <[^>]+> ec822b02    vstmia  r2, {d2}
+0[0-9a-f]+ <[^>]+> ec822b04    vstmia  r2, {d2-d3}
+0[0-9a-f]+ <[^>]+> ec824b08    vstmia  r2, {d4-d7}
+0[0-9a-f]+ <[^>]+> ecc28b10    vstmia  r2, {d24-d31}
+0[0-9a-f]+ <[^>]+> ec823b20    vstmia  r2, {d3-d18}
+0[0-9a-f]+ <[^>]+> ec822b02    vstmia  r2, {d2}
+0[0-9a-f]+ <[^>]+> ec822b04    vstmia  r2, {d2-d3}
+0[0-9a-f]+ <[^>]+> ec824b08    vstmia  r2, {d4-d7}
+0[0-9a-f]+ <[^>]+> ecc28b10    vstmia  r2, {d24-d31}
+0[0-9a-f]+ <[^>]+> ec823b20    vstmia  r2, {d3-d18}
+0[0-9a-f]+ <[^>]+> eca22b02    vstmia  r2!, {d2}
+0[0-9a-f]+ <[^>]+> eca22b04    vstmia  r2!, {d2-d3}
+0[0-9a-f]+ <[^>]+> eca24b08    vstmia  r2!, {d4-d7}
+0[0-9a-f]+ <[^>]+> ece28b10    vstmia  r2!, {d24-d31}
+0[0-9a-f]+ <[^>]+> eca23b20    vstmia  r2!, {d3-d18}
+0[0-9a-f]+ <[^>]+> ed222b02    vstmdb  r2!, {d2}
+0[0-9a-f]+ <[^>]+> ed222b04    vstmdb  r2!, {d2-d3}
+0[0-9a-f]+ <[^>]+> ed224b08    vstmdb  r2!, {d4-d7}
+0[0-9a-f]+ <[^>]+> ed628b10    vstmdb  r2!, {d24-d31}
+0[0-9a-f]+ <[^>]+> ed223b20    vstmdb  r2!, {d3-d18}
+0[0-9a-f]+ <backward> 000001f4         streqd  r0, \[r0\], -r4
+0[0-9a-f]+ <[^>]+> eddf6b0b    vldr    d22, \[pc, #44\]        ; 0[0-9a-f]+ <forward>
+0[0-9a-f]+ <[^>]+> ed935b00    vldr    d5, \[r3\]
+0[0-9a-f]+ <[^>]+> ed135b01    vldr    d5, \[r3, #-4\]
+0[0-9a-f]+ <[^>]+> ed935b01    vldr    d5, \[r3, #4\]
+0[0-9a-f]+ <[^>]+> ed835b00    vstr    d5, \[r3\]
+0[0-9a-f]+ <[^>]+> ed035b01    vstr    d5, \[r3, #-4\]
+0[0-9a-f]+ <[^>]+> ed835b01    vstr    d5, \[r3, #4\]
+0[0-9a-f]+ <[^>]+> ed935b00    vldr    d5, \[r3\]
+0[0-9a-f]+ <[^>]+> ed135b40    vldr    d5, \[r3, #-256\]
+0[0-9a-f]+ <[^>]+> ed935b40    vldr    d5, \[r3, #256\]
+0[0-9a-f]+ <[^>]+> ed835b00    vstr    d5, \[r3\]
+0[0-9a-f]+ <[^>]+> ed035b40    vstr    d5, \[r3, #-256\]
+0[0-9a-f]+ <[^>]+> ed835b40    vstr    d5, \[r3, #256\]
+0[0-9a-f]+ <forward> 000002bc  streqh  r0, \[r0\], -ip
+0[0-9a-f]+ <[^>]+> ed1f7b11    vldr    d7, \[pc, #-68\]        ; 0[0-9a-f]+ <backward>
diff --git a/gas/testsuite/gas/arm/neon-ldst-rm.s b/gas/testsuite/gas/arm/neon-ldst-rm.s

new file mode 100644 (file)

index 0000000..f9421ac
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-ldst-rm.s
@@ -0,0 +1,44 @@
+@ test register and multi-register loads and stores.
+
+       .text
+       .arm
+       .syntax unified
+
+       .macro multi op dir="" wb=""
+       \op\dir r2\wb,{d2}
+       \op\dir r2\wb,{d2-d3}
+       \op\dir r2\wb,{q2-q3}
+       \op\dir r2\wb,{q12-q14,q15}
+       \op\dir r2\wb,{d3,d4,d5-d8,d9,d10,d11,d12-d16,d17-d18}
+       .endm
+
+       multi vldm
+       multi vldm ia
+       multi vldm ia "!"
+       multi vldm db "!"
+
+       multi vstm
+       multi vstm ia
+       multi vstm ia "!"
+       multi vstm db "!"
+
+backward:
+       .word 500
+
+       .macro single op offset=""
+       \op d5,[r3]
+       \op d5,[r3,#-\offset]
+       \op d5,[r3,#\offset]
+       .endm
+
+       vldr d22, forward
+
+       single vldr 4
+       single vstr 4
+       single vldr 256
+       single vstr 256
+
+forward:
+       .word 700
+
+       vldr d7, backward
diff --git a/gas/testsuite/gas/arm/neon-omit.d b/gas/testsuite/gas/arm/neon-omit.d

new file mode 100644 (file)

index 0000000..155fec9
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-omit.d
@@ -0,0 +1,51 @@
+# name: Neon optional register operands
+# as: -mfpu=neon
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+0[0-9a-f]+ <[^>]+> f3022746    vabd\.u8        q1, q1, q3
+0[0-9a-f]+ <[^>]+> f26cc0c6    vhadd\.s32      q14, q14, q3
+0[0-9a-f]+ <[^>]+> f2222144    vrhadd\.s32     q1, q1, q2
+0[0-9a-f]+ <[^>]+> f22aa24e    vhsub\.s32      q5, q5, q7
+0[0-9a-f]+ <[^>]+> f3166448    vshl\.u16       q3, q3, q4
+0[0-9a-f]+ <[^>]+> f32aa45c    vqshl\.u32      q5, q5, q6
+0[0-9a-f]+ <[^>]+> f20ee170    vand    q7, q7, q8
+0[0-9a-f]+ <[^>]+> f30ee170    veor    q7, q7, q8
+0[0-9a-f]+ <[^>]+> f3b5a14a    vceq\.i16       q5, q5, #0
+0[0-9a-f]+ <[^>]+> f31aa85a    vceq\.i16       q5, q5, q5
+0[0-9a-f]+ <[^>]+> f3b5a24a    vclt\.s16       q5, q5, #0
+0[0-9a-f]+ <[^>]+> f3b5a34c    vabs\.s16       q5, q6
+0[0-9a-f]+ <[^>]+> f3b57388    vneg\.s16       d7, d8
+0[0-9a-f]+ <[^>]+> f3b97708    vabs\.f32       d7, d8
+0[0-9a-f]+ <[^>]+> f3f927e4    vneg\.f32       q9, q10
+0[0-9a-f]+ <[^>]+> f2211a03    vpmax\.s32      d1, d1, d3
+0[0-9a-f]+ <[^>]+> f2255a17    vpmin\.s32      d5, d5, d7
+0[0-9a-f]+ <[^>]+> f3011f03    vpmax\.f32      d1, d1, d3
+0[0-9a-f]+ <[^>]+> f3255f07    vpmin\.f32      d5, d5, d7
+0[0-9a-f]+ <[^>]+> f2122b46    vqdmulh\.s16    q1, q1, q3
+0[0-9a-f]+ <[^>]+> f3255b07    vqrdmulh\.s32   d5, d5, d7
+0[0-9a-f]+ <[^>]+> f3922c6d    vqdmulh\.s16    q1, q1, d5\[3\]
+0[0-9a-f]+ <[^>]+> f2122056    vqadd\.s16      q1, q1, q3
+0[0-9a-f]+ <[^>]+> f2255017    vqadd\.s32      d5, d5, d7
+0[0-9a-f]+ <[^>]+> f2222944    vmla\.i32       q1, q1, q2
+0[0-9a-f]+ <[^>]+> f2133b14    vpadd\.i16      d3, d3, d4
+0[0-9a-f]+ <[^>]+> f3266948    vmls\.i32       q3, q3, q4
+0[0-9a-f]+ <[^>]+> f3022e54    vacge\.f32      q1, q1, q2
+0[0-9a-f]+ <[^>]+> f3266e58    vacgt\.f32      q3, q3, q4
+0[0-9a-f]+ <[^>]+> f30cae5a    vacge\.f32      q5, q6, q5
+0[0-9a-f]+ <[^>]+> f320eede    vacgt\.f32      q7, q8, q7
+0[0-9a-f]+ <[^>]+> f32ee370    vcge\.u32       q7, q7, q8
+0[0-9a-f]+ <[^>]+> f320e3de    vcge\.u32       q7, q8, q7
+0[0-9a-f]+ <[^>]+> f3a22102    vaddw\.u32      q1, q1, d2
+0[0-9a-f]+ <[^>]+> f2a66304    vsubw\.s32      q3, q3, d4
+0[0-9a-f]+ <[^>]+> f2244856    vtst\.32        q2, q2, q3
+0[0-9a-f]+ <[^>]+> f2011f12    vrecps\.f32     d1, d1, d2
+0[0-9a-f]+ <[^>]+> f29c2052    vshr\.s16       q1, q1, #4
+0[0-9a-f]+ <[^>]+> f28b4254    vrshr\.s8       q2, q2, #5
+0[0-9a-f]+ <[^>]+> f39a6156    vsra\.u16       q3, q3, #6
+0[0-9a-f]+ <[^>]+> f39a8358    vrsra\.u16      q4, q4, #6
+0[0-9a-f]+ <[^>]+> f3954554    vsli\.16        q2, q2, #5
+0[0-9a-f]+ <[^>]+> f3bff69f    vqshlu\.s64     d15, d15, #63
+0[0-9a-f]+ <[^>]+> f2b55306    vext\.8 d5, d5, d6, #3
diff --git a/gas/testsuite/gas/arm/neon-omit.s b/gas/testsuite/gas/arm/neon-omit.s

new file mode 100644 (file)

index 0000000..7b20f12
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-omit.s
@@ -0,0 +1,50 @@
+@ test omitted optional arguments
+
+       .text
+       .arm
+       .syntax unified
+
+       vabd.u8 q1,q3
+       vhadd.s32 q14, q3
+       vrhadd.s32 q1,q2
+       vhsub.s32 q5,q7
+       vshl.u16 q3,q4
+       vqshl.u32 q5,q6
+       vand.64 q7,q8
+       veor.64 q7,q8
+       vceq.i16 q5,#0
+       vceq.i16 q5,q5
+       vclt.s16 q5,#0
+       vabs.s16 q5,q6
+       vneg.s16 d7,d8
+       vabs.f d7,d8
+       vneg.f q9,q10
+       vpmax.s32 d1,d3
+       vpmin.s32 d5,d7
+       vpmax.f32 d1,d3
+       vpmin.f32 d5,d7
+       vqdmulh.s16 q1,q3
+       vqrdmulh.s32 d5,d7
+       vqdmulh.s16 q1,d5[3]
+       vqadd.s16 q1,q3
+       vqadd.s32 d5,d7
+       vmla.i32 q1,q2
+       vpadd.i16 d3,d4
+       vmls.s32 q3,q4
+       vacge.f q1,q2
+       vacgt.f q3,q4
+       vaclt.f q5,q6
+       vacle.f q7,q8
+       vcge.u32 q7,q8
+       vclt.u32 q7,q8
+       vaddw.u32 q1,d2
+       vsubw.s32 q3,d4
+       vtst.i32 q2,q3
+       vrecps.f d1,d2
+       vshr.s16 q1,#4
+        vrshr.s8 q2,#5
+       vsra.u16 q3,#6
+        vrsra.u16 q4,#6
+       vsli.16 q2,#5
+       vqshlu.s64 d15,#63
+       vext.8 d5,d6,#3
diff --git a/gas/testsuite/gas/arm/vfp1.d b/gas/testsuite/gas/arm/vfp1.d

index 672b23de31d58000e5432079cb4916cae1d6609a..3894909539bbd59053267db4719bcce441ae06c6 100644 (file)
--- a/gas/testsuite/gas/arm/vfp1.d
+++ b/gas/testsuite/gas/arm/vfp1.d
@@ -24,20 +24,20 @@ Disassembly of section .text:
  0+038 <[^>]*> ee100b40         fnmscd  d0, d0, d0
  0+03c <[^>]*> ee200b40         fnmuld  d0, d0, d0
  0+040 <[^>]*> ee300b40         fsubd   d0, d0, d0
-0+044 <[^>]*> ed900b00         fldd    d0, \[r0\]
-0+048 <[^>]*> ed800b00         fstd    d0, \[r0\]
-0+04c <[^>]*> ec900b02         fldmiad r0, {d0}
-0+050 <[^>]*> ec900b02         fldmiad r0, {d0}
-0+054 <[^>]*> ecb00b02         fldmiad r0!, {d0}
-0+058 <[^>]*> ecb00b02         fldmiad r0!, {d0}
-0+05c <[^>]*> ed300b02         fldmdbd r0!, {d0}
-0+060 <[^>]*> ed300b02         fldmdbd r0!, {d0}
-0+064 <[^>]*> ec800b02         fstmiad r0, {d0}
-0+068 <[^>]*> ec800b02         fstmiad r0, {d0}
-0+06c <[^>]*> eca00b02         fstmiad r0!, {d0}
-0+070 <[^>]*> eca00b02         fstmiad r0!, {d0}
-0+074 <[^>]*> ed200b02         fstmdbd r0!, {d0}
-0+078 <[^>]*> ed200b02         fstmdbd r0!, {d0}
+0+044 <[^>]*> ed900b00         vldr    d0, \[r0\]
+0+048 <[^>]*> ed800b00         vstr    d0, \[r0\]
+0+04c <[^>]*> ec900b02         vldmia  r0, {d0}
+0+050 <[^>]*> ec900b02         vldmia  r0, {d0}
+0+054 <[^>]*> ecb00b02         vldmia  r0!, {d0}
+0+058 <[^>]*> ecb00b02         vldmia  r0!, {d0}
+0+05c <[^>]*> ed300b02         vldmdb  r0!, {d0}
+0+060 <[^>]*> ed300b02         vldmdb  r0!, {d0}
+0+064 <[^>]*> ec800b02         vstmia  r0, {d0}
+0+068 <[^>]*> ec800b02         vstmia  r0, {d0}
+0+06c <[^>]*> eca00b02         vstmia  r0!, {d0}
+0+070 <[^>]*> eca00b02         vstmia  r0!, {d0}
+0+074 <[^>]*> ed200b02         vstmdb  r0!, {d0}
+0+078 <[^>]*> ed200b02         vstmdb  r0!, {d0}
  0+07c <[^>]*> eeb80bc0         fsitod  d0, s0
  0+080 <[^>]*> eeb80b40         fuitod  d0, s0
  0+084 <[^>]*> eebd0b40         ftosid  s0, d0
@@ -46,10 +46,10 @@ Disassembly of section .text:
  0+090 <[^>]*> eebc0bc0         ftouizd s0, d0
  0+094 <[^>]*> eeb70ac0         fcvtds  d0, s0
  0+098 <[^>]*> eeb70bc0         fcvtsd  s0, d0
-0+09c <[^>]*> ee300b10         fmrdh   r0, d0
-0+0a0 <[^>]*> ee100b10         fmrdl   r0, d0
-0+0a4 <[^>]*> ee200b10         fmdhr   d0, r0
-0+0a8 <[^>]*> ee000b10         fmdlr   d0, r0
+0+09c <[^>]*> ee300b10         vmov\.32        r0, d0\[1\]
+0+0a0 <[^>]*> ee100b10         vmov\.32        r0, d0\[0\]
+0+0a4 <[^>]*> ee200b10         vmov\.32        d0\[1\], r0
+0+0a8 <[^>]*> ee000b10         vmov\.32        d0\[0\], r0
  0+0ac <[^>]*> eeb51b40         fcmpzd  d1
  0+0b0 <[^>]*> eeb52b40         fcmpzd  d2
  0+0b4 <[^>]*> eeb5fb40         fcmpzd  d15
@@ -89,46 +89,46 @@ Disassembly of section .text:
  0+13c <[^>]*> eeb70bc1         fcvtsd  s0, d1
  0+140 <[^>]*> eeb70bc2         fcvtsd  s0, d2
  0+144 <[^>]*> eeb70bcf         fcvtsd  s0, d15
-0+148 <[^>]*> ee301b10         fmrdh   r1, d0
-0+14c <[^>]*> ee30eb10         fmrdh   lr, d0
-0+150 <[^>]*> ee310b10         fmrdh   r0, d1
-0+154 <[^>]*> ee320b10         fmrdh   r0, d2
-0+158 <[^>]*> ee3f0b10         fmrdh   r0, d15
-0+15c <[^>]*> ee101b10         fmrdl   r1, d0
-0+160 <[^>]*> ee10eb10         fmrdl   lr, d0
-0+164 <[^>]*> ee110b10         fmrdl   r0, d1
-0+168 <[^>]*> ee120b10         fmrdl   r0, d2
-0+16c <[^>]*> ee1f0b10         fmrdl   r0, d15
-0+170 <[^>]*> ee201b10         fmdhr   d0, r1
-0+174 <[^>]*> ee20eb10         fmdhr   d0, lr
-0+178 <[^>]*> ee210b10         fmdhr   d1, r0
-0+17c <[^>]*> ee220b10         fmdhr   d2, r0
-0+180 <[^>]*> ee2f0b10         fmdhr   d15, r0
-0+184 <[^>]*> ee001b10         fmdlr   d0, r1
-0+188 <[^>]*> ee00eb10         fmdlr   d0, lr
-0+18c <[^>]*> ee010b10         fmdlr   d1, r0
-0+190 <[^>]*> ee020b10         fmdlr   d2, r0
-0+194 <[^>]*> ee0f0b10         fmdlr   d15, r0
-0+198 <[^>]*> ed910b00         fldd    d0, \[r1\]
-0+19c <[^>]*> ed9e0b00         fldd    d0, \[lr\]
-0+1a0 <[^>]*> ed900b00         fldd    d0, \[r0\]
-0+1a4 <[^>]*> ed900bff         fldd    d0, \[r0, #1020\]
-0+1a8 <[^>]*> ed100bff         fldd    d0, \[r0, #-1020\]
-0+1ac <[^>]*> ed901b00         fldd    d1, \[r0\]
-0+1b0 <[^>]*> ed902b00         fldd    d2, \[r0\]
-0+1b4 <[^>]*> ed90fb00         fldd    d15, \[r0\]
-0+1b8 <[^>]*> ed8ccbc9         fstd    d12, \[ip, #804\]
-0+1bc <[^>]*> ec901b02         fldmiad r0, {d1}
-0+1c0 <[^>]*> ec902b02         fldmiad r0, {d2}
-0+1c4 <[^>]*> ec90fb02         fldmiad r0, {d15}
-0+1c8 <[^>]*> ec900b04         fldmiad r0, {d0-d1}
-0+1cc <[^>]*> ec900b06         fldmiad r0, {d0-d2}
-0+1d0 <[^>]*> ec900b20         fldmiad r0, {d0-d15}
-0+1d4 <[^>]*> ec901b1e         fldmiad r0, {d1-d15}
-0+1d8 <[^>]*> ec902b1c         fldmiad r0, {d2-d15}
-0+1dc <[^>]*> ec90eb04         fldmiad r0, {d14-d15}
-0+1e0 <[^>]*> ec910b02         fldmiad r1, {d0}
-0+1e4 <[^>]*> ec9e0b02         fldmiad lr, {d0}
+0+148 <[^>]*> ee301b10         vmov\.32        r1, d0\[1\]
+0+14c <[^>]*> ee30eb10         vmov\.32        lr, d0\[1\]
+0+150 <[^>]*> ee310b10         vmov\.32        r0, d1\[1\]
+0+154 <[^>]*> ee320b10         vmov\.32        r0, d2\[1\]
+0+158 <[^>]*> ee3f0b10         vmov\.32        r0, d15\[1\]
+0+15c <[^>]*> ee101b10         vmov\.32        r1, d0\[0\]
+0+160 <[^>]*> ee10eb10         vmov\.32        lr, d0\[0\]
+0+164 <[^>]*> ee110b10         vmov\.32        r0, d1\[0\]
+0+168 <[^>]*> ee120b10         vmov\.32        r0, d2\[0\]
+0+16c <[^>]*> ee1f0b10         vmov\.32        r0, d15\[0\]
+0+170 <[^>]*> ee201b10         vmov\.32        d0\[1\], r1
+0+174 <[^>]*> ee20eb10         vmov\.32        d0\[1\], lr
+0+178 <[^>]*> ee210b10         vmov\.32        d1\[1\], r0
+0+17c <[^>]*> ee220b10         vmov\.32        d2\[1\], r0
+0+180 <[^>]*> ee2f0b10         vmov\.32        d15\[1\], r0
+0+184 <[^>]*> ee001b10         vmov\.32        d0\[0\], r1
+0+188 <[^>]*> ee00eb10         vmov\.32        d0\[0\], lr
+0+18c <[^>]*> ee010b10         vmov\.32        d1\[0\], r0
+0+190 <[^>]*> ee020b10         vmov\.32        d2\[0\], r0
+0+194 <[^>]*> ee0f0b10         vmov\.32        d15\[0\], r0
+0+198 <[^>]*> ed910b00         vldr    d0, \[r1\]
+0+19c <[^>]*> ed9e0b00         vldr    d0, \[lr\]
+0+1a0 <[^>]*> ed900b00         vldr    d0, \[r0\]
+0+1a4 <[^>]*> ed900bff         vldr    d0, \[r0, #1020\]
+0+1a8 <[^>]*> ed100bff         vldr    d0, \[r0, #-1020\]
+0+1ac <[^>]*> ed901b00         vldr    d1, \[r0\]
+0+1b0 <[^>]*> ed902b00         vldr    d2, \[r0\]
+0+1b4 <[^>]*> ed90fb00         vldr    d15, \[r0\]
+0+1b8 <[^>]*> ed8ccbc9         vstr    d12, \[ip, #804\]
+0+1bc <[^>]*> ec901b02         vldmia  r0, {d1}
+0+1c0 <[^>]*> ec902b02         vldmia  r0, {d2}
+0+1c4 <[^>]*> ec90fb02         vldmia  r0, {d15}
+0+1c8 <[^>]*> ec900b04         vldmia  r0, {d0-d1}
+0+1cc <[^>]*> ec900b06         vldmia  r0, {d0-d2}
+0+1d0 <[^>]*> ec900b20         vldmia  r0, {d0-d15}
+0+1d4 <[^>]*> ec901b1e         vldmia  r0, {d1-d15}
+0+1d8 <[^>]*> ec902b1c         vldmia  r0, {d2-d15}
+0+1dc <[^>]*> ec90eb04         vldmia  r0, {d14-d15}
+0+1e0 <[^>]*> ec910b02         vldmia  r1, {d0}
+0+1e4 <[^>]*> ec9e0b02         vldmia  lr, {d0}
  0+1e8 <[^>]*> eeb50b40         fcmpzd  d0
  0+1ec <[^>]*> eeb51b40         fcmpzd  d1
  0+1f0 <[^>]*> eeb52b40         fcmpzd  d2
@@ -162,20 +162,20 @@ Disassembly of section .text:
  0+260 <[^>]*> 0e167b4b         fnmscdeq        d7, d6, d11
  0+264 <[^>]*> 0e245b4c         fnmuldeq        d5, d4, d12
  0+268 <[^>]*> 0e3d3b4e         fsubdeq d3, d13, d14
-0+26c <[^>]*> 0d952b00         flddeq  d2, \[r5\]
-0+270 <[^>]*> 0d8c1b00         fstdeq  d1, \[ip\]
-0+274 <[^>]*> 0c911b02         fldmiadeq       r1, {d1}
-0+278 <[^>]*> 0c922b02         fldmiadeq       r2, {d2}
-0+27c <[^>]*> 0cb33b02         fldmiadeq       r3!, {d3}
-0+280 <[^>]*> 0cb44b02         fldmiadeq       r4!, {d4}
-0+284 <[^>]*> 0d355b02         fldmdbdeq       r5!, {d5}
-0+288 <[^>]*> 0d366b02         fldmdbdeq       r6!, {d6}
-0+28c <[^>]*> 0c87fb02         fstmiadeq       r7, {d15}
-0+290 <[^>]*> 0c88eb02         fstmiadeq       r8, {d14}
-0+294 <[^>]*> 0ca9db02         fstmiadeq       r9!, {d13}
-0+298 <[^>]*> 0caacb02         fstmiadeq       sl!, {d12}
-0+29c <[^>]*> 0d2bbb02         fstmdbdeq       fp!, {d11}
-0+2a0 <[^>]*> 0d2cab02         fstmdbdeq       ip!, {d10}
+0+26c <[^>]*> 0d952b00         vldreq  d2, \[r5\]
+0+270 <[^>]*> 0d8c1b00         vstreq  d1, \[ip\]
+0+274 <[^>]*> 0c911b02         vldmiaeq        r1, {d1}
+0+278 <[^>]*> 0c922b02         vldmiaeq        r2, {d2}
+0+27c <[^>]*> 0cb33b02         vldmiaeq        r3!, {d3}
+0+280 <[^>]*> 0cb44b02         vldmiaeq        r4!, {d4}
+0+284 <[^>]*> 0d355b02         vldmdbeq        r5!, {d5}
+0+288 <[^>]*> 0d366b02         vldmdbeq        r6!, {d6}
+0+28c <[^>]*> 0c87fb02         vstmiaeq        r7, {d15}
+0+290 <[^>]*> 0c88eb02         vstmiaeq        r8, {d14}
+0+294 <[^>]*> 0ca9db02         vstmiaeq        r9!, {d13}
+0+298 <[^>]*> 0caacb02         vstmiaeq        sl!, {d12}
+0+29c <[^>]*> 0d2bbb02         vstmdbeq        fp!, {d11}
+0+2a0 <[^>]*> 0d2cab02         vstmdbeq        ip!, {d10}
  0+2a4 <[^>]*> 0eb8fbe0         fsitodeq        d15, s1
  0+2a8 <[^>]*> 0eb81b6f         fuitodeq        d1, s31
  0+2ac <[^>]*> 0efd0b4f         ftosideq        s1, d15
@@ -184,10 +184,10 @@ Disassembly of section .text:
  0+2b8 <[^>]*> 0efc5bc3         ftouizdeq       s11, d3
  0+2bc <[^>]*> 0eb71ac5         fcvtdseq        d1, s10
  0+2c0 <[^>]*> 0ef75bc1         fcvtsdeq        s11, d1
-0+2c4 <[^>]*> 0e318b10         fmrdheq r8, d1
-0+2c8 <[^>]*> 0e1f7b10         fmrdleq r7, d15
-0+2cc <[^>]*> 0e21fb10         fmdhreq d1, pc
-0+2d0 <[^>]*> 0e0f1b10         fmdlreq d15, r1
+0+2c4 <[^>]*> 0e318b10         vmoveq\.32      r8, d1\[1\]
+0+2c8 <[^>]*> 0e1f7b10         vmoveq\.32      r7, d15\[0\]
+0+2cc <[^>]*> 0e21fb10         vmoveq\.32      d1\[1\], pc
+0+2d0 <[^>]*> 0e0f1b10         vmoveq\.32      d15\[0\], r1
  0+2d4 <[^>]*> e1a00000 ?       nop[    ]+\(mov r0,r0\)
  0+2d8 <[^>]*> e1a00000 ?       nop[    ]+\(mov r0,r0\)
  0+2dc <[^>]*> e1a00000 ?       nop[    ]+\(mov r0,r0\)
diff --git a/gas/testsuite/gas/arm/vfp1_t2.d b/gas/testsuite/gas/arm/vfp1_t2.d

index 22c4fd6f01dcf246e290629a2a6485650a2d1980..df9ab9f96019f3272ae5b3d657b1068ae7e80f95 100644 (file)
--- a/gas/testsuite/gas/arm/vfp1_t2.d
+++ b/gas/testsuite/gas/arm/vfp1_t2.d
@@ -24,20 +24,20 @@ Disassembly of section .text:
  0+038 <[^>]*> ee10 0b40        fnmscd  d0, d0, d0
  0+03c <[^>]*> ee20 0b40        fnmuld  d0, d0, d0
  0+040 <[^>]*> ee30 0b40        fsubd   d0, d0, d0
-0+044 <[^>]*> ed90 0b00        fldd    d0, \[r0\]
-0+048 <[^>]*> ed80 0b00        fstd    d0, \[r0\]
-0+04c <[^>]*> ec90 0b02        fldmiad r0, {d0}
-0+050 <[^>]*> ec90 0b02        fldmiad r0, {d0}
-0+054 <[^>]*> ecb0 0b02        fldmiad r0!, {d0}
-0+058 <[^>]*> ecb0 0b02        fldmiad r0!, {d0}
-0+05c <[^>]*> ed30 0b02        fldmdbd r0!, {d0}
-0+060 <[^>]*> ed30 0b02        fldmdbd r0!, {d0}
-0+064 <[^>]*> ec80 0b02        fstmiad r0, {d0}
-0+068 <[^>]*> ec80 0b02        fstmiad r0, {d0}
-0+06c <[^>]*> eca0 0b02        fstmiad r0!, {d0}
-0+070 <[^>]*> eca0 0b02        fstmiad r0!, {d0}
-0+074 <[^>]*> ed20 0b02        fstmdbd r0!, {d0}
-0+078 <[^>]*> ed20 0b02        fstmdbd r0!, {d0}
+0+044 <[^>]*> ed90 0b00        vldr    d0, \[r0\]
+0+048 <[^>]*> ed80 0b00        vstr    d0, \[r0\]
+0+04c <[^>]*> ec90 0b02        vldmia  r0, {d0}
+0+050 <[^>]*> ec90 0b02        vldmia  r0, {d0}
+0+054 <[^>]*> ecb0 0b02        vldmia  r0!, {d0}
+0+058 <[^>]*> ecb0 0b02        vldmia  r0!, {d0}
+0+05c <[^>]*> ed30 0b02        vldmdb  r0!, {d0}
+0+060 <[^>]*> ed30 0b02        vldmdb  r0!, {d0}
+0+064 <[^>]*> ec80 0b02        vstmia  r0, {d0}
+0+068 <[^>]*> ec80 0b02        vstmia  r0, {d0}
+0+06c <[^>]*> eca0 0b02        vstmia  r0!, {d0}
+0+070 <[^>]*> eca0 0b02        vstmia  r0!, {d0}
+0+074 <[^>]*> ed20 0b02        vstmdb  r0!, {d0}
+0+078 <[^>]*> ed20 0b02        vstmdb  r0!, {d0}
  0+07c <[^>]*> eeb8 0bc0        fsitod  d0, s0
  0+080 <[^>]*> eeb8 0b40        fuitod  d0, s0
  0+084 <[^>]*> eebd 0b40        ftosid  s0, d0
@@ -46,10 +46,10 @@ Disassembly of section .text:
  0+090 <[^>]*> eebc 0bc0        ftouizd s0, d0
  0+094 <[^>]*> eeb7 0ac0        fcvtds  d0, s0
  0+098 <[^>]*> eeb7 0bc0        fcvtsd  s0, d0
-0+09c <[^>]*> ee30 0b10        fmrdh   r0, d0
-0+0a0 <[^>]*> ee10 0b10        fmrdl   r0, d0
-0+0a4 <[^>]*> ee20 0b10        fmdhr   d0, r0
-0+0a8 <[^>]*> ee00 0b10        fmdlr   d0, r0
+0+09c <[^>]*> ee30 0b10        vmov\.32        r0, d0\[1\]
+0+0a0 <[^>]*> ee10 0b10        vmov\.32        r0, d0\[0\]
+0+0a4 <[^>]*> ee20 0b10        vmov\.32        d0\[1\], r0
+0+0a8 <[^>]*> ee00 0b10        vmov\.32        d0\[0\], r0
  0+0ac <[^>]*> eeb5 1b40        fcmpzd  d1
  0+0b0 <[^>]*> eeb5 2b40        fcmpzd  d2
  0+0b4 <[^>]*> eeb5 fb40        fcmpzd  d15
@@ -89,46 +89,46 @@ Disassembly of section .text:
  0+13c <[^>]*> eeb7 0bc1        fcvtsd  s0, d1
  0+140 <[^>]*> eeb7 0bc2        fcvtsd  s0, d2
  0+144 <[^>]*> eeb7 0bcf        fcvtsd  s0, d15
-0+148 <[^>]*> ee30 1b10        fmrdh   r1, d0
-0+14c <[^>]*> ee30 eb10        fmrdh   lr, d0
-0+150 <[^>]*> ee31 0b10        fmrdh   r0, d1
-0+154 <[^>]*> ee32 0b10        fmrdh   r0, d2
-0+158 <[^>]*> ee3f 0b10        fmrdh   r0, d15
-0+15c <[^>]*> ee10 1b10        fmrdl   r1, d0
-0+160 <[^>]*> ee10 eb10        fmrdl   lr, d0
-0+164 <[^>]*> ee11 0b10        fmrdl   r0, d1
-0+168 <[^>]*> ee12 0b10        fmrdl   r0, d2
-0+16c <[^>]*> ee1f 0b10        fmrdl   r0, d15
-0+170 <[^>]*> ee20 1b10        fmdhr   d0, r1
-0+174 <[^>]*> ee20 eb10        fmdhr   d0, lr
-0+178 <[^>]*> ee21 0b10        fmdhr   d1, r0
-0+17c <[^>]*> ee22 0b10        fmdhr   d2, r0
-0+180 <[^>]*> ee2f 0b10        fmdhr   d15, r0
-0+184 <[^>]*> ee00 1b10        fmdlr   d0, r1
-0+188 <[^>]*> ee00 eb10        fmdlr   d0, lr
-0+18c <[^>]*> ee01 0b10        fmdlr   d1, r0
-0+190 <[^>]*> ee02 0b10        fmdlr   d2, r0
-0+194 <[^>]*> ee0f 0b10        fmdlr   d15, r0
-0+198 <[^>]*> ed91 0b00        fldd    d0, \[r1\]
-0+19c <[^>]*> ed9e 0b00        fldd    d0, \[lr\]
-0+1a0 <[^>]*> ed90 0b00        fldd    d0, \[r0\]
-0+1a4 <[^>]*> ed90 0bff        fldd    d0, \[r0, #1020\]
-0+1a8 <[^>]*> ed10 0bff        fldd    d0, \[r0, #-1020\]
-0+1ac <[^>]*> ed90 1b00        fldd    d1, \[r0\]
-0+1b0 <[^>]*> ed90 2b00        fldd    d2, \[r0\]
-0+1b4 <[^>]*> ed90 fb00        fldd    d15, \[r0\]
-0+1b8 <[^>]*> ed8c cbc9        fstd    d12, \[ip, #804\]
-0+1bc <[^>]*> ec90 1b02        fldmiad r0, {d1}
-0+1c0 <[^>]*> ec90 2b02        fldmiad r0, {d2}
-0+1c4 <[^>]*> ec90 fb02        fldmiad r0, {d15}
-0+1c8 <[^>]*> ec90 0b04        fldmiad r0, {d0-d1}
-0+1cc <[^>]*> ec90 0b06        fldmiad r0, {d0-d2}
-0+1d0 <[^>]*> ec90 0b20        fldmiad r0, {d0-d15}
-0+1d4 <[^>]*> ec90 1b1e        fldmiad r0, {d1-d15}
-0+1d8 <[^>]*> ec90 2b1c        fldmiad r0, {d2-d15}
-0+1dc <[^>]*> ec90 eb04        fldmiad r0, {d14-d15}
-0+1e0 <[^>]*> ec91 0b02        fldmiad r1, {d0}
-0+1e4 <[^>]*> ec9e 0b02        fldmiad lr, {d0}
+0+148 <[^>]*> ee30 1b10        vmov\.32        r1, d0\[1\]
+0+14c <[^>]*> ee30 eb10        vmov\.32        lr, d0\[1\]
+0+150 <[^>]*> ee31 0b10        vmov\.32        r0, d1\[1\]
+0+154 <[^>]*> ee32 0b10        vmov\.32        r0, d2\[1\]
+0+158 <[^>]*> ee3f 0b10        vmov\.32        r0, d15\[1\]
+0+15c <[^>]*> ee10 1b10        vmov\.32        r1, d0\[0\]
+0+160 <[^>]*> ee10 eb10        vmov\.32        lr, d0\[0\]
+0+164 <[^>]*> ee11 0b10        vmov\.32        r0, d1\[0\]
+0+168 <[^>]*> ee12 0b10        vmov\.32        r0, d2\[0\]
+0+16c <[^>]*> ee1f 0b10        vmov\.32        r0, d15\[0\]
+0+170 <[^>]*> ee20 1b10        vmov\.32        d0\[1\], r1
+0+174 <[^>]*> ee20 eb10        vmov\.32        d0\[1\], lr
+0+178 <[^>]*> ee21 0b10        vmov\.32        d1\[1\], r0
+0+17c <[^>]*> ee22 0b10        vmov\.32        d2\[1\], r0
+0+180 <[^>]*> ee2f 0b10        vmov\.32        d15\[1\], r0
+0+184 <[^>]*> ee00 1b10        vmov\.32        d0\[0\], r1
+0+188 <[^>]*> ee00 eb10        vmov\.32        d0\[0\], lr
+0+18c <[^>]*> ee01 0b10        vmov\.32        d1\[0\], r0
+0+190 <[^>]*> ee02 0b10        vmov\.32        d2\[0\], r0
+0+194 <[^>]*> ee0f 0b10        vmov\.32        d15\[0\], r0
+0+198 <[^>]*> ed91 0b00        vldr    d0, \[r1\]
+0+19c <[^>]*> ed9e 0b00        vldr    d0, \[lr\]
+0+1a0 <[^>]*> ed90 0b00        vldr    d0, \[r0\]
+0+1a4 <[^>]*> ed90 0bff        vldr    d0, \[r0, #1020\]
+0+1a8 <[^>]*> ed10 0bff        vldr    d0, \[r0, #-1020\]
+0+1ac <[^>]*> ed90 1b00        vldr    d1, \[r0\]
+0+1b0 <[^>]*> ed90 2b00        vldr    d2, \[r0\]
+0+1b4 <[^>]*> ed90 fb00        vldr    d15, \[r0\]
+0+1b8 <[^>]*> ed8c cbc9        vstr    d12, \[ip, #804\]
+0+1bc <[^>]*> ec90 1b02        vldmia  r0, {d1}
+0+1c0 <[^>]*> ec90 2b02        vldmia  r0, {d2}
+0+1c4 <[^>]*> ec90 fb02        vldmia  r0, {d15}
+0+1c8 <[^>]*> ec90 0b04        vldmia  r0, {d0-d1}
+0+1cc <[^>]*> ec90 0b06        vldmia  r0, {d0-d2}
+0+1d0 <[^>]*> ec90 0b20        vldmia  r0, {d0-d15}
+0+1d4 <[^>]*> ec90 1b1e        vldmia  r0, {d1-d15}
+0+1d8 <[^>]*> ec90 2b1c        vldmia  r0, {d2-d15}
+0+1dc <[^>]*> ec90 eb04        vldmia  r0, {d14-d15}
+0+1e0 <[^>]*> ec91 0b02        vldmia  r1, {d0}
+0+1e4 <[^>]*> ec9e 0b02        vldmia  lr, {d0}
  0+1e8 <[^>]*> eeb5 0b40        fcmpzd  d0
  0+1ec <[^>]*> eeb5 1b40        fcmpzd  d1
  0+1f0 <[^>]*> eeb5 2b40        fcmpzd  d2
@@ -168,23 +168,23 @@ Disassembly of section .text:
  0+26c <[^>]*> ee24 5b4c        fnmuld(eq|)     d5, d4, d12
  0+270 <[^>]*> bf02             ittt    eq
  0+272 <[^>]*> ee3d 3b4e        fsubd(eq|)      d3, d13, d14
-0+276 <[^>]*> ed95 2b00        fldd(eq|)       d2, \[r5\]
-0+27a <[^>]*> ed8c 1b00        fstd(eq|)       d1, \[ip\]
+0+276 <[^>]*> ed95 2b00        vldr(eq|)       d2, \[r5\]
+0+27a <[^>]*> ed8c 1b00        vstr(eq|)       d1, \[ip\]
  0+27e <[^>]*> bf01             itttt   eq
-0+280 <[^>]*> ec91 1b02        fldmiad(eq|)    r1, {d1}
-0+284 <[^>]*> ec92 2b02        fldmiad(eq|)    r2, {d2}
-0+288 <[^>]*> ecb3 3b02        fldmiad(eq|)    r3!, {d3}
-0+28c <[^>]*> ecb4 4b02        fldmiad(eq|)    r4!, {d4}
+0+280 <[^>]*> ec91 1b02        vldmia(eq|)     r1, {d1}
+0+284 <[^>]*> ec92 2b02        vldmia(eq|)     r2, {d2}
+0+288 <[^>]*> ecb3 3b02        vldmia(eq|)     r3!, {d3}
+0+28c <[^>]*> ecb4 4b02        vldmia(eq|)     r4!, {d4}
  0+290 <[^>]*> bf01             itttt   eq
-0+292 <[^>]*> ed35 5b02        fldmdbd(eq|)    r5!, {d5}
-0+296 <[^>]*> ed36 6b02        fldmdbd(eq|)    r6!, {d6}
-0+29a <[^>]*> ec87 fb02        fstmiad(eq|)    r7, {d15}
-0+29e <[^>]*> ec88 eb02        fstmiad(eq|)    r8, {d14}
+0+292 <[^>]*> ed35 5b02        vldmdb(eq|)     r5!, {d5}
+0+296 <[^>]*> ed36 6b02        vldmdb(eq|)     r6!, {d6}
+0+29a <[^>]*> ec87 fb02        vstmia(eq|)     r7, {d15}
+0+29e <[^>]*> ec88 eb02        vstmia(eq|)     r8, {d14}
  0+2a2 <[^>]*> bf01             itttt   eq
-0+2a4 <[^>]*> eca9 db02        fstmiad(eq|)    r9!, {d13}
-0+2a8 <[^>]*> ecaa cb02        fstmiad(eq|)    sl!, {d12}
-0+2ac <[^>]*> ed2b bb02        fstmdbd(eq|)    fp!, {d11}
-0+2b0 <[^>]*> ed2c ab02        fstmdbd(eq|)    ip!, {d10}
+0+2a4 <[^>]*> eca9 db02        vstmia(eq|)     r9!, {d13}
+0+2a8 <[^>]*> ecaa cb02        vstmia(eq|)     sl!, {d12}
+0+2ac <[^>]*> ed2b bb02        vstmdb(eq|)     fp!, {d11}
+0+2b0 <[^>]*> ed2c ab02        vstmdb(eq|)     ip!, {d10}
  0+2b4 <[^>]*> bf01             itttt   eq
  0+2b6 <[^>]*> eeb8 fbe0        fsitod(eq|)     d15, s1
  0+2ba <[^>]*> eeb8 1b6f        fuitod(eq|)     d1, s31
@@ -196,10 +196,10 @@ Disassembly of section .text:
  0+2d0 <[^>]*> eeb7 1ac5        fcvtds(eq|)     d1, s10
  0+2d4 <[^>]*> eef7 5bc1        fcvtsd(eq|)     s11, d1
  0+2d8 <[^>]*> bf01             itttt   eq
-0+2da <[^>]*> ee31 8b10        fmrdh(eq|)      r8, d1
-0+2de <[^>]*> ee1f 7b10        fmrdl(eq|)      r7, d15
-0+2e2 <[^>]*> ee21 fb10        fmdhr(eq|)      d1, pc
-0+2e6 <[^>]*> ee0f 1b10        fmdlr(eq|)      d15, r1
+0+2da <[^>]*> ee31 8b10        vmov(eq|)\.32   r8, d1\[1\]
+0+2de <[^>]*> ee1f 7b10        vmov(eq|)\.32   r7, d15\[0\]
+0+2e2 <[^>]*> ee21 fb10        vmov(eq|)\.32   d1\[1\], pc
+0+2e6 <[^>]*> ee0f 1b10        vmov(eq|)\.32   d15\[0\], r1
  0+2ea <[^>]*> bf00             nop
  0+2ec <[^>]*> bf00             nop
  0+2ee <[^>]*> bf00             nop
diff --git a/gas/testsuite/gas/arm/vfp1xD.d b/gas/testsuite/gas/arm/vfp1xD.d

index 096b46c86e4cfea60f924e4190448a389a214778..1dab07cea6e4874a1e8e1252601995272bfa2dd9 100644 (file)
--- a/gas/testsuite/gas/arm/vfp1xD.d
+++ b/gas/testsuite/gas/arm/vfp1xD.d
@@ -33,24 +33,24 @@ Disassembly of section .text:
  0+05c <[^>]*> ecb00a01         fldmias r0!, {s0}
  0+060 <[^>]*> ed300a01         fldmdbs r0!, {s0}
  0+064 <[^>]*> ed300a01         fldmdbs r0!, {s0}
-0+068 <[^>]*> ec900b03         fldmiax r0, {d0}
-0+06c <[^>]*> ec900b03         fldmiax r0, {d0}
-0+070 <[^>]*> ecb00b03         fldmiax r0!, {d0}
-0+074 <[^>]*> ecb00b03         fldmiax r0!, {d0}
-0+078 <[^>]*> ed300b03         fldmdbx r0!, {d0}
-0+07c <[^>]*> ed300b03         fldmdbx r0!, {d0}
+0+068 <[^>]*> ec900b03         vldmia  r0, {d0}
+0+06c <[^>]*> ec900b03         vldmia  r0, {d0}
+0+070 <[^>]*> ecb00b03         vldmia  r0!, {d0}
+0+074 <[^>]*> ecb00b03         vldmia  r0!, {d0}
+0+078 <[^>]*> ed300b03         vldmdb  r0!, {d0}
+0+07c <[^>]*> ed300b03         vldmdb  r0!, {d0}
  0+080 <[^>]*> ec800a01         fstmias r0, {s0}
  0+084 <[^>]*> ec800a01         fstmias r0, {s0}
  0+088 <[^>]*> eca00a01         fstmias r0!, {s0}
  0+08c <[^>]*> eca00a01         fstmias r0!, {s0}
  0+090 <[^>]*> ed200a01         fstmdbs r0!, {s0}
  0+094 <[^>]*> ed200a01         fstmdbs r0!, {s0}
-0+098 <[^>]*> ec800b03         fstmiax r0, {d0}
-0+09c <[^>]*> ec800b03         fstmiax r0, {d0}
-0+0a0 <[^>]*> eca00b03         fstmiax r0!, {d0}
-0+0a4 <[^>]*> eca00b03         fstmiax r0!, {d0}
-0+0a8 <[^>]*> ed200b03         fstmdbx r0!, {d0}
-0+0ac <[^>]*> ed200b03         fstmdbx r0!, {d0}
+0+098 <[^>]*> ec800b03         vstmia  r0, {d0}
+0+09c <[^>]*> ec800b03         vstmia  r0, {d0}
+0+0a0 <[^>]*> eca00b03         vstmia  r0!, {d0}
+0+0a4 <[^>]*> eca00b03         vstmia  r0!, {d0}
+0+0a8 <[^>]*> ed200b03         vstmdb  r0!, {d0}
+0+0ac <[^>]*> ed200b03         vstmdb  r0!, {d0}
  0+0b0 <[^>]*> eeb80ac0         fsitos  s0, s0
  0+0b4 <[^>]*> eeb80a40         fuitos  s0, s0
  0+0b8 <[^>]*> eebd0a40         ftosis  s0, s0
@@ -142,17 +142,17 @@ Disassembly of section .text:
  0+210 <[^>]*> ec90fa02         fldmias r0, {s30-s31}
  0+214 <[^>]*> ec910a01         fldmias r1, {s0}
  0+218 <[^>]*> ec9e0a01         fldmias lr, {s0}
-0+21c <[^>]*> ec801b03         fstmiax r0, {d1}
-0+220 <[^>]*> ec802b03         fstmiax r0, {d2}
-0+224 <[^>]*> ec80fb03         fstmiax r0, {d15}
-0+228 <[^>]*> ec800b05         fstmiax r0, {d0-d1}
-0+22c <[^>]*> ec800b07         fstmiax r0, {d0-d2}
-0+230 <[^>]*> ec800b21         fstmiax r0, {d0-d15}
-0+234 <[^>]*> ec801b1f         fstmiax r0, {d1-d15}
-0+238 <[^>]*> ec802b1d         fstmiax r0, {d2-d15}
-0+23c <[^>]*> ec80eb05         fstmiax r0, {d14-d15}
-0+240 <[^>]*> ec810b03         fstmiax r1, {d0}
-0+244 <[^>]*> ec8e0b03         fstmiax lr, {d0}
+0+21c <[^>]*> ec801b03         vstmia  r0, {d1}
+0+220 <[^>]*> ec802b03         vstmia  r0, {d2}
+0+224 <[^>]*> ec80fb03         vstmia  r0, {d15}
+0+228 <[^>]*> ec800b05         vstmia  r0, {d0-d1}
+0+22c <[^>]*> ec800b07         vstmia  r0, {d0-d2}
+0+230 <[^>]*> ec800b21         vstmia  r0, {d0-d15}
+0+234 <[^>]*> ec801b1f         vstmia  r0, {d1-d15}
+0+238 <[^>]*> ec802b1d         vstmia  r0, {d2-d15}
+0+23c <[^>]*> ec80eb05         vstmia  r0, {d14-d15}
+0+240 <[^>]*> ec810b03         vstmia  r1, {d0}
+0+244 <[^>]*> ec8e0b03         vstmia  lr, {d0}
  0+248 <[^>]*> eeb50a40         fcmpzs  s0
  0+24c <[^>]*> eef50a40         fcmpzs  s1
  0+250 <[^>]*> eeb51a40         fcmpzs  s2
@@ -211,24 +211,24 @@ Disassembly of section .text:
  0+324 <[^>]*> 0cf42a01         fldmiaseq       r4!, {s5}
  0+328 <[^>]*> 0d352a01         fldmdbseq       r5!, {s4}
  0+32c <[^>]*> 0d761a01         fldmdbseq       r6!, {s3}
-0+330 <[^>]*> 0c971b03         fldmiaxeq       r7, {d1}
-0+334 <[^>]*> 0c982b03         fldmiaxeq       r8, {d2}
-0+338 <[^>]*> 0cb93b03         fldmiaxeq       r9!, {d3}
-0+33c <[^>]*> 0cba4b03         fldmiaxeq       sl!, {d4}
-0+340 <[^>]*> 0d3b5b03         fldmdbxeq       fp!, {d5}
-0+344 <[^>]*> 0d3c6b03         fldmdbxeq       ip!, {d6}
+0+330 <[^>]*> 0c971b03         vldmiaeq        r7, {d1}
+0+334 <[^>]*> 0c982b03         vldmiaeq        r8, {d2}
+0+338 <[^>]*> 0cb93b03         vldmiaeq        r9!, {d3}
+0+33c <[^>]*> 0cba4b03         vldmiaeq        sl!, {d4}
+0+340 <[^>]*> 0d3b5b03         vldmdbeq        fp!, {d5}
+0+344 <[^>]*> 0d3c6b03         vldmdbeq        ip!, {d6}
  0+348 <[^>]*> 0c8d1a01         fstmiaseq       sp, {s2}
  0+34c <[^>]*> 0cce0a01         fstmiaseq       lr, {s1}
  0+350 <[^>]*> 0ce1fa01         fstmiaseq       r1!, {s31}
  0+354 <[^>]*> 0ca2fa01         fstmiaseq       r2!, {s30}
  0+358 <[^>]*> 0d63ea01         fstmdbseq       r3!, {s29}
  0+35c <[^>]*> 0d24ea01         fstmdbseq       r4!, {s28}
-0+360 <[^>]*> 0c857b03         fstmiaxeq       r5, {d7}
-0+364 <[^>]*> 0c868b03         fstmiaxeq       r6, {d8}
-0+368 <[^>]*> 0ca79b03         fstmiaxeq       r7!, {d9}
-0+36c <[^>]*> 0ca8ab03         fstmiaxeq       r8!, {d10}
-0+370 <[^>]*> 0d29bb03         fstmdbxeq       r9!, {d11}
-0+374 <[^>]*> 0d2acb03         fstmdbxeq       sl!, {d12}
+0+360 <[^>]*> 0c857b03         vstmiaeq        r5, {d7}
+0+364 <[^>]*> 0c868b03         vstmiaeq        r6, {d8}
+0+368 <[^>]*> 0ca79b03         vstmiaeq        r7!, {d9}
+0+36c <[^>]*> 0ca8ab03         vstmiaeq        r8!, {d10}
+0+370 <[^>]*> 0d29bb03         vstmdbeq        r9!, {d11}
+0+374 <[^>]*> 0d2acb03         vstmdbeq        sl!, {d12}
  0+378 <[^>]*> 0ef8dac3         fsitoseq        s27, s6
  0+37c <[^>]*> 0efdca62         ftosiseq        s25, s5
  0+380 <[^>]*> 0efdbac2         ftosizseq       s23, s4
diff --git a/gas/testsuite/gas/arm/vfp1xD_t2.d b/gas/testsuite/gas/arm/vfp1xD_t2.d

index 327383d01c5b4970ea4597f1d11c10a940365c6a..65d6115c968e6d2721b44a29b27dd74e1295d260 100644 (file)
--- a/gas/testsuite/gas/arm/vfp1xD_t2.d
+++ b/gas/testsuite/gas/arm/vfp1xD_t2.d
@@ -33,24 +33,24 @@ Disassembly of section .text:
  0+05c <[^>]*> ecb0 0a01        fldmias r0!, {s0}
  0+060 <[^>]*> ed30 0a01        fldmdbs r0!, {s0}
  0+064 <[^>]*> ed30 0a01        fldmdbs r0!, {s0}
-0+068 <[^>]*> ec90 0b03        fldmiax r0, {d0}
-0+06c <[^>]*> ec90 0b03        fldmiax r0, {d0}
-0+070 <[^>]*> ecb0 0b03        fldmiax r0!, {d0}
-0+074 <[^>]*> ecb0 0b03        fldmiax r0!, {d0}
-0+078 <[^>]*> ed30 0b03        fldmdbx r0!, {d0}
-0+07c <[^>]*> ed30 0b03        fldmdbx r0!, {d0}
+0+068 <[^>]*> ec90 0b03        vldmia  r0, {d0}
+0+06c <[^>]*> ec90 0b03        vldmia  r0, {d0}
+0+070 <[^>]*> ecb0 0b03        vldmia  r0!, {d0}
+0+074 <[^>]*> ecb0 0b03        vldmia  r0!, {d0}
+0+078 <[^>]*> ed30 0b03        vldmdb  r0!, {d0}
+0+07c <[^>]*> ed30 0b03        vldmdb  r0!, {d0}
  0+080 <[^>]*> ec80 0a01        fstmias r0, {s0}
  0+084 <[^>]*> ec80 0a01        fstmias r0, {s0}
  0+088 <[^>]*> eca0 0a01        fstmias r0!, {s0}
  0+08c <[^>]*> eca0 0a01        fstmias r0!, {s0}
  0+090 <[^>]*> ed20 0a01        fstmdbs r0!, {s0}
  0+094 <[^>]*> ed20 0a01        fstmdbs r0!, {s0}
-0+098 <[^>]*> ec80 0b03        fstmiax r0, {d0}
-0+09c <[^>]*> ec80 0b03        fstmiax r0, {d0}
-0+0a0 <[^>]*> eca0 0b03        fstmiax r0!, {d0}
-0+0a4 <[^>]*> eca0 0b03        fstmiax r0!, {d0}
-0+0a8 <[^>]*> ed20 0b03        fstmdbx r0!, {d0}
-0+0ac <[^>]*> ed20 0b03        fstmdbx r0!, {d0}
+0+098 <[^>]*> ec80 0b03        vstmia  r0, {d0}
+0+09c <[^>]*> ec80 0b03        vstmia  r0, {d0}
+0+0a0 <[^>]*> eca0 0b03        vstmia  r0!, {d0}
+0+0a4 <[^>]*> eca0 0b03        vstmia  r0!, {d0}
+0+0a8 <[^>]*> ed20 0b03        vstmdb  r0!, {d0}
+0+0ac <[^>]*> ed20 0b03        vstmdb  r0!, {d0}
  0+0b0 <[^>]*> eeb8 0ac0        fsitos  s0, s0
  0+0b4 <[^>]*> eeb8 0a40        fuitos  s0, s0
  0+0b8 <[^>]*> eebd 0a40        ftosis  s0, s0
@@ -142,17 +142,17 @@ Disassembly of section .text:
  0+210 <[^>]*> ec90 fa02        fldmias r0, {s30-s31}
  0+214 <[^>]*> ec91 0a01        fldmias r1, {s0}
  0+218 <[^>]*> ec9e 0a01        fldmias lr, {s0}
-0+21c <[^>]*> ec80 1b03        fstmiax r0, {d1}
-0+220 <[^>]*> ec80 2b03        fstmiax r0, {d2}
-0+224 <[^>]*> ec80 fb03        fstmiax r0, {d15}
-0+228 <[^>]*> ec80 0b05        fstmiax r0, {d0-d1}
-0+22c <[^>]*> ec80 0b07        fstmiax r0, {d0-d2}
-0+230 <[^>]*> ec80 0b21        fstmiax r0, {d0-d15}
-0+234 <[^>]*> ec80 1b1f        fstmiax r0, {d1-d15}
-0+238 <[^>]*> ec80 2b1d        fstmiax r0, {d2-d15}
-0+23c <[^>]*> ec80 eb05        fstmiax r0, {d14-d15}
-0+240 <[^>]*> ec81 0b03        fstmiax r1, {d0}
-0+244 <[^>]*> ec8e 0b03        fstmiax lr, {d0}
+0+21c <[^>]*> ec80 1b03        vstmia  r0, {d1}
+0+220 <[^>]*> ec80 2b03        vstmia  r0, {d2}
+0+224 <[^>]*> ec80 fb03        vstmia  r0, {d15}
+0+228 <[^>]*> ec80 0b05        vstmia  r0, {d0-d1}
+0+22c <[^>]*> ec80 0b07        vstmia  r0, {d0-d2}
+0+230 <[^>]*> ec80 0b21        vstmia  r0, {d0-d15}
+0+234 <[^>]*> ec80 1b1f        vstmia  r0, {d1-d15}
+0+238 <[^>]*> ec80 2b1d        vstmia  r0, {d2-d15}
+0+23c <[^>]*> ec80 eb05        vstmia  r0, {d14-d15}
+0+240 <[^>]*> ec81 0b03        vstmia  r1, {d0}
+0+244 <[^>]*> ec8e 0b03        vstmia  lr, {d0}
  0+248 <[^>]*> eeb5 0a40        fcmpzs  s0
  0+24c <[^>]*> eef5 0a40        fcmpzs  s1
  0+250 <[^>]*> eeb5 1a40        fcmpzs  s2
@@ -219,13 +219,13 @@ Disassembly of section .text:
  0+334 <[^>]*> bf01             itttt   eq
  0+336 <[^>]*> ed35 2a01        fldmdbs(eq|)    r5!, {s4}
  0+33a <[^>]*> ed76 1a01        fldmdbs(eq|)    r6!, {s3}
-0+33e <[^>]*> ec97 1b03        fldmiax(eq|)    r7, {d1}
-0+342 <[^>]*> ec98 2b03        fldmiax(eq|)    r8, {d2}
+0+33e <[^>]*> ec97 1b03        vldmia(eq|)     r7, {d1}
+0+342 <[^>]*> ec98 2b03        vldmia(eq|)     r8, {d2}
  0+346 <[^>]*> bf01             itttt   eq
-0+348 <[^>]*> ecb9 3b03        fldmiax(eq|)    r9!, {d3}
-0+34c <[^>]*> ecba 4b03        fldmiax(eq|)    sl!, {d4}
-0+350 <[^>]*> ed3b 5b03        fldmdbx(eq|)    fp!, {d5}
-0+354 <[^>]*> ed3c 6b03        fldmdbx(eq|)    ip!, {d6}
+0+348 <[^>]*> ecb9 3b03        vldmia(eq|)     r9!, {d3}
+0+34c <[^>]*> ecba 4b03        vldmia(eq|)     sl!, {d4}
+0+350 <[^>]*> ed3b 5b03        vldmdb(eq|)     fp!, {d5}
+0+354 <[^>]*> ed3c 6b03        vldmdb(eq|)     ip!, {d6}
  0+358 <[^>]*> bf01             itttt   eq
  0+35a <[^>]*> ec8d 1a01        fstmias(eq|)    sp, {s2}
  0+35e <[^>]*> ecce 0a01        fstmias(eq|)    lr, {s1}
@@ -234,13 +234,13 @@ Disassembly of section .text:
  0+36a <[^>]*> bf01             itttt   eq
  0+36c <[^>]*> ed63 ea01        fstmdbs(eq|)    r3!, {s29}
  0+370 <[^>]*> ed24 ea01        fstmdbs(eq|)    r4!, {s28}
-0+374 <[^>]*> ec85 7b03        fstmiax(eq|)    r5, {d7}
-0+378 <[^>]*> ec86 8b03        fstmiax(eq|)    r6, {d8}
+0+374 <[^>]*> ec85 7b03        vstmia(eq|)     r5, {d7}
+0+378 <[^>]*> ec86 8b03        vstmia(eq|)     r6, {d8}
  0+37c <[^>]*> bf01             itttt   eq
-0+37e <[^>]*> eca7 9b03        fstmiax(eq|)    r7!, {d9}
-0+382 <[^>]*> eca8 ab03        fstmiax(eq|)    r8!, {d10}
-0+386 <[^>]*> ed29 bb03        fstmdbx(eq|)    r9!, {d11}
-0+38a <[^>]*> ed2a cb03        fstmdbx(eq|)    sl!, {d12}
+0+37e <[^>]*> eca7 9b03        vstmia(eq|)     r7!, {d9}
+0+382 <[^>]*> eca8 ab03        vstmia(eq|)     r8!, {d10}
+0+386 <[^>]*> ed29 bb03        vstmdb(eq|)     r9!, {d11}
+0+38a <[^>]*> ed2a cb03        vstmdb(eq|)     sl!, {d12}
  0+38e <[^>]*> bf01             itttt   eq
  0+390 <[^>]*> eef8 dac3        fsitos(eq|)     s27, s6
  0+394 <[^>]*> eefd ca62        ftosis(eq|)     s25, s5
diff --git a/gas/testsuite/gas/arm/vfp2.d b/gas/testsuite/gas/arm/vfp2.d

index f9b6096081ebac915e7fa3c9a0aa3e2528d30162..94827f727b4f9acbcbf4c53ec4525715daa8c1d1 100644 (file)
--- a/gas/testsuite/gas/arm/vfp2.d
+++ b/gas/testsuite/gas/arm/vfp2.d
@@ -7,11 +7,11 @@
  .*: +file format .*arm.*
  
  Disassembly of section .text:
-0+000 <[^>]*> ec4a5b10         fmdrr   d0, r5, sl
-0+004 <[^>]*> ec5a5b10         fmrrd   r5, sl, d0
+0+000 <[^>]*> ec4a5b10         vmov    d0, r5, sl
+0+004 <[^>]*> ec5a5b10         vmov    r5, sl, d0
  0+008 <[^>]*> ec4a5a37         fmsrr   r5, sl, {s15, s16}
  0+00c <[^>]*> ec5a5a37         fmrrs   r5, sl, {s15, s16}
-0+010 <[^>]*> ec45ab1f         fmdrr   d15, sl, r5
-0+014 <[^>]*> ec55ab1f         fmrrd   sl, r5, d15
+0+010 <[^>]*> ec45ab1f         vmov    d15, sl, r5
+0+014 <[^>]*> ec55ab1f         vmov    sl, r5, d15
  0+018 <[^>]*> ec45aa38         fmsrr   sl, r5, {s17, s18}
  0+01c <[^>]*> ec55aa38         fmrrs   sl, r5, {s17, s18}
diff --git a/gas/testsuite/gas/arm/vfp2_t2.d b/gas/testsuite/gas/arm/vfp2_t2.d

index bb988e5472e2b377056e7cac335b3222acd726ed..8710e4e0a9d0961dd6f639fe1f1efc8b83ab3344 100644 (file)
--- a/gas/testsuite/gas/arm/vfp2_t2.d
+++ b/gas/testsuite/gas/arm/vfp2_t2.d
@@ -7,11 +7,11 @@
  .*: +file format .*arm.*
  
  Disassembly of section .text:
-0+000 <[^>]*> ec4a 5b10        fmdrr   d0, r5, sl
-0+004 <[^>]*> ec5a 5b10        fmrrd   r5, sl, d0
+0+000 <[^>]*> ec4a 5b10        vmov    d0, r5, sl
+0+004 <[^>]*> ec5a 5b10        vmov    r5, sl, d0
  0+008 <[^>]*> ec4a 5a37        fmsrr   r5, sl, {s15, s16}
  0+00c <[^>]*> ec5a 5a37        fmrrs   r5, sl, {s15, s16}
-0+010 <[^>]*> ec45 ab1f        fmdrr   d15, sl, r5
-0+014 <[^>]*> ec55 ab1f        fmrrd   sl, r5, d15
+0+010 <[^>]*> ec45 ab1f        vmov    d15, sl, r5
+0+014 <[^>]*> ec55 ab1f        vmov    sl, r5, d15
  0+018 <[^>]*> ec45 aa38        fmsrr   sl, r5, {s17, s18}
  0+01c <[^>]*> ec55 aa38        fmrrs   sl, r5, {s17, s18}
diff --git a/gas/testsuite/gas/arm/vfpv3-32drs.d b/gas/testsuite/gas/arm/vfpv3-32drs.d

new file mode 100644 (file)

index 0000000..11f9e93
--- /dev/null
+++ b/gas/testsuite/gas/arm/vfpv3-32drs.d
@@ -0,0 +1,73 @@
+# name: VFPv3 extra D registers
+# as: -mfpu=vfp3
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> eeb03b66    fcpyd   d3, d22
+0[0-9a-f]+ <[^>]+> eef06b43    fcpyd   d22, d3
+0[0-9a-f]+ <[^>]+> eef76acb    fcvtds  d22, s22
+0[0-9a-f]+ <[^>]+> eeb7bbe6    fcvtsd  s22, d22
+0[0-9a-f]+ <[^>]+> ee254b90    vmov\.32        d21\[1\], r4
+0[0-9a-f]+ <[^>]+> ee0b5b90    vmov\.32        d27\[0\], r5
+0[0-9a-f]+ <[^>]+> ee376b90    vmov\.32        r6, d23\[1\]
+0[0-9a-f]+ <[^>]+> ee197b90    vmov\.32        r7, d25\[0\]
+0[0-9a-f]+ <[^>]+> eef86bcb    fsitod  d22, s22
+0[0-9a-f]+ <[^>]+> eef85b6a    fuitod  d21, s21
+0[0-9a-f]+ <[^>]+> eebdab64    ftosid  s20, d20
+0[0-9a-f]+ <[^>]+> eebdabe4    ftosizd s20, d20
+0[0-9a-f]+ <[^>]+> eefc9b63    ftouid  s19, d19
+0[0-9a-f]+ <[^>]+> eefc9be3    ftouizd s19, d19
+0[0-9a-f]+ <[^>]+> edda3b01    vldr    d19, \[sl, #4\]
+0[0-9a-f]+ <[^>]+> edca5b01    vstr    d21, \[sl, #4\]
+0[0-9a-f]+ <[^>]+> ecba5b04    vldmia  sl!, {d5-d6}
+0[0-9a-f]+ <[^>]+> ecfa2b06    vldmia  sl!, {d18-d20}
+0[0-9a-f]+ <[^>]+> ecba5b05    vldmia  sl!, {d5-d6}
+0[0-9a-f]+ <[^>]+> ecfa2b07    vldmia  sl!, {d18-d20}
+0[0-9a-f]+ <[^>]+> ed7a2b05    vldmdb  sl!, {d18-d19}
+0[0-9a-f]+ <[^>]+> ecc94b0a    vstmia  r9, {d20-d24}
+0[0-9a-f]+ <[^>]+> eeb03bc5    fabsd   d3, d5
+0[0-9a-f]+ <[^>]+> eeb0cbe2    fabsd   d12, d18
+0[0-9a-f]+ <[^>]+> eef02be3    fabsd   d18, d19
+0[0-9a-f]+ <[^>]+> eeb13b45    fnegd   d3, d5
+0[0-9a-f]+ <[^>]+> eeb1cb62    fnegd   d12, d18
+0[0-9a-f]+ <[^>]+> eef12b63    fnegd   d18, d19
+0[0-9a-f]+ <[^>]+> eeb13bc5    fsqrtd  d3, d5
+0[0-9a-f]+ <[^>]+> eeb1cbe2    fsqrtd  d12, d18
+0[0-9a-f]+ <[^>]+> eef12be3    fsqrtd  d18, d19
+0[0-9a-f]+ <[^>]+> ee353b06    faddd   d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee32cb84    faddd   d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee732ba4    faddd   d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee353b46    fsubd   d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee32cbc4    fsubd   d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee732be4    fsubd   d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee253b06    fmuld   d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee22cb84    fmuld   d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee632ba4    fmuld   d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee853b06    fdivd   d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee82cb84    fdivd   d12, d18, d4
+0[0-9a-f]+ <[^>]+> eec32ba4    fdivd   d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee053b06    fmacd   d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee02cb84    fmacd   d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee432ba4    fmacd   d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee153b06    fmscd   d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee12cb84    fmscd   d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee532ba4    fmscd   d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee253b46    fnmuld  d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee22cbc4    fnmuld  d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee632be4    fnmuld  d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee053b46    fnmacd  d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee02cbc4    fnmacd  d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee432be4    fnmacd  d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee153b46    fnmscd  d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee12cbc4    fnmscd  d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee532be4    fnmscd  d18, d19, d20
+0[0-9a-f]+ <[^>]+> eeb43b62    fcmpd   d3, d18
+0[0-9a-f]+ <[^>]+> eef42b43    fcmpd   d18, d3
+0[0-9a-f]+ <[^>]+> eef53b40    fcmpzd  d19
+0[0-9a-f]+ <[^>]+> eeb43be2    fcmped  d3, d18
+0[0-9a-f]+ <[^>]+> eef42bc3    fcmped  d18, d3
+0[0-9a-f]+ <[^>]+> eef53bc0    fcmpezd d19
+0[0-9a-f]+ <[^>]+> ec443b3f    vmov    d31, r3, r4
+0[0-9a-f]+ <[^>]+> ec565b3e    vmov    r5, r6, d30
diff --git a/gas/testsuite/gas/arm/vfpv3-32drs.s b/gas/testsuite/gas/arm/vfpv3-32drs.s

new file mode 100644 (file)

index 0000000..ef72c24
--- /dev/null
+++ b/gas/testsuite/gas/arm/vfpv3-32drs.s
@@ -0,0 +1,68 @@
+.arm
+.syntax unified
+       fcpyd d3,d22
+       fcpyd d22,d3
+       fcvtds d22,s22
+       fcvtsd s22,d22
+       fmdhr d21,r4
+       fmdlr d27,r5
+       fmrdh r6,d23
+       fmrdl r7,d25
+       fsitod d22,s22
+       fuitod d21,s21
+       ftosid s20,d20
+       ftosizd s20,d20
+       ftouid s19,d19
+       ftouizd s19,d19
+       fldd d19,[r10,#4]
+       fstd d21,[r10,#4]
+       fldmiad r10!,{d5,d6}
+       fldmiad r10!,{d18,d19,d20}
+       fldmiax r10!,{d5,d6}
+       fldmiax r10!,{d18,d19,d20}
+       fldmdbx r10!,{d18,d19}
+       fstmiad r9,{d20,d21,d22,d23,d24}
+       fabsd d3,d5
+       fabsd d12,d18
+       fabsd d18,d19
+       fnegd d3,d5
+       fnegd d12,d18
+       fnegd d18,d19
+       fsqrtd d3,d5
+       fsqrtd d12,d18
+       fsqrtd d18,d19
+       faddd d3,d5,d6
+       faddd d12,d18,d4
+       faddd d18,d19,d20
+       fsubd d3,d5,d6
+       fsubd d12,d18,d4
+       fsubd d18,d19,d20
+       fmuld d3,d5,d6
+       fmuld d12,d18,d4
+       fmuld d18,d19,d20
+       fdivd d3,d5,d6
+       fdivd d12,d18,d4
+       fdivd d18,d19,d20
+       fmacd d3,d5,d6
+       fmacd d12,d18,d4
+       fmacd d18,d19,d20
+       fmscd d3,d5,d6
+       fmscd d12,d18,d4
+       fmscd d18,d19,d20
+       fnmuld d3,d5,d6
+       fnmuld d12,d18,d4
+       fnmuld d18,d19,d20
+       fnmacd d3,d5,d6
+       fnmacd d12,d18,d4
+       fnmacd d18,d19,d20
+       fnmscd d3,d5,d6
+       fnmscd d12,d18,d4
+       fnmscd d18,d19,d20
+       fcmpd d3,d18
+       fcmpd d18,d3
+       fcmpzd d19
+       fcmped d3,d18
+       fcmped d18,d3
+       fcmpezd d19
+       fmdrr d31,r3,r4
+       fmrrd r5,r6,d30
diff --git a/gas/testsuite/gas/arm/vfpv3-const-conv.d b/gas/testsuite/gas/arm/vfpv3-const-conv.d

new file mode 100644 (file)

index 0000000..ddabd1c
--- /dev/null
+++ b/gas/testsuite/gas/arm/vfpv3-const-conv.d
@@ -0,0 +1,29 @@
+# name: VFPv3 additional constant and conversion ops
+# as: -mfpu=vfp3
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> eef48a00    fconsts s17, #4
+0[0-9a-f]+ <[^>]+> eeb59a00    fconsts s18, #5
+0[0-9a-f]+ <[^>]+> eef69a00    fconsts s19, #6
+0[0-9a-f]+ <[^>]+> eef41b00    fconstd d17, #4
+0[0-9a-f]+ <[^>]+> eef52b00    fconstd d18, #5
+0[0-9a-f]+ <[^>]+> eef63b00    fconstd d19, #6
+0[0-9a-f]+ <[^>]+> eefa8a63    fshtos  s17, #9
+0[0-9a-f]+ <[^>]+> eefa1b63    fshtod  d17, #9
+0[0-9a-f]+ <[^>]+> eefa8aeb    fsltos  s17, #9
+0[0-9a-f]+ <[^>]+> eefa1beb    fsltod  d17, #9
+0[0-9a-f]+ <[^>]+> eefb8a63    fuhtos  s17, #9
+0[0-9a-f]+ <[^>]+> eefb1b63    fuhtod  d17, #9
+0[0-9a-f]+ <[^>]+> eefb8aeb    fultos  s17, #9
+0[0-9a-f]+ <[^>]+> eefb1beb    fultod  d17, #9
+0[0-9a-f]+ <[^>]+> eefe9a64    ftoshs  s19, #7
+0[0-9a-f]+ <[^>]+> eefe3b64    ftoshd  d19, #7
+0[0-9a-f]+ <[^>]+> eefe9aec    ftosls  s19, #7
+0[0-9a-f]+ <[^>]+> eefe3bec    ftosld  d19, #7
+0[0-9a-f]+ <[^>]+> eeff9a64    ftouhs  s19, #7
+0[0-9a-f]+ <[^>]+> eeff3b64    ftouhd  d19, #7
+0[0-9a-f]+ <[^>]+> eeff9aec    ftouls  s19, #7
+0[0-9a-f]+ <[^>]+> eeff3bec    ftould  d19, #7
diff --git a/gas/testsuite/gas/arm/vfpv3-const-conv.s b/gas/testsuite/gas/arm/vfpv3-const-conv.s

new file mode 100644 (file)

index 0000000..c40301c
--- /dev/null
+++ b/gas/testsuite/gas/arm/vfpv3-const-conv.s
@@ -0,0 +1,25 @@
+.arm
+.syntax unified
+       fconsts s17, #4
+       fconsts s18, #5
+       fconsts s19, #6
+       fconstd d17, #4
+       fconstd d18, #5
+       fconstd d19, #6
+       fshtos s17, 9
+       fshtod d17, 9
+       fsltos s17, 9
+       fsltod d17, 9
+       fuhtos s17, 9
+       fuhtod d17, 9
+       fultos s17, 9
+       fultod d17, 9
+
+       ftoshs s19, 7
+       ftoshd d19, 7
+       ftosls s19, 7
+       ftosld d19, 7
+       ftouhs s19, 7
+       ftouhd d19, 7
+       ftouls s19, 7
+       ftould d19, 7
diff --git a/include/opcode/arm.h b/include/opcode/arm.h

index 1d3aa5aeb3d2379632b91f76d98c8b7d633021b4..f142fca9a5c1a08772a9d5dbaaa5cba83df66b9b 100644 (file)
--- a/include/opcode/arm.h
+++ b/include/opcode/arm.h
@@ -58,6 +58,8 @@
  #define FPU_VFP_EXT_V1xD 0x08000000    /* Base VFP instruction set.  */
  #define FPU_VFP_EXT_V1  0x04000000     /* Double-precision insns.    */
  #define FPU_VFP_EXT_V2  0x02000000     /* ARM10E VFPr1.              */
+#define FPU_VFP_EXT_V3  0x01000000     /* VFPv3 insns.               */
+#define FPU_NEON_EXT_V1         0x00800000     /* Neon (SIMD) insns.         */
  
  /* Architectures are the sum of the base and extensions.  The ARM ARM (rev E)
     defines the following: ARMv3, ARMv3M, ARMv4xM, ARMv4, ARMv4TxM, ARMv4T,
@@ -105,7 +107,9 @@
  #define FPU_VFP_V1xD   (FPU_VFP_EXT_V1xD | FPU_ENDIAN_PURE)
  #define FPU_VFP_V1     (FPU_VFP_V1xD | FPU_VFP_EXT_V1)
  #define FPU_VFP_V2     (FPU_VFP_V1 | FPU_VFP_EXT_V2)
-#define FPU_VFP_HARD   (FPU_VFP_EXT_V1xD | FPU_VFP_EXT_V1 | FPU_VFP_EXT_V2)
+#define FPU_VFP_V3     (FPU_VFP_V2 | FPU_VFP_EXT_V3)
+#define FPU_VFP_HARD   (FPU_VFP_EXT_V1xD | FPU_VFP_EXT_V1 | FPU_VFP_EXT_V2 \
+                         | FPU_VFP_EXT_V3 | FPU_NEON_EXT_V1)
  #define FPU_FPA                (FPU_FPA_EXT_V1 | FPU_FPA_EXT_V2)
  
  /* Deprecated */
@@ -117,6 +121,10 @@
  #define FPU_ARCH_VFP_V1xD ARM_FEATURE (0, FPU_VFP_V1xD)
  #define FPU_ARCH_VFP_V1          ARM_FEATURE (0, FPU_VFP_V1)
  #define FPU_ARCH_VFP_V2          ARM_FEATURE (0, FPU_VFP_V2)
+#define FPU_ARCH_VFP_V3          ARM_FEATURE (0, FPU_VFP_V3)
+#define FPU_ARCH_NEON_V1  ARM_FEATURE (0, FPU_NEON_EXT_V1)
+#define FPU_ARCH_VFP_V3_PLUS_NEON_V1 \
+  ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1)
  #define FPU_ARCH_VFP_HARD ARM_FEATURE (0, FPU_VFP_HARD)
  
  #define FPU_ARCH_ENDIAN_PURE ARM_FEATURE (0, FPU_ENDIAN_PURE)
diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c

index bc5b52cf9c08086011bf1d8deac694cfc7b05dc3..f6cac6563c3db856513bd875cca7c9be23b442b9 100644 (file)
--- a/opcodes/arm-dis.c
+++ b/opcodes/arm-dis.c
@@ -63,6 +63,8 @@ struct opcode16
  
     %c                  print condition code (always bits 28-31)
     %A                  print address for ldc/stc/ldf/stf instruction
+   %B                  print vstm/vldm register list
+   %C                  print vstr/vldr address operand
     %I                   print cirrus signed shift immediate: bits 0..3|4..6
     %F                  print the COUNT field of a LFM/SFM instruction.
     %P                  print floating point precision in arithmetic insn
@@ -71,6 +73,7 @@ struct opcode16
  
     %<bitfield>r                print as an ARM register
     %<bitfield>d                print the bitfield in decimal
+   %<bitfield>k                print immediate for VFPv3 conversion instruction
     %<bitfield>x                print the bitfield in hex
     %<bitfield>X                print the bitfield as 1 hex digit without leading "0x"
     %<bitfield>f                print a floating point constant if >7 else a
@@ -78,15 +81,18 @@ struct opcode16
     %<bitfield>w         print as an iWMMXt width field - [bhwd]ss/us
     %<bitfield>g         print as an iWMMXt 64-bit register
     %<bitfield>G         print as an iWMMXt general purpose or control register
+   %<bitfield>D                print as a NEON D register
+   %<bitfield>Q                print as a NEON Q register
  
-   %<code>y            print a single precision VFP reg.
+   %y<code>            print a single precision VFP reg.
                           Codes: 0=>Sm, 1=>Sd, 2=>Sn, 3=>multi-list, 4=>Sm pair
-   %<code>z            print a double precision VFP reg
+   %z<code>            print a double precision VFP reg
                           Codes: 0=>Dm, 1=>Dd, 2=>Dn, 3=>multi-list
-   %<bitnum>'c         print specified char iff bit is one
-   %<bitnum>`c         print specified char iff bit is zero
-   %<bitnum>?ab                print a if bit is one else print b
  
+   %<bitfield>'c       print specified char iff bitfield is all ones
+   %<bitfield>`c       print specified char iff bitfield is all zeroes
+   %<bitfield>?ab...    select from array of values in big endian order
+   
     %L                  print as an iWMMXt N/M width field.
     %Z                  print the Immediate of a WSHUFH instruction.
     %l                  like 'A' except use byte offsets for 'B' & 'H'
@@ -201,80 +207,110 @@ static const struct opcode32 coprocessor_opcodes[] =
    {FPU_FPA_EXT_V2, 0x0c000200, 0x0e100f00, "sfm%c\t%12-14f, %F, %A"},
    {FPU_FPA_EXT_V2, 0x0c100200, 0x0e100f00, "lfm%c\t%12-14f, %F, %A"},
  
+  /* Register load/store */
+  {FPU_NEON_EXT_V1, 0x0d000b00, 0x0f300f00, "vstr%c\t%12-15,22D, %C"},
+  {FPU_NEON_EXT_V1, 0x0d100b00, 0x0f300f00, "vldr%c\t%12-15,22D, %C"},
+  {FPU_NEON_EXT_V1, 0x0c800b00, 0x0f900f00, "vstmia%c\t%16-19r%21'!, %B"},
+  {FPU_NEON_EXT_V1, 0x0c900b00, 0x0f900f00, "vldmia%c\t%16-19r%21'!, %B"},
+  {FPU_NEON_EXT_V1, 0x0d000b00, 0x0f900f00, "vstmdb%c\t%16-19r%21'!, %B"},
+  {FPU_NEON_EXT_V1, 0x0d100b00, 0x0f900f00, "vldmdb%c\t%16-19r%21'!, %B"},
+
+  /* Data transfer between ARM and NEON registers */
+  {FPU_NEON_EXT_V1, 0x0e800b10, 0x0ff00f70, "vdup%c.32\t%16-19,7D, %12-15r"},
+  {FPU_NEON_EXT_V1, 0x0e800b30, 0x0ff00f70, "vdup%c.16\t%16-19,7D, %12-15r"},
+  {FPU_NEON_EXT_V1, 0x0ea00b10, 0x0ff00f70, "vdup%c.32\t%16-19,7Q, %12-15r"},
+  {FPU_NEON_EXT_V1, 0x0ea00b30, 0x0ff00f70, "vdup%c.16\t%16-19,7Q, %12-15r"},
+  {FPU_NEON_EXT_V1, 0x0ec00b10, 0x0ff00f70, "vdup%c.8\t%16-19,7D, %12-15r"},
+  {FPU_NEON_EXT_V1, 0x0ee00b10, 0x0ff00f70, "vdup%c.8\t%16-19,7Q, %12-15r"},
+  {FPU_NEON_EXT_V1, 0x0c400b10, 0x0ff00fd0, "vmov%c\t%0-3,5D, %12-15r, %16-19r"},
+  {FPU_NEON_EXT_V1, 0x0c500b10, 0x0ff00fd0, "vmov%c\t%12-15r, %16-19r, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0x0e000b10, 0x0fd00f70, "vmov%c.32\t%16-19,7D[%21d], %12-15r"},
+  {FPU_NEON_EXT_V1, 0x0e100b10, 0x0f500f70, "vmov%c.32\t%12-15r, %16-19,7D[%21d]"},
+  {FPU_NEON_EXT_V1, 0x0e000b30, 0x0fd00f30, "vmov%c.16\t%16-19,7D[%6,21d], %12-15r"},
+  {FPU_NEON_EXT_V1, 0x0e100b30, 0x0f500f30, "vmov%c.%23?us16\t%12-15r, %16-19,7D[%6,21d]"},
+  {FPU_NEON_EXT_V1, 0x0e400b10, 0x0fd00f10, "vmov%c.8\t%16-19,7D[%5,6,21d], %12-15r"},
+  {FPU_NEON_EXT_V1, 0x0e500b10, 0x0f500f10, "vmov%c.%23?us8\t%12-15r, %16-19,7D[%5,6,21d]"},
+
    /* Floating point coprocessor (VFP) instructions */
-  {FPU_VFP_EXT_V1, 0x0eb00bc0, 0x0fff0ff0, "fabsd%c\t%1z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0eb00ac0, 0x0fbf0fd0, "fabss%c\t%1y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0e300b00, 0x0ff00ff0, "faddd%c\t%1z, %2z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0e300a00, 0x0fb00f50, "fadds%c\t%1y, %2y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0eb40b40, 0x0fff0f70, "fcmp%7'ed%c\t%1z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0eb40a40, 0x0fbf0f50, "fcmp%7'es%c\t%1y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0eb50b40, 0x0fff0f70, "fcmp%7'ezd%c\t%1z"},
-  {FPU_VFP_EXT_V1xD, 0x0eb50a40, 0x0fbf0f70, "fcmp%7'ezs%c\t%1y"},
-  {FPU_VFP_EXT_V1, 0x0eb00b40, 0x0fff0ff0, "fcpyd%c\t%1z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0eb00a40, 0x0fbf0fd0, "fcpys%c\t%1y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0eb70ac0, 0x0fff0fd0, "fcvtds%c\t%1z, %0y"},
-  {FPU_VFP_EXT_V1, 0x0eb70bc0, 0x0fbf0ff0, "fcvtsd%c\t%1y, %0z"},
-  {FPU_VFP_EXT_V1, 0x0e800b00, 0x0ff00ff0, "fdivd%c\t%1z, %2z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0e800a00, 0x0fb00f50, "fdivs%c\t%1y, %2y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0d100b00, 0x0f700f00, "fldd%c\t%1z, %A"},
-  {FPU_VFP_EXT_V1xD, 0x0c900b00, 0x0fd00f00, "fldmia%0?xd%c\t%16-19r%21'!, %3z"},
-  {FPU_VFP_EXT_V1xD, 0x0d300b00, 0x0ff00f00, "fldmdb%0?xd%c\t%16-19r!, %3z"},
-  {FPU_VFP_EXT_V1xD, 0x0d100a00, 0x0f300f00, "flds%c\t%1y, %A"},
-  {FPU_VFP_EXT_V1xD, 0x0c900a00, 0x0f900f00, "fldmias%c\t%16-19r%21'!, %3y"},
-  {FPU_VFP_EXT_V1xD, 0x0d300a00, 0x0fb00f00, "fldmdbs%c\t%16-19r!, %3y"},
-  {FPU_VFP_EXT_V1, 0x0e000b00, 0x0ff00ff0, "fmacd%c\t%1z, %2z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0e000a00, 0x0fb00f50, "fmacs%c\t%1y, %2y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0e200b10, 0x0ff00fff, "fmdhr%c\t%2z, %12-15r"},
-  {FPU_VFP_EXT_V1, 0x0e000b10, 0x0ff00fff, "fmdlr%c\t%2z, %12-15r"},
-  {FPU_VFP_EXT_V2, 0x0c400b10, 0x0ff00ff0, "fmdrr%c\t%0z, %12-15r, %16-19r"},
-  {FPU_VFP_EXT_V1, 0x0e300b10, 0x0ff00fff, "fmrdh%c\t%12-15r, %2z"},
-  {FPU_VFP_EXT_V1, 0x0e100b10, 0x0ff00fff, "fmrdl%c\t%12-15r, %2z"},
-  {FPU_VFP_EXT_V1, 0x0c500b10, 0x0ff00ff0, "fmrrd%c\t%12-15r, %16-19r, %0z"},
-  {FPU_VFP_EXT_V2, 0x0c500a10, 0x0ff00fd0, "fmrrs%c\t%12-15r, %16-19r, %4y"},
-  {FPU_VFP_EXT_V1xD, 0x0e100a10, 0x0ff00f7f, "fmrs%c\t%12-15r, %2y"},
    {FPU_VFP_EXT_V1xD, 0x0ef1fa10, 0x0fffffff, "fmstat%c"},
-  {FPU_VFP_EXT_V1xD, 0x0ef00a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpsid"},
-  {FPU_VFP_EXT_V1xD, 0x0ef10a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpscr"},
-  {FPU_VFP_EXT_V1xD, 0x0ef80a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpexc"},
-  {FPU_VFP_EXT_V1xD, 0x0ef90a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpinst\t@ Impl def"},
-  {FPU_VFP_EXT_V1xD, 0x0efa0a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpinst2\t@ Impl def"},
-  {FPU_VFP_EXT_V1xD, 0x0ef00a10, 0x0ff00fff, "fmrx%c\t%12-15r, <impl def 0x%16-19x>"},
-  {FPU_VFP_EXT_V1, 0x0e100b00, 0x0ff00ff0, "fmscd%c\t%1z, %2z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0e100a00, 0x0fb00f50, "fmscs%c\t%1y, %2y, %0y"},
-  {FPU_VFP_EXT_V1xD, 0x0e000a10, 0x0ff00f7f, "fmsr%c\t%2y, %12-15r"},
-  {FPU_VFP_EXT_V2, 0x0c400a10, 0x0ff00fd0, "fmsrr%c\t%12-15r, %16-19r, %4y"},
-  {FPU_VFP_EXT_V1, 0x0e200b00, 0x0ff00ff0, "fmuld%c\t%1z, %2z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0e200a00, 0x0fb00f50, "fmuls%c\t%1y, %2y, %0y"},
    {FPU_VFP_EXT_V1xD, 0x0ee00a10, 0x0fff0fff, "fmxr%c\tfpsid, %12-15r"},
    {FPU_VFP_EXT_V1xD, 0x0ee10a10, 0x0fff0fff, "fmxr%c\tfpscr, %12-15r"},
    {FPU_VFP_EXT_V1xD, 0x0ee80a10, 0x0fff0fff, "fmxr%c\tfpexc, %12-15r"},
    {FPU_VFP_EXT_V1xD, 0x0ee90a10, 0x0fff0fff, "fmxr%c\tfpinst, %12-15r\t@ Impl def"},
    {FPU_VFP_EXT_V1xD, 0x0eea0a10, 0x0fff0fff, "fmxr%c\tfpinst2, %12-15r\t@ Impl def"},
+  {FPU_VFP_EXT_V1xD, 0x0ef00a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpsid"},
+  {FPU_VFP_EXT_V1xD, 0x0ef10a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpscr"},
+  {FPU_VFP_EXT_V1xD, 0x0ef80a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpexc"},
+  {FPU_VFP_EXT_V1xD, 0x0ef90a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpinst\t@ Impl def"},
+  {FPU_VFP_EXT_V1xD, 0x0efa0a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpinst2\t@ Impl def"},
+  {FPU_VFP_EXT_V1, 0x0e000b10, 0x0ff00fff, "fmdlr%c\t%z2, %12-15r"},
+  {FPU_VFP_EXT_V1, 0x0e100b10, 0x0ff00fff, "fmrdl%c\t%12-15r, %z2"},
+  {FPU_VFP_EXT_V1, 0x0e200b10, 0x0ff00fff, "fmdhr%c\t%z2, %12-15r"},
+  {FPU_VFP_EXT_V1, 0x0e300b10, 0x0ff00fff, "fmrdh%c\t%12-15r, %z2"},
    {FPU_VFP_EXT_V1xD, 0x0ee00a10, 0x0ff00fff, "fmxr%c\t<impl def 0x%16-19x>, %12-15r"},
-  {FPU_VFP_EXT_V1, 0x0eb10b40, 0x0fff0ff0, "fnegd%c\t%1z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0eb10a40, 0x0fbf0fd0, "fnegs%c\t%1y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0e000b40, 0x0ff00ff0, "fnmacd%c\t%1z, %2z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0e000a40, 0x0fb00f50, "fnmacs%c\t%1y, %2y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0e100b40, 0x0ff00ff0, "fnmscd%c\t%1z, %2z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0e100a40, 0x0fb00f50, "fnmscs%c\t%1y, %2y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0e200b40, 0x0ff00ff0, "fnmuld%c\t%1z, %2z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0e200a40, 0x0fb00f50, "fnmuls%c\t%1y, %2y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0eb80bc0, 0x0fff0fd0, "fsitod%c\t%1z, %0y"},
-  {FPU_VFP_EXT_V1xD, 0x0eb80ac0, 0x0fbf0fd0, "fsitos%c\t%1y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0eb10bc0, 0x0fff0ff0, "fsqrtd%c\t%1z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0eb10ac0, 0x0fbf0fd0, "fsqrts%c\t%1y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0d000b00, 0x0f700f00, "fstd%c\t%1z, %A"},
-  {FPU_VFP_EXT_V1xD, 0x0c800b00, 0x0fd00f00, "fstmia%0?xd%c\t%16-19r%21'!, %3z"},
-  {FPU_VFP_EXT_V1xD, 0x0d200b00, 0x0ff00f00, "fstmdb%0?xd%c\t%16-19r!, %3z"},
-  {FPU_VFP_EXT_V1xD, 0x0d000a00, 0x0f300f00, "fsts%c\t%1y, %A"},
-  {FPU_VFP_EXT_V1xD, 0x0c800a00, 0x0f900f00, "fstmias%c\t%16-19r%21'!, %3y"},
-  {FPU_VFP_EXT_V1xD, 0x0d200a00, 0x0fb00f00, "fstmdbs%c\t%16-19r!, %3y"},
-  {FPU_VFP_EXT_V1, 0x0e300b40, 0x0ff00ff0, "fsubd%c\t%1z, %2z, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0e300a40, 0x0fb00f50, "fsubs%c\t%1y, %2y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0ebc0b40, 0x0fbe0f70, "fto%16?sui%7'zd%c\t%1y, %0z"},
-  {FPU_VFP_EXT_V1xD, 0x0ebc0a40, 0x0fbe0f50, "fto%16?sui%7'zs%c\t%1y, %0y"},
-  {FPU_VFP_EXT_V1, 0x0eb80b40, 0x0fff0fd0, "fuitod%c\t%1z, %0y"},
-  {FPU_VFP_EXT_V1xD, 0x0eb80a40, 0x0fbf0fd0, "fuitos%c\t%1y, %0y"},
+  {FPU_VFP_EXT_V1xD, 0x0ef00a10, 0x0ff00fff, "fmrx%c\t%12-15r, <impl def 0x%16-19x>"},
+  {FPU_VFP_EXT_V1xD, 0x0e000a10, 0x0ff00f7f, "fmsr%c\t%y2, %12-15r"},
+  {FPU_VFP_EXT_V1xD, 0x0e100a10, 0x0ff00f7f, "fmrs%c\t%12-15r, %y2"},
+  {FPU_VFP_EXT_V1xD, 0x0eb50a40, 0x0fbf0f70, "fcmp%7'ezs%c\t%y1"},
+  {FPU_VFP_EXT_V1, 0x0eb50b40, 0x0fbf0f70, "fcmp%7'ezd%c\t%z1"},
+  {FPU_VFP_EXT_V1xD, 0x0eb00a40, 0x0fbf0fd0, "fcpys%c\t%y1, %y0"},
+  {FPU_VFP_EXT_V1xD, 0x0eb00ac0, 0x0fbf0fd0, "fabss%c\t%y1, %y0"},
+  {FPU_VFP_EXT_V1, 0x0eb00b40, 0x0fbf0fd0, "fcpyd%c\t%z1, %z0"},
+  {FPU_VFP_EXT_V1, 0x0eb00bc0, 0x0fbf0fd0, "fabsd%c\t%z1, %z0"},
+  {FPU_VFP_EXT_V1xD, 0x0eb10a40, 0x0fbf0fd0, "fnegs%c\t%y1, %y0"},
+  {FPU_VFP_EXT_V1xD, 0x0eb10ac0, 0x0fbf0fd0, "fsqrts%c\t%y1, %y0"},
+  {FPU_VFP_EXT_V1, 0x0eb10b40, 0x0fbf0fd0, "fnegd%c\t%z1, %z0"},
+  {FPU_VFP_EXT_V1, 0x0eb10bc0, 0x0fbf0fd0, "fsqrtd%c\t%z1, %z0"},
+  {FPU_VFP_EXT_V1, 0x0eb70ac0, 0x0fbf0fd0, "fcvtds%c\t%z1, %y0"},
+  {FPU_VFP_EXT_V1, 0x0eb70bc0, 0x0fbf0fd0, "fcvtsd%c\t%y1, %z0"},
+  {FPU_VFP_EXT_V1xD, 0x0eb80a40, 0x0fbf0fd0, "fuitos%c\t%y1, %y0"},
+  {FPU_VFP_EXT_V1xD, 0x0eb80ac0, 0x0fbf0fd0, "fsitos%c\t%y1, %y0"},
+  {FPU_VFP_EXT_V1, 0x0eb80b40, 0x0fbf0fd0, "fuitod%c\t%z1, %y0"},
+  {FPU_VFP_EXT_V1, 0x0eb80bc0, 0x0fbf0fd0, "fsitod%c\t%z1, %y0"},
+  {FPU_VFP_EXT_V1xD, 0x0eb40a40, 0x0fbf0f50, "fcmp%7'es%c\t%y1, %y0"},
+  {FPU_VFP_EXT_V1, 0x0eb40b40, 0x0fbf0f50, "fcmp%7'ed%c\t%z1, %z0"},
+  {FPU_VFP_EXT_V3, 0x0eba0a40, 0x0fbe0f50, "f%16?us%7?lhtos%c\t%y1, #%5,0-3k"},
+  {FPU_VFP_EXT_V3, 0x0eba0b40, 0x0fbe0f50, "f%16?us%7?lhtod%c\t%z1, #%5,0-3k"},
+  {FPU_VFP_EXT_V1xD, 0x0ebc0a40, 0x0fbe0f50, "fto%16?sui%7'zs%c\t%y1, %y0"},
+  {FPU_VFP_EXT_V1, 0x0ebc0b40, 0x0fbe0f50, "fto%16?sui%7'zd%c\t%y1, %z0"},
+  {FPU_VFP_EXT_V3, 0x0ebe0a40, 0x0fbe0f50, "fto%16?us%7?lhs%c\t%y1, #%5,0-3k"},
+  {FPU_VFP_EXT_V3, 0x0ebe0b40, 0x0fbe0f50, "fto%16?us%7?lhd%c\t%z1, #%5,0-3k"},
+  {FPU_VFP_EXT_V1, 0x0c500b10, 0x0fb00ff0, "fmrrd%c\t%12-15r, %16-19r, %z0"},
+  {FPU_VFP_EXT_V3, 0x0eb00a00, 0x0fb00ff0, "fconsts%c\t%y1, #%16-19,0-3d"},
+  {FPU_VFP_EXT_V3, 0x0eb00b00, 0x0fb00ff0, "fconstd%c\t%z1, #%16-19,0-3d"},
+  {FPU_VFP_EXT_V2, 0x0c400a10, 0x0ff00fd0, "fmsrr%c\t%12-15r, %16-19r, %y4"},
+  {FPU_VFP_EXT_V2, 0x0c400b10, 0x0ff00fd0, "fmdrr%c\t%z0, %12-15r, %16-19r"},
+  {FPU_VFP_EXT_V2, 0x0c500a10, 0x0ff00fd0, "fmrrs%c\t%12-15r, %16-19r, %y4"},
+  {FPU_VFP_EXT_V1xD, 0x0e000a00, 0x0fb00f50, "fmacs%c\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_V1xD, 0x0e000a40, 0x0fb00f50, "fnmacs%c\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_V1, 0x0e000b00, 0x0fb00f50, "fmacd%c\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_V1, 0x0e000b40, 0x0fb00f50, "fnmacd%c\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_V1xD, 0x0e100a00, 0x0fb00f50, "fmscs%c\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_V1xD, 0x0e100a40, 0x0fb00f50, "fnmscs%c\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_V1, 0x0e100b00, 0x0fb00f50, "fmscd%c\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_V1, 0x0e100b40, 0x0fb00f50, "fnmscd%c\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_V1xD, 0x0e200a00, 0x0fb00f50, "fmuls%c\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_V1xD, 0x0e200a40, 0x0fb00f50, "fnmuls%c\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_V1, 0x0e200b00, 0x0fb00f50, "fmuld%c\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_V1, 0x0e200b40, 0x0fb00f50, "fnmuld%c\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_V1xD, 0x0e300a00, 0x0fb00f50, "fadds%c\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_V1xD, 0x0e300a40, 0x0fb00f50, "fsubs%c\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_V1, 0x0e300b00, 0x0fb00f50, "faddd%c\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_V1, 0x0e300b40, 0x0fb00f50, "fsubd%c\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_V1xD, 0x0e800a00, 0x0fb00f50, "fdivs%c\t%y1, %y2, %y0"},
+  {FPU_VFP_EXT_V1, 0x0e800b00, 0x0fb00f50, "fdivd%c\t%z1, %z2, %z0"},
+  {FPU_VFP_EXT_V1xD, 0x0d200a00, 0x0fb00f00, "fstmdbs%c\t%16-19r!, %y3"},
+  {FPU_VFP_EXT_V1xD, 0x0d200b00, 0x0fb00f00, "fstmdb%0?xd%c\t%16-19r!, %z3"},
+  {FPU_VFP_EXT_V1xD, 0x0d300a00, 0x0fb00f00, "fldmdbs%c\t%16-19r!, %y3"},
+  {FPU_VFP_EXT_V1xD, 0x0d300b00, 0x0fb00f00, "fldmdb%0?xd%c\t%16-19r!, %z3"},
+  {FPU_VFP_EXT_V1xD, 0x0d000a00, 0x0f300f00, "fsts%c\t%y1, %A"},
+  {FPU_VFP_EXT_V1, 0x0d000b00, 0x0f300f00, "fstd%c\t%z1, %A"},
+  {FPU_VFP_EXT_V1xD, 0x0d100a00, 0x0f300f00, "flds%c\t%y1, %A"},
+  {FPU_VFP_EXT_V1, 0x0d100b00, 0x0f300f00, "fldd%c\t%z1, %A"},
+  {FPU_VFP_EXT_V1xD, 0x0c800a00, 0x0f900f00, "fstmias%c\t%16-19r%21'!, %y3"},
+  {FPU_VFP_EXT_V1xD, 0x0c800b00, 0x0f900f00, "fstmia%0?xd%c\t%16-19r%21'!, %z3"},
+  {FPU_VFP_EXT_V1xD, 0x0c900a00, 0x0f900f00, "fldmias%c\t%16-19r%21'!, %y3"},
+  {FPU_VFP_EXT_V1xD, 0x0c900b00, 0x0f900f00, "fldmia%0?xd%c\t%16-19r%21'!, %z3"},
  
    /* Cirrus coprocessor instructions.  */
    {ARM_CEXT_MAVERICK, 0x0d100400, 0x0f500f00, "cfldrs%c\tmvf%12-15d, %A"},
@@ -381,9 +417,286 @@ static const struct opcode32 coprocessor_opcodes[] =
    {ARM_EXT_V5, 0xfe000000, 0xff000010, "cdp2\t%8-11d, %20-23d, cr%12-15d, cr%16-19d, cr%0-3d, {%5-7d}"},
    {ARM_EXT_V5, 0xfe000010, 0xff100010, "mcr2\t%8-11d, %21-23d, %12-15r, cr%16-19d, cr%0-3d, {%5-7d}"},
    {ARM_EXT_V5, 0xfe100010, 0xff100010, "mrc2\t%8-11d, %21-23d, %12-15r, cr%16-19d, cr%0-3d, {%5-7d}"},
+
    {0, 0, 0, 0}
  };
  
+/* Neon opcode table:  This does not encode the top byte -- that is
+   checked by the print_insn_neon routine, as it depends on whether we are
+   doing thumb32 or arm32 disassembly.  */
+
+/* print_insn_neon recognizes the following format control codes:
+
+   %%                  %
+
+   %A                  print v{st,ld}[1234] operands
+   %B                  print v{st,ld}[1234] any one operands
+   %C                  print v{st,ld}[1234] single->all operands
+   %D                  print scalar
+   %E                  print vmov, vmvn, vorr, vbic encoded constant
+   %F                  print vtbl,vtbx register list
+
+   %<bitfield>r                print as an ARM register
+   %<bitfield>d                print the bitfield in decimal
+   %<bitfield>e         print the 2^N - bitfield in decimal
+   %<bitfield>D                print as a NEON D register
+   %<bitfield>Q                print as a NEON Q register
+   %<bitfield>R                print as a NEON D or Q register
+   %<bitfield>Sn       print byte scaled width limited by n
+   %<bitfield>Tn       print short scaled width limited by n
+   %<bitfield>Un       print long scaled width limited by n
+   
+   %<bitfield>'c       print specified char iff bitfield is all ones
+   %<bitfield>`c       print specified char iff bitfield is all zeroes
+   %<bitfield>?ab...    select from array of values in big endian order  */
+
+static const struct opcode32 neon_opcodes[] =
+{
+  /* Extract */
+  {FPU_NEON_EXT_V1, 0xf2b00840, 0xffb00850, "vext.8\t%12-15,22R, %16-19,7R, %0-3,5R, #%8-11d"},
+  {FPU_NEON_EXT_V1, 0xf2b00000, 0xffb00810, "vext.8\t%12-15,22R, %16-19,7R, %0-3,5R, #%8-11d"},
+
+  /* Move data element to all lanes */
+  {FPU_NEON_EXT_V1, 0xf3b40c00, 0xffb70f90, "vdup.32\t%12-15,22R, %0-3,5D[%19d]"},
+  {FPU_NEON_EXT_V1, 0xf3b20c00, 0xffb30f90, "vdup.16\t%12-15,22R, %0-3,5D[%18-19d]"},
+  {FPU_NEON_EXT_V1, 0xf3b10c00, 0xffb10f90, "vdup.8\t%12-15,22R, %0-3,5D[%17-19d]"},
+
+  /* Table lookup */
+  {FPU_NEON_EXT_V1, 0xf3b00800, 0xffb00c50, "vtbl.8\t%12-15,22D, %F, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf3b00840, 0xffb00c50, "vtbx.8\t%12-15,22D, %F, %0-3,5D"},
+  
+  /* Two registers, miscellaneous */
+  {FPU_NEON_EXT_V1, 0xf2880a10, 0xfebf0fd0, "vmovl.%24?us8\t%12-15,22Q, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2900a10, 0xfebf0fd0, "vmovl.%24?us16\t%12-15,22Q, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2a00a10, 0xfebf0fd0, "vmovl.%24?us32\t%12-15,22Q, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf3b00500, 0xffbf0f90, "vcnt.8\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b00580, 0xffbf0f90, "vmvn\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b20000, 0xffbf0f90, "vswp\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b20200, 0xffb30fd0, "vmovn.i%18-19S2\t%12-15,22D, %0-3,5Q"},
+  {FPU_NEON_EXT_V1, 0xf3b20240, 0xffb30fd0, "vqmovun.s%18-19T2\t%12-15,22D, %0-3,5Q"},
+  {FPU_NEON_EXT_V1, 0xf3b20280, 0xffb30fd0, "vqmovn.s%18-19T2\t%12-15,22D, %0-3,5Q"},
+  {FPU_NEON_EXT_V1, 0xf3b202c0, 0xffb30fd0, "vqmovn.u%18-19T2\t%12-15,22D, %0-3,5Q"},
+  {FPU_NEON_EXT_V1, 0xf3b20300, 0xffb30fd0, "vshll.i%18-19S2\t%12-15,22Q, %0-3,5D, #%18-19S2"},
+  {FPU_NEON_EXT_V1, 0xf3bb0400, 0xffbf0e90, "vrecpe.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3bb0480, 0xffbf0e90, "vrsqrte.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b00000, 0xffb30f90, "vrev64.%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b00080, 0xffb30f90, "vrev32.%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b00100, 0xffb30f90, "vrev16.%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b00400, 0xffb30f90, "vcls.s%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b00480, 0xffb30f90, "vclz.i%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b00700, 0xffb30f90, "vqabs.s%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b00780, 0xffb30f90, "vqneg.s%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b20080, 0xffb30f90, "vtrn.%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b20100, 0xffb30f90, "vuzp.%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b20180, 0xffb30f90, "vzip.%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b10000, 0xffb30b90, "vcgt.%10?fs%18-19S2\t%12-15,22R, %0-3,5R, #0"},
+  {FPU_NEON_EXT_V1, 0xf3b10080, 0xffb30b90, "vcge.%10?fs%18-19S2\t%12-15,22R, %0-3,5R, #0"},
+  {FPU_NEON_EXT_V1, 0xf3b10100, 0xffb30b90, "vceq.%10?fi%18-19S2\t%12-15,22R, %0-3,5R, #0"},
+  {FPU_NEON_EXT_V1, 0xf3b10180, 0xffb30b90, "vcle.%10?fs%18-19S2\t%12-15,22R, %0-3,5R, #0"},
+  {FPU_NEON_EXT_V1, 0xf3b10200, 0xffb30b90, "vclt.%10?fs%18-19S2\t%12-15,22R, %0-3,5R, #0"},
+  {FPU_NEON_EXT_V1, 0xf3b10300, 0xffb30b90, "vabs.%10?fs%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b10380, 0xffb30b90, "vneg.%10?fs%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b00200, 0xffb30f10, "vpaddl.%7?us%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b00600, 0xffb30f10, "vpadal.%7?us%18-19S2\t%12-15,22R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3b30600, 0xffb30e10, "vcvt.%7-8?usff%18-19Sa.%7-8?ffus%18-19Sa\t%12-15,22R, %0-3,5R"},
+
+  /* Three registers of the same length */
+  {FPU_NEON_EXT_V1, 0xf2000110, 0xffb00f10, "vand\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2100110, 0xffb00f10, "vbic\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2200110, 0xffb00f10, "vorr\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2300110, 0xffb00f10, "vorn\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3000110, 0xffb00f10, "veor\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3100110, 0xffb00f10, "vbsl\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3200110, 0xffb00f10, "vbit\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3300110, 0xffb00f10, "vbif\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000d00, 0xffa00f10, "vadd.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000d10, 0xffa00f10, "vmla.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000e00, 0xffa00f10, "vceq.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000f00, 0xffa00f10, "vmax.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000f10, 0xffa00f10, "vrecps.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2200d00, 0xffa00f10, "vsub.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2200d10, 0xffa00f10, "vmls.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2200f00, 0xffa00f10, "vmin.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2200f10, 0xffa00f10, "vrsqrts.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3000d00, 0xffa00f10, "vpadd.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3000d10, 0xffa00f10, "vmul.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3000e00, 0xffa00f10, "vcge.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3000e10, 0xffa00f10, "vacge.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3000f00, 0xffa00f10, "vpmax.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3200d00, 0xffa00f10, "vabd.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3200e00, 0xffa00f10, "vcgt.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3200e10, 0xffa00f10, "vacgt.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3200f00, 0xffa00f10, "vpmin.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000800, 0xff800f10, "vadd.i%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000810, 0xff800f10, "vtst.%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000900, 0xff800f10, "vmla.i%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000b00, 0xff800f10, "vqdmulh.s%20-21S6\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000b10, 0xff800f10, "vpadd.i%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3000800, 0xff800f10, "vsub.i%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3000810, 0xff800f10, "vceq.i%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3000900, 0xff800f10, "vmls.i%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf3000b00, 0xff800f10, "vqrdmulh.s%20-21S6\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000000, 0xfe800f10, "vhadd.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000010, 0xfe800f10, "vqadd.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000100, 0xfe800f10, "vrhadd.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000200, 0xfe800f10, "vhsub.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000210, 0xfe800f10, "vqsub.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000300, 0xfe800f10, "vcgt.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000310, 0xfe800f10, "vcge.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000400, 0xfe800f10, "vshl.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000410, 0xfe800f10, "vqshl.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000500, 0xfe800f10, "vrshl.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000510, 0xfe800f10, "vqrshl.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000600, 0xfe800f10, "vmax.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000610, 0xfe800f10, "vmin.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000700, 0xfe800f10, "vabd.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000710, 0xfe800f10, "vaba.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000910, 0xfe800f10, "vmul.%24?pi%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000a00, 0xfe800f10, "vpmax.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+  {FPU_NEON_EXT_V1, 0xf2000a10, 0xfe800f10, "vpmin.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+
+  /* One register and an immediate value */
+  {FPU_NEON_EXT_V1, 0xf2800e10, 0xfeb80fb0, "vmov.i8\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800e30, 0xfeb80fb0, "vmov.i64\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800f10, 0xfeb80fb0, "vmov.f32\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800810, 0xfeb80db0, "vmov.i16\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800830, 0xfeb80db0, "vmvn.i16\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800910, 0xfeb80db0, "vorr.i16\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800930, 0xfeb80db0, "vbic.i16\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800c10, 0xfeb80eb0, "vmov.i32\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800c30, 0xfeb80eb0, "vmvn.i32\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800110, 0xfeb809b0, "vorr.i32\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800130, 0xfeb809b0, "vbic.i32\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800010, 0xfeb808b0, "vmov.i32\t%12-15,22R, %E"},
+  {FPU_NEON_EXT_V1, 0xf2800030, 0xfeb808b0, "vmvn.i32\t%12-15,22R, %E"},
+
+  /* Two registers and a shift amount */
+  {FPU_NEON_EXT_V1, 0xf2880810, 0xffb80fd0, "vshrn.i16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf2880850, 0xffb80fd0, "vrshrn.i16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf2880810, 0xfeb80fd0, "vqshrun.s16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf2880850, 0xfeb80fd0, "vqrshrun.s16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf2880910, 0xfeb80fd0, "vqshrn.%24?us16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf2880950, 0xfeb80fd0, "vqrshrn.%24?us16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf2880a10, 0xfeb80fd0, "vshll.%24?us8\t%12-15,22D, %0-3,5Q, #%16-18d"},
+  {FPU_NEON_EXT_V1, 0xf2900810, 0xffb00fd0, "vshrn.i32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf2900850, 0xffb00fd0, "vrshrn.i32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf2880510, 0xffb80f90, "vshl.%24?us8\t%12-15,22R, %0-3,5R, #%16-18d"},
+  {FPU_NEON_EXT_V1, 0xf3880410, 0xffb80f90, "vsri.8\t%12-15,22R, %0-3,5R, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf3880510, 0xffb80f90, "vsli.8\t%12-15,22R, %0-3,5R, #%16-18d"},
+  {FPU_NEON_EXT_V1, 0xf3880610, 0xffb80f90, "vqshlu.s8\t%12-15,22R, %0-3,5R, #%16-18d"},
+  {FPU_NEON_EXT_V1, 0xf2900810, 0xfeb00fd0, "vqshrun.s32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf2900850, 0xfeb00fd0, "vqrshrun.s32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf2900910, 0xfeb00fd0, "vqshrn.%24?us32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf2900950, 0xfeb00fd0, "vqrshrn.%24?us32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf2900a10, 0xfeb00fd0, "vshll.%24?us16\t%12-15,22D, %0-3,5Q, #%16-19d"},
+  {FPU_NEON_EXT_V1, 0xf2880010, 0xfeb80f90, "vshr.%24?us8\t%12-15,22R, %0-3,5R, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf2880110, 0xfeb80f90, "vsra.%24?us8\t%12-15,22R, %0-3,5R, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf2880210, 0xfeb80f90, "vrshr.%24?us8\t%12-15,22R, %0-3,5R, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf2880310, 0xfeb80f90, "vrsra.%24?us8\t%12-15,22R, %0-3,5R, #%16-18e"},
+  {FPU_NEON_EXT_V1, 0xf2880710, 0xfeb80f90, "vqshl.%24?us8\t%12-15,22R, %0-3,5R, #%16-18d"},
+  {FPU_NEON_EXT_V1, 0xf2a00810, 0xffa00fd0, "vshrn.i64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf2a00850, 0xffa00fd0, "vrshrn.i64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf2900510, 0xffb00f90, "vshl.%24?us16\t%12-15,22R, %0-3,5R, #%16-19d"},
+  {FPU_NEON_EXT_V1, 0xf3900410, 0xffb00f90, "vsri.16\t%12-15,22R, %0-3,5R, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf3900510, 0xffb00f90, "vsli.16\t%12-15,22R, %0-3,5R, #%16-19d"},
+  {FPU_NEON_EXT_V1, 0xf3900610, 0xffb00f90, "vqshlu.s16\t%12-15,22R, %0-3,5R, #%16-19d"},
+  {FPU_NEON_EXT_V1, 0xf2a00a10, 0xfea00fd0, "vshll.%24?us32\t%12-15,22D, %0-3,5Q, #%16-20d"},
+  {FPU_NEON_EXT_V1, 0xf2900010, 0xfeb00f90, "vshr.%24?us16\t%12-15,22R, %0-3,5R, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf2900110, 0xfeb00f90, "vsra.%24?us16\t%12-15,22R, %0-3,5R, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf2900210, 0xfeb00f90, "vrshr.%24?us16\t%12-15,22R, %0-3,5R, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf2900310, 0xfeb00f90, "vrsra.%24?us16\t%12-15,22R, %0-3,5R, #%16-19e"},
+  {FPU_NEON_EXT_V1, 0xf2900710, 0xfeb00f90, "vqshl.%24?us16\t%12-15,22R, %0-3,5R, #%16-19d"},
+  {FPU_NEON_EXT_V1, 0xf2800810, 0xfec00fd0, "vqshrun.s64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf2800850, 0xfec00fd0, "vqrshrun.s64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf2800910, 0xfec00fd0, "vqshrn.%24?us64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf2800950, 0xfec00fd0, "vqrshrn.%24?us64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf2a00510, 0xffa00f90, "vshl.%24?us32\t%12-15,22R, %0-3,5R, #%16-20d"},
+  {FPU_NEON_EXT_V1, 0xf3a00410, 0xffa00f90, "vsri.32\t%12-15,22R, %0-3,5R, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf3a00510, 0xffa00f90, "vsli.32\t%12-15,22R, %0-3,5R, #%16-20d"},
+  {FPU_NEON_EXT_V1, 0xf3a00610, 0xffa00f90, "vqshlu.s32\t%12-15,22R, %0-3,5R, #%16-20d"},
+  {FPU_NEON_EXT_V1, 0xf2a00010, 0xfea00f90, "vshr.%24?us32\t%12-15,22R, %0-3,5R, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf2a00110, 0xfea00f90, "vsra.%24?us32\t%12-15,22R, %0-3,5R, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf2a00210, 0xfea00f90, "vrshr.%24?us32\t%12-15,22R, %0-3,5R, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf2a00310, 0xfea00f90, "vrsra.%24?us32\t%12-15,22R, %0-3,5R, #%16-20e"},
+  {FPU_NEON_EXT_V1, 0xf2a00710, 0xfea00f90, "vqshl.%24?us32\t%12-15,22R, %0-3,5R, #%16-20d"},
+  {FPU_NEON_EXT_V1, 0xf2800590, 0xff800f90, "vshl.%24?us64\t%12-15,22R, %0-3,5R, #%16-21d"},
+  {FPU_NEON_EXT_V1, 0xf3800490, 0xff800f90, "vsri.64\t%12-15,22R, %0-3,5R, #%16-21e"},
+  {FPU_NEON_EXT_V1, 0xf3800590, 0xff800f90, "vsli.64\t%12-15,22R, %0-3,5R, #%16-21d"},
+  {FPU_NEON_EXT_V1, 0xf3800690, 0xff800f90, "vqshlu.s64\t%12-15,22R, %0-3,5R, #%16-21d"},
+  {FPU_NEON_EXT_V1, 0xf2800090, 0xfe800f90, "vshr.%24?us64\t%12-15,22R, %0-3,5R, #%16-21e"},
+  {FPU_NEON_EXT_V1, 0xf2800190, 0xfe800f90, "vsra.%24?us64\t%12-15,22R, %0-3,5R, #%16-21e"},
+  {FPU_NEON_EXT_V1, 0xf2800290, 0xfe800f90, "vrshr.%24?us64\t%12-15,22R, %0-3,5R, #%16-21e"},
+  {FPU_NEON_EXT_V1, 0xf2800390, 0xfe800f90, "vrsra.%24?us64\t%12-15,22R, %0-3,5R, #%16-21e"},
+  {FPU_NEON_EXT_V1, 0xf2800790, 0xfe800f90, "vqshl.%24?us64\t%12-15,22R, %0-3,5R, #%16-21d"},
+  {FPU_NEON_EXT_V1, 0xf2a00e10, 0xfea00e90, "vcvt.%24,8?usff32.%24,8?ffus32\t%12-15,22R, %0-3,5R, #%16-20e"},
+
+  /* Three registers of different lengths */
+  {FPU_NEON_EXT_V1, 0xf2800e00, 0xfea00f50, "vmull.p%20S0\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800400, 0xff800f50, "vaddhn.i%20-21T2\t%12-15,22D, %16-19,7Q, %0-3,5Q"},
+  {FPU_NEON_EXT_V1, 0xf2800600, 0xff800f50, "vsubhn.i%20-21T2\t%12-15,22D, %16-19,7Q, %0-3,5Q"},
+  {FPU_NEON_EXT_V1, 0xf2800900, 0xff800f50, "vqdmlal.s%20-21S6\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800b00, 0xff800f50, "vqdmlsl.s%20-21S6\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800d00, 0xff800f50, "vqdmull.s%20-21S6\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf3800400, 0xff800f50, "vraddhn.i%20-21T2\t%12-15,22D, %16-19,7Q, %0-3,5Q"},
+  {FPU_NEON_EXT_V1, 0xf3800600, 0xff800f50, "vrsubhn.i%20-21T2\t%12-15,22D, %16-19,7Q, %0-3,5Q"},
+  {FPU_NEON_EXT_V1, 0xf2800000, 0xfe800f50, "vaddl.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800100, 0xfe800f50, "vaddw.%24?us%20-21S2\t%12-15,22Q, %16-19,7Q, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800200, 0xfe800f50, "vsubl.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800300, 0xfe800f50, "vsubw.%24?us%20-21S2\t%12-15,22Q, %16-19,7Q, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800500, 0xfe800f50, "vabal.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800700, 0xfe800f50, "vabdl.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800800, 0xfe800f50, "vmlal.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800a00, 0xfe800f50, "vmlsl.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+  {FPU_NEON_EXT_V1, 0xf2800c00, 0xfe800f50, "vmull.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+
+  /* Two registers and a scalar */
+  {FPU_NEON_EXT_V1, 0xf2800040, 0xff800f50, "vmla.i%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800140, 0xff800f50, "vmla.f%20-21Sa\t%12-15,22D, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800340, 0xff800f50, "vqdmlal.s%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800440, 0xff800f50, "vmls.i%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800540, 0xff800f50, "vmls.f%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800740, 0xff800f50, "vqdmlsl.s%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800840, 0xff800f50, "vmul.i%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800940, 0xff800f50, "vmul.f%20-21Sa\t%12-15,22D, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800b40, 0xff800f50, "vqdmull.s%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800c40, 0xff800f50, "vqdmulh.s%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800d40, 0xff800f50, "vqrdmulh.s%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf3800040, 0xff800f50, "vmla.i%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
+  {FPU_NEON_EXT_V1, 0xf3800140, 0xff800f50, "vmla.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
+  {FPU_NEON_EXT_V1, 0xf3800440, 0xff800f50, "vmls.i%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
+  {FPU_NEON_EXT_V1, 0xf3800540, 0xff800f50, "vmls.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
+  {FPU_NEON_EXT_V1, 0xf3800840, 0xff800f50, "vmul.i%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
+  {FPU_NEON_EXT_V1, 0xf3800940, 0xff800f50, "vmul.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
+  {FPU_NEON_EXT_V1, 0xf3800c40, 0xff800f50, "vqdmulh.s%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
+  {FPU_NEON_EXT_V1, 0xf3800d40, 0xff800f50, "vqrdmulh.s%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800240, 0xfe800f50, "vmlal.%24?us%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800640, 0xfe800f50, "vmlsl.%24?us%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+  {FPU_NEON_EXT_V1, 0xf2800a40, 0xfe800f50, "vmull.%24?us%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+
+  /* Element and structure load/store */
+  {FPU_NEON_EXT_V1, 0xf4a00fc0, 0xffb00fc0, "vld4.32\t%C"},
+  {FPU_NEON_EXT_V1, 0xf4a00c00, 0xffb00f00, "vld1.%6-7S2\t%C"},
+  {FPU_NEON_EXT_V1, 0xf4a00d00, 0xffb00f00, "vld2.%6-7S2\t%C"},
+  {FPU_NEON_EXT_V1, 0xf4a00e00, 0xffb00f00, "vld3.%6-7S2\t%C"},
+  {FPU_NEON_EXT_V1, 0xf4a00f00, 0xffb00f00, "vld4.%6-7S2\t%C"},
+  {FPU_NEON_EXT_V1, 0xf4000200, 0xff900f00, "v%21?ls%21?dt1.%6-7S3\t%A"},
+  {FPU_NEON_EXT_V1, 0xf4000300, 0xff900f00, "v%21?ls%21?dt2.%6-7S2\t%A"},
+  {FPU_NEON_EXT_V1, 0xf4000400, 0xff900f00, "v%21?ls%21?dt3.%6-7S2\t%A"},
+  {FPU_NEON_EXT_V1, 0xf4000500, 0xff900f00, "v%21?ls%21?dt3.%6-7S2\t%A"},
+  {FPU_NEON_EXT_V1, 0xf4000600, 0xff900f00, "v%21?ls%21?dt1.%6-7S3\t%A"},
+  {FPU_NEON_EXT_V1, 0xf4000700, 0xff900f00, "v%21?ls%21?dt1.%6-7S3\t%A"},
+  {FPU_NEON_EXT_V1, 0xf4000800, 0xff900f00, "v%21?ls%21?dt2.%6-7S2\t%A"},
+  {FPU_NEON_EXT_V1, 0xf4000900, 0xff900f00, "v%21?ls%21?dt2.%6-7S2\t%A"},
+  {FPU_NEON_EXT_V1, 0xf4000a00, 0xff900f00, "v%21?ls%21?dt1.%6-7S3\t%A"},
+  {FPU_NEON_EXT_V1, 0xf4000000, 0xff900e00, "v%21?ls%21?dt4.%6-7S2\t%A"},
+  {FPU_NEON_EXT_V1, 0xf4800000, 0xff900300, "v%21?ls%21?dt1.%10-11S2\t%B"},
+  {FPU_NEON_EXT_V1, 0xf4800100, 0xff900300, "v%21?ls%21?dt2.%10-11S2\t%B"},
+  {FPU_NEON_EXT_V1, 0xf4800200, 0xff900300, "v%21?ls%21?dt3.%10-11S2\t%B"},
+  {FPU_NEON_EXT_V1, 0xf4800300, 0xff900300, "v%21?ls%21?dt4.%10-11S2\t%B"},
+
+  {0,0 ,0, 0}
+};
+
  /* Opcode tables: ARM, 16-bit Thumb, 32-bit Thumb.  All three are partially
     ordered: they must be searched linearly from the top to obtain a correct
     match.  */
@@ -410,10 +723,10 @@ static const struct opcode32 coprocessor_opcodes[] =
     %<bitfield>W         print the bitfield plus one in decimal 
     %<bitfield>x                print the bitfield in hex
     %<bitfield>X                print the bitfield as 1 hex digit without leading "0x"
-
-   %<bitnum>'c         print specified char iff bit is one
-   %<bitnum>`c         print specified char iff bit is zero
-   %<bitnum>?ab                print a if bit is one else print b
+   
+   %<bitfield>'c       print specified char iff bitfield is all ones
+   %<bitfield>`c       print specified char iff bitfield is all zeroes
+   %<bitfield>?ab...    select from array of values in big endian order
  
     %e                   print arm SMI operand (bits 0..7,8..19).
     %E                  print the LSB and WIDTH fields of a BFI or BFC instruction.
@@ -844,9 +1157,9 @@ static const struct opcode16 thumb_opcodes[] =
         %<bitfield>r    print bitfield as an ARM register
         %<bitfield>c    print bitfield as a condition code
  
-       %<bitnum>'c     print "c" iff bit is one
-       %<bitnum>`c     print "c" iff bit is zero
-       %<bitnum>?ab    print "a" if bit is one, else "b"
+       %<bitfield>'c   print specified char iff bitfield is all ones
+       %<bitfield>`c   print specified char iff bitfield is all zeroes
+       %<bitfield>?ab... select from array of values in big endian order
  
     With one exception at the bottom (done because BL and BLX(1) need
     to come dead last), this table was machine-sorted first in
@@ -1149,6 +1462,43 @@ get_arm_regnames (int option, const char **setname, const char **setdescription,
    return 16;
  }
  
+/* Decode a bitfield of the form matching regexp (N(-N)?,)*N(-N)?.
+   Returns pointer to following character of the format string and
+   fills in *VALUEP and *WIDTHP with the extracted value and number of
+   bits extracted.  WIDTHP can be NULL. */
+
+static const char *
+arm_decode_bitfield (const char *ptr, unsigned long insn,
+                    unsigned long *valuep, int *widthp)
+{
+  unsigned long value = 0;
+  int width = 0;
+  
+  do 
+    {
+      int start, end;
+      int bits;
+
+      for (start = 0; *ptr >= '0' && *ptr <= '9'; ptr++)
+       start = start * 10 + *ptr - '0';
+      if (*ptr == '-')
+       for (end = 0, ptr++; *ptr >= '0' && *ptr <= '9'; ptr++)
+         end = end * 10 + *ptr - '0';
+      else
+       end = start;
+      bits = end - start;
+      if (bits < 0)
+       abort ();
+      value |= ((insn >> start) & ((2ul << bits) - 1)) << width;
+      width += bits + 1;
+    }
+  while (*ptr++ == ',');
+  *valuep = value;
+  if (widthp)
+    *widthp = width;
+  return ptr - 1;
+}
+
  static void
  arm_decode_shift (long given, fprintf_ftype func, void *stream)
  {
@@ -1185,7 +1535,7 @@ arm_decode_shift (long given, fprintf_ftype func, void *stream)
     recognised coprocessor instruction.  */
  
  static bfd_boolean
-print_insn_coprocessor (struct disassemble_info *info, long given,
+print_insn_coprocessor (bfd_vma pc, struct disassemble_info *info, long given,
                         bfd_boolean thumb)
  {
    const struct opcode32 *insn;
@@ -1265,6 +1615,46 @@ print_insn_coprocessor (struct disassemble_info *info, long given,
                         }
                       break;
  
+                   case 'B':
+                     {
+                       int regno = ((given >> 12) & 0xf) | ((given >> (22 - 4)) & 0x10);
+                       int offset = (given >> 1) & 0x3f;
+                       
+                       if (offset == 1)
+                         func (stream, "{d%d}", regno);
+                       else if (regno + offset > 32)
+                         func (stream, "{d%d-<overflow reg d%d>}", regno, regno + offset - 1);
+                       else
+                         func (stream, "{d%d-d%d}", regno, regno + offset - 1);
+                     }
+                     break;
+                     
+                   case 'C':
+                     {
+                       int rn = (given >> 16) & 0xf;
+                       int offset = (given & 0xff) * 4;
+                       int add = (given >> 23) & 1;
+                       
+                       func (stream, "[%s", arm_regnames[rn]);
+                       
+                       if (offset)
+                         {
+                           if (!add)
+                             offset = -offset;
+                           func (stream, ", #%d", offset);
+                         }
+                       func (stream, "]");
+                       if (rn == 15)
+                         {
+                           func (stream, "\t; ");
+                            /* FIXME: Unsure if info->bytes_per_chunk is the
+                               right thing to use here.  */
+                           info->print_address_func (offset + pc
+                              + info->bytes_per_chunk * 2, info);
+                         }
+                     }
+                     break;
+      
                     case 'c':
                       func (stream, "%s",
                             arm_conditional [(given >> 28) & 0xf]);
@@ -1360,206 +1750,158 @@ print_insn_coprocessor (struct disassemble_info *info, long given,
                     case '0': case '1': case '2': case '3': case '4':
                     case '5': case '6': case '7': case '8': case '9':
                       {
-                       int bitstart = *c++ - '0';
-                       int bitend = 0;
-                       while (*c >= '0' && *c <= '9')
-                         bitstart = (bitstart * 10) + *c++ - '0';
+                       int width;
+                       unsigned long value;
+
+                       c = arm_decode_bitfield (c, given, &value, &width);
  
                         switch (*c)
                           {
-                         case '-':
-                           c++;
-
-                           while (*c >= '0' && *c <= '9')
-                             bitend = (bitend * 10) + *c++ - '0';
-
-                           if (!bitend)
-                             abort ();
-
-                           switch (*c)
-                             {
-                             case 'r':
-                               {
-                                 long reg;
-
-                                 reg = given >> bitstart;
-                                 reg &= (2 << (bitend - bitstart)) - 1;
+                         case 'r':
+                           func (stream, "%s", arm_regnames[value]);
+                           break;
+                         case 'D':
+                           func (stream, "d%ld", value);
+                           break;
+                         case 'Q':
+                           if (value & 1)
+                             func (stream, "<illegal reg q%ld.5>", value >> 1);
+                           else
+                             func (stream, "q%ld", value >> 1);
+                           break;
+                         case 'd':
+                           func (stream, "%ld", value);
+                           break;
+                          case 'k':
+                            {
+                              int from = (given & (1 << 7)) ? 32 : 16;
+                              func (stream, "%ld", from - value);
+                            }
+                            break;
+                            
+                         case 'f':
+                           if (value > 7)
+                             func (stream, "#%s", arm_fp_const[value & 7]);
+                           else
+                             func (stream, "f%ld", value);
+                           break;
  
-                                 func (stream, "%s", arm_regnames[reg]);
-                               }
-                               break;
-                             case 'd':
-                               {
-                                 long reg;
+                         case 'w':
+                           if (width == 2)
+                             func (stream, "%s", iwmmxt_wwnames[value]);
+                           else
+                             func (stream, "%s", iwmmxt_wwssnames[value]);
+                           break;
  
-                                 reg = given >> bitstart;
-                                 reg &= (2 << (bitend - bitstart)) - 1;
+                         case 'g':
+                           func (stream, "%s", iwmmxt_regnames[value]);
+                           break;
+                         case 'G':
+                           func (stream, "%s", iwmmxt_cregnames[value]);
+                           break;
+                         case '`':
+                           c++;
+                           if (value == 0)
+                             func (stream, "%c", *c);
+                           break;
+                         case '\'':
+                           c++;
+                           if (value == ((1ul << width) - 1))
+                             func (stream, "%c", *c);
+                           break;
+                         case '?':
+                           func (stream, "%c", c[(1 << width) - (int)value]);
+                           c += 1 << width;
+                           break;
+                         default:
+                           abort ();
+                         }
+                       break;
  
-                                 func (stream, "%ld", reg);
-                               }
-                               break;
-                             case 'f':
+                     case 'y':
+                     case 'z':
+                       {
+                         int single = *c++ == 'y';
+                         int regno;
+                         
+                         switch (*c)
+                           {
+                           case '4': /* Sm pair */
+                             func (stream, "{");
+                             /* Fall through.  */
+                           case '0': /* Sm, Dm */
+                             regno = given & 0x0000000f;
+                             if (single)
                                 {
-                                 long reg;
-
-                                 reg = given >> bitstart;
-                                 reg &= (2 << (bitend - bitstart)) - 1;
-
-                                 if (reg > 7)
-                                   func (stream, "#%s",
-                                         arm_fp_const[reg & 7]);
-                                 else
-                                   func (stream, "f%ld", reg);
+                                 regno <<= 1;
+                                 regno += (given >> 5) & 1;
                                 }
-                               break;
+                              else
+                                regno += ((given >> 5) & 1) << 4;
+                             break;
  
-                             case 'w':
+                           case '1': /* Sd, Dd */
+                             regno = (given >> 12) & 0x0000000f;
+                             if (single)
                                 {
-                                 long reg;
-
-                                 if (bitstart != bitend)
-                                   {
-                                     reg = given >> bitstart;
-                                     reg &= (2 << (bitend - bitstart)) - 1;
-                                     if (bitend - bitstart == 1)
-                                       func (stream, "%s", iwmmxt_wwnames[reg]);
-                                     else
-                                       func (stream, "%s", iwmmxt_wwssnames[reg]);
-                                   }
-                                 else
-                                   {
-                                     reg = (((given >> 8)  & 0x1) |
-                                            ((given >> 22) & 0x1));
-                                     func (stream, "%s", iwmmxt_wwnames[reg]);
-                                   }
+                                 regno <<= 1;
+                                 regno += (given >> 22) & 1;
                                 }
-                               break;
+                              else
+                                regno += ((given >> 22) & 1) << 4;
+                             break;
  
-                             case 'g':
+                           case '2': /* Sn, Dn */
+                             regno = (given >> 16) & 0x0000000f;
+                             if (single)
                                 {
-                                 long reg;
-                                 reg = given >> bitstart;
-                                 reg &= (2 << (bitend - bitstart)) - 1;
-                                 func (stream, "%s", iwmmxt_regnames[reg]);
+                                 regno <<= 1;
+                                 regno += (given >> 7) & 1;
                                 }
-                               break;
-
-                             case 'G':
+                              else
+                                regno += ((given >> 7) & 1) << 4;
+                             break;
+                             
+                           case '3': /* List */
+                             func (stream, "{");
+                             regno = (given >> 12) & 0x0000000f;
+                             if (single)
                                 {
-                                 long reg;
-                                 reg = given >> bitstart;
-                                 reg &= (2 << (bitend - bitstart)) - 1;
-                                 func (stream, "%s", iwmmxt_cregnames[reg]);
+                                 regno <<= 1;
+                                 regno += (given >> 22) & 1;
                                 }
-                               break;
+                              else
+                                regno += ((given >> 22) & 1) << 4;
+                             break;
+                             
+                           default:
+                             abort ();
+                           }
  
-                             default:
-                               abort ();
-                             }
-                           break;
+                         func (stream, "%c%d", single ? 's' : 'd', regno);
  
-                         case 'y':
-                         case 'z':
+                         if (*c == '3')
                             {
-                             int single = *c == 'y';
-                             int regno;
-
-                             switch (bitstart)
-                               {
-                               case 4: /* Sm pair */
-                                 func (stream, "{");
-                                 /* Fall through.  */
-                               case 0: /* Sm, Dm */
-                                 regno = given & 0x0000000f;
-                                 if (single)
-                                   {
-                                     regno <<= 1;
-                                     regno += (given >> 5) & 1;
-                                   }
-                                 break;
-
-                               case 1: /* Sd, Dd */
-                                 regno = (given >> 12) & 0x0000000f;
-                                 if (single)
-                                   {
-                                     regno <<= 1;
-                                     regno += (given >> 22) & 1;
-                                   }
-                                 break;
-
-                               case 2: /* Sn, Dn */
-                                 regno = (given >> 16) & 0x0000000f;
-                                 if (single)
-                                   {
-                                     regno <<= 1;
-                                     regno += (given >> 7) & 1;
-                                   }
-                                 break;
-
-                               case 3: /* List */
-                                 func (stream, "{");
-                                 regno = (given >> 12) & 0x0000000f;
-                                 if (single)
-                                   {
-                                     regno <<= 1;
-                                     regno += (given >> 22) & 1;
-                                   }
-                                 break;
-
-
-                               default:
-                                 abort ();
-                               }
-
-                             func (stream, "%c%d", single ? 's' : 'd', regno);
-
-                             if (bitstart == 3)
+                             int count = given & 0xff;
+                             
+                             if (single == 0)
+                               count >>= 1;
+                             
+                             if (--count)
                                 {
-                                 int count = given & 0xff;
-
-                                 if (single == 0)
-                                   count >>= 1;
-
-                                 if (--count)
-                                   {
-                                     func (stream, "-%c%d",
-                                           single ? 's' : 'd',
-                                           regno + count);
-                                   }
-
-                                 func (stream, "}");
+                                 func (stream, "-%c%d",
+                                       single ? 's' : 'd',
+                                       regno + count);
                                 }
-                             else if (bitstart == 4)
-                               func (stream, ", %c%d}", single ? 's' : 'd',
-                                     regno + 1);
-
-                             break;
+                             
+                             func (stream, "}");
                             }
-
-                           break;
-
-                         case '`':
-                           c++;
-                           if ((given & (1 << bitstart)) == 0)
-                             func (stream, "%c", *c);
-                           break;
-                         case '\'':
-                           c++;
-                           if ((given & (1 << bitstart)) != 0)
-                             func (stream, "%c", *c);
-                           break;
-                         case '?':
-                           ++c;
-                           if ((given & (1 << bitstart)) != 0)
-                             func (stream, "%c", *c++);
-                           else
-                             func (stream, "%c", *++c);
-                           break;
-                         default:
-                           abort ();
-                         }
+                         else if (*c == '4')
+                           func (stream, ", %c%d}", single ? 's' : 'd',
+                                 regno + 1);
+                       }
                         break;
-
+                           
                       case 'L':
                         switch (given & 0x00400100)
                           {
@@ -1712,6 +2054,436 @@ print_arm_address (bfd_vma pc, struct disassemble_info *info, long given)
      }
  }
  
+/* Print one neon instruction on INFO->STREAM.
+   Return TRUE if the instuction matched, FALSE if this is not a
+   recognised neon instruction.  */
+
+static bfd_boolean
+print_insn_neon (struct disassemble_info *info, long given, bfd_boolean thumb)
+{
+  const struct opcode32 *insn;
+  void *stream = info->stream;
+  fprintf_ftype func = info->fprintf_func;
+
+  if (thumb)
+    {
+      if ((given & 0xef000000) == 0xef000000)
+       {
+         /* move bit 28 to bit 24 to translate Thumb2 to ARM encoding.  */
+         unsigned long bit28 = given & (1 << 28);
+
+         given &= 0x00ffffff;
+         if (bit28)
+            given |= 0xf3000000;
+          else
+           given |= 0xf2000000;
+       }
+      else if ((given & 0xff000000) == 0xf9000000)
+       given ^= 0xf9000000 ^ 0xf4000000;
+      else
+       return FALSE;
+    }
+  
+  for (insn = neon_opcodes; insn->assembler; insn++)
+    {
+      if ((given & insn->mask) == insn->value)
+       {
+         const char *c;
+
+         for (c = insn->assembler; *c; c++)
+           {
+             if (*c == '%')
+               {
+                 switch (*++c)
+                   {
+                   case '%':
+                     func (stream, "%%");
+                     break;
+
+                   case 'A':
+                     {
+                       static const unsigned char enc[16] = 
+                       {
+                         0x4, 0x14, /* st4 0,1 */
+                         0x4, /* st1 2 */
+                         0x4, /* st2 3 */
+                         0x3, /* st3 4 */
+                         0x13, /* st3 5 */
+                         0x3, /* st1 6 */
+                         0x1, /* st1 7 */
+                         0x2, /* st2 8 */
+                         0x12, /* st2 9 */
+                         0x2, /* st1 10 */
+                         0, 0, 0, 0, 0
+                       };
+                       int rd = ((given >> 12) & 0xf) | (((given >> 22) & 1) << 4);
+                       int rn = ((given >> 16) & 0xf);
+                       int rm = ((given >> 0) & 0xf);
+                       int align = ((given >> 4) & 0x3);
+                       int type = ((given >> 8) & 0xf);
+                       int n = enc[type] & 0xf;
+                       int stride = (enc[type] >> 4) + 1;
+                       int ix;
+                       
+                       func (stream, "{");
+                       if (stride > 1)
+                         for (ix = 0; ix != n; ix++)
+                           func (stream, "%sd%d", ix ? "," : "", rd + ix * stride);
+                       else if (n == 1)
+                         func (stream, "d%d", rd);
+                       else
+                         func (stream, "d%d-d%d", rd, rd + n - 1);
+                       func (stream, "}, [%s", arm_regnames[rn]);
+                       if (align)
+                         func (stream, ", :%d", 32 << align);
+                       func (stream, "]");
+                       if (rm == 0xd)
+                         func (stream, "!");
+                       else if (rm != 0xf)
+                         func (stream, ", %s", arm_regnames[rm]);
+                     }
+                     break;
+                     
+                   case 'B':
+                     {
+                       int rd = ((given >> 12) & 0xf) | (((given >> 22) & 1) << 4);
+                       int rn = ((given >> 16) & 0xf);
+                       int rm = ((given >> 0) & 0xf);
+                       int idx_align = ((given >> 4) & 0xf);
+                        int align = 0;
+                       int size = ((given >> 10) & 0x3);
+                       int idx = idx_align >> (size + 1);
+                        int length = ((given >> 8) & 3) + 1;
+                        int stride = 1;
+                        int i;
+
+                        if (length > 1 && size > 0)
+                          stride = (idx_align & (1 << size)) ? 2 : 1;
+                       
+                        switch (length)
+                          {
+                          case 1:
+                            {
+                              int amask = (1 << size) - 1;
+                              if ((idx_align & (1 << size)) != 0)
+                                return FALSE;
+                              if (size > 0)
+                                {
+                                  if ((idx_align & amask) == amask)
+                                    align = 8 << size;
+                                  else if ((idx_align & amask) != 0)
+                                    return FALSE;
+                                }
+                              }
+                            break;
+                          
+                          case 2:
+                            if (size == 2 && (idx_align & 2) != 0)
+                              return FALSE;
+                            align = (idx_align & 1) ? 16 << size : 0;
+                            break;
+                          
+                          case 3:
+                            if ((size == 2 && (idx_align & 3) != 0)
+                                || (idx_align & 1) != 0)
+                              return FALSE;
+                            break;
+                          
+                          case 4:
+                            if (size == 2)
+                              {
+                                if ((idx_align & 3) == 3)
+                                  return FALSE;
+                                align = (idx_align & 3) * 64;
+                              }
+                            else
+                              align = (idx_align & 1) ? 32 << size : 0;
+                            break;
+                          
+                          default:
+                            abort ();
+                          }
+                                
+                       func (stream, "{");
+                        for (i = 0; i < length; i++)
+                          func (stream, "%sd%d[%d]", (i == 0) ? "" : ",",
+                            rd + i * stride, idx);
+                        func (stream, "}, [%s", arm_regnames[rn]);
+                       if (align)
+                         func (stream, ", :%d", align);
+                       func (stream, "]");
+                       if (rm == 0xd)
+                         func (stream, "!");
+                       else if (rm != 0xf)
+                         func (stream, ", %s", arm_regnames[rm]);
+                     }
+                     break;
+                     
+                   case 'C':
+                     {
+                       int rd = ((given >> 12) & 0xf) | (((given >> 22) & 1) << 4);
+                       int rn = ((given >> 16) & 0xf);
+                       int rm = ((given >> 0) & 0xf);
+                       int align = ((given >> 4) & 0x1);
+                       int size = ((given >> 6) & 0x3);
+                       int type = ((given >> 8) & 0x3);
+                       int n = type + 1;
+                       int stride = ((given >> 5) & 0x1);
+                       int ix;
+                       
+                       if (stride && (n == 1))
+                         n++;
+                       else
+                         stride++;
+                       
+                       func (stream, "{");
+                       if (stride > 1)
+                         for (ix = 0; ix != n; ix++)
+                           func (stream, "%sd%d[]", ix ? "," : "", rd + ix * stride);
+                       else if (n == 1)
+                         func (stream, "d%d[]", rd);
+                       else
+                         func (stream, "d%d[]-d%d[]", rd, rd + n - 1);
+                       func (stream, "}, [%s", arm_regnames[rn]);
+                       if (align)
+                         {
+                            int align = (8 * (type + 1)) << size;
+                            if (type == 3)
+                              align = (size > 1) ? align >> 1 : align;
+                           if (type == 2 || (type == 0 && !size))
+                             func (stream, ", :<bad align %d>", align);
+                           else
+                             func (stream, ", :%d", align);
+                         }
+                       func (stream, "]");
+                       if (rm == 0xd)
+                         func (stream, "!");
+                       else if (rm != 0xf)
+                         func (stream, ", %s", arm_regnames[rm]);
+                     }
+                     break;
+                     
+                   case 'D':
+                     {
+                       int raw_reg = (given & 0xf) | ((given >> 1) & 0x10);
+                       int size = (given >> 20) & 3;
+                       int reg = raw_reg & ((4 << size) - 1);
+                       int ix = raw_reg >> size >> 2;
+                       
+                       func (stream, "d%d[%d]", reg, ix);
+                     }
+                     break;
+                     
+                   case 'E':
+                     /* Neon encoded constant for mov, mvn, vorr, vbic */
+                     {
+                       int bits = 0;
+                       int cmode = (given >> 8) & 0xf;
+                       int op = (given >> 5) & 0x1;
+                       unsigned long value = 0, hival = 0;
+                       unsigned shift;
+                        int size = 0;
+                       
+                       bits |= ((given >> 24) & 1) << 7;
+                       bits |= ((given >> 16) & 7) << 4;
+                       bits |= ((given >> 0) & 15) << 0;
+                       
+                       if (cmode < 8)
+                         {
+                           shift = (cmode >> 1) & 3;
+                           value = (unsigned long)bits << (8 * shift);
+                            size = 32;
+                         }
+                       else if (cmode < 12)
+                         {
+                           shift = (cmode >> 1) & 1;
+                           value = (unsigned long)bits << (8 * shift);
+                            size = 16;
+                         }
+                       else if (cmode < 14)
+                         {
+                           shift = (cmode & 1) + 1;
+                           value = (unsigned long)bits << (8 * shift);
+                           value |= (1ul << (8 * shift)) - 1;
+                            size = 32;
+                         }
+                       else if (cmode == 14)
+                         {
+                           if (op)
+                             {
+                               /* bit replication into bytes */
+                               int ix;
+                               unsigned long mask;
+                               
+                               value = 0;
+                                hival = 0;
+                               for (ix = 7; ix >= 0; ix--)
+                                 {
+                                   mask = ((bits >> ix) & 1) ? 0xff : 0;
+                                    if (ix <= 3)
+                                     value = (value << 8) | mask;
+                                    else
+                                      hival = (hival << 8) | mask;
+                                 }
+                                size = 64;
+                             }
+                            else
+                              {
+                                /* byte replication */
+                                value = (unsigned long)bits;
+                                size = 8;
+                              }
+                         }
+                       else if (!op)
+                         {
+                           /* floating point encoding */
+                           int tmp;
+                           
+                           value = (unsigned long)(bits & 0x7f) << (24 - 6);
+                           value |= (unsigned long)(bits & 0x80) << 24;
+                           tmp = bits & 0x40 ? 0x3c : 0x40;
+                           value |= (unsigned long)tmp << 24;
+                            size = 32;
+                         }
+                       else
+                         {
+                           func (stream, "<illegal constant %.8x:%x:%x>",
+                                  bits, cmode, op);
+                            size = 32;
+                           break;
+                         }
+                        switch (size)
+                          {
+                          case 8:
+                           func (stream, "#%ld\t; 0x%.2lx", value, value);
+                            break;
+                          
+                          case 16:
+                            func (stream, "#%ld\t; 0x%.4lx", value, value);
+                            break;
+
+                          case 32:
+                            func (stream, "#%ld\t; 0x%.8lx", value, value);
+                            break;
+
+                          case 64:
+                            func (stream, "#0x%.8lx%.8lx", hival, value);
+                            break;
+                          
+                          default:
+                            abort ();
+                          }
+                     }
+                     break;
+                     
+                   case 'F':
+                     {
+                       int regno = ((given >> 16) & 0xf) | ((given >> (7 - 4)) & 0x10);
+                       int num = (given >> 8) & 0x3;
+                       
+                       if (!num)
+                         func (stream, "{d%d}", regno);
+                       else if (num + regno >= 32)
+                         func (stream, "{d%d-<overflow reg d%d}", regno, regno + num);
+                       else
+                         func (stream, "{d%d-d%d}", regno, regno + num);
+                     }
+                     break;
+      
+
+                   case '0': case '1': case '2': case '3': case '4':
+                   case '5': case '6': case '7': case '8': case '9':
+                     {
+                       int width;
+                       unsigned long value;
+
+                       c = arm_decode_bitfield (c, given, &value, &width);
+                       
+                       switch (*c)
+                         {
+                         case 'r':
+                           func (stream, "%s", arm_regnames[value]);
+                           break;
+                         case 'd':
+                           func (stream, "%ld", value);
+                           break;
+                         case 'e':
+                           func (stream, "%ld", (1ul << width) - value);
+                           break;
+                           
+                         case 'S':
+                         case 'T':
+                         case 'U':
+                           /* various width encodings */
+                           {
+                             int base = 8 << (*c - 'S'); /* 8,16 or 32 */
+                             int limit;
+                             unsigned low, high;
+
+                             c++;
+                             if (*c >= '0' && *c <= '9')
+                               limit = *c - '0';
+                             else if (*c >= 'a' && *c <= 'f')
+                               limit = *c - 'a' + 10;
+                             else
+                               abort ();
+                             low = limit >> 2;
+                             high = limit & 3;
+
+                             if (value < low || value > high)
+                               func (stream, "<illegal width %d>", base << value);
+                             else
+                               func (stream, "%d", base << value);
+                           }
+                           break;
+                         case 'R':
+                           if (given & (1 << 6))
+                             goto Q;
+                           /* FALLTHROUGH */
+                         case 'D':
+                           func (stream, "d%ld", value);
+                           break;
+                         case 'Q':
+                         Q:
+                           if (value & 1)
+                             func (stream, "<illegal reg q%ld.5>", value >> 1);
+                           else
+                             func (stream, "q%ld", value >> 1);
+                           break;
+                           
+                         case '`':
+                           c++;
+                           if (value == 0)
+                             func (stream, "%c", *c);
+                           break;
+                         case '\'':
+                           c++;
+                           if (value == ((1ul << width) - 1))
+                             func (stream, "%c", *c);
+                           break;
+                         case '?':
+                           func (stream, "%c", c[(1 << width) - (int)value]);
+                           c += 1 << width;
+                           break;
+                         default:
+                           abort ();
+                         }
+                       break;
+
+                     default:
+                       abort ();
+                     }
+                   }
+               }
+             else
+               func (stream, "%c", *c);
+           }
+         return TRUE;
+       }
+    }
+  return FALSE;
+}
+
  /* Print one ARM instruction from PC on INFO->STREAM.  */
  
  static void
@@ -1721,7 +2493,10 @@ print_insn_arm (bfd_vma pc, struct disassemble_info *info, long given)
    void *stream = info->stream;
    fprintf_ftype func = info->fprintf_func;
  
-  if (print_insn_coprocessor (info, given, FALSE))
+  if (print_insn_coprocessor (pc, info, given, FALSE))
+    return;
+
+  if (print_insn_neon (info, given, FALSE))
      return;
  
    for (insn = arm_opcodes; insn->assembler; insn++)
@@ -1964,102 +2739,51 @@ print_insn_arm (bfd_vma pc, struct disassemble_info *info, long given)
                     case '0': case '1': case '2': case '3': case '4':
                     case '5': case '6': case '7': case '8': case '9':
                       {
-                       int bitstart = *c++ - '0';
-                       int bitend = 0;
-                       while (*c >= '0' && *c <= '9')
-                         bitstart = (bitstart * 10) + *c++ - '0';
+                       int width;
+                       unsigned long value;
  
+                       c = arm_decode_bitfield (c, given, &value, &width);
+                       
                         switch (*c)
                           {
-                         case '-':
-                           c++;
-
-                           while (*c >= '0' && *c <= '9')
-                             bitend = (bitend * 10) + *c++ - '0';
-
-                           if (!bitend)
-                             abort ();
-
-                           switch (*c)
-                             {
-                             case 'r':
-                               {
-                                 long reg;
-
-                                 reg = given >> bitstart;
-                                 reg &= (2 << (bitend - bitstart)) - 1;
-
-                                 func (stream, "%s", arm_regnames[reg]);
-                               }
-                               break;
-                             case 'd':
-                               {
-                                 long reg;
-
-                                 reg = given >> bitstart;
-                                 reg &= (2 << (bitend - bitstart)) - 1;
-
-                                 func (stream, "%ld", reg);
-                               }
-                               break;
-                             case 'W':
-                               {
-                                 long reg;
-                                 
-                                 reg = given >> bitstart;
-                                 reg &= (2 << (bitend - bitstart)) - 1;
-                                 
-                                 func (stream, "%ld", reg + 1);
-                               }
-                               break;
-                             case 'x':
-                               {
-                                 long reg;
-
-                                 reg = given >> bitstart;
-                                 reg &= (2 << (bitend - bitstart)) - 1;
-
-                                 func (stream, "0x%08lx", reg);
-
-                                 /* Some SWI instructions have special
-                                    meanings.  */
-                                 if ((given & 0x0fffffff) == 0x0FF00000)
-                                   func (stream, "\t; IMB");
-                                 else if ((given & 0x0fffffff) == 0x0FF00001)
-                                   func (stream, "\t; IMBRange");
-                               }
-                               break;
-                             case 'X':
-                               {
-                                 long reg;
-
-                                 reg = given >> bitstart;
-                                 reg &= (2 << (bitend - bitstart)) - 1;
-
-                                 func (stream, "%01lx", reg & 0xf);
-                               }
-                               break;
-                             default:
-                               abort ();
-                             }
+                         case 'r':
+                           func (stream, "%s", arm_regnames[value]);
+                           break;
+                         case 'd':
+                           func (stream, "%ld", value);
+                           break;
+                         case 'b':
+                           func (stream, "%ld", value * 8);
+                           break;
+                         case 'W':
+                           func (stream, "%ld", value + 1);
+                           break;
+                         case 'x':
+                           func (stream, "0x%08lx", value);
+
+                           /* Some SWI instructions have special
+                              meanings.  */
+                           if ((given & 0x0fffffff) == 0x0FF00000)
+                             func (stream, "\t; IMB");
+                           else if ((given & 0x0fffffff) == 0x0FF00001)
+                             func (stream, "\t; IMBRange");
+                           break;
+                         case 'X':
+                           func (stream, "%01lx", value & 0xf);
                             break;
-
                           case '`':
                             c++;
-                           if ((given & (1 << bitstart)) == 0)
+                           if (value == 0)
                               func (stream, "%c", *c);
                             break;
                           case '\'':
                             c++;
-                           if ((given & (1 << bitstart)) != 0)
+                           if (value == ((1ul << width) - 1))
                               func (stream, "%c", *c);
                             break;
                           case '?':
-                           ++c;
-                           if ((given & (1 << bitstart)) != 0)
-                             func (stream, "%c", *c++);
-                           else
-                             func (stream, "%c", *++c);
+                           func (stream, "%c", c[(1 << width) - (int)value]);
+                           c += 1 << width;
                             break;
                           default:
                             abort ();
@@ -2373,7 +3097,10 @@ print_insn_thumb32 (bfd_vma pc, struct disassemble_info *info, long given)
    void *stream = info->stream;
    fprintf_ftype func = info->fprintf_func;
  
-  if (print_insn_coprocessor (info, given, TRUE))
+  if (print_insn_coprocessor (pc, info, given, TRUE))
+    return;
+
+  if (print_insn_neon (info, given, TRUE))
      return;
  
    for (insn = thumb32_opcodes; insn->assembler; insn++)
@@ -2752,30 +3479,15 @@ print_insn_thumb32 (bfd_vma pc, struct disassemble_info *info, long given)
               case '0': case '1': case '2': case '3': case '4':
               case '5': case '6': case '7': case '8': case '9':
                 {
-                 int bitstart = *c++ - '0';
-                 int bitend = 0;
-                 unsigned int val;
-                 while (*c >= '0' && *c <= '9')
-                   bitstart = (bitstart * 10) + *c++ - '0';
-
-                 if (*c == '-')
-                   {
-                     c++;
-                     while (*c >= '0' && *c <= '9')
-                       bitend = (bitend * 10) + *c++ - '0';
-                     if (!bitend)
-                       abort ();
-
-                     val = given >> bitstart;
-                     val &= (2 << (bitend - bitstart)) - 1;
-                   }
-                 else
-                   val = (given >> bitstart) & 1;
+                 int width;
+                 unsigned long val;
  
+                 c = arm_decode_bitfield (c, given, &val, &width);
+                       
                   switch (*c)
                     {
-                   case 'd': func (stream, "%u", val); break;
-                   case 'W': func (stream, "%u", val * 4); break;
+                   case 'd': func (stream, "%lu", val); break;
+                   case 'W': func (stream, "%lu", val * 4); break;
                     case 'r': func (stream, "%s", arm_regnames[val]); break;
  
                     case 'c':
@@ -2786,20 +3498,20 @@ print_insn_thumb32 (bfd_vma pc, struct disassemble_info *info, long given)
                       break;
  
                     case '\'':
-                     if (val)
-                       func (stream, "%c", c[1]);
                       c++;
+                     if (val == ((1ul << width) - 1))
+                       func (stream, "%c", *c);
                       break;
                       
                     case '`':
-                     if (!val)
-                       func (stream, "%c", c[1]);
                       c++;
+                     if (val == 0)
+                       func (stream, "%c", *c);
                       break;
  
                     case '?':
-                     func (stream, "%c", val ? c[1] : c[2]);
-                     c += 2;
+                     func (stream, "%c", c[(1 << width) - (int)val]);
+                     c += 1 << width;
                       break;
  
                     default:
author	Julian Brown <julian@codesourcery.com>
	Mon, 3 Apr 2006 00:03:34 +0000 (00:03 +0000)
committer	Julian Brown <julian@codesourcery.com>
	Mon, 3 Apr 2006 00:03:34 +0000 (00:03 +0000)
ChangeLog.csl		patch \| blob \| blame \| history
binutils/readelf.c		patch \| blob \| blame \| history
gas/config/tc-arm.c		patch \| blob \| blame \| history
gas/testsuite/gas/arm/copro.d		patch \| blob \| blame \| history
gas/testsuite/gas/arm/copro.s		patch \| blob \| blame \| history
gas/testsuite/gas/arm/neon-cond.d	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/neon-cond.s	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/neon-cov.d	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/neon-cov.s	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/neon-ldst-es.d	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/neon-ldst-es.s	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/neon-ldst-rm.d	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/neon-ldst-rm.s	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/neon-omit.d	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/neon-omit.s	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/vfp1.d		patch \| blob \| blame \| history
gas/testsuite/gas/arm/vfp1_t2.d		patch \| blob \| blame \| history
gas/testsuite/gas/arm/vfp1xD.d		patch \| blob \| blame \| history
gas/testsuite/gas/arm/vfp1xD_t2.d		patch \| blob \| blame \| history
gas/testsuite/gas/arm/vfp2.d		patch \| blob \| blame \| history
gas/testsuite/gas/arm/vfp2_t2.d		patch \| blob \| blame \| history
gas/testsuite/gas/arm/vfpv3-32drs.d	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/vfpv3-32drs.s	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/vfpv3-const-conv.d	[new file with mode: 0644]	patch \| blob
gas/testsuite/gas/arm/vfpv3-const-conv.s	[new file with mode: 0644]	patch \| blob
include/opcode/arm.h		patch \| blob \| blame \| history
opcodes/arm-dis.c		patch \| blob \| blame \| history