Update copyright years.

[thirdparty/gcc.git] / gcc / config / aarch64 / aarch64-simd.md
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index fd971bf5c6debac24631f035909d5e3afbc27173..4e28cf97516df19e1d502e56c776f6b34f15c116 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1,5 +1,5 @@
  ;; Machine description for AArch64 AdvSIMD architecture.
-;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2020 Free Software Foundation, Inc.
  ;; Contributed by ARM Ltd.
  ;;
  ;; This file is part of GCC.
@@ -19,8 +19,8 @@
  ;; <http://www.gnu.org/licenses/>.
  
  (define_expand "mov<mode>"
-  [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
-       (match_operand:VALL_F16 1 "general_operand" ""))]
+  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
+       (match_operand:VALL_F16 1 "general_operand"))]
    "TARGET_SIMD"
    "
    /* Force the operand into a register if it is not an
@@ -39,8 +39,8 @@
  )
  
  (define_expand "movmisalign<mode>"
-  [(set (match_operand:VALL 0 "nonimmediate_operand" "")
-        (match_operand:VALL 1 "general_operand" ""))]
+  [(set (match_operand:VALL 0 "nonimmediate_operand")
+        (match_operand:VALL 1 "general_operand"))]
    "TARGET_SIMD"
  {
    /* This pattern is not permitted to fail during expansion: if both arguments
@@ -131,7 +131,7 @@
  
  (define_insn "*aarch64_simd_mov<VQ:mode>"
    [(set (match_operand:VQ 0 "nonimmediate_operand"
-               "=w, Umq,  m,  w, ?r, ?w, ?r, w")
+               "=w, Umn,  m,  w, ?r, ?w, ?r, w")
         (match_operand:VQ 1 "general_operand"
                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
    "TARGET_SIMD
@@ -177,34 +177,62 @@
    [(set_attr "type" "neon_store1_1reg<q>")]
  )
  
-(define_insn "load_pair<mode>"
-  [(set (match_operand:VD 0 "register_operand" "=w")
-       (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
-   (set (match_operand:VD 2 "register_operand" "=w")
-       (match_operand:VD 3 "memory_operand" "m"))]
+(define_insn "load_pair<DREG:mode><DREG2:mode>"
+  [(set (match_operand:DREG 0 "register_operand" "=w")
+       (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
+   (set (match_operand:DREG2 2 "register_operand" "=w")
+       (match_operand:DREG2 3 "memory_operand" "m"))]
    "TARGET_SIMD
     && rtx_equal_p (XEXP (operands[3], 0),
                    plus_constant (Pmode,
                                   XEXP (operands[1], 0),
-                                 GET_MODE_SIZE (<MODE>mode)))"
+                                 GET_MODE_SIZE (<DREG:MODE>mode)))"
    "ldp\\t%d0, %d2, %1"
    [(set_attr "type" "neon_ldp")]
  )
  
-(define_insn "store_pair<mode>"
-  [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
-       (match_operand:VD 1 "register_operand" "w"))
-   (set (match_operand:VD 2 "memory_operand" "=m")
-       (match_operand:VD 3 "register_operand" "w"))]
+(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
+  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
+       (match_operand:DREG 1 "register_operand" "w"))
+   (set (match_operand:DREG2 2 "memory_operand" "=m")
+       (match_operand:DREG2 3 "register_operand" "w"))]
    "TARGET_SIMD
     && rtx_equal_p (XEXP (operands[2], 0),
                    plus_constant (Pmode,
                                   XEXP (operands[0], 0),
-                                 GET_MODE_SIZE (<MODE>mode)))"
+                                 GET_MODE_SIZE (<DREG:MODE>mode)))"
    "stp\\t%d1, %d3, %0"
    [(set_attr "type" "neon_stp")]
  )
  
+(define_insn "load_pair<VQ:mode><VQ2:mode>"
+  [(set (match_operand:VQ 0 "register_operand" "=w")
+       (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
+   (set (match_operand:VQ2 2 "register_operand" "=w")
+       (match_operand:VQ2 3 "memory_operand" "m"))]
+  "TARGET_SIMD
+    && rtx_equal_p (XEXP (operands[3], 0),
+                   plus_constant (Pmode,
+                              XEXP (operands[1], 0),
+                              GET_MODE_SIZE (<VQ:MODE>mode)))"
+  "ldp\\t%q0, %q2, %1"
+  [(set_attr "type" "neon_ldp_q")]
+)
+
+(define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
+  [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
+       (match_operand:VQ 1 "register_operand" "w"))
+   (set (match_operand:VQ2 2 "memory_operand" "=m")
+       (match_operand:VQ2 3 "register_operand" "w"))]
+  "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
+               plus_constant (Pmode,
+                              XEXP (operands[0], 0),
+                              GET_MODE_SIZE (<VQ:MODE>mode)))"
+  "stp\\t%q1, %q3, %0"
+  [(set_attr "type" "neon_stp_q")]
+)
+
+
  (define_split
    [(set (match_operand:VQ 0 "register_operand" "")
        (match_operand:VQ 1 "register_operand" ""))]
@@ -229,7 +257,7 @@
    DONE;
  })
  
-(define_expand "aarch64_split_simd_mov<mode>"
+(define_expand "@aarch64_split_simd_mov<mode>"
    [(set (match_operand:VQ 0)
          (match_operand:VQ 1))]
    "TARGET_SIMD"
@@ -391,6 +419,81 @@
  }
  )
  
+;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
+;; fact that their usage need to guarantee that the source vectors are
+;; contiguous.  It would be wrong to describe the operation without being able
+;; to describe the permute that is also required, but even if that is done
+;; the permute would have been created as a LOAD_LANES which means the values
+;; in the registers are in the wrong order.
+(define_insn "aarch64_fcadd<rot><mode>"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+                      (match_operand:VHSDF 2 "register_operand" "w")]
+                      FCADD))]
+  "TARGET_COMPLEX"
+  "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
+  [(set_attr "type" "neon_fcadd")]
+)
+
+(define_insn "aarch64_fcmla<rot><mode>"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
+                   (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
+                                  (match_operand:VHSDF 3 "register_operand" "w")]
+                                  FCMLA)))]
+  "TARGET_COMPLEX"
+  "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
+  [(set_attr "type" "neon_fcmla")]
+)
+
+
+(define_insn "aarch64_fcmla_lane<rot><mode>"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
+                   (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
+                                  (match_operand:VHSDF 3 "register_operand" "w")
+                                  (match_operand:SI 4 "const_int_operand" "n")]
+                                  FCMLA)))]
+  "TARGET_COMPLEX"
+{
+  operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
+  return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
+}
+  [(set_attr "type" "neon_fcmla")]
+)
+
+(define_insn "aarch64_fcmla_laneq<rot>v4hf"
+  [(set (match_operand:V4HF 0 "register_operand" "=w")
+       (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
+                  (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
+                                (match_operand:V8HF 3 "register_operand" "w")
+                                (match_operand:SI 4 "const_int_operand" "n")]
+                                FCMLA)))]
+  "TARGET_COMPLEX"
+{
+  operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
+  return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
+}
+  [(set_attr "type" "neon_fcmla")]
+)
+
+(define_insn "aarch64_fcmlaq_lane<rot><mode>"
+  [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
+       (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
+                    (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
+                                    (match_operand:<VHALF> 3 "register_operand" "w")
+                                    (match_operand:SI 4 "const_int_operand" "n")]
+                                    FCMLA)))]
+  "TARGET_COMPLEX"
+{
+  int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
+  operands[4]
+    = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
+  return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
+}
+  [(set_attr "type" "neon_fcmla")]
+)
+
  ;; These instructions map to the __builtins for the Dot Product operations.
  (define_insn "aarch64_<sur>dot<vsi2qi>"
    [(set (match_operand:VS 0 "register_operand" "=w")
@@ -400,7 +503,7 @@
                 DOTPROD)))]
    "TARGET_DOTPROD"
    "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
-  [(set_attr "type" "neon_dot")]
+  [(set_attr "type" "neon_dot<q>")]
  )
  
  ;; These expands map to the Dot Product optab the vectorizer checks for.
@@ -452,7 +555,7 @@
      operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
      return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
    }
-  [(set_attr "type" "neon_dot")]
+  [(set_attr "type" "neon_dot<q>")]
  )
  
  (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
@@ -467,7 +570,7 @@
      operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
      return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
    }
-  [(set_attr "type" "neon_dot")]
+  [(set_attr "type" "neon_dot<q>")]
  )
  
  (define_expand "copysign<mode>3"
@@ -531,7 +634,7 @@
    [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
  )
  
-(define_insn "aarch64_rsqrte<mode>"
+(define_insn "@aarch64_rsqrte<mode>"
    [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
                      UNSPEC_RSQRTE))]
@@ -539,7 +642,7 @@
    "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
    [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
  
-(define_insn "aarch64_rsqrts<mode>"
+(define_insn "@aarch64_rsqrts<mode>"
    [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
@@ -549,8 +652,8 @@
    [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
  
  (define_expand "rsqrt<mode>2"
-  [(set (match_operand:VALLF 0 "register_operand" "=w")
-       (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
+  [(set (match_operand:VALLF 0 "register_operand")
+       (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
                      UNSPEC_RSQRT))]
    "TARGET_SIMD"
  {
@@ -602,13 +705,22 @@
    [(set_attr "type" "neon_abs<q>")]
  )
  
-(define_insn "abd<mode>_3"
+;; It's tempting to represent SABD as ABS (MINUS op1 op2).
+;; This isn't accurate as ABS treats always its input as a signed value.
+;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
+;; Whereas SABD would return 192 (-64 signed) on the above example.
+;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
+(define_insn "aarch64_<su>abd<mode>_3"
    [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
-       (abs:VDQ_BHSI (minus:VDQ_BHSI
-                      (match_operand:VDQ_BHSI 1 "register_operand" "w")
-                      (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
-  "TARGET_SIMD"
-  "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+       (minus:VDQ_BHSI
+         (USMAX:VDQ_BHSI
+           (match_operand:VDQ_BHSI 1 "register_operand" "w")
+           (match_operand:VDQ_BHSI 2 "register_operand" "w"))
+         (<max_opp>:VDQ_BHSI
+           (match_dup 1)
+           (match_dup 2))))]
+  "TARGET_SIMD"
+  "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
    [(set_attr "type" "neon_abd<q>")]
  )
  
@@ -652,7 +764,16 @@
  ;; UABAL       tmp.8h, op1.16b, op2.16b
  ;; UADALP      op3.4s, tmp.8h
  ;; MOV         op0, op3 // should be eliminated in later passes.
-;; The signed version just uses the signed variants of the above instructions.
+;;
+;; For TARGET_DOTPROD we do:
+;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
+;; UABD        tmp2.16b, op1.16b, op2.16b
+;; UDOT        op3.4s, tmp2.16b, tmp1.16b
+;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
+;;
+;; The signed version just uses the signed variants of the above instructions
+;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
+;; unsigned.
  
  (define_expand "<sur>sadv16qi"
    [(use (match_operand:V4SI 0 "register_operand"))
@@ -661,6 +782,15 @@
     (use (match_operand:V4SI 3 "register_operand"))]
    "TARGET_SIMD"
    {
+    if (TARGET_DOTPROD)
+      {
+       rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
+       rtx abd = gen_reg_rtx (V16QImode);
+       emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
+       emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
+                                         abd, ones));
+       DONE;
+      }
      rtx reduc = gen_reg_rtx (V8HImode);
      emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
                                                operands[2]));
@@ -823,6 +953,21 @@
    [(set_attr "type" "neon_ins<q>")]
  )
  
+(define_expand "signbit<mode>2"
+  [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
+   (use (match_operand:VDQSF 1 "register_operand"))]
+  "TARGET_SIMD"
+{
+  int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
+  rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+                                                        shift_amount);
+  operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+
+  emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
+                                                 shift_vector));
+  DONE;
+})
+
  (define_insn "aarch64_simd_lshr<mode>"
   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
         (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
@@ -841,6 +986,18 @@
    [(set_attr "type" "neon_shift_imm<q>")]
  )
  
+(define_insn "*aarch64_simd_sra<mode>"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+       (plus:VDQ_I
+          (SHIFTRT:VDQ_I
+               (match_operand:VDQ_I 1 "register_operand" "w")
+               (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
+          (match_operand:VDQ_I 3 "register_operand" "0")))]
+  "TARGET_SIMD"
+  "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
+  [(set_attr "type" "neon_shift_acc<q>")]
+)
+
  (define_insn "aarch64_simd_imm_shl<mode>"
   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
         (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
@@ -880,9 +1037,9 @@
  )
  
  (define_expand "ashl<mode>3"
-  [(match_operand:VDQ_I 0 "register_operand" "")
-   (match_operand:VDQ_I 1 "register_operand" "")
-   (match_operand:SI  2 "general_operand" "")]
+  [(match_operand:VDQ_I 0 "register_operand")
+   (match_operand:VDQ_I 1 "register_operand")
+   (match_operand:SI  2 "general_operand")]
   "TARGET_SIMD"
  {
    int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
@@ -927,9 +1084,9 @@
  )
  
  (define_expand "lshr<mode>3"
-  [(match_operand:VDQ_I 0 "register_operand" "")
-   (match_operand:VDQ_I 1 "register_operand" "")
-   (match_operand:SI  2 "general_operand" "")]
+  [(match_operand:VDQ_I 0 "register_operand")
+   (match_operand:VDQ_I 1 "register_operand")
+   (match_operand:SI  2 "general_operand")]
   "TARGET_SIMD"
  {
    int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
@@ -974,9 +1131,9 @@
  )
  
  (define_expand "ashr<mode>3"
-  [(match_operand:VDQ_I 0 "register_operand" "")
-   (match_operand:VDQ_I 1 "register_operand" "")
-   (match_operand:SI  2 "general_operand" "")]
+  [(match_operand:VDQ_I 0 "register_operand")
+   (match_operand:VDQ_I 1 "register_operand")
+   (match_operand:SI  2 "general_operand")]
   "TARGET_SIMD"
  {
    int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
@@ -1021,9 +1178,9 @@
  )
  
  (define_expand "vashl<mode>3"
- [(match_operand:VDQ_I 0 "register_operand" "")
-  (match_operand:VDQ_I 1 "register_operand" "")
-  (match_operand:VDQ_I 2 "register_operand" "")]
+ [(match_operand:VDQ_I 0 "register_operand")
+  (match_operand:VDQ_I 1 "register_operand")
+  (match_operand:VDQ_I 2 "register_operand")]
   "TARGET_SIMD"
  {
    emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
@@ -1035,9 +1192,9 @@
  ;; Negating individual lanes most certainly offsets the
  ;; gain from vectorization.
  (define_expand "vashr<mode>3"
- [(match_operand:VDQ_BHSI 0 "register_operand" "")
-  (match_operand:VDQ_BHSI 1 "register_operand" "")
-  (match_operand:VDQ_BHSI 2 "register_operand" "")]
+ [(match_operand:VDQ_BHSI 0 "register_operand")
+  (match_operand:VDQ_BHSI 1 "register_operand")
+  (match_operand:VDQ_BHSI 2 "register_operand")]
   "TARGET_SIMD"
  {
    rtx neg = gen_reg_rtx (<MODE>mode);
@@ -1049,9 +1206,9 @@
  
  ;; DI vector shift
  (define_expand "aarch64_ashr_simddi"
-  [(match_operand:DI 0 "register_operand" "=w")
-   (match_operand:DI 1 "register_operand" "w")
-   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:SI 2 "aarch64_shift_imm64_di")]
    "TARGET_SIMD"
    {
      /* An arithmetic shift right by 64 fills the result with copies of the sign
@@ -1065,9 +1222,9 @@
  )
  
  (define_expand "vlshr<mode>3"
- [(match_operand:VDQ_BHSI 0 "register_operand" "")
-  (match_operand:VDQ_BHSI 1 "register_operand" "")
-  (match_operand:VDQ_BHSI 2 "register_operand" "")]
+ [(match_operand:VDQ_BHSI 0 "register_operand")
+  (match_operand:VDQ_BHSI 1 "register_operand")
+  (match_operand:VDQ_BHSI 2 "register_operand")]
   "TARGET_SIMD"
  {
    rtx neg = gen_reg_rtx (<MODE>mode);
@@ -1078,9 +1235,9 @@
  })
  
  (define_expand "aarch64_lshr_simddi"
-  [(match_operand:DI 0 "register_operand" "=w")
-   (match_operand:DI 1 "register_operand" "w")
-   (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:SI 2 "aarch64_shift_imm64_di")]
    "TARGET_SIMD"
    {
      if (INTVAL (operands[2]) == 64)
@@ -1108,9 +1265,9 @@
  )
  
  (define_expand "vec_set<mode>"
-  [(match_operand:VALL_F16 0 "register_operand" "+w")
-   (match_operand:<VEL> 1 "register_operand" "w")
-   (match_operand:SI 2 "immediate_operand" "")]
+  [(match_operand:VALL_F16 0 "register_operand")
+   (match_operand:<VEL> 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
    "TARGET_SIMD"
    {
      HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
@@ -1249,9 +1406,9 @@
  )
  
  (define_expand "<su><maxmin>v2di3"
- [(set (match_operand:V2DI 0 "register_operand" "")
-       (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
-                    (match_operand:V2DI 2 "register_operand" "")))]
+ [(set (match_operand:V2DI 0 "register_operand")
+       (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
+                    (match_operand:V2DI 2 "register_operand")))]
   "TARGET_SIMD"
  {
    enum rtx_code cmp_operator;
@@ -1324,9 +1481,8 @@
     fmov\\t%d0, %1
     dup\\t%d0, %1"
    [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")
-   (set_attr "length" "4")]
+   (set_attr "length" "4")
+   (set_attr "arch" "simd,fp,simd")]
  )
  
  (define_insn "move_lo_quad_internal_<mode>"
@@ -1340,9 +1496,8 @@
     fmov\\t%d0, %1
     dup\\t%d0, %1"
    [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")
-   (set_attr "length" "4")]
+   (set_attr "length" "4")
+   (set_attr "arch" "simd,fp,simd")]
  )
  
  (define_insn "move_lo_quad_internal_be_<mode>"
@@ -1356,9 +1511,8 @@
     fmov\\t%d0, %1
     dup\\t%d0, %1"
    [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")
-   (set_attr "length" "4")]
+   (set_attr "length" "4")
+   (set_attr "arch" "simd,fp,simd")]
  )
  
  (define_insn "move_lo_quad_internal_be_<mode>"
@@ -1372,9 +1526,8 @@
     fmov\\t%d0, %1
     dup\\t%d0, %1"
    [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")
-   (set_attr "length" "4")]
+   (set_attr "length" "4")
+   (set_attr "arch" "simd,fp,simd")]
  )
  
  (define_expand "move_lo_quad_<mode>"
@@ -1424,8 +1577,8 @@
  )
  
  (define_expand "move_hi_quad_<mode>"
- [(match_operand:VQ 0 "register_operand" "")
-  (match_operand:<VHALF> 1 "register_operand" "")]
+ [(match_operand:VQ 0 "register_operand")
+  (match_operand:<VHALF> 1 "register_operand")]
   "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -1450,9 +1603,9 @@
  )
  
  (define_expand "vec_pack_trunc_<mode>"
- [(match_operand:<VNARROWD> 0 "register_operand" "")
-  (match_operand:VDN 1 "register_operand" "")
-  (match_operand:VDN 2 "register_operand" "")]
+ [(match_operand:<VNARROWD> 0 "register_operand")
+  (match_operand:VDN 1 "register_operand")
+  (match_operand:VDN 2 "register_operand")]
   "TARGET_SIMD"
  {
    rtx tempreg = gen_reg_rtx (<VDBL>mode);
@@ -1492,7 +1645,7 @@
                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
                             )))]
    "TARGET_SIMD"
-  "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
+  "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
    [(set_attr "type" "neon_shift_imm_long")]
  )
  
@@ -1503,12 +1656,12 @@
                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
                             )))]
    "TARGET_SIMD"
-  "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
+  "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
    [(set_attr "type" "neon_shift_imm_long")]
  )
  
  (define_expand "vec_unpack<su>_hi_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
+  [(match_operand:<VWIDE> 0 "register_operand")
     (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
    "TARGET_SIMD"
    {
@@ -1520,8 +1673,8 @@
  )
  
  (define_expand "vec_unpack<su>_lo_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
-   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
    "TARGET_SIMD"
    {
      rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -1639,9 +1792,9 @@
  )
  
  (define_expand "vec_widen_<su>mult_lo_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
-   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
-   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
   "TARGET_SIMD"
   {
     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -1666,9 +1819,9 @@
  )
  
  (define_expand "vec_widen_<su>mult_hi_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
-   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
-   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
+   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
   "TARGET_SIMD"
   {
     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -1733,9 +1886,9 @@
  )
  
  (define_expand "div<mode>3"
- [(set (match_operand:VHSDF 0 "register_operand" "=w")
-       (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
-                 (match_operand:VHSDF 2 "register_operand" "w")))]
+ [(set (match_operand:VHSDF 0 "register_operand")
+       (div:VHSDF (match_operand:VHSDF 1 "register_operand")
+                 (match_operand:VHSDF 2 "register_operand")))]
   "TARGET_SIMD"
  {
    if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
@@ -2070,8 +2223,8 @@
  ;; other big-endian patterns their behavior is as required.
  
  (define_expand "vec_unpacks_lo_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
-   (match_operand:VQ_HSF 1 "register_operand" "")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSF 1 "register_operand")]
    "TARGET_SIMD"
    {
      rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -2093,8 +2246,8 @@
  )
  
  (define_expand "vec_unpacks_hi_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "")
-   (match_operand:VQ_HSF 1 "register_operand" "")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSF 1 "register_operand")]
    "TARGET_SIMD"
    {
      rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -2146,9 +2299,9 @@
  )
  
  (define_expand "aarch64_float_truncate_hi_<Vdbl>"
-  [(match_operand:<VDBL> 0 "register_operand" "=w")
-   (match_operand:VDF 1 "register_operand" "0")
-   (match_operand:<VWIDE> 2 "register_operand" "w")]
+  [(match_operand:<VDBL> 0 "register_operand")
+   (match_operand:VDF 1 "register_operand")
+   (match_operand:<VWIDE> 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
@@ -2205,8 +2358,9 @@
  ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
  ;; expression like:
  ;;      a = (b < c) ? b : c;
-;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
-;; either explicitly or indirectly via -ffast-math.
+;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
+;; -fno-signed-zeros are enabled either explicitly or indirectly via
+;; -ffast-math.
  ;;
  ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
  ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
@@ -2240,8 +2394,8 @@
  ;; 'across lanes' add.
  
  (define_expand "reduc_plus_scal_<mode>"
-  [(match_operand:<VEL> 0 "register_operand" "=w")
-   (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
+  [(match_operand:<VEL> 0 "register_operand")
+   (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
                UNSPEC_ADDV)]
    "TARGET_SIMD"
    {
@@ -2993,36 +3147,38 @@
  (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
    [(set (match_operand:GPI 0 "register_operand" "=r")
         (sign_extend:GPI
-         (vec_select:<VEL>
+         (vec_select:<VDQQH:VEL>
             (match_operand:VDQQH 1 "register_operand" "w")
             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
    "TARGET_SIMD"
    {
-    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
+    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
+                                          INTVAL (operands[2]));
      return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
    }
-  [(set_attr "type" "neon_to_gp<q>")]
+  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
  )
  
-(define_insn "*aarch64_get_lane_zero_extendsi<mode>"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-       (zero_extend:SI
-         (vec_select:<VEL>
+(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+       (zero_extend:GPI
+         (vec_select:<VDQQH:VEL>
             (match_operand:VDQQH 1 "register_operand" "w")
             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
    "TARGET_SIMD"
    {
-    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
-    return "umov\\t%w0, %1.<Vetype>[%2]";
+    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
+                                          INTVAL (operands[2]));
+    return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
    }
-  [(set_attr "type" "neon_to_gp<q>")]
+  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
  )
  
  ;; Lane extraction of a value, neither sign nor zero extension
  ;; is guaranteed so upper bits should be considered undefined.
  ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
  (define_insn "aarch64_get_lane<mode>"
-  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
         (vec_select:<VEL>
           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
@@ -3059,7 +3215,7 @@
  )
  
  (define_insn "store_pair_lanes<mode>"
-  [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
+  [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
         (vec_concat:<VDBL>
            (match_operand:VDC 1 "register_operand" "w, r")
            (match_operand:VDC 2 "register_operand" "w, r")))]
@@ -3073,7 +3229,7 @@
  ;; In this insn, operand 1 should be low, and operand 2 the high part of the
  ;; dest vector.
  
-(define_insn "*aarch64_combinez<mode>"
+(define_insn "@aarch64_combinez<mode>"
    [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
         (vec_concat:<VDBL>
           (match_operand:VDC 1 "general_operand" "w,?r,m")
@@ -3084,11 +3240,10 @@
     fmov\t%d0, %1
     ldr\\t%d0, %1"
    [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")]
+   (set_attr "arch" "simd,fp,simd")]
  )
  
-(define_insn "*aarch64_combinez_be<mode>"
+(define_insn "@aarch64_combinez_be<mode>"
    [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
          (vec_concat:<VDBL>
           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
@@ -3099,8 +3254,7 @@
     fmov\t%d0, %1
     ldr\\t%d0, %1"
    [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
-   (set_attr "simd" "yes,*,yes")
-   (set_attr "fp" "*,yes,*")]
+   (set_attr "arch" "simd,fp,simd")]
  )
  
  (define_expand "aarch64_combine<mode>"
@@ -3115,7 +3269,7 @@
  }
  )
  
-(define_expand "aarch64_simd_combine<mode>"
+(define_expand "@aarch64_simd_combine<mode>"
    [(match_operand:<VDBL> 0 "register_operand")
     (match_operand:VDC 1 "register_operand")
     (match_operand:VDC 2 "register_operand")]
@@ -3158,9 +3312,9 @@
  
  
  (define_expand "aarch64_saddl2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQW 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3170,9 +3324,9 @@
  })
  
  (define_expand "aarch64_uaddl2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQW 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3182,9 +3336,9 @@
  })
  
  (define_expand "aarch64_ssubl2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQW 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3194,9 +3348,9 @@
  })
  
  (define_expand "aarch64_usubl2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQW 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQW 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3219,10 +3373,10 @@
  ;; <su><addsub>w<q>.
  
  (define_expand "widen_ssum<mode>3"
-  [(set (match_operand:<VDBLW> 0 "register_operand" "")
+  [(set (match_operand:<VDBLW> 0 "register_operand")
         (plus:<VDBLW> (sign_extend:<VDBLW> 
-                       (match_operand:VQW 1 "register_operand" ""))
-                     (match_operand:<VDBLW> 2 "register_operand" "")))]
+                       (match_operand:VQW 1 "register_operand"))
+                     (match_operand:<VDBLW> 2 "register_operand")))]
    "TARGET_SIMD"
    {
      rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -3236,10 +3390,10 @@
  )
  
  (define_expand "widen_ssum<mode>3"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "")
+  [(set (match_operand:<VWIDE> 0 "register_operand")
         (plus:<VWIDE> (sign_extend:<VWIDE>
-                       (match_operand:VD_BHSI 1 "register_operand" ""))
-                     (match_operand:<VWIDE> 2 "register_operand" "")))]
+                       (match_operand:VD_BHSI 1 "register_operand"))
+                     (match_operand:<VWIDE> 2 "register_operand")))]
    "TARGET_SIMD"
  {
    emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
@@ -3247,10 +3401,10 @@
  })
  
  (define_expand "widen_usum<mode>3"
-  [(set (match_operand:<VDBLW> 0 "register_operand" "")
+  [(set (match_operand:<VDBLW> 0 "register_operand")
         (plus:<VDBLW> (zero_extend:<VDBLW> 
-                       (match_operand:VQW 1 "register_operand" ""))
-                     (match_operand:<VDBLW> 2 "register_operand" "")))]
+                       (match_operand:VQW 1 "register_operand"))
+                     (match_operand:<VDBLW> 2 "register_operand")))]
    "TARGET_SIMD"
    {
      rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
@@ -3264,54 +3418,90 @@
  )
  
  (define_expand "widen_usum<mode>3"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "")
+  [(set (match_operand:<VWIDE> 0 "register_operand")
         (plus:<VWIDE> (zero_extend:<VWIDE>
-                       (match_operand:VD_BHSI 1 "register_operand" ""))
-                     (match_operand:<VWIDE> 2 "register_operand" "")))]
+                       (match_operand:VD_BHSI 1 "register_operand"))
+                     (match_operand:<VWIDE> 2 "register_operand")))]
    "TARGET_SIMD"
  {
    emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
    DONE;
  })
  
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
+(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
-                       (ANY_EXTEND:<VWIDE>
-                         (match_operand:VD_BHSI 2 "register_operand" "w"))))]
+       (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+         (ANY_EXTEND:<VWIDE>
+           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
    "TARGET_SIMD"
-  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
-  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+  "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_sub_widen")]
  )
  
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
+(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
-                       (ANY_EXTEND:<VWIDE>
-                         (vec_select:<VHALF>
-                          (match_operand:VQW 2 "register_operand" "w")
-                          (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
+       (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+         (ANY_EXTEND:<VWIDE>
+           (vec_select:<VHALF>
+             (match_operand:VQW 2 "register_operand" "w")
+             (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
    "TARGET_SIMD"
-  "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
-  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+  "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
+  [(set_attr "type" "neon_sub_widen")]
  )
  
-(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
+(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-        (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
-                       (ANY_EXTEND:<VWIDE>
-                         (vec_select:<VHALF>
-                          (match_operand:VQW 2 "register_operand" "w")
-                          (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
+       (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
+         (ANY_EXTEND:<VWIDE>
+           (vec_select:<VHALF>
+             (match_operand:VQW 2 "register_operand" "w")
+             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_sub_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+       (plus:<VWIDE>
+         (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
+         (match_operand:<VWIDE> 1 "register_operand" "w")))]
    "TARGET_SIMD"
-  "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
-  [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
+  "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_add_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+       (plus:<VWIDE>
+         (ANY_EXTEND:<VWIDE>
+           (vec_select:<VHALF>
+             (match_operand:VQW 2 "register_operand" "w")
+             (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+         (match_operand:<VWIDE> 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
+  [(set_attr "type" "neon_add_widen")]
+)
+
+(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+       (plus:<VWIDE>
+         (ANY_EXTEND:<VWIDE>
+           (vec_select:<VHALF>
+             (match_operand:VQW 2 "register_operand" "w")
+             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+         (match_operand:<VWIDE> 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
+  [(set_attr "type" "neon_add_widen")]
  )
  
  (define_expand "aarch64_saddw2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3321,9 +3511,9 @@
  })
  
  (define_expand "aarch64_uaddw2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3334,9 +3524,9 @@
  
  
  (define_expand "aarch64_ssubw2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3346,9 +3536,9 @@
  })
  
  (define_expand "aarch64_usubw2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQW 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQW 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3359,6 +3549,22 @@
  
  ;; <su><r>h<addsub>.
  
+(define_expand "<u>avg<mode>3_floor"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand")
+       (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
+                         (match_operand:VDQ_BHSI 2 "register_operand")]
+                        HADD))]
+  "TARGET_SIMD"
+)
+
+(define_expand "<u>avg<mode>3_ceil"
+  [(set (match_operand:VDQ_BHSI 0 "register_operand")
+       (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
+                         (match_operand:VDQ_BHSI 2 "register_operand")]
+                        RHADD))]
+  "TARGET_SIMD"
+)
+
  (define_insn "aarch64_<sur>h<addsub><mode>"
    [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
          (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
@@ -3865,10 +4071,10 @@
  )
  
  (define_expand "aarch64_sqdmlal2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:VQ_HSI 3 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:VQ_HSI 3 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3878,10 +4084,10 @@
  })
  
  (define_expand "aarch64_sqdmlsl2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:VQ_HSI 3 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:VQ_HSI 3 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3945,11 +4151,11 @@
  )
  
  (define_expand "aarch64_sqdmlal2_lane<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
-   (match_operand:SI 4 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VCOND> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3960,11 +4166,11 @@
  })
  
  (define_expand "aarch64_sqdmlal2_laneq<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
-   (match_operand:SI 4 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VCONQ> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3975,11 +4181,11 @@
  })
  
  (define_expand "aarch64_sqdmlsl2_lane<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
-   (match_operand:SI 4 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VCOND> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -3990,11 +4196,11 @@
  })
  
  (define_expand "aarch64_sqdmlsl2_laneq<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
-   (match_operand:SI 4 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VCONQ> 3 "register_operand")
+   (match_operand:SI 4 "immediate_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4024,10 +4230,10 @@
  )
  
  (define_expand "aarch64_sqdmlal2_n<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VEL> 3 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VEL> 3 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4038,10 +4244,10 @@
  })
  
  (define_expand "aarch64_sqdmlsl2_n<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:<VWIDE> 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")
-   (match_operand:<VEL> 3 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:<VWIDE> 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")
+   (match_operand:<VEL> 3 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4193,9 +4399,9 @@
  )
  
  (define_expand "aarch64_sqdmull2<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:VQ_HSI 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSI 1 "register_operand")
+   (match_operand:VQ_HSI 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4253,10 +4459,10 @@
  )
  
  (define_expand "aarch64_sqdmull2_lane<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
-   (match_operand:SI 3 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSI 1 "register_operand")
+   (match_operand:<VCOND> 2 "register_operand")
+   (match_operand:SI 3 "immediate_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4267,10 +4473,10 @@
  })
  
  (define_expand "aarch64_sqdmull2_laneq<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
-   (match_operand:SI 3 "immediate_operand" "i")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSI 1 "register_operand")
+   (match_operand:<VCONQ> 2 "register_operand")
+   (match_operand:SI 3 "immediate_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4301,9 +4507,9 @@
  )
  
  (define_expand "aarch64_sqdmull2_n<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand" "=w")
-   (match_operand:VQ_HSI 1 "register_operand" "w")
-   (match_operand:<VEL> 2 "register_operand" "w")]
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (match_operand:VQ_HSI 1 "register_operand")
+   (match_operand:<VEL> 2 "register_operand")]
    "TARGET_SIMD"
  {
    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
@@ -4705,8 +4911,8 @@
  ;; sqrt
  
  (define_expand "sqrt<mode>2"
-  [(set (match_operand:VHSDF 0 "register_operand" "=w")
-       (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
+  [(set (match_operand:VHSDF 0 "register_operand")
+       (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
    "TARGET_SIMD"
  {
    if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
@@ -4759,8 +4965,8 @@
  )
  
  (define_expand "vec_load_lanesoi<mode>"
-  [(set (match_operand:OI 0 "register_operand" "=w")
-       (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
+  [(set (match_operand:OI 0 "register_operand")
+       (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_LD2))]
    "TARGET_SIMD"
@@ -4803,8 +5009,8 @@
  )
  
  (define_expand "vec_store_lanesoi<mode>"
-  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
-       (unspec:OI [(match_operand:OI 1 "register_operand" "w")
+  [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
+       (unspec:OI [(match_operand:OI 1 "register_operand")
                      (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                     UNSPEC_ST2))]
    "TARGET_SIMD"
@@ -4857,8 +5063,8 @@
  )
  
  (define_expand "vec_load_lanesci<mode>"
-  [(set (match_operand:CI 0 "register_operand" "=w")
-       (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
+  [(set (match_operand:CI 0 "register_operand")
+       (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_LD3))]
    "TARGET_SIMD"
@@ -4901,8 +5107,8 @@
  )
  
  (define_expand "vec_store_lanesci<mode>"
-  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
-       (unspec:CI [(match_operand:CI 1 "register_operand" "w")
+  [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
+       (unspec:CI [(match_operand:CI 1 "register_operand")
                      (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                     UNSPEC_ST3))]
    "TARGET_SIMD"
@@ -4955,8 +5161,8 @@
  )
  
  (define_expand "vec_load_lanesxi<mode>"
-  [(set (match_operand:XI 0 "register_operand" "=w")
-       (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
+  [(set (match_operand:XI 0 "register_operand")
+       (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_LD4))]
    "TARGET_SIMD"
@@ -4999,8 +5205,8 @@
  )
  
  (define_expand "vec_store_lanesxi<mode>"
-  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
-       (unspec:XI [(match_operand:XI 1 "register_operand" "w")
+  [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
+       (unspec:XI [(match_operand:XI 1 "register_operand")
                      (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                     UNSPEC_ST4))]
    "TARGET_SIMD"
@@ -5045,8 +5251,8 @@
  ;; Reload patterns for AdvSIMD register list operands.
  
  (define_expand "mov<mode>"
-  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
-       (match_operand:VSTRUCT 1 "general_operand" ""))]
+  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
+       (match_operand:VSTRUCT 1 "general_operand"))]
    "TARGET_SIMD"
  {
    if (can_create_pseudo_p ())
@@ -5056,6 +5262,114 @@
      }
  })
  
+
+(define_expand "aarch64_ld1x3<VALLDIF:mode>"
+  [(match_operand:CI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  rtx mem = gen_rtx_MEM (CImode, operands[1]);
+  emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
+  DONE;
+})
+
+(define_insn "aarch64_ld1_x3_<mode>"
+  [(set (match_operand:CI 0 "register_operand" "=w")
+        (unspec:CI
+         [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
+          (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load1_3reg<q>")]
+)
+
+(define_expand "aarch64_ld1x4<VALLDIF:mode>"
+  [(match_operand:XI 0 "register_operand" "=w")
+   (match_operand:DI 1 "register_operand" "r")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  rtx mem = gen_rtx_MEM (XImode, operands[1]);
+  emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
+  DONE;
+})
+
+(define_insn "aarch64_ld1_x4_<mode>"
+  [(set (match_operand:XI 0 "register_operand" "=w")
+       (unspec:XI
+         [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
+          (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
+       UNSPEC_LD1))]
+  "TARGET_SIMD"
+  "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
+  [(set_attr "type" "neon_load1_4reg<q>")]
+)
+
+(define_expand "aarch64_st1x2<VALLDIF:mode>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:OI 1 "register_operand")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  rtx mem = gen_rtx_MEM (OImode, operands[0]);
+  emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_st1_x2_<mode>"
+   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
+        (unspec:OI
+         [(match_operand:OI 1 "register_operand" "w")
+          (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
+  "TARGET_SIMD"
+  "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store1_2reg<q>")]
+)
+
+(define_expand "aarch64_st1x3<VALLDIF:mode>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:CI 1 "register_operand")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  rtx mem = gen_rtx_MEM (CImode, operands[0]);
+  emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_st1_x3_<mode>"
+   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
+       (unspec:CI
+         [(match_operand:CI 1 "register_operand" "w")
+         (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
+  "TARGET_SIMD"
+  "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store1_3reg<q>")]
+)
+
+(define_expand "aarch64_st1x4<VALLDIF:mode>"
+  [(match_operand:DI 0 "register_operand" "")
+   (match_operand:XI 1 "register_operand" "")
+   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+  "TARGET_SIMD"
+{
+  rtx mem = gen_rtx_MEM (XImode, operands[0]);
+  emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
+  DONE;
+})
+
+(define_insn "aarch64_st1_x4_<mode>"
+  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
+       (unspec:XI
+          [(match_operand:XI 1 "register_operand" "w")
+          (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
+       UNSPEC_ST1))]
+  "TARGET_SIMD"
+  "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
+  [(set_attr "type" "neon_store1_4reg<q>")]
+)
+
  (define_insn "*aarch64_mov<mode>"
    [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
@@ -5189,8 +5503,8 @@
  })
  
  (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
-  [(match_operand:VSTRUCT 0 "register_operand" "=w")
-   (match_operand:DI 1 "register_operand" "w")
+  [(match_operand:VSTRUCT 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
    "TARGET_SIMD"
  {
@@ -5264,8 +5578,8 @@
  )
  
  (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "=w")
-  (match_operand:DI 1 "register_operand" "r")
+ [(match_operand:VSTRUCT 0 "register_operand")
+  (match_operand:DI 1 "register_operand")
    (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
    "TARGET_SIMD"
  {
@@ -5292,8 +5606,8 @@
  })
  
  (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "=w")
-  (match_operand:DI 1 "register_operand" "r")
+ [(match_operand:VSTRUCT 0 "register_operand")
+  (match_operand:DI 1 "register_operand")
    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
    "TARGET_SIMD"
  {
@@ -5305,8 +5619,8 @@
  })
  
  (define_expand "aarch64_ld1x2<VQ:mode>"
- [(match_operand:OI 0 "register_operand" "=w")
-  (match_operand:DI 1 "register_operand" "r")
+ [(match_operand:OI 0 "register_operand")
+  (match_operand:DI 1 "register_operand")
    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
    "TARGET_SIMD"
  {
@@ -5318,8 +5632,8 @@
  })
  
  (define_expand "aarch64_ld1x2<VDC:mode>"
- [(match_operand:OI 0 "register_operand" "=w")
-  (match_operand:DI 1 "register_operand" "r")
+ [(match_operand:OI 0 "register_operand")
+  (match_operand:DI 1 "register_operand")
    (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
    "TARGET_SIMD"
  {
@@ -5332,10 +5646,10 @@
  
  
  (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
-  [(match_operand:VSTRUCT 0 "register_operand" "=w")
-       (match_operand:DI 1 "register_operand" "w")
-       (match_operand:VSTRUCT 2 "register_operand" "0")
-       (match_operand:SI 3 "immediate_operand" "i")
+  [(match_operand:VSTRUCT 0 "register_operand")
+       (match_operand:DI 1 "register_operand")
+       (match_operand:VSTRUCT 2 "register_operand")
+       (match_operand:SI 3 "immediate_operand")
         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
    "TARGET_SIMD"
  {
@@ -5355,9 +5669,9 @@
  ;; D-register list.
  
  (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
- [(match_operand:VDC 0 "register_operand" "=w")
-  (match_operand:VSTRUCT 1 "register_operand" "w")
-  (match_operand:SI 2 "immediate_operand" "i")]
+ [(match_operand:VDC 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
+  (match_operand:SI 2 "immediate_operand")]
    "TARGET_SIMD"
  {
    int part = INTVAL (operands[2]);
@@ -5372,9 +5686,9 @@
  ;; Q-register list.
  
  (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
- [(match_operand:VQ 0 "register_operand" "=w")
-  (match_operand:VSTRUCT 1 "register_operand" "w")
-  (match_operand:SI 2 "immediate_operand" "i")]
+ [(match_operand:VQ 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
+  (match_operand:SI 2 "immediate_operand")]
    "TARGET_SIMD"
  {
    int part = INTVAL (operands[2]);
@@ -5511,13 +5825,13 @@
  ;; This instruction's pattern is generated directly by
  ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
  ;; need corresponding changes there.
-(define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
+(define_insn "aarch64_<PERMUTE:perm_insn><mode>"
    [(set (match_operand:VALL_F16 0 "register_operand" "=w")
         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
                           (match_operand:VALL_F16 2 "register_operand" "w")]
          PERMUTE))]
    "TARGET_SIMD"
-  "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+  "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
    [(set_attr "type" "neon_permute<q>")]
  )
  
@@ -5613,8 +5927,8 @@
  )
  
  (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
- [(match_operand:DI 0 "register_operand" "r")
-  (match_operand:VSTRUCT 1 "register_operand" "w")
+ [(match_operand:DI 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
    (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
    "TARGET_SIMD"
  {
@@ -5626,8 +5940,8 @@
  })
  
  (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
- [(match_operand:DI 0 "register_operand" "r")
-  (match_operand:VSTRUCT 1 "register_operand" "w")
+ [(match_operand:DI 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
    "TARGET_SIMD"
  {
@@ -5639,8 +5953,8 @@
  })
  
  (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
- [(match_operand:DI 0 "register_operand" "r")
-  (match_operand:VSTRUCT 1 "register_operand" "w")
+ [(match_operand:DI 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
    (match_operand:SI 2 "immediate_operand")]
    "TARGET_SIMD"
@@ -5676,10 +5990,10 @@
  ;; extend them in arm_neon.h and insert the resulting Q-regs.
  
  (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
- [(match_operand:VSTRUCT 0 "register_operand" "+w")
-  (match_operand:VSTRUCT 1 "register_operand" "0")
-  (match_operand:VQ 2 "register_operand" "w")
-  (match_operand:SI 3 "immediate_operand" "i")]
+ [(match_operand:VSTRUCT 0 "register_operand")
+  (match_operand:VSTRUCT 1 "register_operand")
+  (match_operand:VQ 2 "register_operand")
+  (match_operand:SI 3 "immediate_operand")]
    "TARGET_SIMD"
  {
    int part = INTVAL (operands[3]);
@@ -5694,7 +6008,16 @@
  ;; Standard pattern name vec_init<mode><Vel>.
  
  (define_expand "vec_init<mode><Vel>"
-  [(match_operand:VALL_F16 0 "register_operand" "")
+  [(match_operand:VALL_F16 0 "register_operand")
+   (match_operand 1 "" "")]
+  "TARGET_SIMD"
+{
+  aarch64_expand_vector_init (operands[0], operands[1]);
+  DONE;
+})
+
+(define_expand "vec_init<mode><Vhalf>"
+  [(match_operand:VQ_NO2E 0 "register_operand")
     (match_operand 1 "" "")]
    "TARGET_SIMD"
  {
@@ -5732,25 +6055,26 @@
  )
  
  
-(define_insn "aarch64_frecpe<mode>"
-  [(set (match_operand:VHSDF 0 "register_operand" "=w")
-       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
+(define_insn "@aarch64_frecpe<mode>"
+  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
+       (unspec:VHSDF_HSDF
+        [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
          UNSPEC_FRECPE))]
    "TARGET_SIMD"
-  "frecpe\\t%0.<Vtype>, %1.<Vtype>"
+  "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
    [(set_attr "type" "neon_fp_recpe_<stype><q>")]
  )
  
-(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
+(define_insn "aarch64_frecpx<mode>"
    [(set (match_operand:GPF_F16 0 "register_operand" "=w")
         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
-        FRECP))]
+        UNSPEC_FRECPX))]
    "TARGET_SIMD"
-  "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
-  [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
+  "frecpx\t%<s>0, %<s>1"
+  [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
  )
  
-(define_insn "aarch64_frecps<mode>"
+(define_insn "@aarch64_frecps<mode>"
    [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
         (unspec:VHSDF_HSDF
           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
@@ -5772,9 +6096,9 @@
  ;; Standard pattern name vec_extract<mode><Vel>.
  
  (define_expand "vec_extract<mode><Vel>"
-  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
-   (match_operand:VALL_F16 1 "register_operand" "")
-   (match_operand:SI 2 "immediate_operand" "")]
+  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
+   (match_operand:VALL_F16 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
    "TARGET_SIMD"
  {
      emit_insn
@@ -5786,33 +6110,23 @@
  
  (define_insn "aarch64_crypto_aes<aes_op>v16qi"
    [(set (match_operand:V16QI 0 "register_operand" "=w")
-        (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
-                      (match_operand:V16QI 2 "register_operand" "w")]
+       (unspec:V16QI
+               [(xor:V16QI
+                (match_operand:V16QI 1 "register_operand" "%0")
+                (match_operand:V16QI 2 "register_operand" "w"))]
           CRYPTO_AES))]
    "TARGET_SIMD && TARGET_AES"
    "aes<aes_op>\\t%0.16b, %2.16b"
    [(set_attr "type" "crypto_aese")]
  )
  
-;; When AES/AESMC fusion is enabled we want the register allocation to
-;; look like:
-;;    AESE Vn, _
-;;    AESMC Vn, Vn
-;; So prefer to tie operand 1 to operand 0 when fusing.
-
  (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
-  [(set (match_operand:V16QI 0 "register_operand" "=w,w")
-       (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+       (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
          CRYPTO_AESMC))]
    "TARGET_SIMD && TARGET_AES"
    "aes<aesmc_op>\\t%0.16b, %1.16b"
-  [(set_attr "type" "crypto_aesmc")
-   (set_attr_alternative "enabled"
-     [(if_then_else (match_test
-                      "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
-                    (const_string "yes" )
-                    (const_string "no"))
-      (const_string "yes")])]
+  [(set_attr "type" "crypto_aesmc")]
  )
  
  ;; When AESE/AESMC fusion is enabled we really want to keep the two together
@@ -5821,12 +6135,14 @@
  ;;  Mash the two together during combine.
  
  (define_insn "*aarch64_crypto_aese_fused"
-  [(set (match_operand:V16QI 0 "register_operand" "=&w")
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
         (unspec:V16QI
           [(unspec:V16QI
-           [(match_operand:V16QI 1 "register_operand" "0")
-            (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
-         ] UNSPEC_AESMC))]
+          [(xor:V16QI
+               (match_operand:V16QI 1 "register_operand" "%0")
+               (match_operand:V16QI 2 "register_operand" "w"))]
+            UNSPEC_AESE)]
+       UNSPEC_AESMC))]
    "TARGET_SIMD && TARGET_AES
     && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
    "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
@@ -5840,12 +6156,14 @@
  ;;  Mash the two together during combine.
  
  (define_insn "*aarch64_crypto_aesd_fused"
-  [(set (match_operand:V16QI 0 "register_operand" "=&w")
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
         (unspec:V16QI
           [(unspec:V16QI
-           [(match_operand:V16QI 1 "register_operand" "0")
-            (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
-         ] UNSPEC_AESIMC))]
+                   [(xor:V16QI
+                       (match_operand:V16QI 1 "register_operand" "%0")
+                       (match_operand:V16QI 2 "register_operand" "w"))]
+               UNSPEC_AESD)]
+         UNSPEC_AESIMC))]
    "TARGET_SIMD && TARGET_AES
     && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
    "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
@@ -6097,11 +6415,11 @@
  ;; fp16fml
  
  (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
-  [(set (match_operand:VDQSF 0 "register_operand" "=w")
+  [(set (match_operand:VDQSF 0 "register_operand")
         (unspec:VDQSF
-        [(match_operand:VDQSF 1 "register_operand" "0")
-         (match_operand:<VFMLA_W> 2 "register_operand" "w")
-         (match_operand:<VFMLA_W> 3 "register_operand" "w")]
+        [(match_operand:VDQSF 1 "register_operand")
+         (match_operand:<VFMLA_W> 2 "register_operand")
+         (match_operand:<VFMLA_W> 3 "register_operand")]
          VFMLA16_LOW))]
    "TARGET_F16FML"
  {
@@ -6120,11 +6438,11 @@
  })
  
  (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
-  [(set (match_operand:VDQSF 0 "register_operand" "=w")
+  [(set (match_operand:VDQSF 0 "register_operand")
         (unspec:VDQSF
-        [(match_operand:VDQSF 1 "register_operand" "0")
-         (match_operand:<VFMLA_W> 2 "register_operand" "w")
-         (match_operand:<VFMLA_W> 3 "register_operand" "w")]
+        [(match_operand:VDQSF 1 "register_operand")
+         (match_operand:<VFMLA_W> 2 "register_operand")
+         (match_operand:<VFMLA_W> 3 "register_operand")]
          VFMLA16_HIGH))]
    "TARGET_F16FML"
  {
@@ -6210,11 +6528,11 @@
  )
  
  (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "")
-       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
-                          (match_operand:V4HF 2 "register_operand" "")
-                          (match_operand:V4HF 3 "register_operand" "")
-                          (match_operand:SI 4 "aarch64_imm2" "")]
+  [(set (match_operand:V2SF 0 "register_operand")
+       (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+                          (match_operand:V4HF 2 "register_operand")
+                          (match_operand:V4HF 3 "register_operand")
+                          (match_operand:SI 4 "aarch64_imm2")]
          VFMLA16_LOW))]
    "TARGET_F16FML"
  {
@@ -6231,11 +6549,11 @@
  )
  
  (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "")
-       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
-                          (match_operand:V4HF 2 "register_operand" "")
-                          (match_operand:V4HF 3 "register_operand" "")
-                          (match_operand:SI 4 "aarch64_imm2" "")]
+  [(set (match_operand:V2SF 0 "register_operand")
+       (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+                          (match_operand:V4HF 2 "register_operand")
+                          (match_operand:V4HF 3 "register_operand")
+                          (match_operand:SI 4 "aarch64_imm2")]
          VFMLA16_HIGH))]
    "TARGET_F16FML"
  {
@@ -6325,11 +6643,11 @@
  )
  
  (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
-                          (match_operand:V8HF 2 "register_operand" "")
-                          (match_operand:V8HF 3 "register_operand" "")
-                          (match_operand:SI 4 "aarch64_lane_imm3" "")]
+  [(set (match_operand:V4SF 0 "register_operand")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+                          (match_operand:V8HF 2 "register_operand")
+                          (match_operand:V8HF 3 "register_operand")
+                          (match_operand:SI 4 "aarch64_lane_imm3")]
          VFMLA16_LOW))]
    "TARGET_F16FML"
  {
@@ -6345,11 +6663,11 @@
  })
  
  (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
-                          (match_operand:V8HF 2 "register_operand" "")
-                          (match_operand:V8HF 3 "register_operand" "")
-                          (match_operand:SI 4 "aarch64_lane_imm3" "")]
+  [(set (match_operand:V4SF 0 "register_operand")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+                          (match_operand:V8HF 2 "register_operand")
+                          (match_operand:V8HF 3 "register_operand")
+                          (match_operand:SI 4 "aarch64_lane_imm3")]
          VFMLA16_HIGH))]
    "TARGET_F16FML"
  {
@@ -6439,11 +6757,11 @@
  )
  
  (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "")
-       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
-                     (match_operand:V4HF 2 "register_operand" "")
-                     (match_operand:V8HF 3 "register_operand" "")
-                     (match_operand:SI 4 "aarch64_lane_imm3" "")]
+  [(set (match_operand:V2SF 0 "register_operand")
+       (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+                     (match_operand:V4HF 2 "register_operand")
+                     (match_operand:V8HF 3 "register_operand")
+                     (match_operand:SI 4 "aarch64_lane_imm3")]
          VFMLA16_LOW))]
    "TARGET_F16FML"
  {
@@ -6460,11 +6778,11 @@
  })
  
  (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "")
-       (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
-                     (match_operand:V4HF 2 "register_operand" "")
-                     (match_operand:V8HF 3 "register_operand" "")
-                     (match_operand:SI 4 "aarch64_lane_imm3" "")]
+  [(set (match_operand:V2SF 0 "register_operand")
+       (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
+                     (match_operand:V4HF 2 "register_operand")
+                     (match_operand:V8HF 3 "register_operand")
+                     (match_operand:SI 4 "aarch64_lane_imm3")]
          VFMLA16_HIGH))]
    "TARGET_F16FML"
  {
@@ -6555,11 +6873,11 @@
  )
  
  (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
-                     (match_operand:V8HF 2 "register_operand" "")
-                     (match_operand:V4HF 3 "register_operand" "")
-                     (match_operand:SI 4 "aarch64_imm2" "")]
+  [(set (match_operand:V4SF 0 "register_operand")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+                     (match_operand:V8HF 2 "register_operand")
+                     (match_operand:V4HF 3 "register_operand")
+                     (match_operand:SI 4 "aarch64_imm2")]
          VFMLA16_LOW))]
    "TARGET_F16FML"
  {
@@ -6575,11 +6893,11 @@
  })
  
  (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
-  [(set (match_operand:V4SF 0 "register_operand" "")
-       (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
-                     (match_operand:V8HF 2 "register_operand" "")
-                     (match_operand:V4HF 3 "register_operand" "")
-                     (match_operand:SI 4 "aarch64_imm2" "")]
+  [(set (match_operand:V4SF 0 "register_operand")
+       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
+                     (match_operand:V8HF 2 "register_operand")
+                     (match_operand:V4HF 3 "register_operand")
+                     (match_operand:SI 4 "aarch64_imm2")]
          VFMLA16_HIGH))]
    "TARGET_F16FML"
  {
@@ -6689,3 +7007,21 @@
    "pmull2\\t%0.1q, %1.2d, %2.2d"
    [(set_attr "type" "crypto_pmull")]
  )
+
+;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
+(define_insn "<optab><Vnarrowq><mode>2"
+  [(set (match_operand:VQN 0 "register_operand" "=w")
+       (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+;; Truncate a 128-bit integer vector to a 64-bit vector.
+(define_insn "trunc<mode><Vnarrowq>2"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "xtn\t%0.<Vntype>, %1.<Vtype>"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)