]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: [US]Q(R)SHR(U)N2 refactoring
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 7 Jun 2023 10:20:01 +0000 (11:20 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Fri, 16 Jun 2023 12:52:24 +0000 (13:52 +0100)
This patch is large in lines of code, but it is a fairly regular
extension of the first patch as it converts the high-half patterns
to standard RTL codes in the same fashion as the first patch did for the
low-half ones.
This now allows us to remove the unspec codes for these instructions as
there are no more uses of them left.

Bootstrapped and tested on aarch64-none-linux-gnu and
aarch64_be-none-elf.

gcc/ChangeLog:

* config/aarch64/aarch64-simd-builtins.def (shrn2): Rename builtins to...
(shrn2_n): ... This.
(rshrn2): Rename builtins to...
(rshrn2_n): ... This.
* config/aarch64/arm_neon.h (vrshrn_high_n_s16): Adjust for the above.
(vrshrn_high_n_s32): Likewise.
(vrshrn_high_n_s64): Likewise.
(vrshrn_high_n_u16): Likewise.
(vrshrn_high_n_u32): Likewise.
(vrshrn_high_n_u64): Likewise.
(vshrn_high_n_s16): Likewise.
(vshrn_high_n_s32): Likewise.
(vshrn_high_n_s64): Likewise.
(vshrn_high_n_u16): Likewise.
(vshrn_high_n_u32): Likewise.
(vshrn_high_n_u64): Likewise.
* config/aarch64/aarch64-simd.md (*aarch64_<srn_op>shrn<mode>2_vect_le):
Delete.
(*aarch64_<srn_op>shrn<mode>2_vect_be): Likewise.
(aarch64_shrn2<mode>_insn_le): Likewise.
(aarch64_shrn2<mode>_insn_be): Likewise.
(aarch64_shrn2<mode>): Likewise.
(aarch64_rshrn2<mode>_insn_le): Likewise.
(aarch64_rshrn2<mode>_insn_be): Likewise.
(aarch64_rshrn2<mode>): Likewise.
(aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le): Likewise.
(aarch64_<shrn_op>shrn2_n<mode>_insn_le): New define_insn.
(aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be): Delete.
(aarch64_<shrn_op>shrn2_n<mode>_insn_be): New define_insn.
(aarch64_<sur>q<r>shr<u>n2_n<mode>): Delete.
(aarch64_<shrn_op>shrn2_n<mode>): New define_expand.
(aarch64_<shrn_op>rshrn2_n<mode>_insn_le): New define_insn.
(aarch64_<shrn_op>rshrn2_n<mode>_insn_be): New define_insn.
(aarch64_<shrn_op>rshrn2_n<mode>): New define_expand.
(aarch64_sqshrun2_n<mode>_insn_le): New define_insn.
(aarch64_sqshrun2_n<mode>_insn_be): New define_insn.
(aarch64_sqshrun2_n<mode>): New define_expand.
(aarch64_sqrshrun2_n<mode>_insn_le): New define_insn.
(aarch64_sqrshrun2_n<mode>_insn_be): New define_insn.
(aarch64_sqrshrun2_n<mode>): New define_expand.
* config/aarch64/iterators.md (UNSPEC_SQSHRUN, UNSPEC_SQRSHRUN,
UNSPEC_SQSHRN, UNSPEC_UQSHRN, UNSPEC_SQRSHRN, UNSPEC_UQRSHRN):
Delete unspec values.
(VQSHRN_N): Delete int iterator.

gcc/config/aarch64/aarch64-simd-builtins.def
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/arm_neon.h
gcc/config/aarch64/iterators.md

index 87af8f3689c7a4e0c93a28c9590ac2e6ac4267f7..01cd85d64fd8e215430432706ce24f5f0a5bfc6d 100644 (file)
   BUILTIN_VQN (SHIFTIMM, shrn_n, 0, NONE)
   BUILTIN_VQN (USHIFTIMM, shrn_n, 0, NONE)
 
-  /* Implemented by aarch64_shrn2<mode>.  */
-  BUILTIN_VQN (SHIFT2IMM, shrn2, 0, NONE)
-  BUILTIN_VQN (USHIFT2IMM, shrn2, 0, NONE)
+  BUILTIN_VQN (SHIFT2IMM, shrn2_n, 0, NONE)
+  BUILTIN_VQN (USHIFT2IMM, shrn2_n, 0, NONE)
 
   BUILTIN_VQN (SHIFTIMM, rshrn_n, 0, NONE)
   BUILTIN_VQN (USHIFTIMM, rshrn_n, 0, NONE)
 
-  /* Implemented by aarch64_rshrn2<mode>.  */
-  BUILTIN_VQN (SHIFT2IMM, rshrn2, 0, NONE)
-  BUILTIN_VQN (USHIFT2IMM, rshrn2, 0, NONE)
+  BUILTIN_VQN (SHIFT2IMM, rshrn2_n, 0, NONE)
+  BUILTIN_VQN (USHIFT2IMM, rshrn2_n, 0, NONE)
 
   /* Implemented by aarch64_<su>mlsl<mode>.  */
   BUILTIN_VD_BHSI (TERNOP, smlsl, 0, NONE)
   BUILTIN_SD_HSDI (USHIFTIMM, uqshrn_n, 0, NONE)
   BUILTIN_SD_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE)
   BUILTIN_SD_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE)
-  /* Implemented by aarch64_<sur>q<r>shr<u>n2_n<mode>.  */
   BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE)
   BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE)
   BUILTIN_VQN (SHIFT2IMM, sqshrn2_n, 0, NONE)
index ce5885e7bb1dce22953fccf3d0df2e5c15b210c4..b31c7130708652826c4ad306d93150318b7d016d 100644 (file)
  }
 )
 
-(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (match_operand:<VNARROWQ> 1 "register_operand" "0")
-         (truncate:<VNARROWQ>
-           (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
-             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_insn "*aarch64_<srn_op>shrn<mode>2_vect_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (truncate:<VNARROWQ>
-           (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
-             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
-         (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
        (vec_concat:<VNARROWQ2>
   [(set_attr "type" "neon_permute<q>")]
 )
 
-(define_insn "aarch64_shrn2<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (match_operand:<VNARROWQ> 1 "register_operand" "0")
-         (truncate:<VNARROWQ>
-           (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
-             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_shrn2<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (truncate:<VNARROWQ>
-           (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
-             (match_operand:VQN 3
-               "aarch64_simd_shift_imm_vec_<vn_mode>")))
-         (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_expand "aarch64_shrn2<mode>"
-  [(match_operand:<VNARROWQ2> 0 "register_operand")
-   (match_operand:<VNARROWQ> 1 "register_operand")
-   (match_operand:VQN 2 "register_operand")
-   (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
-  "TARGET_SIMD"
-  {
-    operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
-                                                INTVAL (operands[3]));
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_shrn2<mode>_insn_be (operands[0], operands[1],
-                                                 operands[2], operands[3]));
-    else
-      emit_insn (gen_aarch64_shrn2<mode>_insn_le (operands[0], operands[1],
-                                                 operands[2], operands[3]));
-    DONE;
-  }
-)
-
-(define_insn "aarch64_rshrn2<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (match_operand:<VNARROWQ> 1 "register_operand" "0")
-         (truncate:<VNARROWQ>
-           (lshiftrt:VQN
-             (plus:VQN (match_operand:VQN 2 "register_operand" "w")
-                       (match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
-             (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN
-   && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
-      == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[4], 0)) - 1))"
-  "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %4"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_rshrn2<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (truncate:<VNARROWQ>
-           (lshiftrt:VQN
-             (plus:VQN (match_operand:VQN 2 "register_operand" "w")
-                       (match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
-             (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_<vn_mode>")))
-         (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN
-   && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
-      == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[4], 0)) - 1))"
-  "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %4"
-  [(set_attr "type" "neon_shift_imm_narrow_q")]
-)
-
-(define_expand "aarch64_rshrn2<mode>"
-  [(match_operand:<VNARROWQ2> 0 "register_operand")
-   (match_operand:<VNARROWQ> 1 "register_operand")
-   (match_operand:VQN 2 "register_operand")
-   (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
-  "TARGET_SIMD"
-  {
-    if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ2>mode))
-      {
-       rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
-       emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
-                                             operands[2], tmp));
-      }
-    else
-      {
-       rtx shft
-         = aarch64_simd_gen_const_vector_dup (<MODE>mode,
-                                              HOST_WIDE_INT_1U
-                                               << (INTVAL (operands[3]) - 1));
-
-       operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
-                                                        INTVAL (operands[3]));
-       if (BYTES_BIG_ENDIAN)
-         emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0],
-                                                      operands[1],
-                                                      operands[2],
-                                                      shft,
-                                                      operands[3]));
-       else
-         emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0],
-                                                      operands[1],
-                                                      operands[2],
-                                                      shft,
-                                                      operands[3]));
-      }
-    DONE;
-  }
-)
-
 ;; Widening operations.
 
 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
   }
 )
 
-(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le"
+(define_insn "aarch64_<shrn_op>shrn2_n<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
        (vec_concat:<VNARROWQ2>
          (match_operand:<VNARROWQ> 1 "register_operand" "0")
-         (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
-                             (match_operand:VQN 3
-                               "aarch64_simd_shift_imm_vec_<vn_mode>")]
-                            VQSHRN_N)))]
+         (ALL_TRUNC:<VNARROWQ>
+           (<TRUNC_SHIFT>:VQN
+             (match_operand:VQN 2 "register_operand" "w")
+             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+  "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
-(define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be"
+(define_insn "aarch64_<shrn_op>shrn2_n<mode>_insn_be"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
        (vec_concat:<VNARROWQ2>
-          (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
-                             (match_operand:VQN 3
-                               "aarch64_simd_shift_imm_vec_<vn_mode>")]
-                            VQSHRN_N)
+         (ALL_TRUNC:<VNARROWQ>
+           (<TRUNC_SHIFT>:VQN
+             (match_operand:VQN 2 "register_operand" "w")
+             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
          (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
   "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
+  "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
 
-(define_expand "aarch64_<sur>q<r>shr<u>n2_n<mode>"
+(define_expand "aarch64_<shrn_op>shrn2_n<mode>"
   [(match_operand:<VNARROWQ2> 0 "register_operand")
    (match_operand:<VNARROWQ> 1 "register_operand")
-   (unspec:<VNARROWQ>
-       [(match_operand:VQN 2 "register_operand")
-        (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
-        VQSHRN_N)]
+   (ALL_TRUNC:<VNARROWQ>
+     (match_operand:VQN 2 "register_operand"))
+   (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
   "TARGET_SIMD"
   {
     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
                                                 INTVAL (operands[3]));
 
     if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be (operands[0],
+      emit_insn (gen_aarch64_<shrn_op>shrn2_n<mode>_insn_be (operands[0],
                                operands[1], operands[2], operands[3]));
     else
-      emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le (operands[0],
+      emit_insn (gen_aarch64_<shrn_op>shrn2_n<mode>_insn_le (operands[0],
                                operands[1], operands[2], operands[3]));
     DONE;
   }
 )
 
+(define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+       (vec_concat:<VNARROWQ2>
+         (match_operand:<VNARROWQ> 1 "register_operand" "0")
+         (ALL_TRUNC:<VNARROWQ>
+           (<TRUNC_SHIFT>:<V2XWIDE>
+             (plus:<V2XWIDE>
+               (<TRUNCEXTEND>:<V2XWIDE>
+                 (match_operand:VQN 2 "register_operand" "w"))
+               (match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec"))
+             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN
+   && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
+  "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+       (vec_concat:<VNARROWQ2>
+         (ALL_TRUNC:<VNARROWQ>
+           (<TRUNC_SHIFT>:<V2XWIDE>
+             (plus:<V2XWIDE>
+               (<TRUNCEXTEND>:<V2XWIDE>
+                 (match_operand:VQN 2 "register_operand" "w"))
+               (match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec"))
+             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
+         (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN
+   && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
+  "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_<shrn_op>rshrn2_n<mode>"
+  [(match_operand:<VNARROWQ2> 0 "register_operand")
+   (match_operand:<VNARROWQ> 1 "register_operand")
+   (ALL_TRUNC:<VNARROWQ> (match_operand:VQN 2 "register_operand"))
+   (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
+  "TARGET_SIMD"
+  {
+    if (<CODE> == TRUNCATE
+       && INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
+      {
+       rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
+       emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
+                                             operands[2], tmp));
+       DONE;
+      }
+    /* Use this expander to create the rounding constant vector, which is
+       1 << (shift - 1).  Use wide_int here to ensure that the right TImode
+       RTL is generated when handling the DImode expanders.  */
+    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
+    rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
+    rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
+    operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[0],
+                                                             operands[1],
+                                                             operands[2],
+                                                             operands[3],
+                                                             rnd));
+    else
+      emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[0],
+                                                             operands[1],
+                                                             operands[2],
+                                                             operands[3],
+                                                             rnd));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_sqshrun2_n<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+       (vec_concat:<VNARROWQ2>
+         (match_operand:<VNARROWQ> 1 "register_operand" "0")
+         (truncate:<VNARROWQ>
+           (smin:VQN
+             (smax:VQN
+               (ashiftrt:VQN
+                 (match_operand:VQN 2 "register_operand" "w")
+                 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
+               (match_operand:VQN 4 "aarch64_simd_imm_zero"))
+             (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_sqshrun2_n<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+       (vec_concat:<VNARROWQ2>
+         (truncate:<VNARROWQ>
+           (smin:VQN
+             (smax:VQN
+               (ashiftrt:VQN
+                 (match_operand:VQN 2 "register_operand" "w")
+                 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
+               (match_operand:VQN 4 "aarch64_simd_imm_zero"))
+             (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))
+         (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqshrun2_n<mode>"
+  [(match_operand:<VNARROWQ2> 0 "register_operand")
+   (match_operand:<VNARROWQ> 1 "register_operand")
+   (match_operand:VQN 2 "register_operand")
+   (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
+  "TARGET_SIMD"
+  {
+    operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+                                                INTVAL (operands[3]));
+    rtx zeros = CONST0_RTX (<MODE>mode);
+    rtx half_umax
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+                       GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[0],
+                               operands[1], operands[2], operands[3],
+                               zeros, half_umax));
+    else
+      emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[0],
+                               operands[1], operands[2], operands[3],
+                               zeros, half_umax));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_sqrshrun2_n<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+       (vec_concat:<VNARROWQ2>
+         (match_operand:<VNARROWQ> 1 "register_operand" "0")
+         (truncate:<VNARROWQ>
+           (smin:<V2XWIDE>
+             (smax:<V2XWIDE>
+               (ashiftrt:<V2XWIDE>
+                 (plus:<V2XWIDE>
+                   (sign_extend:<V2XWIDE>
+                     (match_operand:VQN 2 "register_operand" "w"))
+                   (match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec"))
+                 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
+               (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
+             (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN
+   && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
+  "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_insn "aarch64_sqrshrun2_n<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+       (vec_concat:<VNARROWQ2>
+         (truncate:<VNARROWQ>
+           (smin:<V2XWIDE>
+             (smax:<V2XWIDE>
+               (ashiftrt:<V2XWIDE>
+                 (plus:<V2XWIDE>
+                   (sign_extend:<V2XWIDE>
+                     (match_operand:VQN 2 "register_operand" "w"))
+                   (match_operand:<V2XWIDE> 4 "aarch64_simd_rsra_rnd_imm_vec"))
+                 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
+               (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
+             (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))
+         (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN
+   && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
+  "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
+
+(define_expand "aarch64_sqrshrun2_n<mode>"
+  [(match_operand:<VNARROWQ2> 0 "register_operand")
+   (match_operand:<VNARROWQ> 1 "register_operand")
+   (match_operand:VQN 2 "register_operand")
+   (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
+  "TARGET_SIMD"
+  {
+    int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
+    wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
+    rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
+    rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
+    rtx zero = CONST0_RTX (<V2XWIDE>mode);
+    rtx half_umax
+      = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode,
+                       GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
+    operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[0],
+                               operands[1], operands[2], operands[3], rnd,
+                               zero, half_umax));
+    else
+      emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[0],
+                               operands[1], operands[2], operands[3], rnd,
+                               zero, half_umax));
+    DONE;
+  }
+)
 
 ;; cm(eq|ge|gt|lt|le)
 ;; Note, we have constraints for Dz and Z as different expanders
index 2a46a31b6175914d9d1d8c4caf81a8a2b588849e..d350d9e7c013782891be1d26149a36dde5c53f50 100644 (file)
@@ -5532,42 +5532,42 @@ __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_rshrn2v8hi (__a, __b, __c);
+  return __builtin_aarch64_rshrn2_nv8hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_rshrn2v4si (__a, __b, __c);
+  return __builtin_aarch64_rshrn2_nv4si (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c)
 {
-  return __builtin_aarch64_rshrn2v2di (__a, __b, __c);
+  return __builtin_aarch64_rshrn2_nv2di (__a, __b, __c);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_rshrn2v8hi_uuus (__a, __b, __c);
+  return __builtin_aarch64_rshrn2_nv8hi_uuus (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_rshrn2v4si_uuus (__a, __b, __c);
+  return __builtin_aarch64_rshrn2_nv4si_uuus (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vrshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c)
 {
-  return __builtin_aarch64_rshrn2v2di_uuus (__a, __b, __c);
+  return __builtin_aarch64_rshrn2_nv2di_uuus (__a, __b, __c);
 }
 
 __extension__ extern __inline int8x8_t
@@ -5630,42 +5630,42 @@ __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_high_n_s16 (int8x8_t __a, int16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_shrn2v8hi (__a, __b, __c);
+  return __builtin_aarch64_shrn2_nv8hi (__a, __b, __c);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_high_n_s32 (int16x4_t __a, int32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_shrn2v4si (__a, __b, __c);
+  return __builtin_aarch64_shrn2_nv4si (__a, __b, __c);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_high_n_s64 (int32x2_t __a, int64x2_t __b, const int __c)
 {
-  return __builtin_aarch64_shrn2v2di (__a, __b, __c);
+  return __builtin_aarch64_shrn2_nv2di (__a, __b, __c);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_high_n_u16 (uint8x8_t __a, uint16x8_t __b, const int __c)
 {
-  return __builtin_aarch64_shrn2v8hi_uuus (__a, __b, __c);
+  return __builtin_aarch64_shrn2_nv8hi_uuus (__a, __b, __c);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_high_n_u32 (uint16x4_t __a, uint32x4_t __b, const int __c)
 {
-  return __builtin_aarch64_shrn2v4si_uuus (__a, __b, __c);
+  return __builtin_aarch64_shrn2_nv4si_uuus (__a, __b, __c);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vshrn_high_n_u64 (uint32x2_t __a, uint64x2_t __b, const int __c)
 {
-  return __builtin_aarch64_shrn2v2di_uuus (__a, __b, __c);
+  return __builtin_aarch64_shrn2_nv2di_uuus (__a, __b, __c);
 }
 
 __extension__ extern __inline poly8x8_t
index 15436c8ef37a07d4cafdd855351cac1a6723f8f9..7f9a512809d7ca12d4944257b34ce6522327cbba 100644 (file)
     UNSPEC_SQSHLU      ; Used in aarch64-simd.md.
     UNSPEC_SQSHL       ; Used in aarch64-simd.md.
     UNSPEC_UQSHL       ; Used in aarch64-simd.md.
-    UNSPEC_SQSHRUN     ; Used in aarch64-simd.md.
-    UNSPEC_SQRSHRUN    ; Used in aarch64-simd.md.
-    UNSPEC_SQSHRN      ; Used in aarch64-simd.md.
-    UNSPEC_UQSHRN      ; Used in aarch64-simd.md.
-    UNSPEC_SQRSHRN     ; Used in aarch64-simd.md.
-    UNSPEC_UQRSHRN     ; Used in aarch64-simd.md.
     UNSPEC_SSHL                ; Used in aarch64-simd.md.
     UNSPEC_USHL                ; Used in aarch64-simd.md.
     UNSPEC_SRSHL       ; Used in aarch64-simd.md.
 
 (define_int_iterator VQSHL_N [UNSPEC_SQSHLU UNSPEC_SQSHL UNSPEC_UQSHL])
 
-(define_int_iterator VQSHRN_N [UNSPEC_SQSHRUN UNSPEC_SQRSHRUN
-                               UNSPEC_SQSHRN UNSPEC_UQSHRN
-                               UNSPEC_SQRSHRN UNSPEC_UQRSHRN])
-
 (define_int_iterator SQRDMLH_AS [UNSPEC_SQRDMLAH UNSPEC_SQRDMLSH])
 
 (define_int_iterator PERMUTE [UNSPEC_ZIP1 UNSPEC_ZIP2
                      (UNSPEC_URSHR  "ur") (UNSPEC_SRSHR  "sr")
                      (UNSPEC_SQSHLU "s") (UNSPEC_SQSHL   "s")
                      (UNSPEC_UQSHL  "u")
-                     (UNSPEC_SQSHRUN "s") (UNSPEC_SQRSHRUN "s")
-                      (UNSPEC_SQSHRN "s")  (UNSPEC_UQSHRN "u")
-                      (UNSPEC_SQRSHRN "s") (UNSPEC_UQRSHRN "u")
                      (UNSPEC_USHL  "u")   (UNSPEC_SSHL  "s")
                      (UNSPEC_USHLL  "u")  (UNSPEC_SSHLL "s")
                      (UNSPEC_URSHL  "ur") (UNSPEC_SRSHL  "sr")
 ])
 
 (define_int_attr r [(UNSPEC_SQDMULH "") (UNSPEC_SQRDMULH "r")
-                   (UNSPEC_SQSHRUN "") (UNSPEC_SQRSHRUN "r")
-                    (UNSPEC_SQSHRN "")  (UNSPEC_UQSHRN "")
-                    (UNSPEC_SQRSHRN "r") (UNSPEC_UQRSHRN "r")
                     (UNSPEC_SQSHL   "")  (UNSPEC_UQSHL  "")
                     (UNSPEC_SQRSHL   "r")(UNSPEC_UQRSHL  "r")
                    (UNSPEC_SMULHS "") (UNSPEC_UMULHS "")
                     (UNSPEC_SLI   "l") (UNSPEC_SRI   "r")])
 
 (define_int_attr u [(UNSPEC_SQSHLU "u") (UNSPEC_SQSHL "") (UNSPEC_UQSHL "")
-                   (UNSPEC_SQSHRUN "u") (UNSPEC_SQRSHRUN "u")
-                   (UNSPEC_SQSHRN "")  (UNSPEC_UQSHRN "")
-                   (UNSPEC_SQRSHRN "") (UNSPEC_UQRSHRN "")
                    (UNSPEC_SHADD "") (UNSPEC_UHADD "u")
                    (UNSPEC_SRHADD "") (UNSPEC_URHADD "u")])