]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
aarch64: PR target/99195 annotate simple narrowing patterns for vec-concat-zero
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 10 May 2023 09:40:06 +0000 (10:40 +0100)
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 10 May 2023 09:42:22 +0000 (10:42 +0100)
This patch cleans up some almost-duplicate patterns for the XTN, SQXTN, UQXTN instructions.
Using the <vczle><vczbe> attributes we can remove the BYTES_BIG_ENDIAN and !BYTES_BIG_ENDIAN cases,
as well as the intrinsic expanders that select between the two.
Tests are also added. Thankfully the diffstat comes out negative \O/.

Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.

gcc/ChangeLog:

PR target/99195
* config/aarch64/aarch64-simd.md (aarch64_xtn<mode>_insn_le): Delete.
(aarch64_xtn<mode>_insn_be): Likewise.
(trunc<mode><Vnarrowq>2): Rename to...
(trunc<mode><Vnarrowq>2<vczle><vczbe>): ... This.
(aarch64_xtn<mode>): Move under the above.  Just emit the truncate RTL.
(aarch64_<su>qmovn<mode>): Likewise.
(aarch64_<su>qmovn<mode><vczle><vczbe>): New define_insn.
(aarch64_<su>qmovn<mode>_insn_le): Delete.
(aarch64_<su>qmovn<mode>_insn_be): Likewise.

gcc/testsuite/ChangeLog:

PR target/99195
* gcc.target/aarch64/simd/pr99195_4.c: Add tests for vmovn, vqmovn.

gcc/config/aarch64/aarch64-simd.md
gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c

index 46038889573a332ec14dcf80e81748a20289755d..9ad0489f79a7ad6e68d920b1b345ee274e372b6c 100644 (file)
 
 ;; Narrowing operations.
 
-(define_insn "aarch64_xtn<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
-         (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "xtn\\t%0.<Vntype>, %1.<Vtype>"
-  [(set_attr "type" "neon_move_narrow_q")]
-)
-
-(define_insn "aarch64_xtn<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
-         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "xtn\\t%0.<Vntype>, %1.<Vtype>"
-  [(set_attr "type" "neon_move_narrow_q")]
-)
-
-(define_expand "aarch64_xtn<mode>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand")
-       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
-  "TARGET_SIMD"
-  {
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
-                               CONST0_RTX (<VNARROWQ>mode)));
-    else
-      emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
-                               CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-                                                <VNARROWQ2>mode));
-    DONE;
-  }
-)
-
 (define_insn "aarch64_xtn2<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
        (vec_concat:<VNARROWQ2>
 
 ;; sqmovn and uqmovn
 
-(define_insn "aarch64_<su>qmovn<mode>"
+(define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
        (SAT_TRUNC:<VNARROWQ>
          (match_operand:SD_HSDI 1 "register_operand" "w")))]
   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
-(define_insn "aarch64_<su>qmovn<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (SAT_TRUNC:<VNARROWQ>
-           (match_operand:VQN 1 "register_operand" "w"))
-         (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_<su>qmovn<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
-         (SAT_TRUNC:<VNARROWQ>
-           (match_operand:VQN 1 "register_operand" "w"))))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
-)
-
-(define_expand "aarch64_<su>qmovn<mode>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+(define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
        (SAT_TRUNC:<VNARROWQ>
-         (match_operand:VQN 1 "register_operand")))]
+         (match_operand:VQN 1 "register_operand" "w")))]
   "TARGET_SIMD"
-  {
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
-                               CONST0_RTX (<VNARROWQ>mode)));
-    else
-      emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
-                               CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-                                                <VNARROWQ2>mode));
-    DONE;
-  }
+  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
 )
 
 (define_insn "aarch64_<su>qxtn2<mode>_le"
 )
 
 ;; Truncate a 128-bit integer vector to a 64-bit vector.
-(define_insn "trunc<mode><Vnarrowq>2"
+(define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
   "TARGET_SIMD"
   [(set_attr "type" "neon_move_narrow_q")]
 )
 
+;; Expander for the intrinsics that only takes one mode unlike the two-mode
+;; trunc optab.
+(define_expand "aarch64_xtn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
+  "TARGET_SIMD"
+  {}
+)
+
 (define_insn "aarch64_bfdot<mode>"
   [(set (match_operand:VDQSF 0 "register_operand" "=w")
        (plus:VDQSF
index b6ef15b6a972366979125252c60cc5d6996151ff..6127cb26781bbd2b727b3ac11489cf0a673e4597 100644 (file)
@@ -15,7 +15,6 @@ foo_##OP##_##OS (IT a, IT b)                     \
   return vcombine_##OS (v##OP##_##IS (a, b), zeros);      \
 }
 
-
 #define FUNC(OT,IT,IMT,IS,OS)                  \
 MYOP (OT, IT, IMT, addhn, IS, OS)              \
 MYOP (OT, IT, IMT, subhn, IS, OS)              \
@@ -30,6 +29,27 @@ FUNC (uint8x16_t, uint16x8_t, uint8x8_t, u16, u8)
 FUNC (uint16x8_t, uint32x4_t, uint16x4_t, u32, u16)
 FUNC (uint32x4_t, uint64x2_t, uint32x2_t, u64, u32)
 
+#undef MYOP
+#define MYOP(OT,IT,IMT,OP,IS,OS)               \
+OT                                             \
+foo_##OP##_##OS (IT a)                         \
+{                                              \
+  IMT zeros = vcreate_##OS (0);                        \
+  return vcombine_##OS (v##OP##_##IS (a), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(OP)                                       \
+MYOP (int8x16_t, int16x8_t, int8x8_t, OP, s16, s8)     \
+MYOP (int16x8_t, int32x4_t, int16x4_t, OP, s32, s16)   \
+MYOP (int32x4_t, int64x2_t, int32x2_t, OP, s64, s32)   \
+MYOP (uint8x16_t, uint16x8_t, uint8x8_t, OP, u16, u8)  \
+MYOP (uint16x8_t, uint32x4_t, uint16x4_t, OP, u32, u16)        \
+MYOP (uint32x4_t, uint64x2_t, uint32x2_t, OP, u64, u32)        \
+
+FUNC (movn)
+FUNC (qmovn)
+
 /* { dg-final { scan-assembler-not {\tfmov\t} } }  */
 /* { dg-final { scan-assembler-not {\tmov\t} } }  */