aarch64: PR target/99195 annotate simple narrowing patterns for vec-concat-zero

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 10 May 2023 09:40:06 +0000 (10:40 +0100)

committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Wed, 10 May 2023 09:42:22 +0000 (10:42 +0100)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 10 May 2023 09:40:06 +0000 (10:40 +0100)
committer Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Wed, 10 May 2023 09:42:22 +0000 (10:42 +0100)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 46038889573a332ec14dcf80e81748a20289755d..9ad0489f79a7ad6e68d920b1b345ee274e372b6c 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1743,47 +1743,6 @@
  
  ;; Narrowing operations.
  
-(define_insn "aarch64_xtn<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
-         (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "xtn\\t%0.<Vntype>, %1.<Vtype>"
-  [(set_attr "type" "neon_move_narrow_q")]
-)
-
-(define_insn "aarch64_xtn<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
-         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "xtn\\t%0.<Vntype>, %1.<Vtype>"
-  [(set_attr "type" "neon_move_narrow_q")]
-)
-
-(define_expand "aarch64_xtn<mode>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand")
-       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
-  "TARGET_SIMD"
-  {
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
-                               CONST0_RTX (<VNARROWQ>mode)));
-    else
-      emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
-                               CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-                                                <VNARROWQ2>mode));
-    DONE;
-  }
-)
-
  (define_insn "aarch64_xtn2<mode>_insn_le"
    [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
         (vec_concat:<VNARROWQ2>
@@ -5300,7 +5259,7 @@
  
  ;; sqmovn and uqmovn
  
-(define_insn "aarch64_<su>qmovn<mode>"
+(define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
    [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
         (SAT_TRUNC:<VNARROWQ>
           (match_operand:SD_HSDI 1 "register_operand" "w")))]
@@ -5309,48 +5268,13 @@
    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
  )
  
-(define_insn "aarch64_<su>qmovn<mode>_insn_le"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (SAT_TRUNC:<VNARROWQ>
-           (match_operand:VQN 1 "register_operand" "w"))
-         (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
-  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
-)
-
-(define_insn "aarch64_<su>qmovn<mode>_insn_be"
-  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
-       (vec_concat:<VNARROWQ2>
-         (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
-         (SAT_TRUNC:<VNARROWQ>
-           (match_operand:VQN 1 "register_operand" "w"))))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
-  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
-  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
-)
-
-(define_expand "aarch64_<su>qmovn<mode>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+(define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
         (SAT_TRUNC:<VNARROWQ>
-         (match_operand:VQN 1 "register_operand")))]
+         (match_operand:VQN 1 "register_operand" "w")))]
    "TARGET_SIMD"
-  {
-    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
-    if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
-                               CONST0_RTX (<VNARROWQ>mode)));
-    else
-      emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
-                               CONST0_RTX (<VNARROWQ>mode)));
-
-    /* The intrinsic expects a narrow result, so emit a subreg that will get
-       optimized away as appropriate.  */
-    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
-                                                <VNARROWQ2>mode));
-    DONE;
-  }
+  "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
+  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
  )
  
  (define_insn "aarch64_<su>qxtn2<mode>_le"
@@ -9281,7 +9205,7 @@
  )
  
  ;; Truncate a 128-bit integer vector to a 64-bit vector.
-(define_insn "trunc<mode><Vnarrowq>2"
+(define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
    [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
    "TARGET_SIMD"
@@ -9289,6 +9213,15 @@
    [(set_attr "type" "neon_move_narrow_q")]
  )
  
+;; Expander for the intrinsics that only takes one mode unlike the two-mode
+;; trunc optab.
+(define_expand "aarch64_xtn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
+  "TARGET_SIMD"
+  {}
+)
+
  (define_insn "aarch64_bfdot<mode>"
    [(set (match_operand:VDQSF 0 "register_operand" "=w")
         (plus:VDQSF
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c

index b6ef15b6a972366979125252c60cc5d6996151ff..6127cb26781bbd2b727b3ac11489cf0a673e4597 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c
@@ -15,7 +15,6 @@ foo_##OP##_##OS (IT a, IT b)                     \
    return vcombine_##OS (v##OP##_##IS (a, b), zeros);      \
  }
  
-
  #define FUNC(OT,IT,IMT,IS,OS)                  \
  MYOP (OT, IT, IMT, addhn, IS, OS)              \
  MYOP (OT, IT, IMT, subhn, IS, OS)              \
@@ -30,6 +29,27 @@ FUNC (uint8x16_t, uint16x8_t, uint8x8_t, u16, u8)
  FUNC (uint16x8_t, uint32x4_t, uint16x4_t, u32, u16)
  FUNC (uint32x4_t, uint64x2_t, uint32x2_t, u64, u32)
  
+#undef MYOP
+#define MYOP(OT,IT,IMT,OP,IS,OS)               \
+OT                                             \
+foo_##OP##_##OS (IT a)                         \
+{                                              \
+  IMT zeros = vcreate_##OS (0);                        \
+  return vcombine_##OS (v##OP##_##IS (a), zeros);      \
+}
+
+#undef FUNC
+#define FUNC(OP)                                       \
+MYOP (int8x16_t, int16x8_t, int8x8_t, OP, s16, s8)     \
+MYOP (int16x8_t, int32x4_t, int16x4_t, OP, s32, s16)   \
+MYOP (int32x4_t, int64x2_t, int32x2_t, OP, s64, s32)   \
+MYOP (uint8x16_t, uint16x8_t, uint8x8_t, OP, u16, u8)  \
+MYOP (uint16x8_t, uint32x4_t, uint16x4_t, OP, u32, u16)        \
+MYOP (uint32x4_t, uint64x2_t, uint32x2_t, OP, u64, u32)        \
+
+FUNC (movn)
+FUNC (qmovn)
+
  /* { dg-final { scan-assembler-not {\tfmov\t} } }  */
  /* { dg-final { scan-assembler-not {\tmov\t} } }  */
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 10 May 2023 09:40:06 +0000 (10:40 +0100)
committer	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Wed, 10 May 2023 09:42:22 +0000 (10:42 +0100)
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/simd/pr99195_4.c		patch \| blob \| blame \| history