From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed, 26 Apr 2023 14:10:18 +0000 (+0100)
Subject: aarch64: Reimplement RSHRN intrinsic patterns with standard RTL codes
X-Git-Tag: basepoints/gcc-15~9873
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=80afac3121778e509e1847be90a1999236cff03a;p=thirdparty%2Fgcc.git

aarch64: Reimplement RSHRN intrinsic patterns with standard RTL codes

This patch reimplements the backend patterns for the rshrn intrinsics using standard RTL codes rather than UNSPECS.
We already represent shrn as truncate of a shift. rshrn can be represented as truncate (src + (1 << (shft - 1)) >> shft),
similar to how LLVM treats it.

I have a follow-up patch to do the same for the rshrn2 pattern, which will allow us to remove the UNSPEC_RSHRN entirely.

Bootstrapped and tested on aarch64-none-linux-gnu.

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md (aarch64_rshrn<mode>_insn_le): Reimplement
	with standard RTL codes instead of an UNSPEC.
	(aarch64_rshrn<mode>_insn_be): Likewise.
	(aarch64_rshrn<mode>): Adjust for the above.
	* config/aarch64/predicates.md (aarch64_simd_rshrn_imm_vec): Define.
---

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index cb2223d29c2d..f8913107aad6 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1990,11 +1990,15 @@
 (define_insn "aarch64_rshrn<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
-	  (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
-		(match_operand:VQN 2
-		  "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
-	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+	  (truncate:<VNARROWQ>
+	    (lshiftrt:VQN
+	      (plus:VQN (match_operand:VQN 1 "register_operand" "w")
+			(match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
+	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
+	  (match_operand:<VNARROWQ> 4 "aarch64_simd_or_scalar_imm_zero")))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN
+   && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
+      == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))"
   "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
@@ -2002,11 +2006,15 @@
 (define_insn "aarch64_rshrn<mode>_insn_be"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
-	  (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
-	  (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
-		(match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
-		  UNSPEC_RSHRN)))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+	  (match_operand:<VNARROWQ> 4 "aarch64_simd_or_scalar_imm_zero")
+	  (truncate:<VNARROWQ>
+	    (lshiftrt:VQN
+	      (plus:VQN (match_operand:VQN 1 "register_operand" "w")
+			(match_operand:VQN 3 "aarch64_simd_rshrn_imm_vec"))
+	      (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN
+   && INTVAL (CONST_VECTOR_ELT (operands[3], 0))
+      == (HOST_WIDE_INT_1 << (INTVAL (CONST_VECTOR_ELT (operands[2], 0)) - 1))"
   "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
   [(set_attr "type" "neon_shift_imm_narrow_q")]
 )
@@ -2024,18 +2032,22 @@
       }
     else
       {
+	rtx shft
+	  = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+					       HOST_WIDE_INT_1U
+					        << (INTVAL (operands[2]) - 1));
 	rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
 	operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 						         INTVAL (operands[2]));
 	if (BYTES_BIG_ENDIAN)
 	  emit_insn (
 		gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
-						 operands[2],
+						 operands[2], shft,
 						 CONST0_RTX (<VNARROWQ>mode)));
 	else
 	  emit_insn (
 		gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
-						 operands[2],
+						 operands[2], shft,
 						 CONST0_RTX (<VNARROWQ>mode)));
 
 	/* The intrinsic expects a narrow result, so emit a subreg that will
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 3f5f4df8c468..242f10aea1f4 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -587,6 +587,12 @@
   (and (match_code "const_vector")
        (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 64)")))
 
+(define_predicate "aarch64_simd_rshrn_imm_vec"
+  (and (match_code "const_vector")
+       (match_test "aarch64_const_vec_all_same_in_range_p (op, 1,
+				HOST_WIDE_INT_1U
+				<< (GET_MODE_UNIT_BITSIZE  (mode) - 1))")))
+
 (define_predicate "aarch64_simd_shift_imm_bitsize_qi"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 8)")))