From: Kyrylo Tkachov Date: Mon, 8 Mar 2021 15:05:21 +0000 (+0000) Subject: aarch64: Fix PR99437 - tighten shift predicates for narrowing shift patterns X-Git-Tag: basepoints/gcc-12~701 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=0d9a70ea3881c284b7689b691d54d047b55b486d;p=thirdparty%2Fgcc.git aarch64: Fix PR99437 - tighten shift predicates for narrowing shift patterns In this bug combine forms the (R)SHRN(2) instructions with an invalid shift amount. The intrinsic expanders for these patterns validate the right shift amount but if the final patterns end up being matched by combine (or other RTL passes I suppose) they still let the wrong const_vector through. This patch tightens up the predicates for the instructions involved by using predicates for the right shift amount const_vectors. gcc/ChangeLog: PR target/99437 * config/aarch64/predicates.md (aarch64_simd_shift_imm_vec_qi): Define. (aarch64_simd_shift_imm_vec_hi): Likewise. (aarch64_simd_shift_imm_vec_si): Likewise. (aarch64_simd_shift_imm_vec_di): Likewise. * config/aarch64/aarch64-simd.md (aarch64_shrn_insn_le): Use predicate from above. (aarch64_shrn_insn_be): Likewise. (aarch64_rshrn_insn_le): Likewise. (aarch64_rshrn_insn_be): Likewise. (aarch64_shrn2_insn_le): Likewise. (aarch64_shrn2_insn_be): Likewise. (aarch64_rshrn2_insn_le): Likewise. (aarch64_rshrn2_insn_be): Likewise. gcc/testsuite/ChangeLog: PR target/99437 * gcc.target/aarch64/simd/pr99437.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 71aa77dd0102..348a43d835d4 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1738,7 +1738,7 @@ (vec_concat: (truncate: (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_rshift_imm"))) + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_"))) (match_operand: 3 "aarch64_simd_or_scalar_imm_zero")))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "shrn\\t%0., %1., %2" @@ -1751,7 +1751,7 @@ (match_operand: 3 "aarch64_simd_or_scalar_imm_zero") (truncate: (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_rshift_imm")))))] + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_")))))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "shrn\\t%0., %1., %2" [(set_attr "type" "neon_shift_imm_narrow_q")] @@ -1786,8 +1786,8 @@ [(set (match_operand: 0 "register_operand" "=w") (vec_concat: (unspec: [(match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_rshift_imm") - ] UNSPEC_RSHRN) + (match_operand:VQN 2 + "aarch64_simd_shift_imm_vec_")] UNSPEC_RSHRN) (match_operand: 3 "aarch64_simd_or_scalar_imm_zero")))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "rshrn\\t%0., %1., %2" @@ -1799,8 +1799,8 @@ (vec_concat: (match_operand: 3 "aarch64_simd_or_scalar_imm_zero") (unspec: [(match_operand:VQN 1 "register_operand" "w") - (match_operand:VQN 2 "aarch64_simd_rshift_imm") - ] UNSPEC_RSHRN)))] + (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_")] + UNSPEC_RSHRN)))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "rshrn\\t%0., %1., %2" [(set_attr "type" "neon_shift_imm_narrow_q")] @@ -1836,7 +1836,7 @@ (match_operand: 1 "register_operand" "0") (truncate: (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshift_imm")))))] + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_")))))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "shrn2\\t%0., %2., %3" [(set_attr "type" "neon_shift_imm_narrow_q")] @@ -1847,7 +1847,8 @@ (vec_concat: (truncate: (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshift_imm"))) + (match_operand:VQN 3 + "aarch64_simd_shift_imm_vec_"))) (match_operand: 1 "register_operand" "0")))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "shrn2\\t%0., %2., %3" @@ -1878,7 +1879,8 @@ (vec_concat: (match_operand: 1 "register_operand" "0") (unspec: [(match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshift_imm")] UNSPEC_RSHRN)))] + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_")] + UNSPEC_RSHRN)))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "rshrn2\\t%0., %2., %3" [(set_attr "type" "neon_shift_imm_narrow_q")] @@ -1888,7 +1890,8 @@ [(set (match_operand: 0 "register_operand" "=w") (vec_concat: (unspec: [(match_operand:VQN 2 "register_operand" "w") - (match_operand:VQN 3 "aarch64_simd_rshift_imm")] UNSPEC_RSHRN) + (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_")] + UNSPEC_RSHRN) (match_operand: 1 "register_operand" "0")))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "rshrn2\\t%0., %2., %3" diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 75612fd9f66c..c55842b9c747 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -545,6 +545,22 @@ (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 1, 64)"))) +(define_predicate "aarch64_simd_shift_imm_vec_qi" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 8)"))) + +(define_predicate "aarch64_simd_shift_imm_vec_hi" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 16)"))) + +(define_predicate "aarch64_simd_shift_imm_vec_si" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 32)"))) + +(define_predicate "aarch64_simd_shift_imm_vec_di" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, 1, 64)"))) + (define_predicate "aarch64_simd_shift_imm_bitsize_qi" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 0, 8)"))) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/pr99437.c b/gcc/testsuite/gcc.target/aarch64/simd/pr99437.c new file mode 100644 index 000000000000..976fac4567aa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/pr99437.c @@ -0,0 +1,17 @@ +/* { dg-do assemble } */ +/* { dg-options "-O" } */ + +#include + +uint8x16_t +foo (uint16x8_t a, uint8x8_t b) +{ + return vcombine_u8 (vmovn_u16 (vshrq_n_u16 (a, 9)), b); +} + +uint8x16_t +foo2 (uint16x8_t a, uint8x8_t b) +{ + return vcombine_u8 (b, vmovn_u16 (vshrq_n_u16 (a, 15))); +} +