From: Jakub Jelinek <jakub@redhat.com>
Date: Fri, 19 Mar 2021 12:48:44 +0000 (+0100)
Subject: arm: Fix mve_vshlq* [PR99593]
X-Git-Tag: basepoints/gcc-12~524
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=009528d61c796608affd1eaa18ae31a3679eb46d;p=thirdparty%2Fgcc.git

arm: Fix mve_vshlq* [PR99593]

As mentioned in the PR, before the r11-6708-gbfab355012ca0f5219da8beb04f2fdaf757d34b7
change v[al]shr<mode>3 expanders were expanding the shifts by register
to gen_ashl<mode>3_{,un}signed which don't support immediate CONST_VECTOR
shift amounts, but now expand to mve_vshlq_<supf><mode> which does.
The testcase ICEs, because the constraint doesn't match the predicate and
because LRA works solely with the constraints, so it can e.g. from REG_EQUAL
propagate there a CONST_VECTOR which matches the constraint but fails the
predicate and only later on other passes will notice the predicate fails
and ICE.

Fixed by adding a constraint that matches the immediate part of the
predicate.

	PR target/99593
	* config/arm/constraints.md (Ds): New constraint.
	* config/arm/vec-common.md (mve_vshlq_<supf><mode>): Use w,Ds
	constraint instead of w,Dm.

	* g++.target/arm/pr99593.C: New test.
---

diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 919f2995f1b0..de0ca8ecbfe4 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -32,7 +32,7 @@
 
 ;; The following multi-letter normal constraints have been used:
 ;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, DN, Dm, Dl, DL, Do, Dv, Dy, Di,
-;;			 Dt, Dp, Dz, Tu, Te
+;;			 Ds, Dt, Dp, Dz, Tu, Te
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Ha, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Rd, Rf, Rb, Ra,
 ;;		     Rg, Ri
@@ -412,6 +412,14 @@
   (and (match_code "const_double")
        (match_test "TARGET_32BIT && vfp3_const_double_for_fract_bits (op)")))
 
+(define_constraint "Ds"
+ "@internal
+  In ARM/Thumb-2 state a const_vector which can be used as immediate
+  in vshl instruction."
+ (and (match_code "const_vector")
+      (match_test "TARGET_32BIT
+		   && imm_for_neon_lshift_operand (op, GET_MODE (op))")))
+
 (define_constraint "Dp"
  "@internal
   In ARM/ Thumb2 a const_double which can be used with a vcvt.s32.f32 with bits operation"
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 345ada055235..d7011c6ec13d 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -299,7 +299,7 @@
 (define_insn "mve_vshlq_<supf><mode>"
   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
 	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w,w")
-		       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")]
+		       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Ds")]
 	 VSHLQ))]
   "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
   "@
diff --git a/gcc/testsuite/g++.target/arm/pr99593.C b/gcc/testsuite/g++.target/arm/pr99593.C
new file mode 100644
index 000000000000..ba2bb9825f02
--- /dev/null
+++ b/gcc/testsuite/g++.target/arm/pr99593.C
@@ -0,0 +1,129 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c++17 -O2 -mfloat-abi=hard -mcpu=generic-armv7-a" } */
+// { dg-require-effective-target arm_hard_ok }
+// { dg-require-effective-target arm_neon_ok }
+// { dg-add-options arm_neon }
+
+#include <arm_neon.h>
+
+typedef uint16x4_t e;
+typedef int16x4_t f;
+typedef int32x4_t g;
+typedef float32x4_t h;
+typedef uint32x4_t i;
+g j, p;
+g k(int l) { return vdupq_n_s32(l); }
+i n(f l) { return (i)vmovl_u16((e)l); }
+template <int, typename> struct q;
+template <int r, typename aa> q<r, aa> operator<(aa s, q<r, aa> t) {
+  return q<r, aa>(s) < t;
+}
+template <typename ab, typename ac, int r> q<r, ab> ad(const q<r, ac> &);
+typedef q<4, int> ae;
+template <> class q<4, float> {
+ public:
+ q(h af) : ag(af) {}
+  q(float) {}
+  static q ah(void *ai) {
+    float *l = (float *)ai;
+    return vld1q_f32(l);
+  }
+  q operator+(q o) {
+    h l = ag, m = o.ag;
+    return vaddq_f32(l, m);
+  }
+  q operator*(q) {
+    h l = ag, m;
+    return vmulq_f32(l, m);
+  }
+  h ag;
+};
+template <> class q<4, unsigned short> {
+ public:
+ q(f af) : ag(af) {}
+  static q ah(void *ai) {
+    unsigned short *l = (unsigned short *)ai;
+    return (f)vld1_s16((int16_t *)l);
+  }
+  void aj() {
+    f m = ag;
+    vst1_u16(0, (e)m);
+  }
+  f ag;
+};
+template <> class q<4, int> {
+ public:
+ q(g af) : ag(af) {}
+  q(int u) { ag = k(u); }
+  static q ah(void *ai) {
+    int32_t *l = (int32_t *)ai;
+    return vld1q_s32(l);
+  }
+  q operator&(q o) {
+    g v = ag & o.ag;
+    return v;
+  }
+  q operator|(q o) {
+    g w = ag | o.ag;
+    return w;
+  }
+  q operator^(q) {
+    g x = ag ^ p;
+    return x;
+  }
+  q operator>>(int ak) { return ag >> q(ak).ag; }
+  q operator<(q) {
+    g y, z = j < ag;
+    y = (g)z;
+    return y;
+  }
+  g ag;
+};
+template <> ae ad(const q<4, unsigned short> &al) { return g(n(al.ag)); }
+template <> q<4, unsigned short> ad(const ae &al) {
+  i l(i(al.ag));
+  return (f)vmovn_s32((g)l);
+}
+q<4, float> am(long long an) {
+  q ao = q<4, unsigned short>::ah(&an);
+  ae ak = ad<int>(ao), ap = ak & 8000, aq = ak ^ ap, ar = 55 < aq, as(aq);
+  q at = as & ar;
+  ae au = ap | at;
+  return q<4, float>::ah(&au);
+}
+q<4, unsigned short> av(q<4, float> aw) {
+  ae ak = ae::ah(&aw), ap = ak & 80000000, aq = ap, ax = 5, as = aq >> 3,
+    ay = 6;
+  q az = ax & as;
+  ae au = ay | az;
+  return ad<unsigned short>(au);
+}
+struct ba {
+  typedef int bb;
+  static q<4, float> bc(int s) { return am(s); }
+};
+q<4, float> bd(q<4, float> s) { return s * 0; }
+template <typename be> void bf(void *bg, void *al, int bh, int bi) {
+  int bj;
+  auto bk(static_cast<typename be::bb *>(al) + bh),
+    d = static_cast<typename be::bb *>(bg),
+    bl = be::bc(static_cast<typename be::bb *>(al)[0]), bm = be::bc(0),
+    c = bm;
+  for (; bi;) {
+    auto a = c, bn = be::bc(static_cast<typename be::bb *>(al)[1]),
+      bo = be::bc(1);
+    q bp = bn;
+    q bq = bp;
+    auto b = bq + bo;
+    bl = be::bc(static_cast<typename be::bb *>(al)[2]);
+    bm = be::bc(bk[2]);
+    c = bl + bm;
+    q br = a + b;
+    auto bs = br;
+    q bt = bd(bs);
+    av(bt).aj();
+    d[0] = bj;
+  }
+}
+int bu;
+void bv() { bf<ba>(0, 0, 0, bu); }