]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
re PR tree-optimization/51581 (Integer division by constant is not vectorized)
authorJakub Jelinek <jakub@redhat.com>
Fri, 15 Jun 2012 11:07:47 +0000 (13:07 +0200)
committerJakub Jelinek <jakub@gcc.gnu.org>
Fri, 15 Jun 2012 11:07:47 +0000 (13:07 +0200)
PR tree-optimization/51581
* expr.h (choose_multiplier): New prototype.
* expmed.c (choose_multiplier): No longer static.
Change multiplier_ptr from rtx * to UHWI *.
(expand_divmod): Adjust callers.
* tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern):
Renamed to...
(vect_recog_divmod_pattern): ... this.  Pass bb_vinfo as last
argument to new_stmt_vec_info.  Attempt to optimize also divisions
by non-pow2 constants if integer vector division isn't supported.
* tree-vect-stmts.c (vect_analyze_stmt): If node != NULL,
don't look at pattern stmts and sequences.

* gcc.c-torture/execute/pr51581-1.c: New test.
* gcc.c-torture/execute/pr51581-2.c: New test.
* gcc.dg/vect/pr51581-1.c: New test.
* gcc.dg/vect/pr51581-2.c: New test.
* gcc.dg/vect/pr51581-3.c: New test.
* gcc.target/i386/avx-pr51581-1.c: New test.
* gcc.target/i386/avx-pr51581-2.c: New test.
* gcc.target/i386/avx2-pr51581-1.c: New test.
* gcc.target/i386/avx2-pr51581-2.c: New test.
* gcc.dg/vect/slp-26.c (main1): Divide by 0x8031 instead of 3.

From-SVN: r188656

16 files changed:
gcc/ChangeLog
gcc/expmed.c
gcc/expr.h
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.c-torture/execute/pr51581-1.c [new file with mode: 0644]
gcc/testsuite/gcc.c-torture/execute/pr51581-2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/pr51581-1.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/pr51581-2.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/pr51581-3.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/slp-26.c
gcc/testsuite/gcc.target/i386/avx-pr51581-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx-pr51581-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx2-pr51581-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx2-pr51581-2.c [new file with mode: 0644]
gcc/tree-vect-patterns.c
gcc/tree-vect-stmts.c

index 0322fbf9f428aa0985ab32f01e612fe84fe89587..00cffd6b4b7900e79869f797207a3ff4b638d395 100644 (file)
@@ -1,3 +1,18 @@
+2012-06-15  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/51581
+       * expr.h (choose_multiplier): New prototype.
+       * expmed.c (choose_multiplier): No longer static.
+       Change multiplier_ptr from rtx * to UHWI *.
+       (expand_divmod): Adjust callers.
+       * tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern):
+       Renamed to...
+       (vect_recog_divmod_pattern): ... this.  Pass bb_vinfo as last
+       argument to new_stmt_vec_info.  Attempt to optimize also divisions
+       by non-pow2 constants if integer vector division isn't supported.
+       * tree-vect-stmts.c (vect_analyze_stmt): If node != NULL,
+       don't look at pattern stmts and sequences.
+
 2012-06-15  Eric Botcazou  <ebotcazou@adacore.com>
 
        PR middle-end/53590
index 98f7c0916c3aa64d4961cc4cdf1c52e78d59c06e..b456bac177c71fbd3f2b8fa534bf6e7f563c5cdc 100644 (file)
@@ -2363,8 +2363,6 @@ static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
                                 struct algorithm *, enum mult_variant *, int);
 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
                              const struct algorithm *, enum mult_variant);
-static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
-                                                int, rtx *, int *, int *);
 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
 static rtx extract_high_half (enum machine_mode, rtx);
 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
@@ -3293,10 +3291,10 @@ ceil_log2 (unsigned HOST_WIDE_INT x)
    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
 
-static
 unsigned HOST_WIDE_INT
 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
-                  rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
+                  unsigned HOST_WIDE_INT *multiplier_ptr,
+                  int *post_shift_ptr, int *lgup_ptr)
 {
   HOST_WIDE_INT mhigh_hi, mlow_hi;
   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
@@ -3368,12 +3366,12 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
   if (n < HOST_BITS_PER_WIDE_INT)
     {
       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
-      *multiplier_ptr = GEN_INT (mhigh_lo & mask);
+      *multiplier_ptr = mhigh_lo & mask;
       return mhigh_lo >= mask;
     }
   else
     {
-      *multiplier_ptr = GEN_INT (mhigh_lo);
+      *multiplier_ptr = mhigh_lo;
       return mhigh_hi;
     }
 }
@@ -4053,10 +4051,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
          {
            if (unsignedp)
              {
-               unsigned HOST_WIDE_INT mh;
+               unsigned HOST_WIDE_INT mh, ml;
                int pre_shift, post_shift;
                int dummy;
-               rtx ml;
                unsigned HOST_WIDE_INT d = (INTVAL (op1)
                                            & GET_MODE_MASK (compute_mode));
 
@@ -4118,7 +4115,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
                              = (shift_cost[speed][compute_mode][post_shift - 1]
                                 + shift_cost[speed][compute_mode][1]
                                 + 2 * add_cost[speed][compute_mode]);
-                           t1 = expand_mult_highpart (compute_mode, op0, ml,
+                           t1 = expand_mult_highpart (compute_mode, op0,
+                                                      GEN_INT (ml),
                                                       NULL_RTX, 1,
                                                       max_cost - extra_cost);
                            if (t1 == 0)
@@ -4149,7 +4147,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
                            extra_cost
                              = (shift_cost[speed][compute_mode][pre_shift]
                                 + shift_cost[speed][compute_mode][post_shift]);
-                           t2 = expand_mult_highpart (compute_mode, t1, ml,
+                           t2 = expand_mult_highpart (compute_mode, t1,
+                                                      GEN_INT (ml),
                                                       NULL_RTX, 1,
                                                       max_cost - extra_cost);
                            if (t2 == 0)
@@ -4262,8 +4261,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
                else if (size <= HOST_BITS_PER_WIDE_INT)
                  {
                    choose_multiplier (abs_d, size, size - 1,
-                                      &mlr, &post_shift, &lgup);
-                   ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
+                                      &ml, &post_shift, &lgup);
                    if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
                      {
                        rtx t1, t2, t3;
@@ -4275,8 +4273,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
                                      + shift_cost[speed][compute_mode][size - 1]
                                      + add_cost[speed][compute_mode]);
-                       t1 = expand_mult_highpart (compute_mode, op0, mlr,
-                                                  NULL_RTX, 0,
+                       t1 = expand_mult_highpart (compute_mode, op0,
+                                                  GEN_INT (ml), NULL_RTX, 0,
                                                   max_cost - extra_cost);
                        if (t1 == 0)
                          goto fail1;
@@ -4356,10 +4354,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
       /* We will come here only for signed operations.  */
        if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
          {
-           unsigned HOST_WIDE_INT mh;
+           unsigned HOST_WIDE_INT mh, ml;
            int pre_shift, lgup, post_shift;
            HOST_WIDE_INT d = INTVAL (op1);
-           rtx ml;
 
            if (d > 0)
              {
@@ -4399,8 +4396,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
                        extra_cost = (shift_cost[speed][compute_mode][post_shift]
                                      + shift_cost[speed][compute_mode][size - 1]
                                      + 2 * add_cost[speed][compute_mode]);
-                       t3 = expand_mult_highpart (compute_mode, t2, ml,
-                                                  NULL_RTX, 1,
+                       t3 = expand_mult_highpart (compute_mode, t2,
+                                                  GEN_INT (ml), NULL_RTX, 1,
                                                   max_cost - extra_cost);
                        if (t3 != 0)
                          {
index 0096367a7272bc2622a8c1835274c6bd75641e93..7444baf166d6080ab51aeb496d481b410e20c2b0 100644 (file)
@@ -243,6 +243,13 @@ extern rtx emit_store_flag (rtx, enum rtx_code, rtx, rtx, enum machine_mode,
 /* Like emit_store_flag, but always succeeds.  */
 extern rtx emit_store_flag_force (rtx, enum rtx_code, rtx, rtx,
                                  enum machine_mode, int, int);
+
+/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
+   replace division by D, and put the least significant N bits of the result
+   in *MULTIPLIER_PTR and return the most significant bit.  */
+extern unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
+                                                int, unsigned HOST_WIDE_INT *,
+                                                int *, int *);
 \f
 /* Functions from builtins.c:  */
 extern rtx expand_builtin (tree, rtx, rtx, enum machine_mode, int);
index cbf9c02549989e17225bbedb2ba5e60e96fde7cb..f3d10e3fb5e581f17ee352f630a04067ab7938a3 100644 (file)
@@ -1,3 +1,17 @@
+2012-06-15  Jakub Jelinek  <jakub@redhat.com>
+
+       PR tree-optimization/51581
+       * gcc.c-torture/execute/pr51581-1.c: New test.
+       * gcc.c-torture/execute/pr51581-2.c: New test.
+       * gcc.dg/vect/pr51581-1.c: New test.
+       * gcc.dg/vect/pr51581-2.c: New test.
+       * gcc.dg/vect/pr51581-3.c: New test.
+       * gcc.target/i386/avx-pr51581-1.c: New test.
+       * gcc.target/i386/avx-pr51581-2.c: New test.
+       * gcc.target/i386/avx2-pr51581-1.c: New test.
+       * gcc.target/i386/avx2-pr51581-2.c: New test.
+       * gcc.dg/vect/slp-26.c (main1): Divide by 0x8031 instead of 3.
+
 2012-06-15  Richard Guenther  <rguenther@suse.de>
 
        * gcc.c-torture/execute/20120615-1.c: New testcase.
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr51581-1.c b/gcc/testsuite/gcc.c-torture/execute/pr51581-1.c
new file mode 100644 (file)
index 0000000..396b7aa
--- /dev/null
@@ -0,0 +1,157 @@
+/* PR tree-optimization/51581 */
+
+extern void abort (void);
+
+#define N 4096
+int a[N], c[N];
+unsigned int b[N], d[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    c[i] = a[i] / 3;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = b[i] / 3;
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    c[i] = a[i] / 18;
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = b[i] / 18;
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    c[i] = a[i] / 19;
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = b[i] / 19;
+}
+
+#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
+__attribute__((noinline, noclone)) void
+f7 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    c[i] = (int) ((unsigned long long) (a[i] * 0x55555556LL) >> 32) - (a[i] >> 31);
+}
+
+__attribute__((noinline, noclone)) void
+f8 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = ((unsigned int) ((b[i] * 0xaaaaaaabULL) >> 32) >> 1);
+}
+
+__attribute__((noinline, noclone)) void
+f9 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    c[i] = (((int) ((unsigned long long) (a[i] * 0x38e38e39LL) >> 32)) >> 2) - (a[i] >> 31);
+}
+
+__attribute__((noinline, noclone)) void
+f10 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = (unsigned int) ((b[i] * 0x38e38e39ULL) >> 32) >> 2;
+}
+
+__attribute__((noinline, noclone)) void
+f11 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    c[i] = (((int) ((unsigned long long) (a[i] * 0x6bca1af3LL) >> 32)) >> 3) - (a[i] >> 31);
+}
+
+__attribute__((noinline, noclone)) void
+f12 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      unsigned int tmp = (b[i] * 0xaf286bcbULL) >> 32;
+      d[i] = (((b[i] - tmp) >> 1) + tmp) >> 4;
+    }
+}
+#endif
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      asm ("");
+      a[i] = i - N / 2;
+      b[i] = i;
+    }
+  a[0] = -__INT_MAX__ - 1;
+  a[1] = -__INT_MAX__;
+  a[N - 1] = __INT_MAX__;
+  b[N - 1] = ~0;
+  f1 ();
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] / 3 || d[i] != b[i] / 3)
+      abort ();
+  f3 ();
+  f4 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] / 18 || d[i] != b[i] / 18)
+      abort ();
+  f5 ();
+  f6 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] / 19 || d[i] != b[i] / 19)
+      abort ();
+#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
+  f7 ();
+  f8 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] / 3 || d[i] != b[i] / 3)
+      abort ();
+  f9 ();
+  f10 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] / 18 || d[i] != b[i] / 18)
+      abort ();
+  f11 ();
+  f12 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] / 19 || d[i] != b[i] / 19)
+      abort ();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr51581-2.c b/gcc/testsuite/gcc.c-torture/execute/pr51581-2.c
new file mode 100644 (file)
index 0000000..dc111c4
--- /dev/null
@@ -0,0 +1,173 @@
+/* PR tree-optimization/51581 */
+
+extern void abort (void);
+
+#define N 4096
+int a[N], c[N];
+unsigned int b[N], d[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    c[i] = a[i] % 3;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = b[i] % 3;
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    c[i] = a[i] % 18;
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = b[i] % 18;
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    c[i] = a[i] % 19;
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    d[i] = b[i] % 19;
+}
+
+#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
+__attribute__((noinline, noclone)) void
+f7 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      int x = (int) ((unsigned long long) (a[i] * 0x55555556LL) >> 32) - (a[i] >> 31);
+      c[i] = a[i] - x * 3;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f8 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      unsigned int x = ((unsigned int) ((b[i] * 0xaaaaaaabULL) >> 32) >> 1);
+      d[i] = b[i] - x * 3;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f9 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      int x = (((int) ((unsigned long long) (a[i] * 0x38e38e39LL) >> 32)) >> 2) - (a[i] >> 31);
+      c[i] = a[i] - x * 18;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f10 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      unsigned int x = (unsigned int) ((b[i] * 0x38e38e39ULL) >> 32) >> 2;
+      d[i] = b[i] - x * 18;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f11 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      int x = (((int) ((unsigned long long) (a[i] * 0x6bca1af3LL) >> 32)) >> 3) - (a[i] >> 31);
+      c[i] = a[i] - x * 19;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f12 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      unsigned int tmp = (b[i] * 0xaf286bcbULL) >> 32;
+      unsigned int x = (((b[i] - tmp) >> 1) + tmp) >> 4;
+      d[i] = b[i] - x * 19;
+    }
+}
+#endif
+
+int
+main ()
+{
+  int i;
+  for (i = 0; i < N; i++)
+    {
+      asm ("");
+      a[i] = i - N / 2;
+      b[i] = i;
+    }
+  a[0] = -__INT_MAX__ - 1;
+  a[1] = -__INT_MAX__;
+  a[N - 1] = __INT_MAX__;
+  b[N - 1] = ~0;
+  f1 ();
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] % 3 || d[i] != b[i] % 3)
+      abort ();
+  f3 ();
+  f4 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] % 18 || d[i] != b[i] % 18)
+      abort ();
+  f5 ();
+  f6 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] % 19 || d[i] != b[i] % 19)
+      abort ();
+#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
+  f7 ();
+  f8 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] % 3 || d[i] != b[i] % 3)
+      abort ();
+  f9 ();
+  f10 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] % 18 || d[i] != b[i] % 18)
+      abort ();
+  f11 ();
+  f12 ();
+  for (i = 0; i < N; i++)
+    if (c[i] != a[i] % 19 || d[i] != b[i] % 19)
+      abort ();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr51581-1.c b/gcc/testsuite/gcc.dg/vect/pr51581-1.c
new file mode 100644 (file)
index 0000000..d8d61be
--- /dev/null
@@ -0,0 +1,18 @@
+/* PR tree-optimization/51581 */
+
+#include "tree-vect.h"
+
+#define main main1
+#include "../../gcc.c-torture/execute/pr51581-1.c"
+#undef main
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  asm ("");
+  return main1 ();
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr51581-2.c b/gcc/testsuite/gcc.dg/vect/pr51581-2.c
new file mode 100644 (file)
index 0000000..13b8ba9
--- /dev/null
@@ -0,0 +1,18 @@
+/* PR tree-optimization/51581 */
+
+#include "tree-vect.h"
+
+#define main main1
+#include "../../gcc.c-torture/execute/pr51581-2.c"
+#undef main
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  asm ("");
+  return main1 ();
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/pr51581-3.c b/gcc/testsuite/gcc.dg/vect/pr51581-3.c
new file mode 100644 (file)
index 0000000..a478136
--- /dev/null
@@ -0,0 +1,118 @@
+/* PR tree-optimization/51581 */
+
+#include "tree-vect.h"
+
+int a[8], b[8];
+unsigned int c[8], d[8];
+
+void
+f1 (void)
+{
+  a[0] = b[0] / 8;
+  a[1] = b[1] / 8;
+  a[2] = b[2] / 8;
+  a[3] = b[3] / 8;
+  a[4] = b[4] / 8;
+  a[5] = b[5] / 8;
+  a[6] = b[6] / 8;
+  a[7] = b[7] / 8;
+}
+
+void
+f2 (void)
+{
+  c[0] = d[0] / 3;
+  c[1] = d[1] / 3;
+  c[2] = d[2] / 3;
+  c[3] = d[3] / 3;
+  c[4] = d[4] / 3;
+  c[5] = d[5] / 3;
+  c[6] = d[6] / 3;
+  c[7] = d[7] / 3;
+}
+
+void
+f3 (void)
+{
+  a[0] = b[0] / 8;
+  a[1] = b[1] / 4;
+  a[2] = b[2] / 8;
+  a[3] = b[3] / 4;
+  a[4] = b[4] / 8;
+  a[5] = b[5] / 4;
+  a[6] = b[6] / 8;
+  a[7] = b[7] / 4;
+}
+
+void
+f4 (void)
+{
+  c[0] = d[0] / 3;
+  c[1] = d[1] / 5;
+  c[2] = d[2] / 3;
+  c[3] = d[3] / 5;
+  c[4] = d[4] / 3;
+  c[5] = d[5] / 5;
+  c[6] = d[6] / 3;
+  c[7] = d[7] / 5;
+}
+
+void
+f5 (void)
+{
+  a[0] = b[0] / 14;
+  a[1] = b[1] / 15;
+  a[2] = b[2] / 14;
+  a[3] = b[3] / 15;
+  a[4] = b[4] / 14;
+  a[5] = b[5] / 15;
+  a[6] = b[6] / 14;
+  a[7] = b[7] / 15;
+}
+
+void
+f6 (void)
+{
+  c[0] = d[0] / 6;
+  c[1] = d[1] / 5;
+  c[2] = d[2] / 6;
+  c[3] = d[3] / 5;
+  c[4] = d[4] / 6;
+  c[5] = d[5] / 5;
+  c[6] = d[6] / 13;
+  c[7] = d[7] / 5;
+}
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  asm ("");
+  for (i = 0; i < 8; i++)
+    {
+      asm ("");
+      b[i] = i - 4;
+      d[i] = i - 4;
+    }
+  f1 ();
+  f2 ();
+  for (i = 0; i < 8; i++)
+    if (a[i] != b[i] / 8 || c[i] != d[i] / 3)
+      abort ();
+  f3 ();
+  f4 ();
+  for (i = 0; i < 8; i+= 2)
+    if (a[i] != b[i] / 8 || a[i + 1] != b[i + 1] / 4
+       || c[i] != d[i] / 3 || c[i + 1] != d[i + 1] / 5)
+      abort ();
+  f5 ();
+  f6 ();
+  for (i = 0; i < 8; i+= 2)
+    if (a[i] != b[i] / 14 || a[i + 1] != b[i + 1] / 15
+       || c[i] != d[i] / (i == 6 ? 13 : 6) || c[i + 1] != d[i + 1] / 5)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
index 6821b2ced0cb378c1184e1cdea1aa9b4e2102fb1..09a1ecd9c427232d2b7a1e97dfe15c4753b27e68 100644 (file)
@@ -10,7 +10,7 @@ main1 ()
 {
   int i;
   unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
-  unsigned short out[N*8], a[N], b[N] = {3,6,9,12,15,18,21,24};
+  unsigned short out[N*8], a[N], b[N] = {3,0x8031,0x7fff,0x8032,0xffff,0,0x8030,0x8000};
 
   /* Partial SLP is not supported.  */
   for (i = 0; i < N; i++)
@@ -20,7 +20,7 @@ main1 ()
       out[i*4 + 2] = in[i*4 + 2];
       out[i*4 + 3] = in[i*4 + 3];
 
-      a[i] = b[i] / 3;
+      a[i] = b[i] / 0x8031;
     }
 
   /* check results:  */
@@ -30,7 +30,7 @@ main1 ()
          || out[i*4 + 1] != in[i*4 + 1]
          || out[i*4 + 2] != in[i*4 + 2]
          || out[i*4 + 3] != in[i*4 + 3]
-         || a[i] != b[i] / 3)
+        || a[i] != b[i] / 0x8031)
         abort ();
     }
 
diff --git a/gcc/testsuite/gcc.target/i386/avx-pr51581-1.c b/gcc/testsuite/gcc.target/i386/avx-pr51581-1.c
new file mode 100644 (file)
index 0000000..a1d84bf
--- /dev/null
@@ -0,0 +1,23 @@
+/* PR tree-optimization/51581 */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -mavx -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx-check.h"
+#endif
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#define main main1
+#include "../../gcc.c-torture/execute/pr51581-1.c"
+#undef main
+
+#include CHECK_H
+
+static void
+TEST (void)
+{
+  main1 ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-pr51581-2.c b/gcc/testsuite/gcc.target/i386/avx-pr51581-2.c
new file mode 100644 (file)
index 0000000..6ff54d9
--- /dev/null
@@ -0,0 +1,23 @@
+/* PR tree-optimization/51581 */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -mavx -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx } */
+
+#ifndef CHECK_H
+#define CHECK_H "avx-check.h"
+#endif
+#ifndef TEST
+#define TEST avx_test
+#endif
+
+#define main main1
+#include "../../gcc.c-torture/execute/pr51581-2.c"
+#undef main
+
+#include CHECK_H
+
+static void
+TEST (void)
+{
+  main1 ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr51581-1.c b/gcc/testsuite/gcc.target/i386/avx2-pr51581-1.c
new file mode 100644 (file)
index 0000000..74d507f
--- /dev/null
@@ -0,0 +1,9 @@
+/* PR tree-optimization/51581 */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -mavx2 -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx2 } */
+
+#define CHECK_H "avx2-check.h"
+#define TEST avx2_test
+
+#include "avx-pr51581-1.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr51581-2.c b/gcc/testsuite/gcc.target/i386/avx2-pr51581-2.c
new file mode 100644 (file)
index 0000000..bf063c2
--- /dev/null
@@ -0,0 +1,9 @@
+/* PR tree-optimization/51581 */
+/* { dg-do run } */
+/* { dg-options "-O2 -ftree-vectorize -mavx2 -fno-vect-cost-model" } */
+/* { dg-require-effective-target avx2 } */
+
+#define CHECK_H "avx2-check.h"
+#define TEST avx2_test
+
+#include "avx-pr51581-2.c"
index b4fadf8b69e0621c9126833eea4df0e90f5a383a..aeb7eb23eb402436f2f91390b416853a6cf1a50c 100644 (file)
@@ -53,8 +53,8 @@ static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
                                        tree *, tree *);
 static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
                                                      tree *, tree *);
-static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
-                                              tree *, tree *);
+static gimple vect_recog_divmod_pattern (VEC (gimple, heap) **,
+                                        tree *, tree *);
 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
                                                  tree *, tree *);
 static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
@@ -66,7 +66,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
        vect_recog_widen_shift_pattern,
        vect_recog_over_widening_pattern,
        vect_recog_vector_vector_shift_pattern,
-       vect_recog_sdivmod_pow2_pattern,
+       vect_recog_divmod_pattern,
        vect_recog_mixed_size_cond_pattern,
        vect_recog_bool_pattern};
 
@@ -1585,29 +1585,30 @@ vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
   return pattern_stmt;
 }
 
-/* Detect a signed division by power of two constant that wouldn't be
+/* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
    type a_t, b_t;
 
    S1 a_t = b_t / N;
 
-  where type 'type' is a signed integral type and N is a constant positive
-  power of two.
+  where type 'type' is an integral type and N is a constant.
 
-  Similarly handle signed modulo by power of two constant:
+  Similarly handle modulo by a constant:
 
    S4 a_t = b_t % N;
 
   Input/Output:
 
   * STMTS: Contains a stmt from which the pattern search begins,
-    i.e. the division stmt.  S1 is replaced by:
+    i.e. the division stmt.  S1 is replaced by if N is a power
+    of two constant and type is signed:
   S3  y_t = b_t < 0 ? N - 1 : 0;
   S2  x_t = b_t + y_t;
   S1' a_t = x_t >> log2 (N);
 
-    S4 is replaced by (where *_T temporaries have unsigned type):
+    S4 is replaced if N is a power of two constant and
+    type is signed by (where *_T temporaries have unsigned type):
   S9  y_T = b_t < 0 ? -1U : 0U;
   S8  z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
   S7  z_t = (type) z_T;
@@ -1625,16 +1626,22 @@ vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
     S1 or modulo S4 stmt.  */
 
 static gimple
-vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
-                                tree *type_in, tree *type_out)
+vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
+                          tree *type_in, tree *type_out)
 {
   gimple last_stmt = VEC_pop (gimple, *stmts);
-  tree oprnd0, oprnd1, vectype, itype, cond;
+  tree oprnd0, oprnd1, vectype, itype, witype, vecwtype, cond;
   gimple pattern_stmt, def_stmt;
   enum tree_code rhs_code;
   stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
   optab optab;
+  tree dummy, q;
+  enum tree_code dummy_code;
+  int dummy_int, prec;
+  VEC (tree, heap) *dummy_vec;
+  stmt_vec_info def_stmt_vinfo;
 
   if (!is_gimple_assign (last_stmt))
     return NULL;
@@ -1658,10 +1665,7 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
   if (TREE_CODE (oprnd0) != SSA_NAME
       || TREE_CODE (oprnd1) != INTEGER_CST
       || TREE_CODE (itype) != INTEGER_TYPE
-      || TYPE_UNSIGNED (itype)
-      || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
-      || !integer_pow2p (oprnd1)
-      || tree_int_cst_sgn (oprnd1) != 1)
+      || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
     return NULL;
 
   vectype = get_vectype_for_scalar_type (itype);
@@ -1680,104 +1684,438 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
        return NULL;
     }
 
-  /* Pattern detected.  */
-  if (vect_print_dump_info (REPORT_DETAILS))
-    fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
-
-  cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
-  if (rhs_code == TRUNC_DIV_EXPR)
+  prec = TYPE_PRECISION (itype);
+  if (integer_pow2p (oprnd1))
     {
-      tree var = vect_recog_temp_ssa_var (itype, NULL);
-      def_stmt
-       = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
-                                        fold_build2 (MINUS_EXPR, itype,
-                                                     oprnd1,
-                                                     build_int_cst (itype,
-                                                                    1)),
-                                        build_int_cst (itype, 0));
-      new_pattern_def_seq (stmt_vinfo, def_stmt);
-      var = vect_recog_temp_ssa_var (itype, NULL);
-      def_stmt
-       = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
-                                       gimple_assign_lhs (def_stmt));
-      append_pattern_def_seq (stmt_vinfo, def_stmt);
+      if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
+       return NULL;
 
-      pattern_stmt
-       = gimple_build_assign_with_ops (RSHIFT_EXPR,
-                                       vect_recog_temp_ssa_var (itype, NULL),
-                                       var,
-                                       build_int_cst (itype,
-                                                      tree_log2 (oprnd1)));
+      /* Pattern detected.  */
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "vect_recog_divmod_pattern: detected: ");
+
+      cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
+                    build_int_cst (itype, 0));
+      if (rhs_code == TRUNC_DIV_EXPR)
+       {
+         tree var = vect_recog_temp_ssa_var (itype, NULL);
+         tree shift;
+         def_stmt
+           = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
+                                            fold_build2 (MINUS_EXPR, itype,
+                                                         oprnd1,
+                                                         build_int_cst (itype,
+                                                                        1)),
+                                            build_int_cst (itype, 0));
+         new_pattern_def_seq (stmt_vinfo, def_stmt);
+         var = vect_recog_temp_ssa_var (itype, NULL);
+         def_stmt
+           = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
+                                           gimple_assign_lhs (def_stmt));
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+         shift = build_int_cst (itype, tree_log2 (oprnd1));
+         pattern_stmt
+           = gimple_build_assign_with_ops (RSHIFT_EXPR,
+                                           vect_recog_temp_ssa_var (itype,
+                                                                    NULL),
+                                           var, shift);
+       }
+      else
+       {
+         tree signmask;
+         STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
+         if (compare_tree_int (oprnd1, 2) == 0)
+           {
+             signmask = vect_recog_temp_ssa_var (itype, NULL);
+             def_stmt
+               = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
+                                                build_int_cst (itype, 1),
+                                                build_int_cst (itype, 0));
+             append_pattern_def_seq (stmt_vinfo, def_stmt);
+           }
+         else
+           {
+             tree utype
+               = build_nonstandard_integer_type (prec, 1);
+             tree vecutype = get_vectype_for_scalar_type (utype);
+             tree shift
+               = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
+                                       - tree_log2 (oprnd1));
+             tree var = vect_recog_temp_ssa_var (utype, NULL);
+
+             def_stmt
+               = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
+                                                build_int_cst (utype, -1),
+                                                build_int_cst (utype, 0));
+             def_stmt_vinfo
+               = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+             set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+             STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
+             append_pattern_def_seq (stmt_vinfo, def_stmt);
+             var = vect_recog_temp_ssa_var (utype, NULL);
+             def_stmt
+               = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
+                                               gimple_assign_lhs (def_stmt),
+                                               shift);
+             def_stmt_vinfo
+               = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+             set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+             STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
+             append_pattern_def_seq (stmt_vinfo, def_stmt);
+             signmask = vect_recog_temp_ssa_var (itype, NULL);
+             def_stmt
+               = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
+                                               NULL_TREE);
+             append_pattern_def_seq (stmt_vinfo, def_stmt);
+           }
+         def_stmt
+           = gimple_build_assign_with_ops (PLUS_EXPR,
+                                           vect_recog_temp_ssa_var (itype,
+                                                                    NULL),
+                                           oprnd0, signmask);
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+         def_stmt
+           = gimple_build_assign_with_ops (BIT_AND_EXPR,
+                                           vect_recog_temp_ssa_var (itype,
+                                                                    NULL),
+                                           gimple_assign_lhs (def_stmt),
+                                           fold_build2 (MINUS_EXPR, itype,
+                                                        oprnd1,
+                                                        build_int_cst (itype,
+                                                                       1)));
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+         pattern_stmt
+           = gimple_build_assign_with_ops (MINUS_EXPR,
+                                           vect_recog_temp_ssa_var (itype,
+                                                                    NULL),
+                                           gimple_assign_lhs (def_stmt),
+                                           signmask);
+       }
+
+      if (vect_print_dump_info (REPORT_DETAILS))
+       print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+
+      VEC_safe_push (gimple, heap, *stmts, last_stmt);
+
+      *type_in = vectype;
+      *type_out = vectype;
+      return pattern_stmt;
     }
-  else
+
+  if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
+      || integer_zerop (oprnd1)
+      || prec > HOST_BITS_PER_WIDE_INT)
+    return NULL;
+
+  witype = build_nonstandard_integer_type (prec * 2,
+                                          TYPE_UNSIGNED (itype));
+  vecwtype = get_vectype_for_scalar_type (witype);
+  if (vecwtype == NULL_TREE)
+    return NULL;
+
+  if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
+                                      vecwtype, vectype,
+                                      &dummy, &dummy, &dummy_code,
+                                      &dummy_code, &dummy_int, &dummy_vec))
+    return NULL;
+
+  STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
+
+  if (TYPE_UNSIGNED (itype))
     {
-      tree signmask;
-      STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
-      if (compare_tree_int (oprnd1, 2) == 0)
+      unsigned HOST_WIDE_INT mh, ml;
+      int pre_shift, post_shift;
+      unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
+                                & GET_MODE_MASK (TYPE_MODE (itype));
+      tree t1, t2, t3, t4, t5, t6;
+
+      if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
+       /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0.  */
+       return NULL;
+
+      /* Find a suitable multiplier and right shift count
+        instead of multiplying with D.  */
+      mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
+
+      /* If the suggested multiplier is more than SIZE bits, we can do better
+        for even divisors, using an initial right shift.  */
+      if (mh != 0 && (d & 1) == 0)
+       {
+         pre_shift = floor_log2 (d & -d);
+         mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
+                                 &ml, &post_shift, &dummy_int);
+         gcc_assert (!mh);
+       }
+      else
+       pre_shift = 0;
+
+      if (mh != 0)
        {
-         signmask = vect_recog_temp_ssa_var (itype, NULL);
+         if (post_shift - 1 >= prec)
+           return NULL;
+
+         /* t1 = oprnd0 w* ml;
+            t2 = t1 >> prec;
+            t3 = (type) t2;
+            t4 = oprnd0 - t3;
+            t5 = t4 >> 1;
+            t6 = t3 + t5;
+            q = t6 >> (post_shift - 1);  */
+         t1 = vect_recog_temp_ssa_var (witype, NULL);
          def_stmt
-           = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
-                                            build_int_cst (itype, 1),
-                                            build_int_cst (itype, 0));
+           = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0,
+                                           build_int_cst (itype, ml));
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+         def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+         set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+         STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
+
+         t2 = vect_recog_temp_ssa_var (witype, NULL);
+         def_stmt
+           = gimple_build_assign_with_ops (RSHIFT_EXPR, t2, t1,
+                                           build_int_cst (itype, prec));
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+         def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+         set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+         STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
+
+         t3 = vect_recog_temp_ssa_var (itype, NULL);
+         def_stmt
+           = gimple_build_assign_with_ops (NOP_EXPR, t3, t2, NULL_TREE);
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+         t4 = vect_recog_temp_ssa_var (itype, NULL);
+         def_stmt
+           = gimple_build_assign_with_ops (MINUS_EXPR, t4, oprnd0, t3);
          append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+         t5 = vect_recog_temp_ssa_var (itype, NULL);
+         def_stmt
+           = gimple_build_assign_with_ops (RSHIFT_EXPR, t5, t4,
+                                           integer_one_node);
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+         t6 = vect_recog_temp_ssa_var (itype, NULL);
+         def_stmt
+           = gimple_build_assign_with_ops (PLUS_EXPR, t6, t3, t5);
+
+         if (post_shift != 1)
+           {
+             append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+             q = vect_recog_temp_ssa_var (witype, NULL);
+             pattern_stmt
+               = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t6,
+                                               build_int_cst (itype,
+                                                              post_shift
+                                                              - 1));
+           }
+         else
+           {
+             q = t6;
+             pattern_stmt = def_stmt;
+           }
        }
       else
        {
-         tree utype
-           = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
-         tree vecutype = get_vectype_for_scalar_type (utype);
-         tree shift
-           = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
-                                   - tree_log2 (oprnd1));
-         tree var = vect_recog_temp_ssa_var (utype, NULL);
-         stmt_vec_info def_stmt_vinfo;
+         if (pre_shift >= prec || post_shift >= prec)
+           return NULL;
+
+         /* t1 = oprnd0 >> pre_shift;
+            t2 = t1 w* ml;
+            t3 = t2 >> (prec + post_shift);
+            q = (type) t3;  */
+         if (pre_shift)
+           {
+             t1 = vect_recog_temp_ssa_var (itype, NULL);
+             def_stmt
+               = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
+                                               build_int_cst (NULL,
+                                                              pre_shift));
+             append_pattern_def_seq (stmt_vinfo, def_stmt);
+           }
+         else
+           t1 = oprnd0;
 
+         t2 = vect_recog_temp_ssa_var (witype, NULL);
          def_stmt
-           = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
-                                            build_int_cst (utype, -1),
-                                            build_int_cst (utype, 0));
-         def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
+           = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t2, t1,
+                                           build_int_cst (itype, ml));
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+         def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
          set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
-         STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
+         STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
+
+         t3 = vect_recog_temp_ssa_var (witype, NULL);
+         def_stmt
+           = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
+                                           build_int_cst (itype, post_shift
+                                                                 + prec));
          append_pattern_def_seq (stmt_vinfo, def_stmt);
-         var = vect_recog_temp_ssa_var (utype, NULL);
+         def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+         set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+         STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
+
+         q = vect_recog_temp_ssa_var (itype, NULL);
+         pattern_stmt
+           = gimple_build_assign_with_ops (NOP_EXPR, q, t3, NULL_TREE);
+       }
+    }
+  else
+    {
+      unsigned HOST_WIDE_INT ml;
+      int lgup, post_shift;
+      HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
+      unsigned HOST_WIDE_INT abs_d;
+      bool add = false;
+      tree uwitype = NULL, vecuwtype = NULL;
+      tree t1, t2, t3, t4, t5, t6, t7;
+
+      /* Give up for -1.  */
+      if (d == -1)
+       return NULL;
+
+      if (!vect_supportable_shift (RSHIFT_EXPR, witype))
+       {
+         uwitype = build_nonstandard_integer_type (prec * 2, 1);
+         vecuwtype = get_vectype_for_scalar_type (uwitype);
+         if (vecuwtype == NULL_TREE)
+           return NULL;
+       }
+
+      /* Since d might be INT_MIN, we have to cast to
+        unsigned HOST_WIDE_INT before negating to avoid
+        undefined signed overflow.  */
+      abs_d = (d >= 0
+              ? (unsigned HOST_WIDE_INT) d
+              : - (unsigned HOST_WIDE_INT) d);
+
+      /* n rem d = n rem -d */
+      if (rhs_code == TRUNC_MOD_EXPR && d < 0)
+       {
+         d = abs_d;
+         oprnd1 = build_int_cst (itype, abs_d);
+       }
+      else if (HOST_BITS_PER_WIDE_INT >= prec
+              && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
+       /* This case is not handled correctly below.  */
+       return NULL;
+
+      choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &lgup);
+      if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
+       {
+         add = true;
+         ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
+       }
+      if (post_shift >= prec)
+       return NULL;
+
+      /* t1 = oprnd1 w* ml;  */
+      t1 = vect_recog_temp_ssa_var (witype, NULL);
+      def_stmt
+       = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0,
+                                       build_int_cst (itype, ml));
+      append_pattern_def_seq (stmt_vinfo, def_stmt);
+      def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
+
+      if (vecuwtype != NULL)
+       {
+         /* t2 = (uwtype) t1;  */
+         t2 = vect_recog_temp_ssa_var (uwitype, NULL);
          def_stmt
-           = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
-                                           gimple_assign_lhs (def_stmt),
-                                           shift);
-         def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
+           = gimple_build_assign_with_ops (NOP_EXPR, t2, t1, NULL_TREE);
+         append_pattern_def_seq (stmt_vinfo, def_stmt);
+         def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
          set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
-         STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
+         STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype;
+       }
+      else
+       t2 = t1;
+
+      /* t3 = t2 >> prec;  or t3 = t2 >> (prec + post_shift);  */
+      t3 = vect_recog_temp_ssa_var (vecuwtype ? uwitype : witype, NULL);
+      def_stmt
+       = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
+                                       build_int_cst (itype,
+                                                      prec
+                                                      + (!add
+                                                         && vecuwtype == NULL
+                                                         ? post_shift : 0)));
+      append_pattern_def_seq (stmt_vinfo, def_stmt);
+      def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
+      set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
+      STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype ? vecuwtype : vecwtype;
+
+      /* t4 = (type) t3;  */
+      t4 = vect_recog_temp_ssa_var (itype, NULL);
+      def_stmt
+       = gimple_build_assign_with_ops (NOP_EXPR, t4, t3, NULL_TREE);
+      append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+      if (add)
+       {
+         /* t5 = t4 + oprnd0;  */
+         t5 = vect_recog_temp_ssa_var (itype, NULL);
+         def_stmt
+           = gimple_build_assign_with_ops (PLUS_EXPR, t5, t4, oprnd0);
          append_pattern_def_seq (stmt_vinfo, def_stmt);
-         signmask = vect_recog_temp_ssa_var (itype, NULL);
+       }
+      else
+       t5 = t4;
+
+      if ((add || vecuwtype != NULL) && post_shift)
+       {
+         /* t6 = t5 >> post_shift;  */
+         t6 = vect_recog_temp_ssa_var (itype, NULL);
          def_stmt
-           = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
-                                           NULL_TREE);
+           = gimple_build_assign_with_ops (RSHIFT_EXPR, t6, t5,
+                                           build_int_cst (itype, post_shift));
          append_pattern_def_seq (stmt_vinfo, def_stmt);
        }
+      else
+       t6 = t5;
+
+      /* t7 = oprnd0 >> (prec - 1);  */
+      t7 = vect_recog_temp_ssa_var (itype, NULL);
       def_stmt
-       = gimple_build_assign_with_ops (PLUS_EXPR,
-                                       vect_recog_temp_ssa_var (itype, NULL),
-                                       oprnd0, signmask);
+       = gimple_build_assign_with_ops (RSHIFT_EXPR, t7, oprnd0,
+                                       build_int_cst (itype, prec - 1));
       append_pattern_def_seq (stmt_vinfo, def_stmt);
+
+      /* q = t6 - t7;  or q = t7 - t6;  */
+      q = vect_recog_temp_ssa_var (itype, NULL);
+      pattern_stmt
+       = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t7 : t6,
+                                       d < 0 ? t6 : t7);
+    }
+
+  if (rhs_code == TRUNC_MOD_EXPR)
+    {
+      tree r, t1;
+
+      /* We divided.  Now finish by:
+        t1 = q * oprnd1;
+        r = oprnd0 - t1;  */
+      append_pattern_def_seq (stmt_vinfo, pattern_stmt);
+
+      t1 = vect_recog_temp_ssa_var (itype, NULL);
       def_stmt
-       = gimple_build_assign_with_ops (BIT_AND_EXPR,
-                                       vect_recog_temp_ssa_var (itype, NULL),
-                                       gimple_assign_lhs (def_stmt),
-                                       fold_build2 (MINUS_EXPR, itype,
-                                                    oprnd1,
-                                                    build_int_cst (itype,
-                                                                   1)));
+       = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
       append_pattern_def_seq (stmt_vinfo, def_stmt);
 
+      r = vect_recog_temp_ssa_var (itype, NULL);
       pattern_stmt
-       = gimple_build_assign_with_ops (MINUS_EXPR,
-                                       vect_recog_temp_ssa_var (itype, NULL),
-                                       gimple_assign_lhs (def_stmt),
-                                       signmask);
+       = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
     }
 
+  /* Pattern detected.  */
+  if (vect_print_dump_info (REPORT_DETAILS))
+    fprintf (vect_dump, "vect_recog_divmod_pattern: detected: ");
+
   if (vect_print_dump_info (REPORT_DETAILS))
     print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
 
index 9fa57e1985c042b840f847807950305c451d8022..5853d4fdec9392529cbacadb71f665d69ec30a45 100644 (file)
@@ -5361,7 +5361,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
 
      Pattern statement needs to be analyzed instead of the original statement
      if the original statement is not relevant.  Otherwise, we analyze both
-     statements.  */
+     statements.  In basic blocks we are called from some SLP instance
+     traversal, don't analyze pattern stmts instead, the pattern stmts
+     already will be part of SLP instance.  */
 
   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
   if (!STMT_VINFO_RELEVANT_P (stmt_info)
@@ -5390,6 +5392,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
         }
     }
   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+          && node == NULL
            && pattern_stmt
            && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
                || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
@@ -5406,6 +5409,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
    }
 
   if (is_pattern_stmt_p (stmt_info)
+      && node == NULL
       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
     {
       gimple_stmt_iterator si;