]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
i386: Fix ix86_fold_builtin shift folding [PR93418]
authorJakub Jelinek <jakub@redhat.com>
Tue, 28 Jan 2020 07:46:23 +0000 (08:46 +0100)
committerJakub Jelinek <jakub@redhat.com>
Thu, 13 Feb 2020 20:27:53 +0000 (21:27 +0100)
The following testcase is miscompiled, because the variable shift left
operand, { -1, -1, -1, -1 } is represented as a VECTOR_CST with
VECTOR_CST_NPATTERNS 1 and VECTOR_CST_NELTS_PER_PATTERN 1, so when
we call builder.new_unary_operation, builder.encoded_nelts () will be just 1
and thus we encode the resulting vector as if all the elements were the
same.
For non-masked is_vshift, we could perhaps call builder.new_binary_operation
(TREE_TYPE (args[0]), args[0], args[1], false), but then there are masked
shifts, for non-is_vshift we could perhaps call it too but with args[2]
instead of args[1], but there is no builder.new_ternary_operation.
All this stuff is primarily for aarch64 anyway, on x86 we don't have any
variable length vectors, and it is not a big deal to compute all elements
and just let builder.finalize () find the most efficient VECTOR_CST
representation of the vector.  So, instead of doing too much, this just
keeps using new_unary_operation only if only one VECTOR_CST is involved
(i.e. non-masked shift by constant) and for the rest just compute all elts.

2020-01-28  Jakub Jelinek  <jakub@redhat.com>

PR target/93418
* config/i386/i386.c (ix86_fold_builtin) <do_shift>: If mask is not
-1 or is_vshift is true, use new_vector with number of elts npatterns
rather than new_unary_operation.

* gcc.target/i386/avx2-pr93418.c: New test.

gcc/ChangeLog
gcc/config/i386/i386.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx2-pr93418.c [new file with mode: 0644]

index 2029c67bf0204d87b27ce0e5a02073e9337f249c..ca09488b59d17ce086cfc05bf39d44200db96f6c 100644 (file)
@@ -1,6 +1,13 @@
 2020-02-13  Jakub Jelinek  <jakub@redhat.com>
 
        Backported from mainline
+       2020-01-28  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/93418
+       * config/i386/i386.c (ix86_fold_builtin) <do_shift>: If mask is not
+       -1 or is_vshift is true, use new_vector with number of elts npatterns
+       rather than new_unary_operation.
+
        2020-01-23  Jakub Jelinek  <jakub@redhat.com>
 
        PR rtl-optimization/93402
index 6ee6aea2389dccacece0bbd14de735c20fd93bce..779e8111379aa0521242330f14830f3e21922b30 100644 (file)
@@ -33418,8 +33418,13 @@ ix86_fold_builtin (tree fndecl, int n_args,
                    countt = build_int_cst (integer_type_node, count);
                }
              tree_vector_builder builder;
-             builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
-                                          false);
+             if (mask != HOST_WIDE_INT_M1U || is_vshift)
+               builder.new_vector (TREE_TYPE (args[0]),
+                                   TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
+                                   1);
+             else
+               builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
+                                            false);
              unsigned int cnt = builder.encoded_nelts ();
              for (unsigned int i = 0; i < cnt; ++i)
                {
index bec5eba503327b968bdca1da2aebee966aff62d6..532f8dbef6ccbe7ac7fd389e006fb6fae24dc269 100644 (file)
@@ -1,6 +1,11 @@
 2020-02-13  Jakub Jelinek  <jakub@redhat.com>
 
        Backported from mainline
+       2020-01-28  Jakub Jelinek  <jakub@redhat.com>
+
+       PR target/93418
+       * gcc.target/i386/avx2-pr93418.c: New test.
+
        2020-01-23  Jakub Jelinek  <jakub@redhat.com>
 
        PR rtl-optimization/93402
diff --git a/gcc/testsuite/gcc.target/i386/avx2-pr93418.c b/gcc/testsuite/gcc.target/i386/avx2-pr93418.c
new file mode 100644 (file)
index 0000000..67ed33d
--- /dev/null
@@ -0,0 +1,20 @@
+/* PR target/93418 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */
+
+#include <x86intrin.h>
+
+void link_error (void);
+
+void
+foo (void)
+{
+  __m128i a = _mm_set1_epi32 (0xffffffffU);
+  __m128i b = _mm_setr_epi32 (16, 31, -34, 3);
+  __m128i c = _mm_sllv_epi32 (a, b);
+  __v4su d = (__v4su) c;
+  if (d[0] != 0xffff0000U || d[1] != 0x80000000U
+      || d[2] != 0 || d[3] != 0xfffffff8U)
+    link_error ();
+}