]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
i386: Improve split of *extendv2di2_highpart_stv_noavx512vl.
authorRoger Sayle <roger@nextmovesoftware.com>
Thu, 15 Aug 2024 21:02:05 +0000 (22:02 +0100)
committerRoger Sayle <roger@nextmovesoftware.com>
Thu, 15 Aug 2024 21:02:05 +0000 (22:02 +0100)
This patch follows up on the previous patch to fix PR target/116275 by
improving the code STV (ultimately) generates for highpart sign extensions
like (x<<8)>>8.  The arithmetic right shift is able to take advantage of
the available common subexpressions from the preceding left shift.

Hence previously with -O2 -m32 -mavx -mno-avx512vl we'd generate:

        vpsllq  $8, %xmm0, %xmm0
        vpsrad  $8, %xmm0, %xmm1
        vpsrlq  $8, %xmm0, %xmm0
        vpblendw        $51, %xmm0, %xmm1, %xmm0

But with improved splitting, we now generate three instructions:

        vpslld  $8, %xmm1, %xmm0
        vpsrad  $8, %xmm0, %xmm0
        vpblendw        $51, %xmm1, %xmm0, %xmm0

This patch also implements Uros' suggestion that the pre-reload
splitter could introduced a new pseudo to hold the intermediate
to potentially help reload with register allocation, which applies
when not performing the above optimization, i.e. on TARGET_XOP.

2024-08-15  Roger Sayle  <roger@nextmovesoftware.com>
    Uros Bizjak  <ubizjak@gmail.com>

gcc/ChangeLog
* config/i386/i386.md (*extendv2di2_highpart_stv_noavx512vl): Split
to an improved implementation on !TARGET_XOP.  On TARGET_XOP, use
a new pseudo for the intermediate to simplify register allocation.

gcc/testsuite/ChangeLog
* g++.target/i386/pr116275-2.C: New test case.

gcc/config/i386/i386.md
gcc/testsuite/g++.target/i386/pr116275-2.C [new file with mode: 0644]

index efbab2f25ec3b71066fe320e56103838283e15fe..36108e5c2c9eeb10b826367332e8e7af71d58bf5 100644 (file)
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
-  [(set (match_dup 0)
+  [(set (match_dup 4)
        (ashift:V2DI (match_dup 1) (match_dup 2)))
    (set (match_dup 0)
-       (ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
+       (ashiftrt:V2DI (match_dup 4) (match_dup 2)))]
+{
+  if (!TARGET_XOP)
+    {
+      rtx op0 = operands[0];
+      rtx op2 = operands[2];
+      rtx tmp1 = gen_reg_rtx (V4SImode);
+      rtx tmp2 = gen_reg_rtx (V4SImode);
+      rtx tmp3 = gen_reg_rtx (V4SImode);
+      rtx tmp4 = gen_reg_rtx (V4SImode);
+      emit_move_insn (tmp1, lowpart_subreg (V4SImode, operands[1], V2DImode));
+      emit_insn (gen_ashlv4si3 (tmp2, tmp1, op2));
+      emit_insn (gen_ashrv4si3 (tmp3, tmp2, op2));
+      vec_perm_builder sel (4, 4, 1);
+      sel.quick_grow (4);
+      sel[0] = 0;
+      sel[1] = 5;
+      sel[2] = 2;
+      sel[3] = 7;
+      vec_perm_indices indices(sel, 2, 4);
+      bool ok = targetm.vectorize.vec_perm_const (V4SImode, V4SImode, tmp4,
+                                                 tmp1, tmp3, indices);
+      gcc_assert (ok);
+      emit_move_insn (op0, lowpart_subreg (V2DImode, tmp4, V4SImode));
+      DONE;
+    }
+  else
+    operands[4] = gen_reg_rtx (V2DImode);
+})
 \f
 ;; Rotate instructions
 
diff --git a/gcc/testsuite/g++.target/i386/pr116275-2.C b/gcc/testsuite/g++.target/i386/pr116275-2.C
new file mode 100644 (file)
index 0000000..98d3c19
--- /dev/null
@@ -0,0 +1,19 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -mavx -mno-avx512vl -std=c++11" } */
+
+struct SymbolDesc push_back(SymbolDesc);
+struct SymbolDesc {
+  long long ELFLocalSymIdx;
+};
+struct Expected {
+  long long &operator*();
+};
+void SymbolizableObjectFileaddSymbol() {
+  Expected SymbolAddressOrErr;
+  long long SymbolAddress = *SymbolAddressOrErr << 8 >> 8;
+  push_back({SymbolAddress});
+}
+
+/* { dg-final { scan-assembler "vpslld" } } */
+/* { dg-final { scan-assembler-not "vpsllq" } } */
+/* { dg-final { scan-assembler-not "vpsrlq" } } */