]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
target/121230 - x86 vector CTOR cost with 387 math
authorRichard Biener <rguenther@suse.de>
Mon, 8 Dec 2025 13:36:58 +0000 (14:36 +0100)
committerRichard Biener <rguenther@suse.de>
Tue, 9 Dec 2025 14:08:57 +0000 (15:08 +0100)
The following adjusts costing of vector construction from scalars for
FP modes which with 387 math can reside in FP regs which need spilling
to be reloaded to XMM.  I've played on the safe side with mixed
SSE/387 math.

PR target/121230
* config/i386/i386.cc (ix86_vector_costs::add_stmt_cost):
With FP mode and 387 math cost spill/reload.

* gcc.target/i386/pr121230.c: New testcase.

gcc/config/i386/i386.cc
gcc/testsuite/gcc.target/i386/pr121230.c [new file with mode: 0644]

index db43045753bf4c6ae4932f1560b4fd3a7929f745..75a9cb6211a14c941fe830fd6baf5bc724d5b815 100644 (file)
@@ -26397,7 +26397,20 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
                                (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
            {
              if (fp)
-               m_num_sse_needed[where]++;
+               {
+                 /* Scalar FP values residing in x87 registers need to be
+                    spilled and reloaded.  */
+                 auto mode2 = TYPE_MODE (TREE_TYPE (op));
+                 if (IS_STACK_MODE (mode2))
+                   {
+                     int cost
+                       = (ix86_cost->hard_register.fp_store[mode2 == SFmode
+                                                            ? 0 : 1]
+                          + ix86_cost->sse_load[sse_store_index (mode2)]);
+                     stmt_cost += COSTS_N_INSNS (cost) / 2;
+                   }
+                 m_num_sse_needed[where]++;
+               }
              else
                {
                  m_num_gpr_needed[where]++;
diff --git a/gcc/testsuite/gcc.target/i386/pr121230.c b/gcc/testsuite/gcc.target/i386/pr121230.c
new file mode 100644 (file)
index 0000000..67c9c5c
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O3 -march=athlon-xp -mfpmath=387 -fexcess-precision=standard" } */
+
+typedef struct {
+    float a;
+    float b;
+} f32_2;
+
+f32_2 add32_2(f32_2 x, f32_2 y) {
+    return (f32_2){ x.a + y.a, x.b + y.b};
+}
+
+/* We do not want the vectorizer to vectorize the store and/or the
+   conversion (with IA32 we do not support V2SF add) given that spills
+   FP regs to reload them to XMM.  */
+/* { dg-final { scan-assembler-not "movss\[ \\t\]+\[0-9\]*\\\(%esp\\\), %xmm" } } */