target/121230 - x86 vector CTOR cost with 387 math

author Richard Biener <rguenther@suse.de>

Mon, 8 Dec 2025 13:36:58 +0000 (14:36 +0100)

committer Richard Biener <rguenther@suse.de>

Tue, 9 Dec 2025 14:08:57 +0000 (15:08 +0100)
author Richard Biener <rguenther@suse.de>
Mon, 8 Dec 2025 13:36:58 +0000 (14:36 +0100)
committer Richard Biener <rguenther@suse.de>
Tue, 9 Dec 2025 14:08:57 +0000 (15:08 +0100)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc

index db43045753bf4c6ae4932f1560b4fd3a7929f745..75a9cb6211a14c941fe830fd6baf5bc724d5b815 100644 (file)
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -26397,7 +26397,20 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
                                 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))))
             {
               if (fp)
-               m_num_sse_needed[where]++;
+               {
+                 /* Scalar FP values residing in x87 registers need to be
+                    spilled and reloaded.  */
+                 auto mode2 = TYPE_MODE (TREE_TYPE (op));
+                 if (IS_STACK_MODE (mode2))
+                   {
+                     int cost
+                       = (ix86_cost->hard_register.fp_store[mode2 == SFmode
+                                                            ? 0 : 1]
+                          + ix86_cost->sse_load[sse_store_index (mode2)]);
+                     stmt_cost += COSTS_N_INSNS (cost) / 2;
+                   }
+                 m_num_sse_needed[where]++;
+               }
               else
                 {
                   m_num_gpr_needed[where]++;
diff --git a/gcc/testsuite/gcc.target/i386/pr121230.c b/gcc/testsuite/gcc.target/i386/pr121230.c

new file mode 100644 (file)

index 0000000..67c9c5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr121230.c
@@ -0,0 +1,16 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O3 -march=athlon-xp -mfpmath=387 -fexcess-precision=standard" } */
+
+typedef struct {
+    float a;
+    float b;
+} f32_2;
+
+f32_2 add32_2(f32_2 x, f32_2 y) {
+    return (f32_2){ x.a + y.a, x.b + y.b};
+}
+
+/* We do not want the vectorizer to vectorize the store and/or the
+   conversion (with IA32 we do not support V2SF add) given that spills
+   FP regs to reload them to XMM.  */
+/* { dg-final { scan-assembler-not "movss\[ \\t\]+\[0-9\]*\\\(%esp\\\), %xmm" } } */
author	Richard Biener <rguenther@suse.de>
	Mon, 8 Dec 2025 13:36:58 +0000 (14:36 +0100)
committer	Richard Biener <rguenther@suse.de>
	Tue, 9 Dec 2025 14:08:57 +0000 (15:08 +0100)
gcc/config/i386/i386.cc		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/i386/pr121230.c	[new file with mode: 0644]	patch \| blob