From b5d88fa6c36bf17d506c235812e20191e4675986 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Tue, 9 Sep 2025 17:47:21 -0300 Subject: [PATCH] math: Fix x86_64 build for -Os (BZ 33367) The compiler might not inline the trunc function call for USE_TRUNC_BUILTIN [1]. This patch adds an optimized __trunc/__truncf for x86 used on modf ifunc variant to avoid the trunc libcall. Checked on x86_64, x86_64-v2, x86_64-v3, and x86_64-v4. Used -O2 and -Os options. Performed a full make check on x86_64 with both optimizations. [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121861 Reviewed-by: H.J. Lu --- sysdeps/x86/fpu/math_private.h | 27 +++++++++++++++++++ sysdeps/x86_64/fpu/multiarch/s_modf-avx.c | 3 +++ sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c | 3 +++ sysdeps/x86_64/fpu/multiarch/s_modf.c | 2 ++ sysdeps/x86_64/fpu/multiarch/s_modff-avx.c | 3 +++ sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c | 3 +++ sysdeps/x86_64/fpu/multiarch/s_modff.c | 2 ++ 7 files changed, 43 insertions(+) diff --git a/sysdeps/x86/fpu/math_private.h b/sysdeps/x86/fpu/math_private.h index 132f011809..d30d580cea 100644 --- a/sysdeps/x86/fpu/math_private.h +++ b/sysdeps/x86/fpu/math_private.h @@ -19,6 +19,7 @@ #ifndef X86_MATH_PRIVATE_H #define X86_MATH_PRIVATE_H 1 +#include #include_next __extern_always_inline long double @@ -29,4 +30,30 @@ __NTH (__ieee754_atan2l (long double y, long double x)) return ret; } +__extern_always_inline double +__trunc (double x) +{ +#ifdef __AVX__ + asm ("vroundsd $11, %1, %1, %0" : "=v" (x) : "v" (x)); +#elif defined __SSE4_1__ + asm ("roundsd $11, %1, %0" : "=x" (x) : "x" (x)); +#else + x = trunc (x); +#endif + return x; +} + +__extern_always_inline float +__truncf (float x) +{ +#ifdef __AVX__ + asm ("vroundss $11, %1, %1, %0" : "=v" (x) : "v" (x)); +#elif defined __SSE4_1__ + asm ("roundss $11, %1, %0" : "=x" (x) : "x" (x)); +#else + x = truncf (x); +#endif + return x; +} + #endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c b/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c index ab4f03db0e..0982280d25 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c @@ -1,3 +1,6 @@ +#include + #define __modf __modf_avx +#define trunc __trunc #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c b/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c index 00aa8cd736..f6fb996f97 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c +++ b/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c @@ -1,3 +1,6 @@ +#include + #define __modf __modf_sse41 +#define trunc __trunc #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf.c b/sysdeps/x86_64/fpu/multiarch/s_modf.c index e365bfcef7..a108ae5dc6 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_modf.c +++ b/sysdeps/x86_64/fpu/multiarch/s_modf.c @@ -38,4 +38,6 @@ libm_alias_double (__modf, modf) # define __modf __modf_sse2 # endif #endif +#include +#define trunc __trunc #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c b/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c index 07cb9c1036..b2afe1efe3 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c +++ b/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c @@ -1,3 +1,6 @@ +#include + #define __modff __modff_avx +#define truncf __truncf #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c b/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c index 060c5e3979..0352c3ea4b 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c +++ b/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c @@ -1,3 +1,6 @@ +#include + #define __modff __modff_sse41 +#define truncf __truncf #include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff.c b/sysdeps/x86_64/fpu/multiarch/s_modff.c index a4b5429037..62d7645bc7 100644 --- a/sysdeps/x86_64/fpu/multiarch/s_modff.c +++ b/sysdeps/x86_64/fpu/multiarch/s_modff.c @@ -38,4 +38,6 @@ libm_alias_float (__modf, modf) # define __modff __modff_sse2 # endif #endif +#include +#define truncf __truncf #include -- 2.47.3