From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Date: Mon, 16 Jun 2025 13:17:37 +0000 (-0300)
Subject: x86_64: Optimize modf/modff for x86_64-v2
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;p=thirdparty%2Fglibc.git

x86_64: Optimize modf/modff for x86_64-v2

The SSE4.1 provides a direct instruction for trunc, which improves
modf/modff performance with a less text size.  On Ryzen 9 (zen3) with
gcc 14.2.1:

x86_64-v2
reciprocal-throughput        master        patch       difference
workload-0_1                 7.9610       7.7914            2.13%
workload-1_maxint            9.4323       7.8021           17.28%
workload-maxint_maxfloat     8.7379       7.8049           10.68%
workload-integral            7.9492       7.7991            1.89%

latency                      master        patch       difference
workload-0_1                 7.9511      10.8910          -36.97%
workload-1_maxint           15.8278      10.9048           31.10%
workload-maxint_maxfloat    11.3495      10.9139            3.84%
workload-integral           11.5938      10.9071            5.92%

x86_64-v3
reciprocal-throughput        master        patch       difference
workload-0_1                 8.7522       7.9781            8.84%
workload-1_maxint            9.6690       7.9872           17.39%
workload-maxint_maxfloat     8.7634       7.9857            8.87%
workload-integral            8.7397       7.9893            8.59%

latency                      master        patch       difference
workload-0_1                 8.7447       9.5589           -9.31%
workload-1_maxint           13.7480       9.5690           30.40%
workload-maxint_maxfloat    10.0092       9.5680            4.41%
workload-integral            9.7518       9.5743            1.82%

For x86_64-v1 the optimization is done through a new ifunc selector.
The avx is to follow other SSE4_1 optimization (like trunc) to avoid
the ifunc for x86_64-v3.

Checked on x86_64-linux-gnu.
Tested-by: Carlos O'Donell <carlos@redhat.com>
Reviewed-by: Carlos O'Donell <carlos@redhat.com>
---

diff --git a/sysdeps/x86_64/fpu/math-use-builtins-trunc.h b/sysdeps/x86_64/fpu/math-use-builtins-trunc.h
new file mode 100644
index 0000000000..c2387eb3da
--- /dev/null
+++ b/sysdeps/x86_64/fpu/math-use-builtins-trunc.h
@@ -0,0 +1,9 @@
+#ifdef __SSE4_1__
+# define USE_TRUNC_BUILTIN 1
+# define USE_TRUNCF_BUILTIN 1
+#else
+# define USE_TRUNC_BUILTIN 0
+# define USE_TRUNCF_BUILTIN 0
+#endif
+#define USE_TRUNCL_BUILTIN 0
+#define USE_TRUNCF128_BUILTIN 0
diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile
index 3403422443..5fe872b898 100644
--- a/sysdeps/x86_64/fpu/multiarch/Makefile
+++ b/sysdeps/x86_64/fpu/multiarch/Makefile
@@ -28,11 +28,17 @@ CFLAGS-s_sincosf-fma.c = -mfma -mavx2
 
 # Check if ISA level is 3 or above.
 ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above)))
+sysdep_routines += \
+  s_modf-avx \
+  s_modff-avx \
+# sysdep_routines
 libm-sysdep_routines += \
   s_ceil-avx \
   s_ceilf-avx \
   s_floor-avx \
   s_floorf-avx \
+  s_modf-avx \
+  s_modff-avx \
   s_nearbyint-avx \
   s_nearbyintf-avx \
   s_rint-avx \
@@ -56,6 +62,10 @@ libm-sysdep_routines += \
   s_tan-fma4 \
 # libm-sysdep_routines
 endif
+sysdep_routines += \
+  s_modf-sse4_1 \
+  s_modff-sse4_1 \
+# sysdep_routines
 libm-sysdep_routines += \
   e_asin-fma \
   e_atan2-avx \
@@ -85,6 +95,8 @@ libm-sysdep_routines += \
   s_floor-sse4_1 \
   s_floorf-sse4_1 \
   s_log1p-fma \
+  s_modf-sse4_1 \
+  s_modff-sse4_1 \
   s_nearbyint-sse4_1 \
   s_nearbyintf-sse4_1 \
   s_rint-sse4_1 \
@@ -106,11 +118,17 @@ libm-sysdep_routines += \
   s_truncf-sse4_1 \
 # libm-sysdep_routines
 ifeq ($(have-x86-isa-level),baseline)
+sysdep_routines += \
+  s_modf-c \
+  s_modff-c \
+# sysdep-routines
 libm-sysdep_routines += \
   s_ceil-c \
   s_ceilf-c \
   s_floor-c \
   s_floorf-c \
+  s_modf-c \
+  s_modff-c \
   s_nearbyint-c \
   s_nearbyintf-c \
   s_rint-c \
@@ -140,6 +158,12 @@ CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX
 CFLAGS-s_sincos-avx.c = -msse2avx -DSSE2AVX
+
+CFLAGS-s_modf-sse4_1.c = -msse4.1 -fno-builtin-modff32x -fno-builtin-modff64
+CFLAGS-s_modff-sse4_1.c = -msse4.1 -fno-builtin-modff32
+
+CFLAGS-s_modf-avx.c = -msse2avx -DSSE2AVX -fno-builtin-modff32x -fno-builtin-modff64
+CFLAGS-s_modff-avx.c = -msse2avx -DSSE2AVX -fno-builtin-modff32
 endif
 
 ifeq ($(subdir),mathvec)
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c b/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c
new file mode 100644
index 0000000000..db32398b4a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c
@@ -0,0 +1 @@
+#include <sysdeps/ieee754/dbl-64/s_modf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-c.c b/sysdeps/x86_64/fpu/multiarch/s_modf-c.c
new file mode 100644
index 0000000000..6679d322cc
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modf-c.c
@@ -0,0 +1,2 @@
+#define __modf __modf_c
+#include <sysdeps/ieee754/dbl-64/s_modf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c b/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c
new file mode 100644
index 0000000000..6ef4c19a7e
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c
@@ -0,0 +1,7 @@
+#include <sysdeps/x86/isa-level.h>
+
+#if MINIMUM_X86_ISA_LEVEL != SSE4_1_X86_ISA_LEVEL
+# define __modf __modf_sse41
+#endif
+
+#include <sysdeps/ieee754/dbl-64/s_modf.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf.c b/sysdeps/x86_64/fpu/multiarch/s_modf.c
new file mode 100644
index 0000000000..d65977d69a
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modf.c
@@ -0,0 +1,35 @@
+/* Multiple versions of modf
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-double.h>
+
+# define modf __redirect_modf
+# define __modf __redirect___modf
+# include <math.h>
+# undef modf
+# undef __modf
+
+# define SYMBOL_NAME modf
+# include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_modf, __modf, IFUNC_SELECTOR ());
+libm_alias_double (__modf, modf)
+#endif
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c b/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c
new file mode 100644
index 0000000000..804b0426b7
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c
@@ -0,0 +1 @@
+#include <sysdeps/ieee754/flt-32/s_modff.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-c.c b/sysdeps/x86_64/fpu/multiarch/s_modff-c.c
new file mode 100644
index 0000000000..f54f8eacb6
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modff-c.c
@@ -0,0 +1,2 @@
+#define __modff __modff_c
+#include <sysdeps/ieee754/flt-32/s_modff.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c b/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c
new file mode 100644
index 0000000000..0437e044f3
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c
@@ -0,0 +1,7 @@
+#include <sysdeps/x86/isa-level.h>
+
+#if MINIMUM_X86_ISA_LEVEL != SSE4_1_X86_ISA_LEVEL
+# define __modff __modff_sse41
+#endif
+
+#include <sysdeps/ieee754/flt-32/s_modff.c>
diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff.c b/sysdeps/x86_64/fpu/multiarch/s_modff.c
new file mode 100644
index 0000000000..89d0a32c23
--- /dev/null
+++ b/sysdeps/x86_64/fpu/multiarch/s_modff.c
@@ -0,0 +1,35 @@
+/* Multiple versions of modff
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdeps/x86/isa-level.h>
+#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL
+# define NO_MATH_REDIRECT
+# include <libm-alias-float.h>
+
+# define modff __redirect_modff
+# define __modff __redirect___modff
+# include <math.h>
+# undef modff
+# undef __modff
+
+# define SYMBOL_NAME modff
+# include "ifunc-sse4_1.h"
+
+libc_ifunc_redirected (__redirect_modff, __modff, IFUNC_SELECTOR ());
+libm_alias_float (__modf, modf)
+#endif