From: Adhemerval Zanella Date: Mon, 16 Jun 2025 13:17:37 +0000 (-0300) Subject: x86_64: Optimize modf/modff for x86_64-v2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;p=thirdparty%2Fglibc.git x86_64: Optimize modf/modff for x86_64-v2 The SSE4.1 provides a direct instruction for trunc, which improves modf/modff performance with a less text size. On Ryzen 9 (zen3) with gcc 14.2.1: x86_64-v2 reciprocal-throughput master patch difference workload-0_1 7.9610 7.7914 2.13% workload-1_maxint 9.4323 7.8021 17.28% workload-maxint_maxfloat 8.7379 7.8049 10.68% workload-integral 7.9492 7.7991 1.89% latency master patch difference workload-0_1 7.9511 10.8910 -36.97% workload-1_maxint 15.8278 10.9048 31.10% workload-maxint_maxfloat 11.3495 10.9139 3.84% workload-integral 11.5938 10.9071 5.92% x86_64-v3 reciprocal-throughput master patch difference workload-0_1 8.7522 7.9781 8.84% workload-1_maxint 9.6690 7.9872 17.39% workload-maxint_maxfloat 8.7634 7.9857 8.87% workload-integral 8.7397 7.9893 8.59% latency master patch difference workload-0_1 8.7447 9.5589 -9.31% workload-1_maxint 13.7480 9.5690 30.40% workload-maxint_maxfloat 10.0092 9.5680 4.41% workload-integral 9.7518 9.5743 1.82% For x86_64-v1 the optimization is done through a new ifunc selector. The avx is to follow other SSE4_1 optimization (like trunc) to avoid the ifunc for x86_64-v3. Checked on x86_64-linux-gnu. Tested-by: Carlos O'Donell Reviewed-by: Carlos O'Donell --- diff --git a/sysdeps/x86_64/fpu/math-use-builtins-trunc.h b/sysdeps/x86_64/fpu/math-use-builtins-trunc.h new file mode 100644 index 0000000000..c2387eb3da --- /dev/null +++ b/sysdeps/x86_64/fpu/math-use-builtins-trunc.h @@ -0,0 +1,9 @@ +#ifdef __SSE4_1__ +# define USE_TRUNC_BUILTIN 1 +# define USE_TRUNCF_BUILTIN 1 +#else +# define USE_TRUNC_BUILTIN 0 +# define USE_TRUNCF_BUILTIN 0 +#endif +#define USE_TRUNCL_BUILTIN 0 +#define USE_TRUNCF128_BUILTIN 0 diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile index 3403422443..5fe872b898 100644 --- a/sysdeps/x86_64/fpu/multiarch/Makefile +++ b/sysdeps/x86_64/fpu/multiarch/Makefile @@ -28,11 +28,17 @@ CFLAGS-s_sincosf-fma.c = -mfma -mavx2 # Check if ISA level is 3 or above. ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above))) +sysdep_routines += \ + s_modf-avx \ + s_modff-avx \ +# sysdep_routines libm-sysdep_routines += \ s_ceil-avx \ s_ceilf-avx \ s_floor-avx \ s_floorf-avx \ + s_modf-avx \ + s_modff-avx \ s_nearbyint-avx \ s_nearbyintf-avx \ s_rint-avx \ @@ -56,6 +62,10 @@ libm-sysdep_routines += \ s_tan-fma4 \ # libm-sysdep_routines endif +sysdep_routines += \ + s_modf-sse4_1 \ + s_modff-sse4_1 \ +# sysdep_routines libm-sysdep_routines += \ e_asin-fma \ e_atan2-avx \ @@ -85,6 +95,8 @@ libm-sysdep_routines += \ s_floor-sse4_1 \ s_floorf-sse4_1 \ s_log1p-fma \ + s_modf-sse4_1 \ + s_modff-sse4_1 \ s_nearbyint-sse4_1 \ s_nearbyintf-sse4_1 \ s_rint-sse4_1 \ @@ -106,11 +118,17 @@ libm-sysdep_routines += \ s_truncf-sse4_1 \ # libm-sysdep_routines ifeq ($(have-x86-isa-level),baseline) +sysdep_routines += \ + s_modf-c \ + s_modff-c \ +# sysdep-routines libm-sysdep_routines += \ s_ceil-c \ s_ceilf-c \ s_floor-c \ s_floorf-c \ + s_modf-c \ + s_modff-c \ s_nearbyint-c \ s_nearbyintf-c \ s_rint-c \ @@ -140,6 +158,12 @@ CFLAGS-s_atan-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_sin-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_tan-avx.c = -msse2avx -DSSE2AVX CFLAGS-s_sincos-avx.c = -msse2avx -DSSE2AVX + +CFLAGS-s_modf-sse4_1.c = -msse4.1 -fno-builtin-modff32x -fno-builtin-modff64 +CFLAGS-s_modff-sse4_1.c = -msse4.1 -fno-builtin-modff32 + +CFLAGS-s_modf-avx.c = -msse2avx -DSSE2AVX -fno-builtin-modff32x -fno-builtin-modff64 +CFLAGS-s_modff-avx.c = -msse2avx -DSSE2AVX -fno-builtin-modff32 endif ifeq ($(subdir),mathvec) diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c b/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c new file mode 100644 index 0000000000..db32398b4a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modf-avx.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-c.c b/sysdeps/x86_64/fpu/multiarch/s_modf-c.c new file mode 100644 index 0000000000..6679d322cc --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modf-c.c @@ -0,0 +1,2 @@ +#define __modf __modf_c +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c b/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c new file mode 100644 index 0000000000..6ef4c19a7e --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modf-sse4_1.c @@ -0,0 +1,7 @@ +#include + +#if MINIMUM_X86_ISA_LEVEL != SSE4_1_X86_ISA_LEVEL +# define __modf __modf_sse41 +#endif + +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modf.c b/sysdeps/x86_64/fpu/multiarch/s_modf.c new file mode 100644 index 0000000000..d65977d69a --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modf.c @@ -0,0 +1,35 @@ +/* Multiple versions of modf + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include + +# define modf __redirect_modf +# define __modf __redirect___modf +# include +# undef modf +# undef __modf + +# define SYMBOL_NAME modf +# include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_modf, __modf, IFUNC_SELECTOR ()); +libm_alias_double (__modf, modf) +#endif diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c b/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c new file mode 100644 index 0000000000..804b0426b7 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modff-avx.c @@ -0,0 +1 @@ +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-c.c b/sysdeps/x86_64/fpu/multiarch/s_modff-c.c new file mode 100644 index 0000000000..f54f8eacb6 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modff-c.c @@ -0,0 +1,2 @@ +#define __modff __modff_c +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c b/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c new file mode 100644 index 0000000000..0437e044f3 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modff-sse4_1.c @@ -0,0 +1,7 @@ +#include + +#if MINIMUM_X86_ISA_LEVEL != SSE4_1_X86_ISA_LEVEL +# define __modff __modff_sse41 +#endif + +#include diff --git a/sysdeps/x86_64/fpu/multiarch/s_modff.c b/sysdeps/x86_64/fpu/multiarch/s_modff.c new file mode 100644 index 0000000000..89d0a32c23 --- /dev/null +++ b/sysdeps/x86_64/fpu/multiarch/s_modff.c @@ -0,0 +1,35 @@ +/* Multiple versions of modff + Copyright (C) 2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL +# define NO_MATH_REDIRECT +# include + +# define modff __redirect_modff +# define __modff __redirect___modff +# include +# undef modff +# undef __modff + +# define SYMBOL_NAME modff +# include "ifunc-sse4_1.h" + +libc_ifunc_redirected (__redirect_modff, __modff, IFUNC_SELECTOR ()); +libm_alias_float (__modf, modf) +#endif