From: Matthias Kretz Date: Thu, 24 Jun 2021 13:20:15 +0000 (+0100) Subject: libstdc++: Avoid raising fp exceptions in trunc, floor, and ceil X-Git-Tag: basepoints/gcc-13~6527 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d5125819d86274383041571daa3698d72bbac661;p=thirdparty%2Fgcc.git libstdc++: Avoid raising fp exceptions in trunc, floor, and ceil Signed-off-by: Matthias Kretz libstdc++-v3/ChangeLog: * include/experimental/bits/simd_x86.h (_S_trunc, _S_floor) (_S_ceil): Set bit 8 (_MM_FROUND_NO_EXC) on AVX and SSE4.1 roundp[sd] calls. --- diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h index 5706bf638459..34633c096b1e 100644 --- a/libstdc++-v3/include/experimental/bits/simd_x86.h +++ b/libstdc++-v3/include/experimental/bits/simd_x86.h @@ -2657,13 +2657,13 @@ template else if constexpr (__is_avx512_pd<_Tp, _Np>()) return _mm512_roundscale_pd(__x, 0x0b); else if constexpr (__is_avx_ps<_Tp, _Np>()) - return _mm256_round_ps(__x, 0x3); + return _mm256_round_ps(__x, 0xb); else if constexpr (__is_avx_pd<_Tp, _Np>()) - return _mm256_round_pd(__x, 0x3); + return _mm256_round_pd(__x, 0xb); else if constexpr (__have_sse4_1 && __is_sse_ps<_Tp, _Np>()) - return __auto_bitcast(_mm_round_ps(__to_intrin(__x), 0x3)); + return __auto_bitcast(_mm_round_ps(__to_intrin(__x), 0xb)); else if constexpr (__have_sse4_1 && __is_sse_pd<_Tp, _Np>()) - return _mm_round_pd(__x, 0x3); + return _mm_round_pd(__x, 0xb); else if constexpr (__is_sse_ps<_Tp, _Np>()) { auto __truncated @@ -2786,13 +2786,13 @@ template else if constexpr (__is_avx512_pd<_Tp, _Np>()) return _mm512_roundscale_pd(__x, 0x09); else if constexpr (__is_avx_ps<_Tp, _Np>()) - return _mm256_round_ps(__x, 0x1); + return _mm256_round_ps(__x, 0x9); else if constexpr (__is_avx_pd<_Tp, _Np>()) - return _mm256_round_pd(__x, 0x1); + return _mm256_round_pd(__x, 0x9); else if constexpr (__have_sse4_1 && __is_sse_ps<_Tp, _Np>()) - return __auto_bitcast(_mm_floor_ps(__to_intrin(__x))); + return __auto_bitcast(_mm_round_ps(__to_intrin(__x), 0x9)); else if constexpr (__have_sse4_1 && __is_sse_pd<_Tp, _Np>()) - return _mm_floor_pd(__x); + return _mm_round_pd(__x, 0x9); else return _Base::_S_floor(__x); } @@ -2808,13 +2808,13 @@ template else if constexpr (__is_avx512_pd<_Tp, _Np>()) return _mm512_roundscale_pd(__x, 0x0a); else if constexpr (__is_avx_ps<_Tp, _Np>()) - return _mm256_round_ps(__x, 0x2); + return _mm256_round_ps(__x, 0xa); else if constexpr (__is_avx_pd<_Tp, _Np>()) - return _mm256_round_pd(__x, 0x2); + return _mm256_round_pd(__x, 0xa); else if constexpr (__have_sse4_1 && __is_sse_ps<_Tp, _Np>()) - return __auto_bitcast(_mm_ceil_ps(__to_intrin(__x))); + return __auto_bitcast(_mm_round_ps(__to_intrin(__x), 0xa)); else if constexpr (__have_sse4_1 && __is_sse_pd<_Tp, _Np>()) - return _mm_ceil_pd(__x); + return _mm_round_pd(__x, 0xa); else return _Base::_S_ceil(__x); }