From: Matthias Kretz Date: Thu, 24 Jun 2021 13:20:14 +0000 (+0100) Subject: libstdc++: Improve simd fixed_size codegen X-Git-Tag: basepoints/gcc-13~6531 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=343f01f4cde17003b6721722361e190bd6cffd15;p=thirdparty%2Fgcc.git libstdc++: Improve simd fixed_size codegen Sometimes fixed_size objects will get unnecessarily copied on the stack. The simd implementation should never pass _SimdTuple by value to avoid requiring the optimizer to see through these copies. Signed-off-by: Matthias Kretz libstdc++-v3/ChangeLog: * include/experimental/bits/simd_converter.h (_SimdConverter::operator()): Pass _SimdTuple by const-ref. * include/experimental/bits/simd_fixed_size.h (_GLIBCXX_SIMD_FIXED_OP): Pass binary operator _SimdTuple arguments by const-ref. (_S_masked_unary): Pass _SimdTuple by const-ref. --- diff --git a/libstdc++-v3/include/experimental/bits/simd_converter.h b/libstdc++-v3/include/experimental/bits/simd_converter.h index 9c8bf382df94..11999df25e49 100644 --- a/libstdc++-v3/include/experimental/bits/simd_converter.h +++ b/libstdc++-v3/include/experimental/bits/simd_converter.h @@ -316,7 +316,7 @@ template _GLIBCXX_SIMD_INTRINSIC constexpr typename _SimdTraits<_To, _Ap>::_SimdMember - operator()(_Arg __x) const noexcept + operator()(const _Arg& __x) const noexcept { if constexpr (_Arg::_S_tuple_size == 1) return __vector_convert<__vector_type_t<_To, _Np>>(__x.first); diff --git a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h index b6fb47cdf39f..dc2fb90b9b2e 100644 --- a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h +++ b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h @@ -1480,7 +1480,7 @@ template #define _GLIBCXX_SIMD_FIXED_OP(name_, op_) \ template \ static inline constexpr _SimdTuple<_Tp, _As...> name_( \ - const _SimdTuple<_Tp, _As...> __x, const _SimdTuple<_Tp, _As...> __y) \ + const _SimdTuple<_Tp, _As...>& __x, const _SimdTuple<_Tp, _As...>& __y)\ { \ return __x._M_apply_per_chunk( \ [](auto __impl, auto __xx, auto __yy) constexpr { \ @@ -1780,8 +1780,7 @@ template // _S_masked_unary {{{2 template