libstdc++: Fix ODR issues with different -m flags

author Matthias Kretz <m.kretz@gsi.de>

Mon, 1 Feb 2021 09:58:03 +0000 (10:58 +0100)

committer Matthias Kretz <m.kretz@gsi.de>

Sat, 15 Jan 2022 20:05:17 +0000 (21:05 +0100)
author Matthias Kretz <m.kretz@gsi.de>
Mon, 1 Feb 2021 09:58:03 +0000 (10:58 +0100)
committer Matthias Kretz <m.kretz@gsi.de>
Sat, 15 Jan 2022 20:05:17 +0000 (21:05 +0100)
diff --git a/libstdc++-v3/include/experimental/bits/simd.h b/libstdc++-v3/include/experimental/bits/simd.h

index 00569cee55459e6de4b6a7a3d15a00d317b19350..d837d0efb21e9c52bce437078046af273b14cc50 100644 (file)
--- a/libstdc++-v3/include/experimental/bits/simd.h
+++ b/libstdc++-v3/include/experimental/bits/simd.h
@@ -35,6 +35,7 @@
  #include <cstdio> // for stderr
  #endif
  #include <cstring>
+#include <cmath>
  #include <functional>
  #include <iosfwd>
  #include <utility>
@@ -206,9 +207,170 @@ template <size_t _Np>
  // }}}
  template <size_t _Xp>
    using _SizeConstant = integral_constant<size_t, _Xp>;
+// constexpr feature detection{{{
+constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
+constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
+constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
+constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
+constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
+constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
+constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
+constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
+constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
+constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
+constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
+constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
+constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
+constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
+constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
+constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
+constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
+constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
+constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
+constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
+constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
+constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
+constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
+constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
+constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
+constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
+constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
+constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
+constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
+constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
+constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
+constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
+
+constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
+constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
+constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
+constexpr inline bool __support_neon_float =
+#if defined __GCC_IEC_559
+  __GCC_IEC_559 == 0;
+#elif defined __FAST_MATH__
+  true;
+#else
+  false;
+#endif
+
+#ifdef _ARCH_PWR10
+constexpr inline bool __have_power10vec = true;
+#else
+constexpr inline bool __have_power10vec = false;
+#endif
+#ifdef __POWER9_VECTOR__
+constexpr inline bool __have_power9vec = true;
+#else
+constexpr inline bool __have_power9vec = false;
+#endif
+#if defined __POWER8_VECTOR__
+constexpr inline bool __have_power8vec = true;
+#else
+constexpr inline bool __have_power8vec = __have_power9vec;
+#endif
+#if defined __VSX__
+constexpr inline bool __have_power_vsx = true;
+#else
+constexpr inline bool __have_power_vsx = __have_power8vec;
+#endif
+#if defined __ALTIVEC__
+constexpr inline bool __have_power_vmx = true;
+#else
+constexpr inline bool __have_power_vmx = __have_power_vsx;
+#endif
+
+// }}}
  
  namespace __detail
  {
+  constexpr std::uint_least64_t
+  __floating_point_flags()
+  {
+    std::uint_least64_t __flags = 0;
+    if constexpr (math_errhandling & MATH_ERREXCEPT)
+      __flags |= 1;
+#ifdef __FAST_MATH__
+    __flags |= 1 << 1;
+#elif __FINITE_MATH_ONLY__
+    __flags |= 2 << 1;
+#elif __GCC_IEC_559 < 2
+    __flags |= 3 << 1;
+#endif
+    __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
+    return __flags;
+  }
+
+  constexpr std::uint_least64_t
+  __machine_flags()
+  {
+    if constexpr (__have_mmx || __have_sse)
+      return __have_mmx
+                | (__have_sse                << 1)
+                | (__have_sse2               << 2)
+                | (__have_sse3               << 3)
+                | (__have_ssse3              << 4)
+                | (__have_sse4_1             << 5)
+                | (__have_sse4_2             << 6)
+                | (__have_xop                << 7)
+                | (__have_avx                << 8)
+                | (__have_avx2               << 9)
+                | (__have_bmi                << 10)
+                | (__have_bmi2               << 11)
+                | (__have_lzcnt              << 12)
+                | (__have_sse4a              << 13)
+                | (__have_fma                << 14)
+                | (__have_fma4               << 15)
+                | (__have_f16c               << 16)
+                | (__have_popcnt             << 17)
+                | (__have_avx512f            << 18)
+                | (__have_avx512dq           << 19)
+                | (__have_avx512vl           << 20)
+                | (__have_avx512bw           << 21)
+                | (__have_avx512bitalg       << 22)
+                | (__have_avx512vbmi2        << 23)
+                | (__have_avx512vbmi         << 24)
+                | (__have_avx512ifma         << 25)
+                | (__have_avx512cd           << 26)
+                | (__have_avx512vnni         << 27)
+                | (__have_avx512vpopcntdq    << 28)
+                | (__have_avx512vp2intersect << 29);
+    else if constexpr (__have_neon)
+      return __have_neon
+              | (__have_neon_a32 << 1)
+              | (__have_neon_a64 << 2)
+              | (__have_neon_a64 << 2)
+              | (__support_neon_float << 3);
+    else if constexpr (__have_power_vmx)
+      return __have_power_vmx
+              | (__have_power_vsx  << 1)
+              | (__have_power8vec  << 2)
+              | (__have_power9vec  << 3)
+              | (__have_power10vec << 4);
+    else
+      return 0;
+  }
+
+  namespace
+  {
+    struct _OdrEnforcer {};
+  }
+
+  template <std::uint_least64_t...>
+    struct _MachineFlagsTemplate {};
+
+  /**@internal
+   * Use this type as default template argument to all function templates that
+   * are not declared always_inline. It ensures, that a function
+   * specialization, which the compiler decides not to inline, has a unique symbol
+   * (_OdrEnforcer) or a symbol matching the machine/architecture flags
+   * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
+   * users link TUs compiled with different flags. This is especially important
+   * for using simd in libraries.
+   */
+  using __odr_helper
+    = conditional_t<__machine_flags() == 0, _OdrEnforcer,
+                   _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
+
    struct _Minimum
    {
      template <typename _Tp>
@@ -472,71 +634,6 @@ template <int _Np>
  template <typename _Tp>
    inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
  
-// }}}
-// constexpr feature detection{{{
-constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
-constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
-constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
-constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
-constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
-constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
-constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
-constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
-constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
-constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
-constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
-constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
-constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
-constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
-constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
-constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
-constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
-constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
-constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
-constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
-constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
-constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
-constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
-constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
-
-constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
-constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
-constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
-constexpr inline bool __support_neon_float =
-#if defined __GCC_IEC_559
-  __GCC_IEC_559 == 0;
-#elif defined __FAST_MATH__
-  true;
-#else
-  false;
-#endif
-
-#ifdef _ARCH_PWR10
-constexpr inline bool __have_power10vec = true;
-#else
-constexpr inline bool __have_power10vec = false;
-#endif
-#ifdef __POWER9_VECTOR__
-constexpr inline bool __have_power9vec = true;
-#else
-constexpr inline bool __have_power9vec = false;
-#endif
-#if defined __POWER8_VECTOR__
-constexpr inline bool __have_power8vec = true;
-#else
-constexpr inline bool __have_power8vec = __have_power9vec;
-#endif
-#if defined __VSX__
-constexpr inline bool __have_power_vsx = true;
-#else
-constexpr inline bool __have_power_vsx = __have_power8vec;
-#endif
-#if defined __ALTIVEC__
-constexpr inline bool __have_power_vmx = true;
-#else
-constexpr inline bool __have_power_vmx = __have_power_vsx;
-#endif
-
  // }}}
  // __is_scalar_abi {{{
  template <typename _Abi>
@@ -4004,7 +4101,7 @@ template <typename _Tp, typename _A0, typename... _As>
  
  // }}}
  // concat(simd...) {{{
-template <typename _Tp, typename... _As>
+template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
    inline _GLIBCXX_SIMD_CONSTEXPR
    simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
    concat(const simd<_Tp, _As>&... __xs)
@@ -4589,6 +4686,7 @@ template <typename _Tp, typename _Abi>
        template <typename _Up, typename _A2,
                 typename
                 = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
+       _GLIBCXX_SIMD_ALWAYS_INLINE
         operator simd_mask<_Up, _A2>() &&
         {
           using namespace std::experimental::__proposed;
@@ -4826,121 +4924,153 @@ find_last_set(_ExactBool)
  
  /// @cond undocumented
  // _SimdIntOperators{{{1
-template <typename _V, typename _Impl, bool>
+template <typename _V, typename _Tp, typename _Abi, bool>
    class _SimdIntOperators {};
  
-template <typename _V, typename _Impl>
-  class _SimdIntOperators<_V, _Impl, true>
+template <typename _V, typename _Tp, typename _Abi>
+  class _SimdIntOperators<_V, _Tp, _Abi, true>
    {
+    using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
+
      _GLIBCXX_SIMD_INTRINSIC const _V& __derived() const
      { return *static_cast<const _V*>(this); }
  
-    template <typename _Tp>
+    template <typename _Up>
        _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
-      _S_make_derived(_Tp&& __d)
-      { return {__private_init, static_cast<_Tp&&>(__d)}; }
+      _S_make_derived(_Up&& __d)
+      { return {__private_init, static_cast<_Up&&>(__d)}; }
  
    public:
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator%=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator%=(_V& __lhs, const _V& __x)
      { return __lhs = __lhs % __x; }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator&=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator&=(_V& __lhs, const _V& __x)
      { return __lhs = __lhs & __x; }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator|=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator|=(_V& __lhs, const _V& __x)
      { return __lhs = __lhs | __x; }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator^=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator^=(_V& __lhs, const _V& __x)
      { return __lhs = __lhs ^ __x; }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator<<=(_V& __lhs, const _V& __x)
      { return __lhs = __lhs << __x; }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, const _V& __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator>>=(_V& __lhs, const _V& __x)
      { return __lhs = __lhs >> __x; }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator<<=(_V& __lhs, int __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator<<=(_V& __lhs, int __x)
      { return __lhs = __lhs << __x; }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V& operator>>=(_V& __lhs, int __x)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V&
+    operator>>=(_V& __lhs, int __x)
      { return __lhs = __lhs >> __x; }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator%(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator%(const _V& __x, const _V& __y)
      {
        return _SimdIntOperators::_S_make_derived(
         _Impl::_S_modulus(__data(__x), __data(__y)));
      }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator&(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator&(const _V& __x, const _V& __y)
      {
        return _SimdIntOperators::_S_make_derived(
         _Impl::_S_bit_and(__data(__x), __data(__y)));
      }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator|(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator|(const _V& __x, const _V& __y)
      {
        return _SimdIntOperators::_S_make_derived(
         _Impl::_S_bit_or(__data(__x), __data(__y)));
      }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator^(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator^(const _V& __x, const _V& __y)
      {
        return _SimdIntOperators::_S_make_derived(
         _Impl::_S_bit_xor(__data(__x), __data(__y)));
      }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator<<(const _V& __x, const _V& __y)
      {
        return _SimdIntOperators::_S_make_derived(
         _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
      }
  
-    _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, const _V& __y)
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator>>(const _V& __x, const _V& __y)
      {
        return _SimdIntOperators::_S_make_derived(
         _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
      }
  
-    template <typename _VV = _V>
-      _GLIBCXX_SIMD_CONSTEXPR friend _V operator<<(const _V& __x, int __y)
-      {
-       using _Tp = typename _VV::value_type;
-       if (__y < 0)
-         __invoke_ub("The behavior is undefined if the right operand of a "
-                     "shift operation is negative. [expr.shift]\nA shift by "
-                     "%d was requested",
-                     __y);
-       if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
-         __invoke_ub(
-           "The behavior is undefined if the right operand of a "
-           "shift operation is greater than or equal to the width of the "
-           "promoted left operand. [expr.shift]\nA shift by %d was requested",
-           __y);
-       return _SimdIntOperators::_S_make_derived(
-         _Impl::_S_bit_shift_left(__data(__x), __y));
-      }
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator<<(const _V& __x, int __y)
+    {
+      if (__y < 0)
+       __invoke_ub("The behavior is undefined if the right operand of a "
+                   "shift operation is negative. [expr.shift]\nA shift by "
+                   "%d was requested",
+                   __y);
+      if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
+       __invoke_ub(
+         "The behavior is undefined if the right operand of a "
+         "shift operation is greater than or equal to the width of the "
+         "promoted left operand. [expr.shift]\nA shift by %d was requested",
+         __y);
+      return _SimdIntOperators::_S_make_derived(
+       _Impl::_S_bit_shift_left(__data(__x), __y));
+    }
  
-    template <typename _VV = _V>
-      _GLIBCXX_SIMD_CONSTEXPR friend _V operator>>(const _V& __x, int __y)
-      {
-       using _Tp = typename _VV::value_type;
-       if (__y < 0)
-         __invoke_ub(
-           "The behavior is undefined if the right operand of a shift "
-           "operation is negative. [expr.shift]\nA shift by %d was requested",
-           __y);
-       if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
-         __invoke_ub(
-           "The behavior is undefined if the right operand of a shift "
-           "operation is greater than or equal to the width of the promoted "
-           "left operand. [expr.shift]\nA shift by %d was requested",
-           __y);
-       return _SimdIntOperators::_S_make_derived(
-         _Impl::_S_bit_shift_right(__data(__x), __y));
-      }
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
+    _V
+    operator>>(const _V& __x, int __y)
+    {
+      if (__y < 0)
+       __invoke_ub(
+         "The behavior is undefined if the right operand of a shift "
+         "operation is negative. [expr.shift]\nA shift by %d was requested",
+         __y);
+      if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
+       __invoke_ub(
+         "The behavior is undefined if the right operand of a shift "
+         "operation is greater than or equal to the width of the promoted "
+         "left operand. [expr.shift]\nA shift by %d was requested",
+         __y);
+      return _SimdIntOperators::_S_make_derived(
+       _Impl::_S_bit_shift_right(__data(__x), __y));
+    }
  
      // unary operators (for integral _Tp)
-    _GLIBCXX_SIMD_CONSTEXPR _V operator~() const
+    _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
+    _V
+    operator~() const
      { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
    };
  
@@ -4950,7 +5080,7 @@ template <typename _V, typename _Impl>
  // simd {{{
  template <typename _Tp, typename _Abi>
    class simd : public _SimdIntOperators<
-                simd<_Tp, _Abi>, typename _SimdTraits<_Tp, _Abi>::_SimdImpl,
+                simd<_Tp, _Abi>, _Tp, _Abi,
                  conjunction<is_integral<_Tp>,
                              typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
                public _SimdTraits<_Tp, _Abi>::_SimdBase
@@ -4964,7 +5094,7 @@ template <typename _Tp, typename _Abi>
    public:
      using _Impl = typename _Traits::_SimdImpl;
      friend _Impl;
-    friend _SimdIntOperators<simd, _Impl, true>;
+    friend _SimdIntOperators<simd, _Tp, _Abi, true>;
  
      using value_type = _Tp;
      using reference = _SmartReference<_MemberType, _Impl, value_type>;
diff --git a/libstdc++-v3/include/experimental/bits/simd_builtin.h b/libstdc++-v3/include/experimental/bits/simd_builtin.h

index 6d3ba5b45f0e692a10fc8dd2bce72b9f810b8e4c..019b98e8c37551e6544914d54e8e3800b350d4ef 100644 (file)
--- a/libstdc++-v3/include/experimental/bits/simd_builtin.h
+++ b/libstdc++-v3/include/experimental/bits/simd_builtin.h
@@ -50,7 +50,8 @@ template <typename _V, typename = _VectorTraits<_V>>
  //}}}
  // __vector_permute<Indices...>{{{
  // Index == -1 requests zeroing of the output element
-template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
+template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>,
+         typename = __detail::__odr_helper>
    _Tp
    __vector_permute(_Tp __x)
    {
@@ -62,7 +63,8 @@ template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
  // }}}
  // __vector_shuffle<Indices...>{{{
  // Index == -1 requests zeroing of the output element
-template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
+template <int... _Indices, typename _Tp, typename _TVT = _VectorTraits<_Tp>,
+         typename = __detail::__odr_helper>
    _Tp
    __vector_shuffle(_Tp __x, _Tp __y)
    {
@@ -820,10 +822,12 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
      // _SimdBase / base class for simd, providing extra conversions {{{
      struct _SimdBase2
      {
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        explicit operator __intrinsic_type_t<_Tp, _Np>() const
        {
         return __to_intrin(static_cast<const simd<_Tp, _Abi>*>(this)->_M_data);
        }
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        explicit operator __vector_type_t<_Tp, _Np>() const
        {
         return static_cast<const simd<_Tp, _Abi>*>(this)->_M_data.__builtin();
@@ -832,6 +836,7 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
  
      struct _SimdBase1
      {
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        explicit operator __intrinsic_type_t<_Tp, _Np>() const
        { return __data(*static_cast<const simd<_Tp, _Abi>*>(this)); }
      };
@@ -844,11 +849,13 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
      // _MaskBase {{{
      struct _MaskBase2
      {
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        explicit operator __intrinsic_type_t<_Tp, _Np>() const
        {
         return static_cast<const simd_mask<_Tp, _Abi>*>(this)
           ->_M_data.__intrin();
        }
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        explicit operator __vector_type_t<_Tp, _Np>() const
        {
         return static_cast<const simd_mask<_Tp, _Abi>*>(this)->_M_data._M_data;
@@ -857,6 +864,7 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
  
      struct _MaskBase1
      {
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        explicit operator __intrinsic_type_t<_Tp, _Np>() const
        { return __data(*static_cast<const simd_mask<_Tp, _Abi>*>(this)); }
      };
@@ -874,7 +882,9 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
        _Up _M_data;
  
      public:
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        _MaskCastType(_Up __x) : _M_data(__x) {}
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        operator _MaskMember() const { return _M_data; }
      };
  
@@ -887,7 +897,9 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
        _SimdMember _M_data;
  
      public:
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        _SimdCastType1(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        operator _SimdMember() const { return _M_data; }
      };
  
@@ -898,8 +910,11 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
        _SimdMember _M_data;
  
      public:
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        _SimdCastType2(_Ap __a) : _M_data(__vector_bitcast<_Tp>(__a)) {}
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        _SimdCastType2(_Bp __b) : _M_data(__b) {}
+      _GLIBCXX_SIMD_ALWAYS_INLINE
        operator _SimdMember() const { return _M_data; }
      };
  
@@ -913,14 +928,14 @@ template <typename _Tp, typename _Mp, typename _Abi, size_t _Np>
  struct _CommonImplX86;
  struct _CommonImplNeon;
  struct _CommonImplBuiltin;
-template <typename _Abi> struct _SimdImplBuiltin;
-template <typename _Abi> struct _MaskImplBuiltin;
-template <typename _Abi> struct _SimdImplX86;
-template <typename _Abi> struct _MaskImplX86;
-template <typename _Abi> struct _SimdImplNeon;
-template <typename _Abi> struct _MaskImplNeon;
-template <typename _Abi> struct _SimdImplPpc;
-template <typename _Abi> struct _MaskImplPpc;
+template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplBuiltin;
+template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplBuiltin;
+template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplX86;
+template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplX86;
+template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplNeon;
+template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplNeon;
+template <typename _Abi, typename = __detail::__odr_helper> struct _SimdImplPpc;
+template <typename _Abi, typename = __detail::__odr_helper> struct _MaskImplPpc;
  
  // simd_abi::_VecBuiltin {{{
  template <int _UsedBytes>
@@ -1369,7 +1384,7 @@ struct _CommonImplBuiltin
  
  // }}}
  // _SimdImplBuiltin {{{1
-template <typename _Abi>
+template <typename _Abi, typename>
    struct _SimdImplBuiltin
    {
      // member types {{{2
@@ -2618,7 +2633,7 @@ struct _MaskImplBuiltinMixin
  };
  
  // _MaskImplBuiltin {{{1
-template <typename _Abi>
+template <typename _Abi, typename>
    struct _MaskImplBuiltin : _MaskImplBuiltinMixin
    {
      using _MaskImplBuiltinMixin::_S_to_bits;
@@ -2953,4 +2968,4 @@ _GLIBCXX_SIMD_END_NAMESPACE
  #endif // __cplusplus >= 201703L
  #endif // _GLIBCXX_EXPERIMENTAL_SIMD_ABIS_H_
  
-// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=80
+// vim: foldmethod=marker foldmarker={{{,}}} sw=2 noet ts=8 sts=2 tw=100
diff --git a/libstdc++-v3/include/experimental/bits/simd_detail.h b/libstdc++-v3/include/experimental/bits/simd_detail.h

index 638a5b9233d522da0a7e2233ceb9a4c25000c3fc..9135d3e75d346f3365e45218e59c4f1a228eff16 100644 (file)
--- a/libstdc++-v3/include/experimental/bits/simd_detail.h
+++ b/libstdc++-v3/include/experimental/bits/simd_detail.h
@@ -173,6 +173,46 @@
  #else
  #define _GLIBCXX_SIMD_HAVE_AVX512BW 0
  #endif
+#ifdef __AVX512BITALG__
+#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 0
+#endif
+#ifdef __AVX512VBMI2__
+#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 0
+#endif
+#ifdef __AVX512VBMI__
+#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 0
+#endif
+#ifdef __AVX512IFMA__
+#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 0
+#endif
+#ifdef __AVX512CD__
+#define _GLIBCXX_SIMD_HAVE_AVX512CD 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512CD 0
+#endif
+#ifdef __AVX512VNNI__
+#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 0
+#endif
+#ifdef __AVX512VPOPCNTDQ__
+#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 0
+#endif
+#ifdef __AVX512VP2INTERSECT__
+#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 1
+#else
+#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 0
+#endif
  
  #if _GLIBCXX_SIMD_HAVE_SSE
  #define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
diff --git a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h

index f2e77d0d1214cc3401f20d79a44b7333c81bb065..7bb248cf9d0a4acbbf43e5111db0ec32730ba3bd 100644 (file)
--- a/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
+++ b/libstdc++-v3/include/experimental/bits/simd_fixed_size.h
@@ -201,6 +201,7 @@ template <typename _Tp, typename _Abi, size_t _Offset>
    };
  
  template <size_t _Offset, typename _Tp, typename _Abi, typename... _As>
+  _GLIBCXX_SIMD_INTRINSIC
    __tuple_element_meta<_Tp, _Abi, _Offset>
    __make_meta(const _SimdTuple<_Tp, _Abi, _As...>&)
    { return {}; }
@@ -230,11 +231,13 @@ template <size_t _O0, size_t _O1, typename _Base>
    struct _WithOffset<_O0, _WithOffset<_O1, _Base>> {};
  
  template <size_t _Offset, typename _Tp>
+  _GLIBCXX_SIMD_INTRINSIC
    decltype(auto)
    __add_offset(_Tp& __base)
    { return static_cast<_WithOffset<_Offset, __remove_cvref_t<_Tp>>&>(__base); }
  
  template <size_t _Offset, typename _Tp>
+  _GLIBCXX_SIMD_INTRINSIC
    decltype(auto)
    __add_offset(const _Tp& __base)
    {
@@ -243,6 +246,7 @@ template <size_t _Offset, typename _Tp>
    }
  
  template <size_t _Offset, size_t _ExistingOffset, typename _Tp>
+  _GLIBCXX_SIMD_INTRINSIC
    decltype(auto)
    __add_offset(_WithOffset<_ExistingOffset, _Tp>& __base)
    {
@@ -251,6 +255,7 @@ template <size_t _Offset, size_t _ExistingOffset, typename _Tp>
    }
  
  template <size_t _Offset, size_t _ExistingOffset, typename _Tp>
+  _GLIBCXX_SIMD_INTRINSIC
    decltype(auto)
    __add_offset(const _WithOffset<_ExistingOffset, _Tp>& __base)
    {
@@ -586,6 +591,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
           return second[integral_constant<_Up, _I - simd_size_v<_Tp, _Abi0>>()];
        }
  
+    _GLIBCXX_SIMD_INTRINSIC
      _Tp operator[](size_t __i) const noexcept
      {
        if constexpr (_S_tuple_size == 1)
@@ -608,6 +614,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
         }
      }
  
+    _GLIBCXX_SIMD_INTRINSIC
      void _M_set(size_t __i, _Tp __val) noexcept
      {
        if constexpr (_S_tuple_size == 1)
@@ -627,6 +634,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
  
    private:
      // _M_subscript_read/_write {{{
+    _GLIBCXX_SIMD_INTRINSIC
      _Tp _M_subscript_read([[maybe_unused]] size_t __i) const noexcept
      {
        if constexpr (__is_vectorizable_v<_FirstType>)
@@ -635,6 +643,7 @@ template <typename _Tp, typename _Abi0, typename... _Abis>
         return first[__i];
      }
  
+    _GLIBCXX_SIMD_INTRINSIC
      void _M_subscript_write([[maybe_unused]] size_t __i, _Tp __y) noexcept
      {
        if constexpr (__is_vectorizable_v<_FirstType>)
@@ -1033,9 +1042,11 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>>
      _Tp _M_data;
      using _TT = __remove_cvref_t<_Tp>;
  
+    _GLIBCXX_SIMD_INTRINSIC
      operator _TT()
      { return _M_data; }
  
+    _GLIBCXX_SIMD_INTRINSIC
      operator _TT&()
      {
        static_assert(is_lvalue_reference<_Tp>::value, "");
@@ -1043,6 +1054,7 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>>
        return _M_data;
      }
  
+    _GLIBCXX_SIMD_INTRINSIC
      operator _TT*()
      {
        static_assert(is_lvalue_reference<_Tp>::value, "");
@@ -1050,13 +1062,16 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>>
        return &_M_data;
      }
  
-    constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd) {}
+    _GLIBCXX_SIMD_INTRINSIC
+    constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd) {}
  
      template <typename _Abi>
+      _GLIBCXX_SIMD_INTRINSIC
        operator simd<typename _TT::value_type, _Abi>()
        { return {__private_init, _M_data}; }
  
      template <typename _Abi>
+      _GLIBCXX_SIMD_INTRINSIC
        operator simd<typename _TT::value_type, _Abi>&()
        {
         return *reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(
@@ -1064,6 +1079,7 @@ template <typename _Tp, bool = is_arithmetic_v<__remove_cvref_t<_Tp>>>
        }
  
      template <typename _Abi>
+      _GLIBCXX_SIMD_INTRINSIC
        operator simd<typename _TT::value_type, _Abi>*()
        {
         return reinterpret_cast<simd<typename _TT::value_type, _Abi>*>(
@@ -1081,14 +1097,18 @@ template <typename _Tp>
      _Tp _M_data;
      fixed_size_simd<_TT, 1> _M_fd;
  
-    constexpr inline __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {}
+    _GLIBCXX_SIMD_INTRINSIC
+    constexpr __autocvt_to_simd(_Tp dd) : _M_data(dd), _M_fd(_M_data) {}
  
+    _GLIBCXX_SIMD_INTRINSIC
      ~__autocvt_to_simd()
      { _M_data = __data(_M_fd).first; }
  
+    _GLIBCXX_SIMD_INTRINSIC
      operator fixed_size_simd<_TT, 1>()
      { return _M_fd; }
  
+    _GLIBCXX_SIMD_INTRINSIC
      operator fixed_size_simd<_TT, 1> &()
      {
        static_assert(is_lvalue_reference<_Tp>::value, "");
@@ -1096,6 +1116,7 @@ template <typename _Tp>
        return _M_fd;
      }
  
+    _GLIBCXX_SIMD_INTRINSIC
      operator fixed_size_simd<_TT, 1> *()
      {
        static_assert(is_lvalue_reference<_Tp>::value, "");
@@ -1107,8 +1128,8 @@ template <typename _Tp>
  // }}}
  
  struct _CommonImplFixedSize;
-template <int _Np> struct _SimdImplFixedSize;
-template <int _Np> struct _MaskImplFixedSize;
+template <int _Np, typename = __detail::__odr_helper> struct _SimdImplFixedSize;
+template <int _Np, typename = __detail::__odr_helper> struct _MaskImplFixedSize;
  // simd_abi::_Fixed {{{
  template <int _Np>
    struct simd_abi::_Fixed
@@ -1172,12 +1193,15 @@ template <int _Np>
         {
           // The following ensures, function arguments are passed via the stack.
           // This is important for ABI compatibility across TU boundaries
+         _GLIBCXX_SIMD_ALWAYS_INLINE
           _SimdBase(const _SimdBase&) {}
           _SimdBase() = default;
  
+         _GLIBCXX_SIMD_ALWAYS_INLINE
           explicit operator const _SimdMember &() const
           { return static_cast<const simd<_Tp, _Fixed>*>(this)->_M_data; }
  
+         _GLIBCXX_SIMD_ALWAYS_INLINE
           explicit operator array<_Tp, _Np>() const
           {
             array<_Tp, _Np> __r;
@@ -1198,8 +1222,11 @@ template <int _Np>
         // _SimdCastType {{{
         struct _SimdCastType
         {
+         _GLIBCXX_SIMD_ALWAYS_INLINE
           _SimdCastType(const array<_Tp, _Np>&);
+         _GLIBCXX_SIMD_ALWAYS_INLINE
           _SimdCastType(const _SimdMember& dd) : _M_data(dd) {}
+         _GLIBCXX_SIMD_ALWAYS_INLINE
           explicit operator const _SimdMember &() const { return _M_data; }
  
         private:
@@ -1237,7 +1264,7 @@ struct _CommonImplFixedSize
  // _SimdImplFixedSize {{{1
  // fixed_size should not inherit from _SimdMathFallback in order for
  // specializations in the used _SimdTuple Abis to get used
-template <int _Np>
+template <int _Np, typename>
    struct _SimdImplFixedSize
    {
      // member types {{{2
@@ -1794,7 +1821,7 @@ template <int _Np>
    };
  
  // _MaskImplFixedSize {{{1
-template <int _Np>
+template <int _Np, typename>
    struct _MaskImplFixedSize
    {
      static_assert(
diff --git a/libstdc++-v3/include/experimental/bits/simd_math.h b/libstdc++-v3/include/experimental/bits/simd_math.h

index 6f2ee3941b16648e0e0894fd27778fe8924d2763..5fe44adceb52e3f694dd7775a63dac2dc772fd85 100644 (file)
--- a/libstdc++-v3/include/experimental/bits/simd_math.h
+++ b/libstdc++-v3/include/experimental/bits/simd_math.h
@@ -60,6 +60,7 @@ template <typename _DoubleR, typename _Tp, typename _Abi>
  template <typename _Tp, typename _Abi, typename...,                            \
           typename _R = _Math_return_type_t<                                   \
             decltype(std::__name(declval<double>())), _Tp, _Abi>>              \
+  _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
    enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
    __name(simd<_Tp, _Abi> __x)                                                  \
    { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
@@ -125,6 +126,7 @@ template <                                                                     \
    typename _Arg2 = _Extra_argument_type<__arg2, _Tp, _Abi>,                    \
    typename _R = _Math_return_type_t<                                           \
      decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>>    \
+  _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
    enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
    __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y)          \
    {                                                                            \
@@ -155,6 +157,7 @@ template <typename _Tp, typename _Abi, typename...,                            \
             decltype(std::__name(declval<double>(), _Arg2::declval(),          \
                                  _Arg3::declval())),                           \
             _Tp, _Abi>>                                                        \
+  _GLIBCXX_SIMD_ALWAYS_INLINE                                                  \
    enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
    __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y,          \
          const typename _Arg3::type& __z)                                      \
@@ -399,6 +402,7 @@ template <typename _Abi>
  // }}}
  // __extract_exponent_as_int {{{
  template <typename _Tp, typename _Abi>
+  _GLIBCXX_SIMD_INTRINSIC
    rebind_simd_t<int, simd<_Tp, _Abi>>
    __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
    {
@@ -422,7 +426,8 @@ template <typename ImplFun, typename FallbackFun, typename... _Args>
      -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
    { return __impl_fun(static_cast<_Args&&>(__args)...); }
  
-template <typename ImplFun, typename FallbackFun, typename... _Args>
+template <typename ImplFun, typename FallbackFun, typename... _Args,
+         typename = __detail::__odr_helper>
    inline auto
    __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
                               _Args&&... __args)
@@ -458,7 +463,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
   * Fix sign.
   */
  // cos{{{
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    cos(const simd<_Tp, _Abi>& __x)
    {
@@ -504,7 +509,7 @@ template <typename _Tp>
  
  //}}}
  // sin{{{
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    sin(const simd<_Tp, _Abi>& __x)
    {
@@ -566,6 +571,7 @@ _GLIBCXX_SIMD_MATH_CALL_(expm1)
  // frexp {{{
  #if _GLIBCXX_SIMD_X86INTRIN
  template <typename _Tp, size_t _Np>
+  _GLIBCXX_SIMD_INTRINSIC
    _SimdWrapper<_Tp, _Np>
    __getexp(_SimdWrapper<_Tp, _Np> __x)
    {
@@ -594,6 +600,7 @@ template <typename _Tp, size_t _Np>
    }
  
  template <typename _Tp, size_t _Np>
+  _GLIBCXX_SIMD_INTRINSIC
    _SimdWrapper<_Tp, _Np>
    __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
    {
@@ -634,7 +641,7 @@ template <typename _Tp, size_t _Np>
   * The return value will be in the range [0.5, 1.0[
   * The @p __e value will be an integer defining the power-of-two exponent
   */
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
    {
@@ -739,7 +746,7 @@ _GLIBCXX_SIMD_MATH_CALL_(log2)
  
  //}}}
  // logb{{{
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
    logb(const simd<_Tp, _Abi>& __x)
    {
@@ -814,7 +821,7 @@ template <typename _Tp, typename _Abi>
    }
  
  //}}}
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
    {
@@ -848,6 +855,7 @@ _GLIBCXX_SIMD_MATH_CALL_(fabs)
  // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
  // allow signed integral _Tp
  template <typename _Tp, typename _Abi>
+  _GLIBCXX_SIMD_ALWAYS_INLINE
    enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
    abs(const simd<_Tp, _Abi>& __x)
    { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
@@ -930,7 +938,7 @@ template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
               __data(__args)...)};
    }
  
-template <typename _VV>
+template <typename _VV, typename = __detail::__odr_helper>
    __remove_cvref_t<_VV>
    __hypot(_VV __x, _VV __y)
    {
@@ -1069,7 +1077,7 @@ template <typename _Tp, typename _Abi>
  
  _GLIBCXX_SIMD_CVTING2(hypot)
  
-  template <typename _VV>
+  template <typename _VV, typename = __detail::__odr_helper>
    __remove_cvref_t<_VV>
    __hypot(_VV __x, _VV __y, _VV __z)
    {
@@ -1271,7 +1279,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
  _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
  _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
  
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
    {
@@ -1303,12 +1311,14 @@ _GLIBCXX_SIMD_MATH_CALL_(isfinite)
  // `int isinf(double)`.
  template <typename _Tp, typename _Abi, typename...,
           typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
+  _GLIBCXX_SIMD_ALWAYS_INLINE
    enable_if_t<is_floating_point_v<_Tp>, _R>
    isinf(simd<_Tp, _Abi> __x)
    { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
  
  template <typename _Tp, typename _Abi, typename...,
           typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
+  _GLIBCXX_SIMD_ALWAYS_INLINE
    enable_if_t<is_floating_point_v<_Tp>, _R>
    isnan(simd<_Tp, _Abi> __x)
    { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
@@ -1316,6 +1326,7 @@ template <typename _Tp, typename _Abi, typename...,
  _GLIBCXX_SIMD_MATH_CALL_(isnormal)
  
  template <typename..., typename _Tp, typename _Abi>
+  _GLIBCXX_SIMD_ALWAYS_INLINE
    simd_mask<_Tp, _Abi>
    signbit(simd<_Tp, _Abi> __x)
    {
@@ -1363,7 +1374,7 @@ simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
  */
  
  // special math {{{
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
                  const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
@@ -1374,7 +1385,7 @@ template <typename _Tp, typename _Abi>
      });
    }
  
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
                  const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
@@ -1398,7 +1409,7 @@ _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
  _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
  _GLIBCXX_SIMD_MATH_CALL_(expint)
  
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
           const simd<_Tp, _Abi>& __x)
@@ -1407,7 +1418,7 @@ template <typename _Tp, typename _Abi>
        [&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
    }
  
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
            const simd<_Tp, _Abi>& __x)
@@ -1416,7 +1427,7 @@ template <typename _Tp, typename _Abi>
        [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
    }
  
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
            const simd<_Tp, _Abi>& __x)
@@ -1427,7 +1438,7 @@ template <typename _Tp, typename _Abi>
  
  _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
  
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
              const simd<_Tp, _Abi>& __x)
@@ -1436,7 +1447,7 @@ template <typename _Tp, typename _Abi>
        [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
    }
  
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
                const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
@@ -1447,7 +1458,7 @@ template <typename _Tp, typename _Abi>
      });
    }
  
-template <typename _Tp, typename _Abi>
+template <typename _Tp, typename _Abi, typename = __detail::__odr_helper>
    enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
    sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
               const simd<_Tp, _Abi>& __x)
diff --git a/libstdc++-v3/include/experimental/bits/simd_neon.h b/libstdc++-v3/include/experimental/bits/simd_neon.h

index 5fa9fea108efbbbe618794308390a44839003f91..0945092940a9c5a024d48f98f38b5294c0856810 100644 (file)
--- a/libstdc++-v3/include/experimental/bits/simd_neon.h
+++ b/libstdc++-v3/include/experimental/bits/simd_neon.h
@@ -44,7 +44,7 @@ struct _CommonImplNeon : _CommonImplBuiltin
  
  // }}}
  // _SimdImplNeon {{{
-template <typename _Abi>
+template <typename _Abi, typename>
    struct _SimdImplNeon : _SimdImplBuiltin<_Abi>
    {
      using _Base = _SimdImplBuiltin<_Abi>;
@@ -390,7 +390,7 @@ struct _MaskImplNeonMixin
  
  // }}}
  // _MaskImplNeon {{{
-template <typename _Abi>
+template <typename _Abi, typename>
    struct _MaskImplNeon : _MaskImplNeonMixin, _MaskImplBuiltin<_Abi>
    {
      using _MaskImplBuiltinMixin::_S_to_maskvector;
diff --git a/libstdc++-v3/include/experimental/bits/simd_ppc.h b/libstdc++-v3/include/experimental/bits/simd_ppc.h

index 4be3c2dde7cfb1558aece1ce80255488d486a604..a83d970a4575aa7f331bed06f1e19998e7eda22f 100644 (file)
--- a/libstdc++-v3/include/experimental/bits/simd_ppc.h
+++ b/libstdc++-v3/include/experimental/bits/simd_ppc.h
@@ -35,7 +35,7 @@
  _GLIBCXX_SIMD_BEGIN_NAMESPACE
  
  // _SimdImplPpc {{{
-template <typename _Abi>
+template <typename _Abi, typename>
    struct _SimdImplPpc : _SimdImplBuiltin<_Abi>
    {
      using _Base = _SimdImplBuiltin<_Abi>;
@@ -117,7 +117,7 @@ template <typename _Abi>
  
  // }}}
  // _MaskImplPpc {{{
-template <typename _Abi>
+template <typename _Abi, typename>
    struct _MaskImplPpc : _MaskImplBuiltin<_Abi>
    {
      using _Base = _MaskImplBuiltin<_Abi>;
diff --git a/libstdc++-v3/include/experimental/bits/simd_scalar.h b/libstdc++-v3/include/experimental/bits/simd_scalar.h

index 555143e6ffd861edf057ff3902378a8589118018..33c3cefc88dec9f0cdf0a0807b4318838137a17f 100644 (file)
--- a/libstdc++-v3/include/experimental/bits/simd_scalar.h
+++ b/libstdc++-v3/include/experimental/bits/simd_scalar.h
@@ -155,7 +155,8 @@ struct _SimdImplScalar
  
    // _S_masked_load {{{2
    template <typename _Tp, typename _Up>
-    static inline _Tp _S_masked_load(_Tp __merge, bool __k,
+    _GLIBCXX_SIMD_INTRINSIC
+    static _Tp _S_masked_load(_Tp __merge, bool __k,
                                      const _Up* __mem) noexcept
      {
        if (__k)
@@ -165,83 +166,97 @@ struct _SimdImplScalar
  
    // _S_store {{{2
    template <typename _Tp, typename _Up>
-    static inline void _S_store(_Tp __v, _Up* __mem, _TypeTag<_Tp>) noexcept
+    _GLIBCXX_SIMD_INTRINSIC
+    static void _S_store(_Tp __v, _Up* __mem, _TypeTag<_Tp>) noexcept
      { __mem[0] = static_cast<_Up>(__v); }
  
    // _S_masked_store {{{2
    template <typename _Tp, typename _Up>
-    static inline void _S_masked_store(const _Tp __v, _Up* __mem,
+    _GLIBCXX_SIMD_INTRINSIC
+    static void _S_masked_store(const _Tp __v, _Up* __mem,
                                        const bool __k) noexcept
      { if (__k) __mem[0] = __v; }
  
    // _S_negate {{{2
    template <typename _Tp>
-    static constexpr inline bool _S_negate(_Tp __x) noexcept
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr bool _S_negate(_Tp __x) noexcept
      { return !__x; }
  
    // _S_reduce {{{2
    template <typename _Tp, typename _BinaryOperation>
-    static constexpr inline _Tp
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp
      _S_reduce(const simd<_Tp, simd_abi::scalar>& __x, const _BinaryOperation&)
      { return __x._M_data; }
  
    // _S_min, _S_max {{{2
    template <typename _Tp>
-    static constexpr inline _Tp _S_min(const _Tp __a, const _Tp __b)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_min(const _Tp __a, const _Tp __b)
      { return std::min(__a, __b); }
  
    template <typename _Tp>
-    static constexpr inline _Tp _S_max(const _Tp __a, const _Tp __b)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_max(const _Tp __a, const _Tp __b)
      { return std::max(__a, __b); }
  
    // _S_complement {{{2
    template <typename _Tp>
-    static constexpr inline _Tp _S_complement(_Tp __x) noexcept
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_complement(_Tp __x) noexcept
      { return static_cast<_Tp>(~__x); }
  
    // _S_unary_minus {{{2
    template <typename _Tp>
-    static constexpr inline _Tp _S_unary_minus(_Tp __x) noexcept
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_unary_minus(_Tp __x) noexcept
      { return static_cast<_Tp>(-__x); }
  
    // arithmetic operators {{{2
    template <typename _Tp>
-    static constexpr inline _Tp _S_plus(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_plus(_Tp __x, _Tp __y)
      {
        return static_cast<_Tp>(__promote_preserving_unsigned(__x)
                               + __promote_preserving_unsigned(__y));
      }
  
    template <typename _Tp>
-    static constexpr inline _Tp _S_minus(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_minus(_Tp __x, _Tp __y)
      {
        return static_cast<_Tp>(__promote_preserving_unsigned(__x)
                               - __promote_preserving_unsigned(__y));
      }
  
    template <typename _Tp>
-    static constexpr inline _Tp _S_multiplies(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_multiplies(_Tp __x, _Tp __y)
      {
        return static_cast<_Tp>(__promote_preserving_unsigned(__x)
                               * __promote_preserving_unsigned(__y));
      }
  
    template <typename _Tp>
-    static constexpr inline _Tp _S_divides(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_divides(_Tp __x, _Tp __y)
      {
        return static_cast<_Tp>(__promote_preserving_unsigned(__x)
                               / __promote_preserving_unsigned(__y));
      }
  
    template <typename _Tp>
-    static constexpr inline _Tp _S_modulus(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_modulus(_Tp __x, _Tp __y)
      {
        return static_cast<_Tp>(__promote_preserving_unsigned(__x)
                               % __promote_preserving_unsigned(__y));
      }
  
    template <typename _Tp>
-    static constexpr inline _Tp _S_bit_and(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_bit_and(_Tp __x, _Tp __y)
      {
        if constexpr (is_floating_point_v<_Tp>)
         {
@@ -254,7 +269,8 @@ struct _SimdImplScalar
      }
  
    template <typename _Tp>
-    static constexpr inline _Tp _S_bit_or(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_bit_or(_Tp __x, _Tp __y)
      {
        if constexpr (is_floating_point_v<_Tp>)
         {
@@ -267,7 +283,8 @@ struct _SimdImplScalar
      }
  
    template <typename _Tp>
-    static constexpr inline _Tp _S_bit_xor(_Tp __x, _Tp __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_bit_xor(_Tp __x, _Tp __y)
      {
        if constexpr (is_floating_point_v<_Tp>)
         {
@@ -280,11 +297,13 @@ struct _SimdImplScalar
      }
  
    template <typename _Tp>
-    static constexpr inline _Tp _S_bit_shift_left(_Tp __x, int __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_bit_shift_left(_Tp __x, int __y)
      { return static_cast<_Tp>(__promote_preserving_unsigned(__x) << __y); }
  
    template <typename _Tp>
-    static constexpr inline _Tp _S_bit_shift_right(_Tp __x, int __y)
+    _GLIBCXX_SIMD_INTRINSIC
+    static constexpr _Tp _S_bit_shift_right(_Tp __x, int __y)
      { return static_cast<_Tp>(__promote_preserving_unsigned(__x) >> __y); }
  
    // math {{{2
@@ -553,11 +572,13 @@ struct _SimdImplScalar
  
    // _S_increment & _S_decrement{{{2
    template <typename _Tp>
-    constexpr static inline void _S_increment(_Tp& __x)
+    _GLIBCXX_SIMD_INTRINSIC
+    constexpr static void _S_increment(_Tp& __x)
      { ++__x; }
  
    template <typename _Tp>
-    constexpr static inline void _S_decrement(_Tp& __x)
+    _GLIBCXX_SIMD_INTRINSIC
+    constexpr static void _S_decrement(_Tp& __x)
      { --__x; }
  
  
@@ -582,6 +603,7 @@ struct _SimdImplScalar
  
    // smart_reference access {{{2
    template <typename _Tp, typename _Up>
+    _GLIBCXX_SIMD_INTRINSIC
      constexpr static void _S_set(_Tp& __v, [[maybe_unused]] int __i,
                                  _Up&& __x) noexcept
      {
@@ -677,25 +699,32 @@ struct _MaskImplScalar
    }
  
    // logical and bitwise operators {{{2
+  _GLIBCXX_SIMD_INTRINSIC
    static constexpr bool _S_logical_and(bool __x, bool __y)
    { return __x && __y; }
  
+  _GLIBCXX_SIMD_INTRINSIC
    static constexpr bool _S_logical_or(bool __x, bool __y)
    { return __x || __y; }
  
+  _GLIBCXX_SIMD_INTRINSIC
    static constexpr bool _S_bit_not(bool __x)
    { return !__x; }
  
+  _GLIBCXX_SIMD_INTRINSIC
    static constexpr bool _S_bit_and(bool __x, bool __y)
    { return __x && __y; }
  
+  _GLIBCXX_SIMD_INTRINSIC
    static constexpr bool _S_bit_or(bool __x, bool __y)
    { return __x || __y; }
  
+  _GLIBCXX_SIMD_INTRINSIC
    static constexpr bool _S_bit_xor(bool __x, bool __y)
    { return __x != __y; }
  
    // smart_reference access {{{2
+  _GLIBCXX_SIMD_INTRINSIC
    constexpr static void _S_set(bool& __k, [[maybe_unused]] int __i,
                                bool __x) noexcept
    {
diff --git a/libstdc++-v3/include/experimental/bits/simd_x86.h b/libstdc++-v3/include/experimental/bits/simd_x86.h

index 8d493d87aa3b545879eb9e696eb2c5d74aa57888..baa978bffdafc2d378365da979d177ef1a71825c 100644 (file)
--- a/libstdc++-v3/include/experimental/bits/simd_x86.h
+++ b/libstdc++-v3/include/experimental/bits/simd_x86.h
@@ -822,7 +822,7 @@ struct _CommonImplX86 : _CommonImplBuiltin
  
  // }}}
  // _SimdImplX86 {{{
-template <typename _Abi>
+template <typename _Abi, typename>
    struct _SimdImplX86 : _SimdImplBuiltin<_Abi>
    {
      using _Base = _SimdImplBuiltin<_Abi>;
@@ -4241,7 +4241,7 @@ struct _MaskImplX86Mixin
  
  // }}}
  // _MaskImplX86 {{{
-template <typename _Abi>
+template <typename _Abi, typename>
    struct _MaskImplX86 : _MaskImplX86Mixin, _MaskImplBuiltin<_Abi>
    {
      using _MaskImplX86Mixin::_S_to_bits;
author	Matthias Kretz <m.kretz@gsi.de>
	Mon, 1 Feb 2021 09:58:03 +0000 (10:58 +0100)
committer	Matthias Kretz <m.kretz@gsi.de>
	Sat, 15 Jan 2022 20:05:17 +0000 (21:05 +0100)
libstdc++-v3/include/experimental/bits/simd.h		patch \| blob \| blame \| history
libstdc++-v3/include/experimental/bits/simd_builtin.h		patch \| blob \| blame \| history
libstdc++-v3/include/experimental/bits/simd_detail.h		patch \| blob \| blame \| history
libstdc++-v3/include/experimental/bits/simd_fixed_size.h		patch \| blob \| blame \| history
libstdc++-v3/include/experimental/bits/simd_math.h		patch \| blob \| blame \| history
libstdc++-v3/include/experimental/bits/simd_neon.h		patch \| blob \| blame \| history
libstdc++-v3/include/experimental/bits/simd_ppc.h		patch \| blob \| blame \| history
libstdc++-v3/include/experimental/bits/simd_scalar.h		patch \| blob \| blame \| history
libstdc++-v3/include/experimental/bits/simd_x86.h		patch \| blob \| blame \| history