From: Jonathan Wakely <jwakely@redhat.com>
Date: Thu, 10 Oct 2024 12:36:33 +0000 (+0100)
Subject: libstdc++: Enable memcpy optimizations for distinct integral types [PR93059]
X-Git-Tag: basepoints/gcc-16~5233
X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=308d19c11e119b2c5abf67778dd0ac8a370e5df7;p=thirdparty%2Fgcc.git

libstdc++: Enable memcpy optimizations for distinct integral types [PR93059]

Currently we only optimize std::copy, std::copy_n etc. to memmove when
the source and destination types are the same. This means that we fail
to optimize copying between distinct 1-byte types, e.g. copying from a
buffer of unsigned char to a buffer of char8_t or vice versa.

This patch adds more partial specializations of the __memcpyable trait
so that we allow memcpy between integers of equal widths. This will
enable memmove for copies between narrow character types and also
between same-width types like int and unsigned.

Enabling the optimization needs to be based on the width of the integer
type, not just the size in bytes. This is because some targets define
non-standard integral types such as __int20 in msp430, which has padding
bits. It would not be safe to memcpy between e.g. __int20 and int32_t,
even though sizeof(__int20) == sizeof(int32_t). A new trait is
introduced to define the width, __memcpyable_integer, and then the
__memcpyable trait compares the widths.

It's safe to copy between signed and unsigned integers of the same
width, because GCC only supports two's complement integers.

I initially though it would be useful to define the specialization
__memcpyable_integer<byte> to enable copying between narrow character
types and std::byte. But that isn't possible with std::copy, because
is_assignable<char&, std::byte> is false. Optimized copies using memmove
will already happen for copying std::byte to std::byte, because
__memcpyable<T*, T*> is true.

libstdc++-v3/ChangeLog:

	PR libstdc++/93059
	* include/bits/cpp_type_traits.h (__memcpyable): Add partial
	specialization for pointers to distinct types.
	(__memcpyable_integer): New trait to control which types can use
	cross-type memcpy optimizations.
---

diff --git a/libstdc++-v3/include/bits/cpp_type_traits.h b/libstdc++-v3/include/bits/cpp_type_traits.h
index 060652afb18..2f9ce75e82c 100644
--- a/libstdc++-v3/include/bits/cpp_type_traits.h
+++ b/libstdc++-v3/include/bits/cpp_type_traits.h
@@ -434,8 +434,6 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
     };
 #endif
 
-  template<typename> struct iterator_traits;
-
   // A type that is safe for use with memcpy, memmove, memcmp etc.
   template<typename _Tp>
     struct __is_nonvolatile_trivially_copyable
@@ -459,16 +457,103 @@ __INT_N(__GLIBCXX_TYPE_INT_N_3)
       enum { __value = 0 };
     };
 
+  // Allow memcpy when source and destination are pointers to the same type.
   template<typename _Tp>
     struct __memcpyable<_Tp*, _Tp*>
     : __is_nonvolatile_trivially_copyable<_Tp>
     { };
 
+  // Source pointer can be const.
   template<typename _Tp>
     struct __memcpyable<_Tp*, const _Tp*>
     : __is_nonvolatile_trivially_copyable<_Tp>
     { };
 
+  template<typename _Tp> struct __memcpyable_integer;
+
+  // For heterogeneous types, allow memcpy between equal-sized integers.
+  template<typename _Tp, typename _Up>
+    struct __memcpyable<_Tp*, _Up*>
+    {
+      enum {
+	__value = __memcpyable_integer<_Tp>::__width != 0
+		    && ((int)__memcpyable_integer<_Tp>::__width
+			  == (int)__memcpyable_integer<_Up>::__width)
+      };
+    };
+
+  // Specialization for const U* because __is_integer<const U> is never true.
+  template<typename _Tp, typename _Up>
+    struct __memcpyable<_Tp*, const _Up*>
+    : __memcpyable<_Tp*, _Up*>
+    { };
+
+  template<typename _Tp>
+    struct __memcpyable_integer
+    {
+      enum {
+	__width = __is_integer<_Tp>::__value ? (sizeof(_Tp) * __CHAR_BIT__) : 0
+      };
+    };
+
+  // Cannot memcpy volatile memory.
+  template<typename _Tp>
+    struct __memcpyable_integer<volatile _Tp>
+    { enum { __width = 0 }; };
+
+  // Specializations for __intNN types with padding bits.
+#if defined __GLIBCXX_TYPE_INT_N_0 && __GLIBCXX_BITSIZE_INT_N_0 % __CHAR_BIT__
+  __extension__
+  template<>
+    struct __memcpyable_integer<__GLIBCXX_TYPE_INT_N_0>
+    { enum { __width = __GLIBCXX_BITSIZE_INT_N_0 }; };
+  __extension__
+  template<>
+    struct __memcpyable_integer<unsigned __GLIBCXX_TYPE_INT_N_0>
+    { enum { __width = __GLIBCXX_BITSIZE_INT_N_0 }; };
+#endif
+#if defined __GLIBCXX_TYPE_INT_N_1 && __GLIBCXX_BITSIZE_INT_N_1 % __CHAR_BIT__
+  __extension__
+  template<>
+    struct __memcpyable_integer<__GLIBCXX_TYPE_INT_N_1>
+    { enum { __width = __GLIBCXX_BITSIZE_INT_N_1 }; };
+  __extension__
+  template<>
+    struct __memcpyable_integer<unsigned __GLIBCXX_TYPE_INT_N_1>
+    { enum { __width = __GLIBCXX_BITSIZE_INT_N_1 }; };
+#endif
+#if defined __GLIBCXX_TYPE_INT_N_2 && __GLIBCXX_BITSIZE_INT_N_2 % __CHAR_BIT__
+  __extension__
+  template<>
+    struct __memcpyable_integer<__GLIBCXX_TYPE_INT_N_2>
+    { enum { __width = __GLIBCXX_BITSIZE_INT_N_2 }; };
+  __extension__
+  template<>
+    struct __memcpyable_integer<unsigned __GLIBCXX_TYPE_INT_N_2>
+    { enum { __width = __GLIBCXX_BITSIZE_INT_N_2 }; };
+#endif
+#if defined __GLIBCXX_TYPE_INT_N_3 && __GLIBCXX_BITSIZE_INT_N_3 % __CHAR_BIT__
+  __extension__
+  template<>
+    struct __memcpyable_integer<__GLIBCXX_TYPE_INT_N_3>
+    { enum { __width = __GLIBCXX_BITSIZE_INT_N_3 }; };
+  __extension__
+  template<>
+    struct __memcpyable_integer<unsigned __GLIBCXX_TYPE_INT_N_3>
+    { enum { __width = __GLIBCXX_BITSIZE_INT_N_3 }; };
+#endif
+
+#if defined __STRICT_ANSI__ && defined __SIZEOF_INT128__
+  // In strict modes __is_integer<__int128> is false,
+  // but we want to allow memcpy between signed/unsigned __int128.
+  __extension__
+  template<>
+    struct __memcpyable_integer<__int128> { enum { __width = 128 }; };
+  __extension__
+  template<>
+    struct __memcpyable_integer<unsigned __int128> { enum { __width = 128 }; };
+#endif
+
   // Whether two iterator types can be used with memcmp.
   // This trait only says it's well-formed to use memcmp, not that it
   // gives the right answer for a given algorithm. So for example, std::equal