From f80a8b42296265bb868a48592a2bd1fdaa2a3d8a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Jannik=20Gl=C3=BCckert?= Date: Mon, 6 Mar 2023 20:52:08 +0100 Subject: [PATCH] libstdc++: Also use sendfile for big files MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We were previously only using sendfile for files smaller than 2GB, as sendfile needs to be called repeatedly for files bigger than that. Some quick numbers, copying a 16GB file, average of 10 repetitions: old: real: 13.4s user: 0.14s sys : 7.43s new: real: 8.90s user: 0.00s sys : 3.68s libstdc++-v3/ChangeLog: * acinclude.m4 (_GLIBCXX_HAVE_LSEEK): Define. * config.h.in: Regenerate. * configure: Regenerate. * src/filesystem/ops-common.h (copy_file_sendfile): Define new function for sendfile logic. Loop to support large files. Skip zero-length files. (do_copy_file): Use it. Signed-off-by: Jannik Glückert --- libstdc++-v3/acinclude.m4 | 51 +++++---- libstdc++-v3/config.h.in | 3 + libstdc++-v3/configure | 127 ++++++++++++++++------- libstdc++-v3/src/filesystem/ops-common.h | 73 +++++++------ 4 files changed, 170 insertions(+), 84 deletions(-) diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4 index 6ae141b8c207..1920444e5cd9 100644 --- a/libstdc++-v3/acinclude.m4 +++ b/libstdc++-v3/acinclude.m4 @@ -4956,6 +4956,7 @@ dnl _GLIBCXX_USE_FCHMOD dnl _GLIBCXX_USE_FCHMODAT dnl _GLIBCXX_USE_SENDFILE dnl HAVE_LINK +dnl HAVE_LSEEK dnl HAVE_READLINK dnl HAVE_SYMLINK dnl @@ -5091,25 +5092,6 @@ dnl if test $glibcxx_cv_fchmodat = yes; then AC_DEFINE(_GLIBCXX_USE_FCHMODAT, 1, [Define if fchmodat is available in .]) fi -dnl - AC_CACHE_CHECK([for sendfile that can copy files], - glibcxx_cv_sendfile, [dnl - case "${target_os}" in - gnu* | linux* | solaris* | uclinux*) - GCC_TRY_COMPILE_OR_LINK( - [#include ], - [sendfile(1, 2, (off_t*)0, sizeof 1);], - [glibcxx_cv_sendfile=yes], - [glibcxx_cv_sendfile=no]) - ;; - *) - glibcxx_cv_sendfile=no - ;; - esac - ]) - if test $glibcxx_cv_sendfile = yes; then - AC_DEFINE(_GLIBCXX_USE_SENDFILE, 1, [Define if sendfile is available in .]) - fi dnl AC_CACHE_CHECK([for link], glibcxx_cv_link, [dnl @@ -5122,6 +5104,18 @@ dnl if test $glibcxx_cv_link = yes; then AC_DEFINE(HAVE_LINK, 1, [Define if link is available in .]) fi +dnl + AC_CACHE_CHECK([for lseek], + glibcxx_cv_lseek, [dnl + GCC_TRY_COMPILE_OR_LINK( + [#include ], + [lseek(1, 0, SEEK_SET);], + [glibcxx_cv_lseek=yes], + [glibcxx_cv_lseek=no]) + ]) + if test $glibcxx_cv_lseek = yes; then + AC_DEFINE(HAVE_LSEEK, 1, [Define if lseek is available in .]) + fi dnl AC_CACHE_CHECK([for readlink], glibcxx_cv_readlink, [dnl @@ -5158,6 +5152,25 @@ dnl if test $glibcxx_cv_truncate = yes; then AC_DEFINE(HAVE_TRUNCATE, 1, [Define if truncate is available in .]) fi +dnl + AC_CACHE_CHECK([for sendfile that can copy files], + glibcxx_cv_sendfile, [dnl + case "${target_os}" in + gnu* | linux* | solaris* | uclinux*) + GCC_TRY_COMPILE_OR_LINK( + [#include ], + [sendfile(1, 2, (off_t*)0, sizeof 1);], + [glibcxx_cv_sendfile=yes], + [glibcxx_cv_sendfile=no]) + ;; + *) + glibcxx_cv_sendfile=no + ;; + esac + ]) + if test $glibcxx_cv_sendfile = yes && test $glibcxx_cv_lseek = yes; then + AC_DEFINE(_GLIBCXX_USE_SENDFILE, 1, [Define if sendfile is available in .]) + fi dnl AC_CACHE_CHECK([for fdopendir], glibcxx_cv_fdopendir, [dnl diff --git a/libstdc++-v3/config.h.in b/libstdc++-v3/config.h.in index 5a95853cbbe4..99ce682670e9 100644 --- a/libstdc++-v3/config.h.in +++ b/libstdc++-v3/config.h.in @@ -254,6 +254,9 @@ /* Define to 1 if you have the `logl' function. */ #undef HAVE_LOGL +/* Define if lseek is available in . */ +#undef HAVE_LSEEK + /* Define to 1 if you have the header file. */ #undef HAVE_MACHINE_ENDIAN_H diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure index 70d169cf64bc..50a7c30665b9 100755 --- a/libstdc++-v3/configure +++ b/libstdc++-v3/configure @@ -71005,29 +71005,27 @@ $as_echo "$glibcxx_cv_fchmodat" >&6; } $as_echo "#define _GLIBCXX_USE_FCHMODAT 1" >>confdefs.h fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sendfile that can copy files" >&5 -$as_echo_n "checking for sendfile that can copy files... " >&6; } -if ${glibcxx_cv_sendfile+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for link" >&5 +$as_echo_n "checking for link... " >&6; } +if ${glibcxx_cv_link+:} false; then : $as_echo_n "(cached) " >&6 else - case "${target_os}" in - gnu* | linux* | solaris* | uclinux*) - if test x$gcc_no_link = xyes; then + if test x$gcc_no_link = xyes; then cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include +#include int main () { -sendfile(1, 2, (off_t*)0, sizeof 1); +link("", ""); ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : - glibcxx_cv_sendfile=yes + glibcxx_cv_link=yes else - glibcxx_cv_sendfile=no + glibcxx_cv_link=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext else @@ -71036,40 +71034,35 @@ else fi cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include +#include int main () { -sendfile(1, 2, (off_t*)0, sizeof 1); +link("", ""); ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO"; then : - glibcxx_cv_sendfile=yes + glibcxx_cv_link=yes else - glibcxx_cv_sendfile=no + glibcxx_cv_link=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi - ;; - *) - glibcxx_cv_sendfile=no - ;; - esac fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_sendfile" >&5 -$as_echo "$glibcxx_cv_sendfile" >&6; } - if test $glibcxx_cv_sendfile = yes; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_link" >&5 +$as_echo "$glibcxx_cv_link" >&6; } + if test $glibcxx_cv_link = yes; then -$as_echo "#define _GLIBCXX_USE_SENDFILE 1" >>confdefs.h +$as_echo "#define HAVE_LINK 1" >>confdefs.h fi - { $as_echo "$as_me:${as_lineno-$LINENO}: checking for link" >&5 -$as_echo_n "checking for link... " >&6; } -if ${glibcxx_cv_link+:} false; then : + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for lseek" >&5 +$as_echo_n "checking for lseek... " >&6; } +if ${glibcxx_cv_lseek+:} false; then : $as_echo_n "(cached) " >&6 else if test x$gcc_no_link = xyes; then @@ -71079,15 +71072,15 @@ else int main () { -link("", ""); +lseek(1, 0, SEEK_SET); ; return 0; } _ACEOF if ac_fn_cxx_try_compile "$LINENO"; then : - glibcxx_cv_link=yes + glibcxx_cv_lseek=yes else - glibcxx_cv_link=no + glibcxx_cv_lseek=no fi rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext else @@ -71100,26 +71093,26 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext int main () { -link("", ""); +lseek(1, 0, SEEK_SET); ; return 0; } _ACEOF if ac_fn_cxx_try_link "$LINENO"; then : - glibcxx_cv_link=yes + glibcxx_cv_lseek=yes else - glibcxx_cv_link=no + glibcxx_cv_lseek=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_link" >&5 -$as_echo "$glibcxx_cv_link" >&6; } - if test $glibcxx_cv_link = yes; then +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_lseek" >&5 +$as_echo "$glibcxx_cv_lseek" >&6; } + if test $glibcxx_cv_lseek = yes; then -$as_echo "#define HAVE_LINK 1" >>confdefs.h +$as_echo "#define HAVE_LSEEK 1" >>confdefs.h fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for readlink" >&5 @@ -71286,6 +71279,68 @@ $as_echo "$glibcxx_cv_truncate" >&6; } $as_echo "#define HAVE_TRUNCATE 1" >>confdefs.h + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for sendfile that can copy files" >&5 +$as_echo_n "checking for sendfile that can copy files... " >&6; } +if ${glibcxx_cv_sendfile+:} false; then : + $as_echo_n "(cached) " >&6 +else + case "${target_os}" in + gnu* | linux* | solaris* | uclinux*) + if test x$gcc_no_link = xyes; then + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +sendfile(1, 2, (off_t*)0, sizeof 1); + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_compile "$LINENO"; then : + glibcxx_cv_sendfile=yes +else + glibcxx_cv_sendfile=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +else + if test x$gcc_no_link = xyes; then + as_fn_error $? "Link tests are not allowed after GCC_NO_EXECUTABLES." "$LINENO" 5 +fi +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +#include +int +main () +{ +sendfile(1, 2, (off_t*)0, sizeof 1); + ; + return 0; +} +_ACEOF +if ac_fn_cxx_try_link "$LINENO"; then : + glibcxx_cv_sendfile=yes +else + glibcxx_cv_sendfile=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext +fi + ;; + *) + glibcxx_cv_sendfile=no + ;; + esac + +fi +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $glibcxx_cv_sendfile" >&5 +$as_echo "$glibcxx_cv_sendfile" >&6; } + if test $glibcxx_cv_sendfile = yes && test $glibcxx_cv_lseek = yes; then + +$as_echo "#define _GLIBCXX_USE_SENDFILE 1" >>confdefs.h + fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking for fdopendir" >&5 $as_echo_n "checking for fdopendir... " >&6; } diff --git a/libstdc++-v3/src/filesystem/ops-common.h b/libstdc++-v3/src/filesystem/ops-common.h index c95511b5c959..364443882089 100644 --- a/libstdc++-v3/src/filesystem/ops-common.h +++ b/libstdc++-v3/src/filesystem/ops-common.h @@ -51,6 +51,7 @@ # include # ifdef _GLIBCXX_USE_SENDFILE # include // sendfile +# include // lseek # endif #endif @@ -358,6 +359,34 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM } #ifdef NEED_DO_COPY_FILE +#if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS + bool + copy_file_sendfile(int fd_in, int fd_out, size_t length) noexcept + { + // a zero-length file is either empty, or not copyable by this syscall + // return early to avoid the syscall cost + if (length == 0) + { + errno = EINVAL; + return false; + } + size_t bytes_left = length; + off_t offset = 0; + ssize_t bytes_copied; + do + { + bytes_copied = ::sendfile(fd_out, fd_in, &offset, bytes_left); + bytes_left -= bytes_copied; + } + while (bytes_left > 0 && bytes_copied > 0); + if (bytes_copied < 0) + { + ::lseek(fd_out, 0, SEEK_SET); + return false; + } + return true; + } +#endif bool do_copy_file(const char_type* from, const char_type* to, std::filesystem::copy_options_existing_file options, @@ -498,16 +527,22 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM return false; } - size_t count = from_st->st_size; + bool has_copied = false; + #if defined _GLIBCXX_USE_SENDFILE && ! defined _GLIBCXX_FILESYSTEM_IS_WINDOWS - off_t offset = 0; - ssize_t n = ::sendfile(out.fd, in.fd, &offset, count); - if (n < 0 && errno != ENOSYS && errno != EINVAL) + if (!has_copied) + has_copied = copy_file_sendfile(in.fd, out.fd, from_st->st_size); + if (!has_copied) { - ec.assign(errno, std::generic_category()); - return false; + if (errno != ENOSYS && errno != EINVAL) + { + ec.assign(errno, std::generic_category()); + return false; + } } - if ((size_t)n == count) +#endif + + if (has_copied) { if (!out.close() || !in.close()) { @@ -517,9 +552,6 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM ec.clear(); return true; } - else if (n > 0) - count -= n; -#endif // _GLIBCXX_USE_SENDFILE using std::ios; __gnu_cxx::stdio_filebuf sbin(in.fd, ios::in|ios::binary); @@ -530,29 +562,12 @@ _GLIBCXX_BEGIN_NAMESPACE_FILESYSTEM if (sbout.is_open()) out.fd = -1; -#ifdef _GLIBCXX_USE_SENDFILE - if (n != 0) - { - if (n < 0) - n = 0; - - const auto p1 = sbin.pubseekoff(n, ios::beg, ios::in); - const auto p2 = sbout.pubseekoff(n, ios::beg, ios::out); - - const std::streampos errpos(std::streamoff(-1)); - if (p1 == errpos || p2 == errpos) - { - ec = std::make_error_code(std::errc::io_error); - return false; - } - } -#endif - - if (count && !(std::ostream(&sbout) << &sbin)) + if (from_st->st_size && !(std::ostream(&sbout) << &sbin)) { ec = std::make_error_code(std::errc::io_error); return false; } + if (!sbout.close() || !sbin.close()) { ec.assign(errno, std::generic_category()); -- 2.47.3