From c9f528dd1a14aacadec4638e7ee8ecff69fa0ee5 Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Thu, 12 Nov 2020 21:25:14 +0000 Subject: [PATCH] libstdc++: Optimise std::future::wait_for To poll a std::future to see if it's ready you have to call one of the timed waiting functions. The most obvious way is wait_for(0s) but this was previously very inefficient because it would turn the relative timeout to an absolute one by calling system_clock::now(). When the relative timeout is zero (or less) we're obviously going to get a time that has already passed, but the overhead of obtaining the current time can be dozens of microseconds. This patch makes future::wait_for avoid waiting at all when the relative timeout is zero or less, to avoid the unnecessary overhead of getting the current time. This makes polling with wait_for(0s) take only a few cycles instead of dozens of milliseconds. libstdc++-v3/ChangeLog: * include/std/future (future::wait_for): Do not wait for durations less than or equal to zero. * testsuite/30_threads/future/members/poll.cc: New test. (cherry picked from commit 93fc47746815ea9dac413322fcade2931f757e7f) --- libstdc++-v3/include/std/future | 14 ++- .../30_threads/future/members/poll.cc | 106 ++++++++++++++++++ 2 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 libstdc++-v3/testsuite/30_threads/future/members/poll.cc diff --git a/libstdc++-v3/include/std/future b/libstdc++-v3/include/std/future index 97506a27e37c..09f4af6fe7cd 100644 --- a/libstdc++-v3/include/std/future +++ b/libstdc++-v3/include/std/future @@ -346,10 +346,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // to synchronize with the thread that made it ready. if (_M_status._M_load(memory_order_acquire) == _Status::__ready) return future_status::ready; + if (_M_is_deferred_future()) return future_status::deferred; - if (_M_status._M_load_when_equal_for(_Status::__ready, - memory_order_acquire, __rel)) + + // Don't wait unless the relative time is greater than zero. + if (__rel > __rel.zero() + && _M_status._M_load_when_equal_for(_Status::__ready, + memory_order_acquire, + __rel)) { // _GLIBCXX_RESOLVE_LIB_DEFECTS // 2100. timed waiting functions must also join @@ -378,10 +383,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION // to synchronize with the thread that made it ready. if (_M_status._M_load(memory_order_acquire) == _Status::__ready) return future_status::ready; + if (_M_is_deferred_future()) return future_status::deferred; + if (_M_status._M_load_when_equal_until(_Status::__ready, - memory_order_acquire, __abs)) + memory_order_acquire, + __abs)) { // _GLIBCXX_RESOLVE_LIB_DEFECTS // 2100. timed waiting functions must also join diff --git a/libstdc++-v3/testsuite/30_threads/future/members/poll.cc b/libstdc++-v3/testsuite/30_threads/future/members/poll.cc new file mode 100644 index 000000000000..1f54ce7cc3fe --- /dev/null +++ b/libstdc++-v3/testsuite/30_threads/future/members/poll.cc @@ -0,0 +1,106 @@ +// Copyright (C) 2020 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// { dg-options "-O3" } +// { dg-do run { target c++11 } } +// { dg-additional-options "-pthread" { target pthread } } +// { dg-require-gthreads "" } + +#include +#include +#include +#include + +const int iterations = 200; + +using namespace std; + +template +double +print(const char* desc, Duration dur) +{ + auto ns = chrono::duration_cast(dur).count(); + double d = double(ns) / iterations; + cout << desc << ": " << ns << "ns for " << iterations + << " calls, avg " << d << "ns per call\n"; + return d; +} + +int main() +{ + promise p; + future f = p.get_future(); + + auto start = chrono::high_resolution_clock::now(); + for(int i = 0; i < iterations; i++) + f.wait_for(chrono::seconds(0)); + auto stop = chrono::high_resolution_clock::now(); + double wait_for_0 = print("wait_for(0s)", stop - start); + + start = chrono::high_resolution_clock::now(); + for(int i = 0; i < iterations; i++) + f.wait_until(chrono::system_clock::time_point::min()); + stop = chrono::high_resolution_clock::now(); + double wait_until_sys_min __attribute__((unused)) + = print("wait_until(system_clock minimum)", stop - start); + + start = chrono::high_resolution_clock::now(); + for(int i = 0; i < iterations; i++) + f.wait_until(chrono::steady_clock::time_point::min()); + stop = chrono::high_resolution_clock::now(); + double wait_until_steady_min __attribute__((unused)) + = print("wait_until(steady_clock minimum)", stop - start); + + start = chrono::high_resolution_clock::now(); + for(int i = 0; i < iterations; i++) + f.wait_until(chrono::system_clock::time_point()); + stop = chrono::high_resolution_clock::now(); + double wait_until_sys_epoch __attribute__((unused)) + = print("wait_until(system_clock epoch)", stop - start); + + start = chrono::high_resolution_clock::now(); + for(int i = 0; i < iterations; i++) + f.wait_until(chrono::steady_clock::time_point()); + stop = chrono::high_resolution_clock::now(); + double wait_until_steady_epoch __attribute__((unused)) + = print("wait_until(steady_clock epoch", stop - start); + + p.set_value(1); + + start = chrono::high_resolution_clock::now(); + for(int i = 0; i < iterations; i++) + f.wait_for(chrono::seconds(0)); + stop = chrono::high_resolution_clock::now(); + double ready = print("wait_for when ready", stop - start); + + // Polling before ready with wait_for(0s) should be almost as fast as + // after the result is ready. + VERIFY( wait_for_0 < (ready * 10) ); + + // polling before ready using wait_until(min) should not be terribly slow. + VERIFY( wait_until_sys_min < (ready * 100) ); + // Converting from steady clock to system clock adds overhead before GCC 11. + VERIFY( wait_until_steady_min < (ready * 500) ); + + // The following two tests fail with GCC 11, see + // https://gcc.gnu.org/pipermail/libstdc++/2020-November/051422.html + + // polling before ready using wait_until(epoch) should not be terribly slow. + VERIFY( wait_until_sys_epoch < (ready * 100) ); + // Converting from steady clock to system clock adds overhead before GCC 11. + VERIFY( wait_until_steady_epoch < (ready * 500) ); +} -- 2.47.2