]> git.ipfire.org Git - thirdparty/gcc.git/blame - libitm/method-gl.cc
OpenMP: Fix for ICE in tree-nested.cc.
[thirdparty/gcc.git] / libitm / method-gl.cc
CommitLineData
a945c346 1/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
0a35513e
AH
2 Contributed by Torvald Riegel <triegel@redhat.com>.
3
4 This file is part of the GNU Transactional Memory Library (libitm).
5
6 Libitm is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 more details.
15
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
19
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
24
25#include "libitm_i.h"
26
27using namespace GTM;
28
29namespace {
30
31// This group consists of all TM methods that synchronize via just a single
32// global lock (or ownership record).
33struct gl_mg : public method_group
34{
35 static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1;
36 // We can't use the full bitrange because ~0 in gtm_thread::shared_state has
37 // special meaning.
38 static const gtm_word VERSION_MAX = (~(gtm_word)0 >> 1) - 1;
39 static bool is_locked(gtm_word l) { return l & LOCK_BIT; }
40 static gtm_word set_locked(gtm_word l) { return l | LOCK_BIT; }
41 static gtm_word clear_locked(gtm_word l) { return l & ~LOCK_BIT; }
42
43 // The global ownership record.
63b08143
TR
44 // No tail-padding necessary (the virtual functions aren't used frequently).
45 atomic<gtm_word> orec __attribute__((aligned(HW_CACHELINE_SIZE)));
36cfbee1 46
0a35513e
AH
47 virtual void init()
48 {
799142bf
TR
49 // This store is only executed while holding the serial lock, so relaxed
50 // memory order is sufficient here.
36cfbee1 51 orec.store(0, memory_order_relaxed);
0a35513e
AH
52 }
53 virtual void fini() { }
54};
55
56static gl_mg o_gl_mg;
57
58
59// The global lock, write-through TM method.
60// Acquires the orec eagerly before the first write, and then writes through.
61// Reads abort if the global orec's version number changed or if it is locked.
62// Currently, writes require undo-logging to prevent deadlock between the
63// serial lock and the global orec (writer txn acquires orec, reader txn
64// upgrades to serial and waits for all other txns, writer tries to upgrade to
65// serial too but cannot, writer cannot abort either, deadlock). We could
66// avoid this if the serial lock would allow us to prevent other threads from
67// going to serial mode, but this probably is too much additional complexity
68// just to optimize this TM method.
69// gtm_thread::shared_state is used to store a transaction's current
70// snapshot time (or commit time). The serial lock uses ~0 for inactive
71// transactions and 0 for active ones. Thus, we always have a meaningful
72// timestamp in shared_state that can be used to implement quiescence-based
73// privatization safety. This even holds if a writing transaction has the
74// lock bit set in its shared_state because this is fine for both the serial
75// lock (the value will be smaller than ~0) and privatization safety (we
76// validate that no other update transaction comitted before we acquired the
77// orec, so we have the most recent timestamp and no other transaction can
78// commit until we have committed).
651ff415
TR
79// However, we therefore depend on shared_state not being modified by the
80// serial lock during upgrades to serial mode, which is ensured by
81// gtm_thread::serialirr_mode by not calling gtm_rwlock::write_upgrade_finish
82// before we have committed or rolled back.
0a35513e
AH
83class gl_wt_dispatch : public abi_dispatch
84{
85protected:
33a03827
TR
86 static void pre_write(const void *addr, size_t len,
87 gtm_thread *tx = gtm_thr())
0a35513e 88 {
799142bf 89 gtm_word v = tx->shared_state.load(memory_order_relaxed);
36cfbee1 90 if (unlikely(!gl_mg::is_locked(v)))
0a35513e
AH
91 {
92 // Check for and handle version number overflow.
36cfbee1 93 if (unlikely(v >= gl_mg::VERSION_MAX))
0a35513e
AH
94 tx->restart(RESTART_INIT_METHOD_GROUP);
95
0a35513e
AH
96 // This validates that we have a consistent snapshot, which is also
97 // for making privatization safety work (see the class' comments).
799142bf
TR
98 // Note that this check here will be performed by the subsequent CAS
99 // again, so relaxed memory order is fine.
36cfbee1
RH
100 gtm_word now = o_gl_mg.orec.load(memory_order_relaxed);
101 if (now != v)
0a35513e 102 tx->restart(RESTART_VALIDATE_WRITE);
799142bf
TR
103
104 // CAS global orec from our snapshot time to the locked state.
efee0113
TR
105 // We need acquire memory order here to synchronize with other
106 // (ownership) releases of the orec. We do not need acq_rel order
107 // because whenever another thread reads from this CAS'
108 // modification, then it will abort anyway and does not rely on
109 // any further happens-before relation to be established.
110 // Also note that unlike in ml_wt's increase of the global time
111 // base (remember that the global orec is used as time base), we do
112 // not need require memory order here because we do not need to make
113 // prior orec acquisitions visible to other threads that try to
114 // extend their snapshot time.
36cfbee1 115 if (!o_gl_mg.orec.compare_exchange_strong (now, gl_mg::set_locked(now),
efee0113 116 memory_order_acquire))
0a35513e
AH
117 tx->restart(RESTART_LOCKED_WRITE);
118
799142bf
TR
119 // We use an explicit fence here to avoid having to use release
120 // memory order for all subsequent data stores. This fence will
121 // synchronize with loads of the data with acquire memory order. See
122 // validate() for why this is necessary.
efee0113
TR
123 // Adding require memory order to the prior CAS is not sufficient,
124 // at least according to the Batty et al. formalization of the
125 // memory model.
799142bf
TR
126 atomic_thread_fence(memory_order_release);
127
36cfbee1
RH
128 // Set shared_state to new value.
129 tx->shared_state.store(gl_mg::set_locked(now), memory_order_release);
0a35513e
AH
130 }
131
11f30bb0 132 tx->undolog.log(addr, len);
0a35513e
AH
133 }
134
33a03827 135 static void validate(gtm_thread *tx = gtm_thr())
0a35513e 136 {
799142bf
TR
137 // Check that snapshot is consistent. We expect the previous data load to
138 // have acquire memory order, or be atomic and followed by an acquire
139 // fence.
140 // As a result, the data load will synchronize with the release fence
141 // issued by the transactions whose data updates the data load has read
142 // from. This forces the orec load to read from a visible sequence of side
143 // effects that starts with the other updating transaction's store that
144 // acquired the orec and set it to locked.
145 // We therefore either read a value with the locked bit set (and restart)
146 // or read an orec value that was written after the data had been written.
147 // Either will allow us to detect inconsistent reads because it will have
148 // a higher/different value.
36cfbee1
RH
149 gtm_word l = o_gl_mg.orec.load(memory_order_relaxed);
150 if (l != tx->shared_state.load(memory_order_relaxed))
0a35513e
AH
151 tx->restart(RESTART_VALIDATE_READ);
152 }
153
154 template <typename V> static V load(const V* addr, ls_modifier mod)
155 {
156 // Read-for-write should be unlikely, but we need to handle it or will
157 // break later WaW optimizations.
158 if (unlikely(mod == RfW))
159 {
160 pre_write(addr, sizeof(V));
161 return *addr;
162 }
799142bf
TR
163 if (unlikely(mod == RaW))
164 return *addr;
165
166 // We do not have acquired the orec, so we need to load a value and then
167 // validate that this was consistent.
168 // This needs to have acquire memory order (see validate()).
169 // Alternatively, we can put an acquire fence after the data load but this
170 // is probably less efficient.
171 // FIXME We would need an atomic load with acquire memory order here but
172 // we can't just forge an atomic load for nonatomic data because this
173 // might not work on all implementations of atomics. However, we need
174 // the acquire memory order and we can only establish this if we link
175 // it to the matching release using a reads-from relation between atomic
176 // loads. Also, the compiler is allowed to optimize nonatomic accesses
177 // differently than atomic accesses (e.g., if the load would be moved to
178 // after the fence, we potentially don't synchronize properly anymore).
179 // Instead of the following, just use an ordinary load followed by an
180 // acquire fence, and hope that this is good enough for now:
181 // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire);
0a35513e 182 V v = *addr;
799142bf
TR
183 atomic_thread_fence(memory_order_acquire);
184 validate();
0a35513e
AH
185 return v;
186 }
187
188 template <typename V> static void store(V* addr, const V value,
189 ls_modifier mod)
190 {
11f30bb0 191 if (likely(mod != WaW))
0a35513e 192 pre_write(addr, sizeof(V));
799142bf
TR
193 // FIXME We would need an atomic store here but we can't just forge an
194 // atomic load for nonatomic data because this might not work on all
195 // implementations of atomics. However, we need this store to link the
196 // release fence in pre_write() to the acquire operation in load, which
197 // is only guaranteed if we have a reads-from relation between atomic
198 // accesses. Also, the compiler is allowed to optimize nonatomic accesses
199 // differently than atomic accesses (e.g., if the store would be moved
200 // to before the release fence in pre_write(), things could go wrong).
201 // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed);
0a35513e
AH
202 *addr = value;
203 }
204
205public:
206 static void memtransfer_static(void *dst, const void* src, size_t size,
207 bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)
208 {
33a03827
TR
209 gtm_thread *tx = gtm_thr();
210 if (dst_mod != WaW && dst_mod != NONTXNAL)
211 pre_write(dst, size, tx);
212 // We need at least undo-logging for an RfW src region because we might
213 // subsequently write there with WaW.
214 if (src_mod == RfW)
215 pre_write(src, size, tx);
0a35513e 216
799142bf
TR
217 // FIXME We should use atomics here (see store()). Let's just hope that
218 // memcpy/memmove are good enough.
0a35513e
AH
219 if (!may_overlap)
220 ::memcpy(dst, src, size);
221 else
222 ::memmove(dst, src, size);
223
224 if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL
225 && dst_mod != WaW)
33a03827 226 validate(tx);
0a35513e
AH
227 }
228
229 static void memset_static(void *dst, int c, size_t size, ls_modifier mod)
230 {
231 if (mod != WaW)
232 pre_write(dst, size);
799142bf
TR
233 // FIXME We should use atomics here (see store()). Let's just hope that
234 // memset is good enough.
0a35513e
AH
235 ::memset(dst, c, size);
236 }
237
238 virtual gtm_restart_reason begin_or_restart()
239 {
240 // We don't need to do anything for nested transactions.
241 gtm_thread *tx = gtm_thr();
242 if (tx->parent_txns.size() > 0)
243 return NO_RESTART;
244
245 // Spin until global orec is not locked.
246 // TODO This is not necessary if there are no pure loads (check txn props).
0a35513e 247 unsigned i = 0;
36cfbee1
RH
248 gtm_word v;
249 while (1)
0a35513e 250 {
799142bf
TR
251 // We need acquire memory order here so that this load will
252 // synchronize with the store that releases the orec in trycommit().
253 // In turn, this makes sure that subsequent data loads will read from
254 // a visible sequence of side effects that starts with the most recent
255 // store to the data right before the release of the orec.
36cfbee1
RH
256 v = o_gl_mg.orec.load(memory_order_acquire);
257 if (!gl_mg::is_locked(v))
258 break;
0a35513e 259 // TODO need method-specific max spin count
36cfbee1
RH
260 if (++i > gtm_spin_count_var)
261 return RESTART_VALIDATE_READ;
0a35513e
AH
262 cpu_relax();
263 }
0a35513e
AH
264
265 // Everything is okay, we have a snapshot time.
266 // We don't need to enforce any ordering for the following store. There
267 // are no earlier data loads in this transaction, so the store cannot
268 // become visible before those (which could lead to the violation of
269 // privatization safety). The store can become visible after later loads
270 // but this does not matter because the previous value will have been
271 // smaller or equal (the serial lock will set shared_state to zero when
272 // marking the transaction as active, and restarts enforce immediate
273 // visibility of a smaller or equal value with a barrier (see
799142bf 274 // rollback()).
36cfbee1 275 tx->shared_state.store(v, memory_order_relaxed);
0a35513e
AH
276 return NO_RESTART;
277 }
278
279 virtual bool trycommit(gtm_word& priv_time)
280 {
281 gtm_thread* tx = gtm_thr();
799142bf 282 gtm_word v = tx->shared_state.load(memory_order_relaxed);
0a35513e 283
0a35513e
AH
284 // Release the orec but do not reset shared_state, which will be modified
285 // by the serial lock right after our commit anyway. Also, resetting
286 // shared state here would interfere with the serial lock's use of this
287 // location.
288 if (gl_mg::is_locked(v))
289 {
290 // Release the global orec, increasing its version number / timestamp.
799142bf 291 // See begin_or_restart() for why we need release memory order here.
0a35513e 292 v = gl_mg::clear_locked(v) + 1;
36cfbee1 293 o_gl_mg.orec.store(v, memory_order_release);
0a35513e 294 }
d2653984
TR
295
296 // Need to ensure privatization safety. Every other transaction must have
297 // a snapshot time that is at least as high as our commit time (i.e., our
298 // commit must be visible to them). Because of proxy privatization, we
299 // must ensure that even if we are a read-only transaction. See
300 // ml_wt_dispatch::trycommit() for details: We can't get quite the same
301 // set of problems because we just use one orec and thus, for example,
302 // there cannot be concurrent writers -- but we can still get pending
303 // loads to privatized data when not ensuring privatization safety, which
304 // is problematic if the program unmaps the privatized memory.
305 priv_time = v;
0a35513e
AH
306 return true;
307 }
308
309 virtual void rollback(gtm_transaction_cp *cp)
310 {
311 // We don't do anything for rollbacks of nested transactions.
312 if (cp != 0)
313 return;
314
315 gtm_thread *tx = gtm_thr();
799142bf 316 gtm_word v = tx->shared_state.load(memory_order_relaxed);
0a35513e
AH
317
318 // Release lock and increment version number to prevent dirty reads.
319 // Also reset shared state here, so that begin_or_restart() can expect a
320 // value that is correct wrt. privatization safety.
321 if (gl_mg::is_locked(v))
322 {
4c9bd6ac 323 // With our rollback, global time increases.
0a35513e 324 v = gl_mg::clear_locked(v) + 1;
0a35513e 325
4c9bd6ac
TR
326 // First reset the timestamp published via shared_state. Release
327 // memory order will make this happen after undoing prior data writes.
328 // This must also happen before we actually release the global orec
329 // next, so that future update transactions in other threads observe
330 // a meaningful snapshot time for our transaction; otherwise, they
331 // could read a shared_store value with the LOCK_BIT set, which can
332 // break privatization safety because it's larger than the actual
333 // snapshot time. Note that we only need to consider other update
334 // transactions because only those will potentially privatize data.
651ff415 335 tx->shared_state.store(v, memory_order_release);
0a35513e 336
4c9bd6ac
TR
337 // Release the global orec, increasing its version number / timestamp.
338 // See begin_or_restart() for why we need release memory order here,
339 // and we also need it to make future update transactions read the
340 // prior update to shared_state too (update transactions acquire the
341 // global orec with acquire memory order).
342 o_gl_mg.orec.store(v, memory_order_release);
0a35513e
AH
343 }
344
345 }
346
629e4729
TR
347 virtual bool snapshot_most_recent()
348 {
349 // This is the same check as in validate() except that we do not restart
350 // on failure but simply return the result.
351 return o_gl_mg.orec.load(memory_order_relaxed)
352 == gtm_thr()->shared_state.load(memory_order_relaxed);
353 }
354
355
0a35513e
AH
356 CREATE_DISPATCH_METHODS(virtual, )
357 CREATE_DISPATCH_METHODS_MEM()
358
b679c813 359 gl_wt_dispatch() : abi_dispatch(false, true, false, false, 0, &o_gl_mg)
0a35513e
AH
360 { }
361};
362
363} // anon namespace
364
365static const gl_wt_dispatch o_gl_wt_dispatch;
366
367abi_dispatch *
368GTM::dispatch_gl_wt ()
369{
370 return const_cast<gl_wt_dispatch *>(&o_gl_wt_dispatch);
371}