]>
Commit | Line | Data |
---|---|---|
99dee823 | 1 | /* Copyright (C) 2011-2021 Free Software Foundation, Inc. |
0a35513e AH |
2 | Contributed by Torvald Riegel <triegel@redhat.com>. |
3 | ||
4 | This file is part of the GNU Transactional Memory Library (libitm). | |
5 | ||
6 | Libitm is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | Libitm is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
14 | more details. | |
15 | ||
16 | Under Section 7 of GPL version 3, you are granted additional | |
17 | permissions described in the GCC Runtime Library Exception, version | |
18 | 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | You should have received a copy of the GNU General Public License and | |
21 | a copy of the GCC Runtime Library Exception along with this program; | |
22 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | <http://www.gnu.org/licenses/>. */ | |
24 | ||
25 | #include "libitm_i.h" | |
26 | ||
27 | using namespace GTM; | |
28 | ||
29 | namespace { | |
30 | ||
31 | // This group consists of all TM methods that synchronize via just a single | |
32 | // global lock (or ownership record). | |
33 | struct gl_mg : public method_group | |
34 | { | |
35 | static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1; | |
36 | // We can't use the full bitrange because ~0 in gtm_thread::shared_state has | |
37 | // special meaning. | |
38 | static const gtm_word VERSION_MAX = (~(gtm_word)0 >> 1) - 1; | |
39 | static bool is_locked(gtm_word l) { return l & LOCK_BIT; } | |
40 | static gtm_word set_locked(gtm_word l) { return l | LOCK_BIT; } | |
41 | static gtm_word clear_locked(gtm_word l) { return l & ~LOCK_BIT; } | |
42 | ||
43 | // The global ownership record. | |
63b08143 TR |
44 | // No tail-padding necessary (the virtual functions aren't used frequently). |
45 | atomic<gtm_word> orec __attribute__((aligned(HW_CACHELINE_SIZE))); | |
36cfbee1 | 46 | |
0a35513e AH |
47 | virtual void init() |
48 | { | |
799142bf TR |
49 | // This store is only executed while holding the serial lock, so relaxed |
50 | // memory order is sufficient here. | |
36cfbee1 | 51 | orec.store(0, memory_order_relaxed); |
0a35513e AH |
52 | } |
53 | virtual void fini() { } | |
54 | }; | |
55 | ||
56 | static gl_mg o_gl_mg; | |
57 | ||
58 | ||
59 | // The global lock, write-through TM method. | |
60 | // Acquires the orec eagerly before the first write, and then writes through. | |
61 | // Reads abort if the global orec's version number changed or if it is locked. | |
62 | // Currently, writes require undo-logging to prevent deadlock between the | |
63 | // serial lock and the global orec (writer txn acquires orec, reader txn | |
64 | // upgrades to serial and waits for all other txns, writer tries to upgrade to | |
65 | // serial too but cannot, writer cannot abort either, deadlock). We could | |
66 | // avoid this if the serial lock would allow us to prevent other threads from | |
67 | // going to serial mode, but this probably is too much additional complexity | |
68 | // just to optimize this TM method. | |
69 | // gtm_thread::shared_state is used to store a transaction's current | |
70 | // snapshot time (or commit time). The serial lock uses ~0 for inactive | |
71 | // transactions and 0 for active ones. Thus, we always have a meaningful | |
72 | // timestamp in shared_state that can be used to implement quiescence-based | |
73 | // privatization safety. This even holds if a writing transaction has the | |
74 | // lock bit set in its shared_state because this is fine for both the serial | |
75 | // lock (the value will be smaller than ~0) and privatization safety (we | |
76 | // validate that no other update transaction comitted before we acquired the | |
77 | // orec, so we have the most recent timestamp and no other transaction can | |
78 | // commit until we have committed). | |
651ff415 TR |
79 | // However, we therefore depend on shared_state not being modified by the |
80 | // serial lock during upgrades to serial mode, which is ensured by | |
81 | // gtm_thread::serialirr_mode by not calling gtm_rwlock::write_upgrade_finish | |
82 | // before we have committed or rolled back. | |
0a35513e AH |
83 | class gl_wt_dispatch : public abi_dispatch |
84 | { | |
85 | protected: | |
33a03827 TR |
86 | static void pre_write(const void *addr, size_t len, |
87 | gtm_thread *tx = gtm_thr()) | |
0a35513e | 88 | { |
799142bf | 89 | gtm_word v = tx->shared_state.load(memory_order_relaxed); |
36cfbee1 | 90 | if (unlikely(!gl_mg::is_locked(v))) |
0a35513e AH |
91 | { |
92 | // Check for and handle version number overflow. | |
36cfbee1 | 93 | if (unlikely(v >= gl_mg::VERSION_MAX)) |
0a35513e AH |
94 | tx->restart(RESTART_INIT_METHOD_GROUP); |
95 | ||
0a35513e AH |
96 | // This validates that we have a consistent snapshot, which is also |
97 | // for making privatization safety work (see the class' comments). | |
799142bf TR |
98 | // Note that this check here will be performed by the subsequent CAS |
99 | // again, so relaxed memory order is fine. | |
36cfbee1 RH |
100 | gtm_word now = o_gl_mg.orec.load(memory_order_relaxed); |
101 | if (now != v) | |
0a35513e | 102 | tx->restart(RESTART_VALIDATE_WRITE); |
799142bf TR |
103 | |
104 | // CAS global orec from our snapshot time to the locked state. | |
efee0113 TR |
105 | // We need acquire memory order here to synchronize with other |
106 | // (ownership) releases of the orec. We do not need acq_rel order | |
107 | // because whenever another thread reads from this CAS' | |
108 | // modification, then it will abort anyway and does not rely on | |
109 | // any further happens-before relation to be established. | |
110 | // Also note that unlike in ml_wt's increase of the global time | |
111 | // base (remember that the global orec is used as time base), we do | |
112 | // not need require memory order here because we do not need to make | |
113 | // prior orec acquisitions visible to other threads that try to | |
114 | // extend their snapshot time. | |
36cfbee1 | 115 | if (!o_gl_mg.orec.compare_exchange_strong (now, gl_mg::set_locked(now), |
efee0113 | 116 | memory_order_acquire)) |
0a35513e AH |
117 | tx->restart(RESTART_LOCKED_WRITE); |
118 | ||
799142bf TR |
119 | // We use an explicit fence here to avoid having to use release |
120 | // memory order for all subsequent data stores. This fence will | |
121 | // synchronize with loads of the data with acquire memory order. See | |
122 | // validate() for why this is necessary. | |
efee0113 TR |
123 | // Adding require memory order to the prior CAS is not sufficient, |
124 | // at least according to the Batty et al. formalization of the | |
125 | // memory model. | |
799142bf TR |
126 | atomic_thread_fence(memory_order_release); |
127 | ||
36cfbee1 RH |
128 | // Set shared_state to new value. |
129 | tx->shared_state.store(gl_mg::set_locked(now), memory_order_release); | |
0a35513e AH |
130 | } |
131 | ||
11f30bb0 | 132 | tx->undolog.log(addr, len); |
0a35513e AH |
133 | } |
134 | ||
33a03827 | 135 | static void validate(gtm_thread *tx = gtm_thr()) |
0a35513e | 136 | { |
799142bf TR |
137 | // Check that snapshot is consistent. We expect the previous data load to |
138 | // have acquire memory order, or be atomic and followed by an acquire | |
139 | // fence. | |
140 | // As a result, the data load will synchronize with the release fence | |
141 | // issued by the transactions whose data updates the data load has read | |
142 | // from. This forces the orec load to read from a visible sequence of side | |
143 | // effects that starts with the other updating transaction's store that | |
144 | // acquired the orec and set it to locked. | |
145 | // We therefore either read a value with the locked bit set (and restart) | |
146 | // or read an orec value that was written after the data had been written. | |
147 | // Either will allow us to detect inconsistent reads because it will have | |
148 | // a higher/different value. | |
36cfbee1 RH |
149 | gtm_word l = o_gl_mg.orec.load(memory_order_relaxed); |
150 | if (l != tx->shared_state.load(memory_order_relaxed)) | |
0a35513e AH |
151 | tx->restart(RESTART_VALIDATE_READ); |
152 | } | |
153 | ||
154 | template <typename V> static V load(const V* addr, ls_modifier mod) | |
155 | { | |
156 | // Read-for-write should be unlikely, but we need to handle it or will | |
157 | // break later WaW optimizations. | |
158 | if (unlikely(mod == RfW)) | |
159 | { | |
160 | pre_write(addr, sizeof(V)); | |
161 | return *addr; | |
162 | } | |
799142bf TR |
163 | if (unlikely(mod == RaW)) |
164 | return *addr; | |
165 | ||
166 | // We do not have acquired the orec, so we need to load a value and then | |
167 | // validate that this was consistent. | |
168 | // This needs to have acquire memory order (see validate()). | |
169 | // Alternatively, we can put an acquire fence after the data load but this | |
170 | // is probably less efficient. | |
171 | // FIXME We would need an atomic load with acquire memory order here but | |
172 | // we can't just forge an atomic load for nonatomic data because this | |
173 | // might not work on all implementations of atomics. However, we need | |
174 | // the acquire memory order and we can only establish this if we link | |
175 | // it to the matching release using a reads-from relation between atomic | |
176 | // loads. Also, the compiler is allowed to optimize nonatomic accesses | |
177 | // differently than atomic accesses (e.g., if the load would be moved to | |
178 | // after the fence, we potentially don't synchronize properly anymore). | |
179 | // Instead of the following, just use an ordinary load followed by an | |
180 | // acquire fence, and hope that this is good enough for now: | |
181 | // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire); | |
0a35513e | 182 | V v = *addr; |
799142bf TR |
183 | atomic_thread_fence(memory_order_acquire); |
184 | validate(); | |
0a35513e AH |
185 | return v; |
186 | } | |
187 | ||
188 | template <typename V> static void store(V* addr, const V value, | |
189 | ls_modifier mod) | |
190 | { | |
11f30bb0 | 191 | if (likely(mod != WaW)) |
0a35513e | 192 | pre_write(addr, sizeof(V)); |
799142bf TR |
193 | // FIXME We would need an atomic store here but we can't just forge an |
194 | // atomic load for nonatomic data because this might not work on all | |
195 | // implementations of atomics. However, we need this store to link the | |
196 | // release fence in pre_write() to the acquire operation in load, which | |
197 | // is only guaranteed if we have a reads-from relation between atomic | |
198 | // accesses. Also, the compiler is allowed to optimize nonatomic accesses | |
199 | // differently than atomic accesses (e.g., if the store would be moved | |
200 | // to before the release fence in pre_write(), things could go wrong). | |
201 | // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed); | |
0a35513e AH |
202 | *addr = value; |
203 | } | |
204 | ||
205 | public: | |
206 | static void memtransfer_static(void *dst, const void* src, size_t size, | |
207 | bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod) | |
208 | { | |
33a03827 TR |
209 | gtm_thread *tx = gtm_thr(); |
210 | if (dst_mod != WaW && dst_mod != NONTXNAL) | |
211 | pre_write(dst, size, tx); | |
212 | // We need at least undo-logging for an RfW src region because we might | |
213 | // subsequently write there with WaW. | |
214 | if (src_mod == RfW) | |
215 | pre_write(src, size, tx); | |
0a35513e | 216 | |
799142bf TR |
217 | // FIXME We should use atomics here (see store()). Let's just hope that |
218 | // memcpy/memmove are good enough. | |
0a35513e AH |
219 | if (!may_overlap) |
220 | ::memcpy(dst, src, size); | |
221 | else | |
222 | ::memmove(dst, src, size); | |
223 | ||
224 | if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL | |
225 | && dst_mod != WaW) | |
33a03827 | 226 | validate(tx); |
0a35513e AH |
227 | } |
228 | ||
229 | static void memset_static(void *dst, int c, size_t size, ls_modifier mod) | |
230 | { | |
231 | if (mod != WaW) | |
232 | pre_write(dst, size); | |
799142bf TR |
233 | // FIXME We should use atomics here (see store()). Let's just hope that |
234 | // memset is good enough. | |
0a35513e AH |
235 | ::memset(dst, c, size); |
236 | } | |
237 | ||
238 | virtual gtm_restart_reason begin_or_restart() | |
239 | { | |
240 | // We don't need to do anything for nested transactions. | |
241 | gtm_thread *tx = gtm_thr(); | |
242 | if (tx->parent_txns.size() > 0) | |
243 | return NO_RESTART; | |
244 | ||
245 | // Spin until global orec is not locked. | |
246 | // TODO This is not necessary if there are no pure loads (check txn props). | |
0a35513e | 247 | unsigned i = 0; |
36cfbee1 RH |
248 | gtm_word v; |
249 | while (1) | |
0a35513e | 250 | { |
799142bf TR |
251 | // We need acquire memory order here so that this load will |
252 | // synchronize with the store that releases the orec in trycommit(). | |
253 | // In turn, this makes sure that subsequent data loads will read from | |
254 | // a visible sequence of side effects that starts with the most recent | |
255 | // store to the data right before the release of the orec. | |
36cfbee1 RH |
256 | v = o_gl_mg.orec.load(memory_order_acquire); |
257 | if (!gl_mg::is_locked(v)) | |
258 | break; | |
0a35513e | 259 | // TODO need method-specific max spin count |
36cfbee1 RH |
260 | if (++i > gtm_spin_count_var) |
261 | return RESTART_VALIDATE_READ; | |
0a35513e AH |
262 | cpu_relax(); |
263 | } | |
0a35513e AH |
264 | |
265 | // Everything is okay, we have a snapshot time. | |
266 | // We don't need to enforce any ordering for the following store. There | |
267 | // are no earlier data loads in this transaction, so the store cannot | |
268 | // become visible before those (which could lead to the violation of | |
269 | // privatization safety). The store can become visible after later loads | |
270 | // but this does not matter because the previous value will have been | |
271 | // smaller or equal (the serial lock will set shared_state to zero when | |
272 | // marking the transaction as active, and restarts enforce immediate | |
273 | // visibility of a smaller or equal value with a barrier (see | |
799142bf | 274 | // rollback()). |
36cfbee1 | 275 | tx->shared_state.store(v, memory_order_relaxed); |
0a35513e AH |
276 | return NO_RESTART; |
277 | } | |
278 | ||
279 | virtual bool trycommit(gtm_word& priv_time) | |
280 | { | |
281 | gtm_thread* tx = gtm_thr(); | |
799142bf | 282 | gtm_word v = tx->shared_state.load(memory_order_relaxed); |
0a35513e | 283 | |
0a35513e AH |
284 | // Release the orec but do not reset shared_state, which will be modified |
285 | // by the serial lock right after our commit anyway. Also, resetting | |
286 | // shared state here would interfere with the serial lock's use of this | |
287 | // location. | |
288 | if (gl_mg::is_locked(v)) | |
289 | { | |
290 | // Release the global orec, increasing its version number / timestamp. | |
799142bf | 291 | // See begin_or_restart() for why we need release memory order here. |
0a35513e | 292 | v = gl_mg::clear_locked(v) + 1; |
36cfbee1 | 293 | o_gl_mg.orec.store(v, memory_order_release); |
0a35513e | 294 | } |
d2653984 TR |
295 | |
296 | // Need to ensure privatization safety. Every other transaction must have | |
297 | // a snapshot time that is at least as high as our commit time (i.e., our | |
298 | // commit must be visible to them). Because of proxy privatization, we | |
299 | // must ensure that even if we are a read-only transaction. See | |
300 | // ml_wt_dispatch::trycommit() for details: We can't get quite the same | |
301 | // set of problems because we just use one orec and thus, for example, | |
302 | // there cannot be concurrent writers -- but we can still get pending | |
303 | // loads to privatized data when not ensuring privatization safety, which | |
304 | // is problematic if the program unmaps the privatized memory. | |
305 | priv_time = v; | |
0a35513e AH |
306 | return true; |
307 | } | |
308 | ||
309 | virtual void rollback(gtm_transaction_cp *cp) | |
310 | { | |
311 | // We don't do anything for rollbacks of nested transactions. | |
312 | if (cp != 0) | |
313 | return; | |
314 | ||
315 | gtm_thread *tx = gtm_thr(); | |
799142bf | 316 | gtm_word v = tx->shared_state.load(memory_order_relaxed); |
0a35513e AH |
317 | |
318 | // Release lock and increment version number to prevent dirty reads. | |
319 | // Also reset shared state here, so that begin_or_restart() can expect a | |
320 | // value that is correct wrt. privatization safety. | |
321 | if (gl_mg::is_locked(v)) | |
322 | { | |
4c9bd6ac | 323 | // With our rollback, global time increases. |
0a35513e | 324 | v = gl_mg::clear_locked(v) + 1; |
0a35513e | 325 | |
4c9bd6ac TR |
326 | // First reset the timestamp published via shared_state. Release |
327 | // memory order will make this happen after undoing prior data writes. | |
328 | // This must also happen before we actually release the global orec | |
329 | // next, so that future update transactions in other threads observe | |
330 | // a meaningful snapshot time for our transaction; otherwise, they | |
331 | // could read a shared_store value with the LOCK_BIT set, which can | |
332 | // break privatization safety because it's larger than the actual | |
333 | // snapshot time. Note that we only need to consider other update | |
334 | // transactions because only those will potentially privatize data. | |
651ff415 | 335 | tx->shared_state.store(v, memory_order_release); |
0a35513e | 336 | |
4c9bd6ac TR |
337 | // Release the global orec, increasing its version number / timestamp. |
338 | // See begin_or_restart() for why we need release memory order here, | |
339 | // and we also need it to make future update transactions read the | |
340 | // prior update to shared_state too (update transactions acquire the | |
341 | // global orec with acquire memory order). | |
342 | o_gl_mg.orec.store(v, memory_order_release); | |
0a35513e AH |
343 | } |
344 | ||
345 | } | |
346 | ||
629e4729 TR |
347 | virtual bool snapshot_most_recent() |
348 | { | |
349 | // This is the same check as in validate() except that we do not restart | |
350 | // on failure but simply return the result. | |
351 | return o_gl_mg.orec.load(memory_order_relaxed) | |
352 | == gtm_thr()->shared_state.load(memory_order_relaxed); | |
353 | } | |
354 | ||
355 | ||
0a35513e AH |
356 | CREATE_DISPATCH_METHODS(virtual, ) |
357 | CREATE_DISPATCH_METHODS_MEM() | |
358 | ||
b679c813 | 359 | gl_wt_dispatch() : abi_dispatch(false, true, false, false, 0, &o_gl_mg) |
0a35513e AH |
360 | { } |
361 | }; | |
362 | ||
363 | } // anon namespace | |
364 | ||
365 | static const gl_wt_dispatch o_gl_wt_dispatch; | |
366 | ||
367 | abi_dispatch * | |
368 | GTM::dispatch_gl_wt () | |
369 | { | |
370 | return const_cast<gl_wt_dispatch *>(&o_gl_wt_dispatch); | |
371 | } |