]>
Commit | Line | Data |
---|---|---|
0a35513e AH |
1 | /* Copyright (C) 2011 Free Software Foundation, Inc. |
2 | Contributed by Torvald Riegel <triegel@redhat.com>. | |
3 | ||
4 | This file is part of the GNU Transactional Memory Library (libitm). | |
5 | ||
6 | Libitm is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | Libitm is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
13 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for | |
14 | more details. | |
15 | ||
16 | Under Section 7 of GPL version 3, you are granted additional | |
17 | permissions described in the GCC Runtime Library Exception, version | |
18 | 3.1, as published by the Free Software Foundation. | |
19 | ||
20 | You should have received a copy of the GNU General Public License and | |
21 | a copy of the GCC Runtime Library Exception along with this program; | |
22 | see the files COPYING3 and COPYING.RUNTIME respectively. If not, see | |
23 | <http://www.gnu.org/licenses/>. */ | |
24 | ||
25 | #include "libitm_i.h" | |
26 | ||
27 | using namespace GTM; | |
28 | ||
29 | namespace { | |
30 | ||
31 | // This group consists of all TM methods that synchronize via just a single | |
32 | // global lock (or ownership record). | |
33 | struct gl_mg : public method_group | |
34 | { | |
35 | static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1; | |
36 | // We can't use the full bitrange because ~0 in gtm_thread::shared_state has | |
37 | // special meaning. | |
38 | static const gtm_word VERSION_MAX = (~(gtm_word)0 >> 1) - 1; | |
39 | static bool is_locked(gtm_word l) { return l & LOCK_BIT; } | |
40 | static gtm_word set_locked(gtm_word l) { return l | LOCK_BIT; } | |
41 | static gtm_word clear_locked(gtm_word l) { return l & ~LOCK_BIT; } | |
42 | ||
43 | // The global ownership record. | |
36cfbee1 RH |
44 | atomic<gtm_word> orec; |
45 | ||
0a35513e AH |
46 | virtual void init() |
47 | { | |
799142bf TR |
48 | // This store is only executed while holding the serial lock, so relaxed |
49 | // memory order is sufficient here. | |
36cfbee1 | 50 | orec.store(0, memory_order_relaxed); |
0a35513e AH |
51 | } |
52 | virtual void fini() { } | |
53 | }; | |
54 | ||
799142bf | 55 | // TODO cacheline padding |
0a35513e AH |
56 | static gl_mg o_gl_mg; |
57 | ||
58 | ||
59 | // The global lock, write-through TM method. | |
60 | // Acquires the orec eagerly before the first write, and then writes through. | |
61 | // Reads abort if the global orec's version number changed or if it is locked. | |
62 | // Currently, writes require undo-logging to prevent deadlock between the | |
63 | // serial lock and the global orec (writer txn acquires orec, reader txn | |
64 | // upgrades to serial and waits for all other txns, writer tries to upgrade to | |
65 | // serial too but cannot, writer cannot abort either, deadlock). We could | |
66 | // avoid this if the serial lock would allow us to prevent other threads from | |
67 | // going to serial mode, but this probably is too much additional complexity | |
68 | // just to optimize this TM method. | |
69 | // gtm_thread::shared_state is used to store a transaction's current | |
70 | // snapshot time (or commit time). The serial lock uses ~0 for inactive | |
71 | // transactions and 0 for active ones. Thus, we always have a meaningful | |
72 | // timestamp in shared_state that can be used to implement quiescence-based | |
73 | // privatization safety. This even holds if a writing transaction has the | |
74 | // lock bit set in its shared_state because this is fine for both the serial | |
75 | // lock (the value will be smaller than ~0) and privatization safety (we | |
76 | // validate that no other update transaction comitted before we acquired the | |
77 | // orec, so we have the most recent timestamp and no other transaction can | |
78 | // commit until we have committed). | |
79 | // However, we therefore cannot use this method for a serial transaction | |
80 | // (because shared_state needs to remain at ~0) and we have to be careful | |
81 | // when switching to serial mode (see the special handling in trycommit() and | |
82 | // rollback()). | |
83 | // ??? This sharing adds some complexity wrt. serial mode. Just use a separate | |
84 | // state variable? | |
85 | class gl_wt_dispatch : public abi_dispatch | |
86 | { | |
87 | protected: | |
88 | static void pre_write(const void *addr, size_t len) | |
89 | { | |
90 | gtm_thread *tx = gtm_thr(); | |
799142bf | 91 | gtm_word v = tx->shared_state.load(memory_order_relaxed); |
36cfbee1 | 92 | if (unlikely(!gl_mg::is_locked(v))) |
0a35513e AH |
93 | { |
94 | // Check for and handle version number overflow. | |
36cfbee1 | 95 | if (unlikely(v >= gl_mg::VERSION_MAX)) |
0a35513e AH |
96 | tx->restart(RESTART_INIT_METHOD_GROUP); |
97 | ||
0a35513e AH |
98 | // This validates that we have a consistent snapshot, which is also |
99 | // for making privatization safety work (see the class' comments). | |
799142bf TR |
100 | // Note that this check here will be performed by the subsequent CAS |
101 | // again, so relaxed memory order is fine. | |
36cfbee1 RH |
102 | gtm_word now = o_gl_mg.orec.load(memory_order_relaxed); |
103 | if (now != v) | |
0a35513e | 104 | tx->restart(RESTART_VALIDATE_WRITE); |
799142bf TR |
105 | |
106 | // CAS global orec from our snapshot time to the locked state. | |
107 | // We need acq_rel memory order here to synchronize with other loads | |
108 | // and modifications of orec. | |
36cfbee1 | 109 | if (!o_gl_mg.orec.compare_exchange_strong (now, gl_mg::set_locked(now), |
799142bf | 110 | memory_order_acq_rel)) |
0a35513e AH |
111 | tx->restart(RESTART_LOCKED_WRITE); |
112 | ||
799142bf TR |
113 | // We use an explicit fence here to avoid having to use release |
114 | // memory order for all subsequent data stores. This fence will | |
115 | // synchronize with loads of the data with acquire memory order. See | |
116 | // validate() for why this is necessary. | |
117 | atomic_thread_fence(memory_order_release); | |
118 | ||
36cfbee1 RH |
119 | // Set shared_state to new value. |
120 | tx->shared_state.store(gl_mg::set_locked(now), memory_order_release); | |
0a35513e AH |
121 | } |
122 | ||
123 | // TODO Ensure that this gets inlined: Use internal log interface and LTO. | |
124 | GTM_LB(addr, len); | |
125 | } | |
126 | ||
127 | static void validate() | |
128 | { | |
799142bf TR |
129 | // Check that snapshot is consistent. We expect the previous data load to |
130 | // have acquire memory order, or be atomic and followed by an acquire | |
131 | // fence. | |
132 | // As a result, the data load will synchronize with the release fence | |
133 | // issued by the transactions whose data updates the data load has read | |
134 | // from. This forces the orec load to read from a visible sequence of side | |
135 | // effects that starts with the other updating transaction's store that | |
136 | // acquired the orec and set it to locked. | |
137 | // We therefore either read a value with the locked bit set (and restart) | |
138 | // or read an orec value that was written after the data had been written. | |
139 | // Either will allow us to detect inconsistent reads because it will have | |
140 | // a higher/different value. | |
0a35513e | 141 | gtm_thread *tx = gtm_thr(); |
36cfbee1 RH |
142 | gtm_word l = o_gl_mg.orec.load(memory_order_relaxed); |
143 | if (l != tx->shared_state.load(memory_order_relaxed)) | |
0a35513e AH |
144 | tx->restart(RESTART_VALIDATE_READ); |
145 | } | |
146 | ||
147 | template <typename V> static V load(const V* addr, ls_modifier mod) | |
148 | { | |
149 | // Read-for-write should be unlikely, but we need to handle it or will | |
150 | // break later WaW optimizations. | |
151 | if (unlikely(mod == RfW)) | |
152 | { | |
153 | pre_write(addr, sizeof(V)); | |
154 | return *addr; | |
155 | } | |
799142bf TR |
156 | if (unlikely(mod == RaW)) |
157 | return *addr; | |
158 | ||
159 | // We do not have acquired the orec, so we need to load a value and then | |
160 | // validate that this was consistent. | |
161 | // This needs to have acquire memory order (see validate()). | |
162 | // Alternatively, we can put an acquire fence after the data load but this | |
163 | // is probably less efficient. | |
164 | // FIXME We would need an atomic load with acquire memory order here but | |
165 | // we can't just forge an atomic load for nonatomic data because this | |
166 | // might not work on all implementations of atomics. However, we need | |
167 | // the acquire memory order and we can only establish this if we link | |
168 | // it to the matching release using a reads-from relation between atomic | |
169 | // loads. Also, the compiler is allowed to optimize nonatomic accesses | |
170 | // differently than atomic accesses (e.g., if the load would be moved to | |
171 | // after the fence, we potentially don't synchronize properly anymore). | |
172 | // Instead of the following, just use an ordinary load followed by an | |
173 | // acquire fence, and hope that this is good enough for now: | |
174 | // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire); | |
0a35513e | 175 | V v = *addr; |
799142bf TR |
176 | atomic_thread_fence(memory_order_acquire); |
177 | validate(); | |
0a35513e AH |
178 | return v; |
179 | } | |
180 | ||
181 | template <typename V> static void store(V* addr, const V value, | |
182 | ls_modifier mod) | |
183 | { | |
184 | if (unlikely(mod != WaW)) | |
185 | pre_write(addr, sizeof(V)); | |
799142bf TR |
186 | // FIXME We would need an atomic store here but we can't just forge an |
187 | // atomic load for nonatomic data because this might not work on all | |
188 | // implementations of atomics. However, we need this store to link the | |
189 | // release fence in pre_write() to the acquire operation in load, which | |
190 | // is only guaranteed if we have a reads-from relation between atomic | |
191 | // accesses. Also, the compiler is allowed to optimize nonatomic accesses | |
192 | // differently than atomic accesses (e.g., if the store would be moved | |
193 | // to before the release fence in pre_write(), things could go wrong). | |
194 | // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed); | |
0a35513e AH |
195 | *addr = value; |
196 | } | |
197 | ||
198 | public: | |
199 | static void memtransfer_static(void *dst, const void* src, size_t size, | |
200 | bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod) | |
201 | { | |
202 | if ((dst_mod != WaW && src_mod != RaW) | |
203 | && (dst_mod != NONTXNAL || src_mod == RfW)) | |
204 | pre_write(dst, size); | |
205 | ||
799142bf TR |
206 | // FIXME We should use atomics here (see store()). Let's just hope that |
207 | // memcpy/memmove are good enough. | |
0a35513e AH |
208 | if (!may_overlap) |
209 | ::memcpy(dst, src, size); | |
210 | else | |
211 | ::memmove(dst, src, size); | |
212 | ||
213 | if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL | |
214 | && dst_mod != WaW) | |
215 | validate(); | |
216 | } | |
217 | ||
218 | static void memset_static(void *dst, int c, size_t size, ls_modifier mod) | |
219 | { | |
220 | if (mod != WaW) | |
221 | pre_write(dst, size); | |
799142bf TR |
222 | // FIXME We should use atomics here (see store()). Let's just hope that |
223 | // memset is good enough. | |
0a35513e AH |
224 | ::memset(dst, c, size); |
225 | } | |
226 | ||
227 | virtual gtm_restart_reason begin_or_restart() | |
228 | { | |
229 | // We don't need to do anything for nested transactions. | |
230 | gtm_thread *tx = gtm_thr(); | |
231 | if (tx->parent_txns.size() > 0) | |
232 | return NO_RESTART; | |
233 | ||
234 | // Spin until global orec is not locked. | |
235 | // TODO This is not necessary if there are no pure loads (check txn props). | |
0a35513e | 236 | unsigned i = 0; |
36cfbee1 RH |
237 | gtm_word v; |
238 | while (1) | |
0a35513e | 239 | { |
799142bf TR |
240 | // We need acquire memory order here so that this load will |
241 | // synchronize with the store that releases the orec in trycommit(). | |
242 | // In turn, this makes sure that subsequent data loads will read from | |
243 | // a visible sequence of side effects that starts with the most recent | |
244 | // store to the data right before the release of the orec. | |
36cfbee1 RH |
245 | v = o_gl_mg.orec.load(memory_order_acquire); |
246 | if (!gl_mg::is_locked(v)) | |
247 | break; | |
0a35513e | 248 | // TODO need method-specific max spin count |
36cfbee1 RH |
249 | if (++i > gtm_spin_count_var) |
250 | return RESTART_VALIDATE_READ; | |
0a35513e AH |
251 | cpu_relax(); |
252 | } | |
0a35513e AH |
253 | |
254 | // Everything is okay, we have a snapshot time. | |
255 | // We don't need to enforce any ordering for the following store. There | |
256 | // are no earlier data loads in this transaction, so the store cannot | |
257 | // become visible before those (which could lead to the violation of | |
258 | // privatization safety). The store can become visible after later loads | |
259 | // but this does not matter because the previous value will have been | |
260 | // smaller or equal (the serial lock will set shared_state to zero when | |
261 | // marking the transaction as active, and restarts enforce immediate | |
262 | // visibility of a smaller or equal value with a barrier (see | |
799142bf | 263 | // rollback()). |
36cfbee1 | 264 | tx->shared_state.store(v, memory_order_relaxed); |
0a35513e AH |
265 | return NO_RESTART; |
266 | } | |
267 | ||
268 | virtual bool trycommit(gtm_word& priv_time) | |
269 | { | |
270 | gtm_thread* tx = gtm_thr(); | |
799142bf | 271 | gtm_word v = tx->shared_state.load(memory_order_relaxed); |
0a35513e AH |
272 | |
273 | // Special case: If shared_state is ~0, then we have acquired the | |
274 | // serial lock (tx->state is not updated yet). In this case, the previous | |
275 | // value isn't available anymore, so grab it from the global lock, which | |
276 | // must have a meaningful value because no other transactions are active | |
277 | // anymore. In particular, if it is locked, then we are an update | |
278 | // transaction, which is all we care about for commit. | |
279 | if (v == ~(typeof v)0) | |
36cfbee1 | 280 | v = o_gl_mg.orec.load(memory_order_relaxed); |
0a35513e AH |
281 | |
282 | // Release the orec but do not reset shared_state, which will be modified | |
283 | // by the serial lock right after our commit anyway. Also, resetting | |
284 | // shared state here would interfere with the serial lock's use of this | |
285 | // location. | |
286 | if (gl_mg::is_locked(v)) | |
287 | { | |
288 | // Release the global orec, increasing its version number / timestamp. | |
799142bf | 289 | // See begin_or_restart() for why we need release memory order here. |
0a35513e | 290 | v = gl_mg::clear_locked(v) + 1; |
36cfbee1 | 291 | o_gl_mg.orec.store(v, memory_order_release); |
0a35513e AH |
292 | |
293 | // Need to ensure privatization safety. Every other transaction must | |
294 | // have a snapshot time that is at least as high as our commit time | |
295 | // (i.e., our commit must be visible to them). | |
296 | priv_time = v; | |
297 | } | |
298 | return true; | |
299 | } | |
300 | ||
301 | virtual void rollback(gtm_transaction_cp *cp) | |
302 | { | |
303 | // We don't do anything for rollbacks of nested transactions. | |
304 | if (cp != 0) | |
305 | return; | |
306 | ||
307 | gtm_thread *tx = gtm_thr(); | |
799142bf | 308 | gtm_word v = tx->shared_state.load(memory_order_relaxed); |
0a35513e AH |
309 | // Special case: If shared_state is ~0, then we have acquired the |
310 | // serial lock (tx->state is not updated yet). In this case, the previous | |
311 | // value isn't available anymore, so grab it from the global lock, which | |
312 | // must have a meaningful value because no other transactions are active | |
313 | // anymore. In particular, if it is locked, then we are an update | |
314 | // transaction, which is all we care about for rollback. | |
36cfbee1 RH |
315 | bool is_serial = v == ~(typeof v)0; |
316 | if (is_serial) | |
317 | v = o_gl_mg.orec.load(memory_order_relaxed); | |
0a35513e AH |
318 | |
319 | // Release lock and increment version number to prevent dirty reads. | |
320 | // Also reset shared state here, so that begin_or_restart() can expect a | |
321 | // value that is correct wrt. privatization safety. | |
322 | if (gl_mg::is_locked(v)) | |
323 | { | |
324 | // Release the global orec, increasing its version number / timestamp. | |
799142bf | 325 | // See begin_or_restart() for why we need release memory order here. |
0a35513e | 326 | v = gl_mg::clear_locked(v) + 1; |
36cfbee1 | 327 | o_gl_mg.orec.store(v, memory_order_release); |
0a35513e AH |
328 | |
329 | // Also reset the timestamp published via shared_state. | |
330 | // Special case: Only do this if we are not a serial transaction | |
331 | // because otherwise, we would interfere with the serial lock. | |
36cfbee1 | 332 | if (!is_serial) |
799142bf | 333 | tx->shared_state.store(v, memory_order_release); |
0a35513e AH |
334 | |
335 | // We need a store-load barrier after this store to prevent it | |
336 | // from becoming visible after later data loads because the | |
337 | // previous value of shared_state has been higher than the actual | |
338 | // snapshot time (the lock bit had been set), which could break | |
339 | // privatization safety. We do not need a barrier before this | |
340 | // store (see pre_write() for an explanation). | |
799142bf TR |
341 | // ??? What is the precise reasoning in the C++11 model? |
342 | atomic_thread_fence(memory_order_seq_cst); | |
0a35513e AH |
343 | } |
344 | ||
345 | } | |
346 | ||
347 | CREATE_DISPATCH_METHODS(virtual, ) | |
348 | CREATE_DISPATCH_METHODS_MEM() | |
349 | ||
350 | gl_wt_dispatch() : abi_dispatch(false, true, false, false, &o_gl_mg) | |
351 | { } | |
352 | }; | |
353 | ||
354 | } // anon namespace | |
355 | ||
356 | static const gl_wt_dispatch o_gl_wt_dispatch; | |
357 | ||
358 | abi_dispatch * | |
359 | GTM::dispatch_gl_wt () | |
360 | { | |
361 | return const_cast<gl_wt_dispatch *>(&o_gl_wt_dispatch); | |
362 | } |