[thirdparty/gcc.git] / libitm / method-gl.cc

/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
   Contributed by Torvald Riegel <triegel@redhat.com>.

   This file is part of the GNU Transactional Memory Library (libitm).

   Libitm is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
   more details.

   Under Section 7 of GPL version 3, you are granted additional
   permissions described in the GCC Runtime Library Exception, version
   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and
   a copy of the GCC Runtime Library Exception along with this program;
   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   <http://www.gnu.org/licenses/>.  */

#include "libitm_i.h"

using namespace GTM;

namespace {

// This group consists of all TM methods that synchronize via just a single
// global lock (or ownership record).
struct gl_mg : public method_group
{
  static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1;
  // We can't use the full bitrange because ~0 in gtm_thread::shared_state has
  // special meaning.
  static const gtm_word VERSION_MAX = (~(gtm_word)0 >> 1) - 1;
  static bool is_locked(gtm_word l) { return l & LOCK_BIT; }
  static gtm_word set_locked(gtm_word l) { return l | LOCK_BIT; }
  static gtm_word clear_locked(gtm_word l) { return l & ~LOCK_BIT; }

  // The global ownership record.
  // No tail-padding necessary (the virtual functions aren't used frequently).
  atomic<gtm_word> orec __attribute__((aligned(HW_CACHELINE_SIZE)));

  virtual void init()
  {
    // This store is only executed while holding the serial lock, so relaxed
    // memory order is sufficient here.
    orec.store(0, memory_order_relaxed);
  }
  virtual void fini() { }
};

static gl_mg o_gl_mg;


// The global lock, write-through TM method.
// Acquires the orec eagerly before the first write, and then writes through.
// Reads abort if the global orec's version number changed or if it is locked.
// Currently, writes require undo-logging to prevent deadlock between the
// serial lock and the global orec (writer txn acquires orec, reader txn
// upgrades to serial and waits for all other txns, writer tries to upgrade to
// serial too but cannot, writer cannot abort either, deadlock). We could
// avoid this if the serial lock would allow us to prevent other threads from
// going to serial mode, but this probably is too much additional complexity
// just to optimize this TM method.
// gtm_thread::shared_state is used to store a transaction's current
// snapshot time (or commit time). The serial lock uses ~0 for inactive
// transactions and 0 for active ones. Thus, we always have a meaningful
// timestamp in shared_state that can be used to implement quiescence-based
// privatization safety. This even holds if a writing transaction has the
// lock bit set in its shared_state because this is fine for both the serial
// lock (the value will be smaller than ~0) and privatization safety (we
// validate that no other update transaction comitted before we acquired the
// orec, so we have the most recent timestamp and no other transaction can
// commit until we have committed).
// However, we therefore depend on shared_state not being modified by the
// serial lock during upgrades to serial mode, which is ensured by
// gtm_thread::serialirr_mode by not calling gtm_rwlock::write_upgrade_finish
// before we have committed or rolled back.
class gl_wt_dispatch : public abi_dispatch
{
protected:
  static void pre_write(const void *addr, size_t len,
      gtm_thread *tx = gtm_thr())
  {
    gtm_word v = tx->shared_state.load(memory_order_relaxed);
    if (unlikely(!gl_mg::is_locked(v)))
      {
	// Check for and handle version number overflow.
	if (unlikely(v >= gl_mg::VERSION_MAX))
	  tx->restart(RESTART_INIT_METHOD_GROUP);

	// This validates that we have a consistent snapshot, which is also
	// for making privatization safety work (see the class' comments).
	// Note that this check here will be performed by the subsequent CAS
	// again, so relaxed memory order is fine.
	gtm_word now = o_gl_mg.orec.load(memory_order_relaxed);
	if (now != v)
	  tx->restart(RESTART_VALIDATE_WRITE);

	// CAS global orec from our snapshot time to the locked state.
        // We need acquire memory order here to synchronize with other
        // (ownership) releases of the orec.  We do not need acq_rel order
        // because whenever another thread reads from this CAS'
        // modification, then it will abort anyway and does not rely on
        // any further happens-before relation to be established.
	// Also note that unlike in ml_wt's increase of the global time
	// base (remember that the global orec is used as time base), we do
	// not need require memory order here because we do not need to make
	// prior orec acquisitions visible to other threads that try to
	// extend their snapshot time.
	if (!o_gl_mg.orec.compare_exchange_strong (now, gl_mg::set_locked(now),
						   memory_order_acquire))
	  tx->restart(RESTART_LOCKED_WRITE);

	// We use an explicit fence here to avoid having to use release
	// memory order for all subsequent data stores.  This fence will
	// synchronize with loads of the data with acquire memory order.  See
	// validate() for why this is necessary.
        // Adding require memory order to the prior CAS is not sufficient,
        // at least according to the Batty et al. formalization of the
        // memory model.
	atomic_thread_fence(memory_order_release);

	// Set shared_state to new value.
	tx->shared_state.store(gl_mg::set_locked(now), memory_order_release);
      }

    tx->undolog.log(addr, len);
  }

  static void validate(gtm_thread *tx = gtm_thr())
  {
    // Check that snapshot is consistent.  We expect the previous data load to
    // have acquire memory order, or be atomic and followed by an acquire
    // fence.
    // As a result, the data load will synchronize with the release fence
    // issued by the transactions whose data updates the data load has read
    // from.  This forces the orec load to read from a visible sequence of side
    // effects that starts with the other updating transaction's store that
    // acquired the orec and set it to locked.
    // We therefore either read a value with the locked bit set (and restart)
    // or read an orec value that was written after the data had been written.
    // Either will allow us to detect inconsistent reads because it will have
    // a higher/different value.
    gtm_word l = o_gl_mg.orec.load(memory_order_relaxed);
    if (l != tx->shared_state.load(memory_order_relaxed))
      tx->restart(RESTART_VALIDATE_READ);
  }

  template <typename V> static V load(const V* addr, ls_modifier mod)
  {
    // Read-for-write should be unlikely, but we need to handle it or will
    // break later WaW optimizations.
    if (unlikely(mod == RfW))
      {
	pre_write(addr, sizeof(V));
	return *addr;
      }
    if (unlikely(mod == RaW))
      return *addr;

    // We do not have acquired the orec, so we need to load a value and then
    // validate that this was consistent.
    // This needs to have acquire memory order (see validate()).
    // Alternatively, we can put an acquire fence after the data load but this
    // is probably less efficient.
    // FIXME We would need an atomic load with acquire memory order here but
    // we can't just forge an atomic load for nonatomic data because this
    // might not work on all implementations of atomics.  However, we need
    // the acquire memory order and we can only establish this if we link
    // it to the matching release using a reads-from relation between atomic
    // loads.  Also, the compiler is allowed to optimize nonatomic accesses
    // differently than atomic accesses (e.g., if the load would be moved to
    // after the fence, we potentially don't synchronize properly anymore).
    // Instead of the following, just use an ordinary load followed by an
    // acquire fence, and hope that this is good enough for now:
    // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire);
    V v = *addr;
    atomic_thread_fence(memory_order_acquire);
    validate();
    return v;
  }

  template <typename V> static void store(V* addr, const V value,
      ls_modifier mod)
  {
    if (likely(mod != WaW))
      pre_write(addr, sizeof(V));
    // FIXME We would need an atomic store here but we can't just forge an
    // atomic load for nonatomic data because this might not work on all
    // implementations of atomics.  However, we need this store to link the
    // release fence in pre_write() to the acquire operation in load, which
    // is only guaranteed if we have a reads-from relation between atomic
    // accesses.  Also, the compiler is allowed to optimize nonatomic accesses
    // differently than atomic accesses (e.g., if the store would be moved
    // to before the release fence in pre_write(), things could go wrong).
    // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed);
    *addr = value;
  }

public:
  static void memtransfer_static(void *dst, const void* src, size_t size,
      bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)
  {
    gtm_thread *tx = gtm_thr();
    if (dst_mod != WaW && dst_mod != NONTXNAL)
      pre_write(dst, size, tx);
    // We need at least undo-logging for an RfW src region because we might
    // subsequently write there with WaW.
    if (src_mod == RfW)
      pre_write(src, size, tx);

    // FIXME We should use atomics here (see store()).  Let's just hope that
    // memcpy/memmove are good enough.
    if (!may_overlap)
      ::memcpy(dst, src, size);
    else
      ::memmove(dst, src, size);

    if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL
	&& dst_mod != WaW)
      validate(tx);
  }

  static void memset_static(void *dst, int c, size_t size, ls_modifier mod)
  {
    if (mod != WaW)
      pre_write(dst, size);
    // FIXME We should use atomics here (see store()).  Let's just hope that
    // memset is good enough.
    ::memset(dst, c, size);
  }

  virtual gtm_restart_reason begin_or_restart()
  {
    // We don't need to do anything for nested transactions.
    gtm_thread *tx = gtm_thr();
    if (tx->parent_txns.size() > 0)
      return NO_RESTART;

    // Spin until global orec is not locked.
    // TODO This is not necessary if there are no pure loads (check txn props).
    unsigned i = 0;
    gtm_word v;
    while (1)
      {
        // We need acquire memory order here so that this load will
        // synchronize with the store that releases the orec in trycommit().
        // In turn, this makes sure that subsequent data loads will read from
        // a visible sequence of side effects that starts with the most recent
        // store to the data right before the release of the orec.
        v = o_gl_mg.orec.load(memory_order_acquire);
        if (!gl_mg::is_locked(v))
	  break;
	// TODO need method-specific max spin count
	if (++i > gtm_spin_count_var)
	  return RESTART_VALIDATE_READ;
	cpu_relax();
      }

    // Everything is okay, we have a snapshot time.
    // We don't need to enforce any ordering for the following store. There
    // are no earlier data loads in this transaction, so the store cannot
    // become visible before those (which could lead to the violation of
    // privatization safety). The store can become visible after later loads
    // but this does not matter because the previous value will have been
    // smaller or equal (the serial lock will set shared_state to zero when
    // marking the transaction as active, and restarts enforce immediate
    // visibility of a smaller or equal value with a barrier (see
    // rollback()).
    tx->shared_state.store(v, memory_order_relaxed);
    return NO_RESTART;
  }

  virtual bool trycommit(gtm_word& priv_time)
  {
    gtm_thread* tx = gtm_thr();
    gtm_word v = tx->shared_state.load(memory_order_relaxed);

    // Release the orec but do not reset shared_state, which will be modified
    // by the serial lock right after our commit anyway. Also, resetting
    // shared state here would interfere with the serial lock's use of this
    // location.
    if (gl_mg::is_locked(v))
      {
	// Release the global orec, increasing its version number / timestamp.
        // See begin_or_restart() for why we need release memory order here.
	v = gl_mg::clear_locked(v) + 1;
	o_gl_mg.orec.store(v, memory_order_release);
      }

    // Need to ensure privatization safety. Every other transaction must have
    // a snapshot time that is at least as high as our commit time (i.e., our
    // commit must be visible to them).  Because of proxy privatization, we
    // must ensure that even if we are a read-only transaction.  See
    // ml_wt_dispatch::trycommit() for details: We can't get quite the same
    // set of problems because we just use one orec and thus, for example,
    // there cannot be concurrent writers -- but we can still get pending
    // loads to privatized data when not ensuring privatization safety, which
    // is problematic if the program unmaps the privatized memory.
    priv_time = v;
    return true;
  }

  virtual void rollback(gtm_transaction_cp *cp)
  {
    // We don't do anything for rollbacks of nested transactions.
    if (cp != 0)
      return;

    gtm_thread *tx = gtm_thr();
    gtm_word v = tx->shared_state.load(memory_order_relaxed);

    // Release lock and increment version number to prevent dirty reads.
    // Also reset shared state here, so that begin_or_restart() can expect a
    // value that is correct wrt. privatization safety.
    if (gl_mg::is_locked(v))
      {
	// With our rollback, global time increases.
	v = gl_mg::clear_locked(v) + 1;

	// First reset the timestamp published via shared_state.  Release
	// memory order will make this happen after undoing prior data writes.
	// This must also happen before we actually release the global orec
	// next, so that future update transactions in other threads observe
	// a meaningful snapshot time for our transaction; otherwise, they
	// could read a shared_store value with the LOCK_BIT set, which can
	// break privatization safety because it's larger than the actual
	// snapshot time.  Note that we only need to consider other update
	// transactions because only those will potentially privatize data.
	tx->shared_state.store(v, memory_order_release);

	// Release the global orec, increasing its version number / timestamp.
	// See begin_or_restart() for why we need release memory order here,
	// and we also need it to make future update transactions read the
	// prior update to shared_state too (update transactions acquire the
	// global orec with acquire memory order).
	o_gl_mg.orec.store(v, memory_order_release);
      }

  }

  virtual bool snapshot_most_recent()
  {
    // This is the same check as in validate() except that we do not restart
    // on failure but simply return the result.
    return o_gl_mg.orec.load(memory_order_relaxed)
	== gtm_thr()->shared_state.load(memory_order_relaxed);
  }


  CREATE_DISPATCH_METHODS(virtual, )
  CREATE_DISPATCH_METHODS_MEM()

  gl_wt_dispatch() : abi_dispatch(false, true, false, false, 0, &o_gl_mg)
  { }
};

} // anon namespace

static const gl_wt_dispatch o_gl_wt_dispatch;

abi_dispatch *
GTM::dispatch_gl_wt ()
{
  return const_cast<gl_wt_dispatch *>(&o_gl_wt_dispatch);
}
Commit	Line	Data
a945c346	1	/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
0a35513e AH	2	Contributed by Torvald Riegel <triegel@redhat.com>.
	3
	4	This file is part of the GNU Transactional Memory Library (libitm).
	5
	6	Libitm is free software; you can redistribute it and/or modify it
	7	under the terms of the GNU General Public License as published by
	8	the Free Software Foundation; either version 3 of the License, or
	9	(at your option) any later version.
	10
	11	Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
	12	WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
	13	FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	14	more details.
	15
	16	Under Section 7 of GPL version 3, you are granted additional
	17	permissions described in the GCC Runtime Library Exception, version
	18	3.1, as published by the Free Software Foundation.
	19
	20	You should have received a copy of the GNU General Public License and
	21	a copy of the GCC Runtime Library Exception along with this program;
	22	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
	23	<http://www.gnu.org/licenses/>. */
	24
	25	#include "libitm_i.h"
	26
	27	using namespace GTM;
	28
	29	namespace {
	30
	31	// This group consists of all TM methods that synchronize via just a single
	32	// global lock (or ownership record).
	33	struct gl_mg : public method_group
	34	{
	35	static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1;
	36	// We can't use the full bitrange because ~0 in gtm_thread::shared_state has
	37	// special meaning.
	38	static const gtm_word VERSION_MAX = (~(gtm_word)0 >> 1) - 1;
	39	static bool is_locked(gtm_word l) { return l & LOCK_BIT; }
	40	static gtm_word set_locked(gtm_word l) { return l \| LOCK_BIT; }
	41	static gtm_word clear_locked(gtm_word l) { return l & ~LOCK_BIT; }
	42
	43	// The global ownership record.
63b08143 TR	44	// No tail-padding necessary (the virtual functions aren't used frequently).
63b08143 TR	45	atomic<gtm_word> orec __attribute__((aligned(HW_CACHELINE_SIZE)));
36cfbee1	46
0a35513e AH	47	virtual void init()
0a35513e AH	48	{
799142bf TR	49	// This store is only executed while holding the serial lock, so relaxed
799142bf TR	50	// memory order is sufficient here.
36cfbee1	51	orec.store(0, memory_order_relaxed);
0a35513e AH	52	}
	53	virtual void fini() { }
	54	};
	55
	56	static gl_mg o_gl_mg;
	57
	58
	59	// The global lock, write-through TM method.
	60	// Acquires the orec eagerly before the first write, and then writes through.
	61	// Reads abort if the global orec's version number changed or if it is locked.
	62	// Currently, writes require undo-logging to prevent deadlock between the
	63	// serial lock and the global orec (writer txn acquires orec, reader txn
	64	// upgrades to serial and waits for all other txns, writer tries to upgrade to
	65	// serial too but cannot, writer cannot abort either, deadlock). We could
	66	// avoid this if the serial lock would allow us to prevent other threads from
	67	// going to serial mode, but this probably is too much additional complexity
	68	// just to optimize this TM method.
	69	// gtm_thread::shared_state is used to store a transaction's current
	70	// snapshot time (or commit time). The serial lock uses ~0 for inactive
	71	// transactions and 0 for active ones. Thus, we always have a meaningful
	72	// timestamp in shared_state that can be used to implement quiescence-based
	73	// privatization safety. This even holds if a writing transaction has the
	74	// lock bit set in its shared_state because this is fine for both the serial
	75	// lock (the value will be smaller than ~0) and privatization safety (we
	76	// validate that no other update transaction comitted before we acquired the
	77	// orec, so we have the most recent timestamp and no other transaction can
	78	// commit until we have committed).
651ff415 TR	79	// However, we therefore depend on shared_state not being modified by the
	80	// serial lock during upgrades to serial mode, which is ensured by
	81	// gtm_thread::serialirr_mode by not calling gtm_rwlock::write_upgrade_finish
	82	// before we have committed or rolled back.
0a35513e AH	83	class gl_wt_dispatch : public abi_dispatch
	84	{
	85	protected:
33a03827 TR	86	static void pre_write(const void *addr, size_t len,
33a03827 TR	87	gtm_thread *tx = gtm_thr())
0a35513e	88	{
799142bf	89	gtm_word v = tx->shared_state.load(memory_order_relaxed);
36cfbee1	90	if (unlikely(!gl_mg::is_locked(v)))
0a35513e AH	91	{
0a35513e AH	92	// Check for and handle version number overflow.
36cfbee1	93	if (unlikely(v >= gl_mg::VERSION_MAX))
0a35513e AH	94	tx->restart(RESTART_INIT_METHOD_GROUP);
0a35513e AH	95
0a35513e AH	96	// This validates that we have a consistent snapshot, which is also
0a35513e AH	97	// for making privatization safety work (see the class' comments).
799142bf TR	98	// Note that this check here will be performed by the subsequent CAS
799142bf TR	99	// again, so relaxed memory order is fine.
36cfbee1 RH	100	gtm_word now = o_gl_mg.orec.load(memory_order_relaxed);
36cfbee1 RH	101	if (now != v)
0a35513e	102	tx->restart(RESTART_VALIDATE_WRITE);
799142bf TR	103
799142bf TR	104	// CAS global orec from our snapshot time to the locked state.
efee0113 TR	105	// We need acquire memory order here to synchronize with other
	106	// (ownership) releases of the orec. We do not need acq_rel order
	107	// because whenever another thread reads from this CAS'
	108	// modification, then it will abort anyway and does not rely on
	109	// any further happens-before relation to be established.
	110	// Also note that unlike in ml_wt's increase of the global time
	111	// base (remember that the global orec is used as time base), we do
	112	// not need require memory order here because we do not need to make
	113	// prior orec acquisitions visible to other threads that try to
	114	// extend their snapshot time.
36cfbee1	115	if (!o_gl_mg.orec.compare_exchange_strong (now, gl_mg::set_locked(now),
efee0113	116	memory_order_acquire))
0a35513e AH	117	tx->restart(RESTART_LOCKED_WRITE);
0a35513e AH	118
799142bf TR	119	// We use an explicit fence here to avoid having to use release
	120	// memory order for all subsequent data stores. This fence will
	121	// synchronize with loads of the data with acquire memory order. See
	122	// validate() for why this is necessary.
efee0113 TR	123	// Adding require memory order to the prior CAS is not sufficient,
	124	// at least according to the Batty et al. formalization of the
	125	// memory model.
799142bf TR	126	atomic_thread_fence(memory_order_release);
799142bf TR	127
36cfbee1 RH	128	// Set shared_state to new value.
36cfbee1 RH	129	tx->shared_state.store(gl_mg::set_locked(now), memory_order_release);
0a35513e AH	130	}
0a35513e AH	131
11f30bb0	132	tx->undolog.log(addr, len);
0a35513e AH	133	}
0a35513e AH	134
33a03827	135	static void validate(gtm_thread *tx = gtm_thr())
0a35513e	136	{
799142bf TR	137	// Check that snapshot is consistent. We expect the previous data load to
	138	// have acquire memory order, or be atomic and followed by an acquire
	139	// fence.
	140	// As a result, the data load will synchronize with the release fence
	141	// issued by the transactions whose data updates the data load has read
	142	// from. This forces the orec load to read from a visible sequence of side
	143	// effects that starts with the other updating transaction's store that
	144	// acquired the orec and set it to locked.
	145	// We therefore either read a value with the locked bit set (and restart)
	146	// or read an orec value that was written after the data had been written.
	147	// Either will allow us to detect inconsistent reads because it will have
	148	// a higher/different value.
36cfbee1 RH	149	gtm_word l = o_gl_mg.orec.load(memory_order_relaxed);
36cfbee1 RH	150	if (l != tx->shared_state.load(memory_order_relaxed))
0a35513e AH	151	tx->restart(RESTART_VALIDATE_READ);
	152	}
	153
	154	template <typename V> static V load(const V* addr, ls_modifier mod)
	155	{
	156	// Read-for-write should be unlikely, but we need to handle it or will
	157	// break later WaW optimizations.
	158	if (unlikely(mod == RfW))
	159	{
	160	pre_write(addr, sizeof(V));
	161	return *addr;
	162	}
799142bf TR	163	if (unlikely(mod == RaW))
	164	return *addr;
	165
	166	// We do not have acquired the orec, so we need to load a value and then
	167	// validate that this was consistent.
	168	// This needs to have acquire memory order (see validate()).
	169	// Alternatively, we can put an acquire fence after the data load but this
	170	// is probably less efficient.
	171	// FIXME We would need an atomic load with acquire memory order here but
	172	// we can't just forge an atomic load for nonatomic data because this
	173	// might not work on all implementations of atomics. However, we need
	174	// the acquire memory order and we can only establish this if we link
	175	// it to the matching release using a reads-from relation between atomic
	176	// loads. Also, the compiler is allowed to optimize nonatomic accesses
	177	// differently than atomic accesses (e.g., if the load would be moved to
	178	// after the fence, we potentially don't synchronize properly anymore).
	179	// Instead of the following, just use an ordinary load followed by an
	180	// acquire fence, and hope that this is good enough for now:
	181	// V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire);
0a35513e	182	V v = *addr;
799142bf TR	183	atomic_thread_fence(memory_order_acquire);
799142bf TR	184	validate();
0a35513e AH	185	return v;
	186	}
	187
	188	template <typename V> static void store(V* addr, const V value,
	189	ls_modifier mod)
	190	{
11f30bb0	191	if (likely(mod != WaW))
0a35513e	192	pre_write(addr, sizeof(V));
799142bf TR	193	// FIXME We would need an atomic store here but we can't just forge an
	194	// atomic load for nonatomic data because this might not work on all
	195	// implementations of atomics. However, we need this store to link the
	196	// release fence in pre_write() to the acquire operation in load, which
	197	// is only guaranteed if we have a reads-from relation between atomic
	198	// accesses. Also, the compiler is allowed to optimize nonatomic accesses
	199	// differently than atomic accesses (e.g., if the store would be moved
	200	// to before the release fence in pre_write(), things could go wrong).
	201	// atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed);
0a35513e AH	202	*addr = value;
	203	}
	204
	205	public:
	206	static void memtransfer_static(void dst, const void src, size_t size,
	207	bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)
	208	{
33a03827 TR	209	gtm_thread *tx = gtm_thr();
	210	if (dst_mod != WaW && dst_mod != NONTXNAL)
	211	pre_write(dst, size, tx);
	212	// We need at least undo-logging for an RfW src region because we might
	213	// subsequently write there with WaW.
	214	if (src_mod == RfW)
	215	pre_write(src, size, tx);
0a35513e	216
799142bf TR	217	// FIXME We should use atomics here (see store()). Let's just hope that
799142bf TR	218	// memcpy/memmove are good enough.
0a35513e AH	219	if (!may_overlap)
	220	::memcpy(dst, src, size);
	221	else
	222	::memmove(dst, src, size);
	223
	224	if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL
	225	&& dst_mod != WaW)
33a03827	226	validate(tx);
0a35513e AH	227	}
	228
	229	static void memset_static(void *dst, int c, size_t size, ls_modifier mod)
	230	{
	231	if (mod != WaW)
	232	pre_write(dst, size);
799142bf TR	233	// FIXME We should use atomics here (see store()). Let's just hope that
799142bf TR	234	// memset is good enough.
0a35513e AH	235	::memset(dst, c, size);
	236	}
	237
	238	virtual gtm_restart_reason begin_or_restart()
	239	{
	240	// We don't need to do anything for nested transactions.
	241	gtm_thread *tx = gtm_thr();
	242	if (tx->parent_txns.size() > 0)
	243	return NO_RESTART;
	244
	245	// Spin until global orec is not locked.
	246	// TODO This is not necessary if there are no pure loads (check txn props).
0a35513e	247	unsigned i = 0;
36cfbee1 RH	248	gtm_word v;
36cfbee1 RH	249	while (1)
0a35513e	250	{
799142bf TR	251	// We need acquire memory order here so that this load will
	252	// synchronize with the store that releases the orec in trycommit().
	253	// In turn, this makes sure that subsequent data loads will read from
	254	// a visible sequence of side effects that starts with the most recent
	255	// store to the data right before the release of the orec.
36cfbee1 RH	256	v = o_gl_mg.orec.load(memory_order_acquire);
	257	if (!gl_mg::is_locked(v))
	258	break;
0a35513e	259	// TODO need method-specific max spin count
36cfbee1 RH	260	if (++i > gtm_spin_count_var)
36cfbee1 RH	261	return RESTART_VALIDATE_READ;
0a35513e AH	262	cpu_relax();
0a35513e AH	263	}
0a35513e AH	264
	265	// Everything is okay, we have a snapshot time.
	266	// We don't need to enforce any ordering for the following store. There
	267	// are no earlier data loads in this transaction, so the store cannot
	268	// become visible before those (which could lead to the violation of
	269	// privatization safety). The store can become visible after later loads
	270	// but this does not matter because the previous value will have been
	271	// smaller or equal (the serial lock will set shared_state to zero when
	272	// marking the transaction as active, and restarts enforce immediate
	273	// visibility of a smaller or equal value with a barrier (see
799142bf	274	// rollback()).
36cfbee1	275	tx->shared_state.store(v, memory_order_relaxed);
0a35513e AH	276	return NO_RESTART;
	277	}
	278
	279	virtual bool trycommit(gtm_word& priv_time)
	280	{
	281	gtm_thread* tx = gtm_thr();
799142bf	282	gtm_word v = tx->shared_state.load(memory_order_relaxed);
0a35513e	283
0a35513e AH	284	// Release the orec but do not reset shared_state, which will be modified
	285	// by the serial lock right after our commit anyway. Also, resetting
	286	// shared state here would interfere with the serial lock's use of this
	287	// location.
	288	if (gl_mg::is_locked(v))
	289	{
	290	// Release the global orec, increasing its version number / timestamp.
799142bf	291	// See begin_or_restart() for why we need release memory order here.
0a35513e	292	v = gl_mg::clear_locked(v) + 1;
36cfbee1	293	o_gl_mg.orec.store(v, memory_order_release);
0a35513e	294	}
d2653984 TR	295
	296	// Need to ensure privatization safety. Every other transaction must have
	297	// a snapshot time that is at least as high as our commit time (i.e., our
	298	// commit must be visible to them). Because of proxy privatization, we
	299	// must ensure that even if we are a read-only transaction. See
	300	// ml_wt_dispatch::trycommit() for details: We can't get quite the same
	301	// set of problems because we just use one orec and thus, for example,
	302	// there cannot be concurrent writers -- but we can still get pending
	303	// loads to privatized data when not ensuring privatization safety, which
	304	// is problematic if the program unmaps the privatized memory.
	305	priv_time = v;
0a35513e AH	306	return true;
	307	}
	308
	309	virtual void rollback(gtm_transaction_cp *cp)
	310	{
	311	// We don't do anything for rollbacks of nested transactions.
	312	if (cp != 0)
	313	return;
	314
	315	gtm_thread *tx = gtm_thr();
799142bf	316	gtm_word v = tx->shared_state.load(memory_order_relaxed);
0a35513e AH	317
	318	// Release lock and increment version number to prevent dirty reads.
	319	// Also reset shared state here, so that begin_or_restart() can expect a
	320	// value that is correct wrt. privatization safety.
	321	if (gl_mg::is_locked(v))
	322	{
4c9bd6ac	323	// With our rollback, global time increases.
0a35513e	324	v = gl_mg::clear_locked(v) + 1;
0a35513e	325
4c9bd6ac TR	326	// First reset the timestamp published via shared_state. Release
	327	// memory order will make this happen after undoing prior data writes.
	328	// This must also happen before we actually release the global orec
	329	// next, so that future update transactions in other threads observe
	330	// a meaningful snapshot time for our transaction; otherwise, they
	331	// could read a shared_store value with the LOCK_BIT set, which can
	332	// break privatization safety because it's larger than the actual
	333	// snapshot time. Note that we only need to consider other update
	334	// transactions because only those will potentially privatize data.
651ff415	335	tx->shared_state.store(v, memory_order_release);
0a35513e	336
4c9bd6ac TR	337	// Release the global orec, increasing its version number / timestamp.
	338	// See begin_or_restart() for why we need release memory order here,
	339	// and we also need it to make future update transactions read the
	340	// prior update to shared_state too (update transactions acquire the
	341	// global orec with acquire memory order).
	342	o_gl_mg.orec.store(v, memory_order_release);
0a35513e AH	343	}
	344
	345	}
	346
629e4729 TR	347	virtual bool snapshot_most_recent()
	348	{
	349	// This is the same check as in validate() except that we do not restart
	350	// on failure but simply return the result.
	351	return o_gl_mg.orec.load(memory_order_relaxed)
	352	== gtm_thr()->shared_state.load(memory_order_relaxed);
	353	}
	354
	355
0a35513e AH	356	CREATE_DISPATCH_METHODS(virtual, )
	357	CREATE_DISPATCH_METHODS_MEM()
	358
b679c813	359	gl_wt_dispatch() : abi_dispatch(false, true, false, false, 0, &o_gl_mg)
0a35513e AH	360	{ }
	361	};
	362
	363	} // anon namespace
	364
	365	static const gl_wt_dispatch o_gl_wt_dispatch;
	366
	367	abi_dispatch *
	368	GTM::dispatch_gl_wt ()
	369	{
	370	return const_cast<gl_wt_dispatch *>(&o_gl_wt_dispatch);
	371	}