method-gl.cc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371
  1. /* Copyright (C) 2011-2022 Free Software Foundation, Inc.
  2. Contributed by Torvald Riegel <triegel@redhat.com>.
  3. This file is part of the GNU Transactional Memory Library (libitm).
  4. Libitm is free software; you can redistribute it and/or modify it
  5. under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 3 of the License, or
  7. (at your option) any later version.
  8. Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
  9. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  10. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  11. more details.
  12. Under Section 7 of GPL version 3, you are granted additional
  13. permissions described in the GCC Runtime Library Exception, version
  14. 3.1, as published by the Free Software Foundation.
  15. You should have received a copy of the GNU General Public License and
  16. a copy of the GCC Runtime Library Exception along with this program;
  17. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  18. <http://www.gnu.org/licenses/>. */
  19. #include "libitm_i.h"
  20. using namespace GTM;
  21. namespace {
  22. // This group consists of all TM methods that synchronize via just a single
  23. // global lock (or ownership record).
  24. struct gl_mg : public method_group
  25. {
  26. static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1;
  27. // We can't use the full bitrange because ~0 in gtm_thread::shared_state has
  28. // special meaning.
  29. static const gtm_word VERSION_MAX = (~(gtm_word)0 >> 1) - 1;
  30. static bool is_locked(gtm_word l) { return l & LOCK_BIT; }
  31. static gtm_word set_locked(gtm_word l) { return l | LOCK_BIT; }
  32. static gtm_word clear_locked(gtm_word l) { return l & ~LOCK_BIT; }
  33. // The global ownership record.
  34. // No tail-padding necessary (the virtual functions aren't used frequently).
  35. atomic<gtm_word> orec __attribute__((aligned(HW_CACHELINE_SIZE)));
  36. virtual void init()
  37. {
  38. // This store is only executed while holding the serial lock, so relaxed
  39. // memory order is sufficient here.
  40. orec.store(0, memory_order_relaxed);
  41. }
  42. virtual void fini() { }
  43. };
  44. static gl_mg o_gl_mg;
  45. // The global lock, write-through TM method.
  46. // Acquires the orec eagerly before the first write, and then writes through.
  47. // Reads abort if the global orec's version number changed or if it is locked.
  48. // Currently, writes require undo-logging to prevent deadlock between the
  49. // serial lock and the global orec (writer txn acquires orec, reader txn
  50. // upgrades to serial and waits for all other txns, writer tries to upgrade to
  51. // serial too but cannot, writer cannot abort either, deadlock). We could
  52. // avoid this if the serial lock would allow us to prevent other threads from
  53. // going to serial mode, but this probably is too much additional complexity
  54. // just to optimize this TM method.
  55. // gtm_thread::shared_state is used to store a transaction's current
  56. // snapshot time (or commit time). The serial lock uses ~0 for inactive
  57. // transactions and 0 for active ones. Thus, we always have a meaningful
  58. // timestamp in shared_state that can be used to implement quiescence-based
  59. // privatization safety. This even holds if a writing transaction has the
  60. // lock bit set in its shared_state because this is fine for both the serial
  61. // lock (the value will be smaller than ~0) and privatization safety (we
  62. // validate that no other update transaction comitted before we acquired the
  63. // orec, so we have the most recent timestamp and no other transaction can
  64. // commit until we have committed).
  65. // However, we therefore depend on shared_state not being modified by the
  66. // serial lock during upgrades to serial mode, which is ensured by
  67. // gtm_thread::serialirr_mode by not calling gtm_rwlock::write_upgrade_finish
  68. // before we have committed or rolled back.
  69. class gl_wt_dispatch : public abi_dispatch
  70. {
  71. protected:
  72. static void pre_write(const void *addr, size_t len,
  73. gtm_thread *tx = gtm_thr())
  74. {
  75. gtm_word v = tx->shared_state.load(memory_order_relaxed);
  76. if (unlikely(!gl_mg::is_locked(v)))
  77. {
  78. // Check for and handle version number overflow.
  79. if (unlikely(v >= gl_mg::VERSION_MAX))
  80. tx->restart(RESTART_INIT_METHOD_GROUP);
  81. // This validates that we have a consistent snapshot, which is also
  82. // for making privatization safety work (see the class' comments).
  83. // Note that this check here will be performed by the subsequent CAS
  84. // again, so relaxed memory order is fine.
  85. gtm_word now = o_gl_mg.orec.load(memory_order_relaxed);
  86. if (now != v)
  87. tx->restart(RESTART_VALIDATE_WRITE);
  88. // CAS global orec from our snapshot time to the locked state.
  89. // We need acquire memory order here to synchronize with other
  90. // (ownership) releases of the orec. We do not need acq_rel order
  91. // because whenever another thread reads from this CAS'
  92. // modification, then it will abort anyway and does not rely on
  93. // any further happens-before relation to be established.
  94. // Also note that unlike in ml_wt's increase of the global time
  95. // base (remember that the global orec is used as time base), we do
  96. // not need require memory order here because we do not need to make
  97. // prior orec acquisitions visible to other threads that try to
  98. // extend their snapshot time.
  99. if (!o_gl_mg.orec.compare_exchange_strong (now, gl_mg::set_locked(now),
  100. memory_order_acquire))
  101. tx->restart(RESTART_LOCKED_WRITE);
  102. // We use an explicit fence here to avoid having to use release
  103. // memory order for all subsequent data stores. This fence will
  104. // synchronize with loads of the data with acquire memory order. See
  105. // validate() for why this is necessary.
  106. // Adding require memory order to the prior CAS is not sufficient,
  107. // at least according to the Batty et al. formalization of the
  108. // memory model.
  109. atomic_thread_fence(memory_order_release);
  110. // Set shared_state to new value.
  111. tx->shared_state.store(gl_mg::set_locked(now), memory_order_release);
  112. }
  113. tx->undolog.log(addr, len);
  114. }
  115. static void validate(gtm_thread *tx = gtm_thr())
  116. {
  117. // Check that snapshot is consistent. We expect the previous data load to
  118. // have acquire memory order, or be atomic and followed by an acquire
  119. // fence.
  120. // As a result, the data load will synchronize with the release fence
  121. // issued by the transactions whose data updates the data load has read
  122. // from. This forces the orec load to read from a visible sequence of side
  123. // effects that starts with the other updating transaction's store that
  124. // acquired the orec and set it to locked.
  125. // We therefore either read a value with the locked bit set (and restart)
  126. // or read an orec value that was written after the data had been written.
  127. // Either will allow us to detect inconsistent reads because it will have
  128. // a higher/different value.
  129. gtm_word l = o_gl_mg.orec.load(memory_order_relaxed);
  130. if (l != tx->shared_state.load(memory_order_relaxed))
  131. tx->restart(RESTART_VALIDATE_READ);
  132. }
  133. template <typename V> static V load(const V* addr, ls_modifier mod)
  134. {
  135. // Read-for-write should be unlikely, but we need to handle it or will
  136. // break later WaW optimizations.
  137. if (unlikely(mod == RfW))
  138. {
  139. pre_write(addr, sizeof(V));
  140. return *addr;
  141. }
  142. if (unlikely(mod == RaW))
  143. return *addr;
  144. // We do not have acquired the orec, so we need to load a value and then
  145. // validate that this was consistent.
  146. // This needs to have acquire memory order (see validate()).
  147. // Alternatively, we can put an acquire fence after the data load but this
  148. // is probably less efficient.
  149. // FIXME We would need an atomic load with acquire memory order here but
  150. // we can't just forge an atomic load for nonatomic data because this
  151. // might not work on all implementations of atomics. However, we need
  152. // the acquire memory order and we can only establish this if we link
  153. // it to the matching release using a reads-from relation between atomic
  154. // loads. Also, the compiler is allowed to optimize nonatomic accesses
  155. // differently than atomic accesses (e.g., if the load would be moved to
  156. // after the fence, we potentially don't synchronize properly anymore).
  157. // Instead of the following, just use an ordinary load followed by an
  158. // acquire fence, and hope that this is good enough for now:
  159. // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire);
  160. V v = *addr;
  161. atomic_thread_fence(memory_order_acquire);
  162. validate();
  163. return v;
  164. }
  165. template <typename V> static void store(V* addr, const V value,
  166. ls_modifier mod)
  167. {
  168. if (likely(mod != WaW))
  169. pre_write(addr, sizeof(V));
  170. // FIXME We would need an atomic store here but we can't just forge an
  171. // atomic load for nonatomic data because this might not work on all
  172. // implementations of atomics. However, we need this store to link the
  173. // release fence in pre_write() to the acquire operation in load, which
  174. // is only guaranteed if we have a reads-from relation between atomic
  175. // accesses. Also, the compiler is allowed to optimize nonatomic accesses
  176. // differently than atomic accesses (e.g., if the store would be moved
  177. // to before the release fence in pre_write(), things could go wrong).
  178. // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed);
  179. *addr = value;
  180. }
  181. public:
  182. static void memtransfer_static(void *dst, const void* src, size_t size,
  183. bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)
  184. {
  185. gtm_thread *tx = gtm_thr();
  186. if (dst_mod != WaW && dst_mod != NONTXNAL)
  187. pre_write(dst, size, tx);
  188. // We need at least undo-logging for an RfW src region because we might
  189. // subsequently write there with WaW.
  190. if (src_mod == RfW)
  191. pre_write(src, size, tx);
  192. // FIXME We should use atomics here (see store()). Let's just hope that
  193. // memcpy/memmove are good enough.
  194. if (!may_overlap)
  195. ::memcpy(dst, src, size);
  196. else
  197. ::memmove(dst, src, size);
  198. if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL
  199. && dst_mod != WaW)
  200. validate(tx);
  201. }
  202. static void memset_static(void *dst, int c, size_t size, ls_modifier mod)
  203. {
  204. if (mod != WaW)
  205. pre_write(dst, size);
  206. // FIXME We should use atomics here (see store()). Let's just hope that
  207. // memset is good enough.
  208. ::memset(dst, c, size);
  209. }
  210. virtual gtm_restart_reason begin_or_restart()
  211. {
  212. // We don't need to do anything for nested transactions.
  213. gtm_thread *tx = gtm_thr();
  214. if (tx->parent_txns.size() > 0)
  215. return NO_RESTART;
  216. // Spin until global orec is not locked.
  217. // TODO This is not necessary if there are no pure loads (check txn props).
  218. unsigned i = 0;
  219. gtm_word v;
  220. while (1)
  221. {
  222. // We need acquire memory order here so that this load will
  223. // synchronize with the store that releases the orec in trycommit().
  224. // In turn, this makes sure that subsequent data loads will read from
  225. // a visible sequence of side effects that starts with the most recent
  226. // store to the data right before the release of the orec.
  227. v = o_gl_mg.orec.load(memory_order_acquire);
  228. if (!gl_mg::is_locked(v))
  229. break;
  230. // TODO need method-specific max spin count
  231. if (++i > gtm_spin_count_var)
  232. return RESTART_VALIDATE_READ;
  233. cpu_relax();
  234. }
  235. // Everything is okay, we have a snapshot time.
  236. // We don't need to enforce any ordering for the following store. There
  237. // are no earlier data loads in this transaction, so the store cannot
  238. // become visible before those (which could lead to the violation of
  239. // privatization safety). The store can become visible after later loads
  240. // but this does not matter because the previous value will have been
  241. // smaller or equal (the serial lock will set shared_state to zero when
  242. // marking the transaction as active, and restarts enforce immediate
  243. // visibility of a smaller or equal value with a barrier (see
  244. // rollback()).
  245. tx->shared_state.store(v, memory_order_relaxed);
  246. return NO_RESTART;
  247. }
  248. virtual bool trycommit(gtm_word& priv_time)
  249. {
  250. gtm_thread* tx = gtm_thr();
  251. gtm_word v = tx->shared_state.load(memory_order_relaxed);
  252. // Release the orec but do not reset shared_state, which will be modified
  253. // by the serial lock right after our commit anyway. Also, resetting
  254. // shared state here would interfere with the serial lock's use of this
  255. // location.
  256. if (gl_mg::is_locked(v))
  257. {
  258. // Release the global orec, increasing its version number / timestamp.
  259. // See begin_or_restart() for why we need release memory order here.
  260. v = gl_mg::clear_locked(v) + 1;
  261. o_gl_mg.orec.store(v, memory_order_release);
  262. }
  263. // Need to ensure privatization safety. Every other transaction must have
  264. // a snapshot time that is at least as high as our commit time (i.e., our
  265. // commit must be visible to them). Because of proxy privatization, we
  266. // must ensure that even if we are a read-only transaction. See
  267. // ml_wt_dispatch::trycommit() for details: We can't get quite the same
  268. // set of problems because we just use one orec and thus, for example,
  269. // there cannot be concurrent writers -- but we can still get pending
  270. // loads to privatized data when not ensuring privatization safety, which
  271. // is problematic if the program unmaps the privatized memory.
  272. priv_time = v;
  273. return true;
  274. }
  275. virtual void rollback(gtm_transaction_cp *cp)
  276. {
  277. // We don't do anything for rollbacks of nested transactions.
  278. if (cp != 0)
  279. return;
  280. gtm_thread *tx = gtm_thr();
  281. gtm_word v = tx->shared_state.load(memory_order_relaxed);
  282. // Release lock and increment version number to prevent dirty reads.
  283. // Also reset shared state here, so that begin_or_restart() can expect a
  284. // value that is correct wrt. privatization safety.
  285. if (gl_mg::is_locked(v))
  286. {
  287. // With our rollback, global time increases.
  288. v = gl_mg::clear_locked(v) + 1;
  289. // First reset the timestamp published via shared_state. Release
  290. // memory order will make this happen after undoing prior data writes.
  291. // This must also happen before we actually release the global orec
  292. // next, so that future update transactions in other threads observe
  293. // a meaningful snapshot time for our transaction; otherwise, they
  294. // could read a shared_store value with the LOCK_BIT set, which can
  295. // break privatization safety because it's larger than the actual
  296. // snapshot time. Note that we only need to consider other update
  297. // transactions because only those will potentially privatize data.
  298. tx->shared_state.store(v, memory_order_release);
  299. // Release the global orec, increasing its version number / timestamp.
  300. // See begin_or_restart() for why we need release memory order here,
  301. // and we also need it to make future update transactions read the
  302. // prior update to shared_state too (update transactions acquire the
  303. // global orec with acquire memory order).
  304. o_gl_mg.orec.store(v, memory_order_release);
  305. }
  306. }
  307. virtual bool snapshot_most_recent()
  308. {
  309. // This is the same check as in validate() except that we do not restart
  310. // on failure but simply return the result.
  311. return o_gl_mg.orec.load(memory_order_relaxed)
  312. == gtm_thr()->shared_state.load(memory_order_relaxed);
  313. }
  314. CREATE_DISPATCH_METHODS(virtual, )
  315. CREATE_DISPATCH_METHODS_MEM()
  316. gl_wt_dispatch() : abi_dispatch(false, true, false, false, 0, &o_gl_mg)
  317. { }
  318. };
  319. } // anon namespace
  320. static const gl_wt_dispatch o_gl_wt_dispatch;
  321. abi_dispatch *
  322. GTM::dispatch_gl_wt ()
  323. {
  324. return const_cast<gl_wt_dispatch *>(&o_gl_wt_dispatch);
  325. }