contrib/compiler-rt/lib/tsan/rtl/tsan_clock.cc

   1 //===-- tsan_clock.cc -----------------------------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file is a part of ThreadSanitizer (TSan), a race detector.
  11 //
  12 //===----------------------------------------------------------------------===//
  13 #include "tsan_clock.h"
  14 #include "tsan_rtl.h"
  15 #include "sanitizer_common/sanitizer_placement_new.h"
  16
  17 // SyncClock and ThreadClock implement vector clocks for sync variables
  18 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
  19 // ThreadClock contains fixed-size vector clock for maximum number of threads.
  20 // SyncClock contains growable vector clock for currently necessary number of
  21 // threads.
  22 // Together they implement very simple model of operations, namely:
  23 //
  24 //   void ThreadClock::acquire(const SyncClock *src) {
  25 //     for (int i = 0; i < kMaxThreads; i++)
  26 //       clock[i] = max(clock[i], src->clock[i]);
  27 //   }
  28 //
  29 //   void ThreadClock::release(SyncClock *dst) const {
  30 //     for (int i = 0; i < kMaxThreads; i++)
  31 //       dst->clock[i] = max(dst->clock[i], clock[i]);
  32 //   }
  33 //
  34 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
  35 //     for (int i = 0; i < kMaxThreads; i++)
  36 //       dst->clock[i] = clock[i];
  37 //   }
  38 //
  39 //   void ThreadClock::acq_rel(SyncClock *dst) {
  40 //     acquire(dst);
  41 //     release(dst);
  42 //   }
  43 //
  44 // Conformance to this model is extensively verified in tsan_clock_test.cc.
  45 // However, the implementation is significantly more complex. The complexity
  46 // allows to implement important classes of use cases in O(1) instead of O(N).
  47 //
  48 // The use cases are:
  49 // 1. Singleton/once atomic that has a single release-store operation followed
  50 //    by zillions of acquire-loads (the acquire-load is O(1)).
  51 // 2. Thread-local mutex (both lock and unlock can be O(1)).
  52 // 3. Leaf mutex (unlock is O(1)).
  53 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
  54 // 5. An atomic with a single writer (writes can be O(1)).
  55 // The implementation dynamically adopts to workload. So if an atomic is in
  56 // read-only phase, these reads will be O(1); if it later switches to read/write
  57 // phase, the implementation will correctly handle that by switching to O(N).
  58 //
  59 // Thread-safety note: all const operations on SyncClock's are conducted under
  60 // a shared lock; all non-const operations on SyncClock's are conducted under
  61 // an exclusive lock; ThreadClock's are private to respective threads and so
  62 // do not need any protection.
  63 //
  64 // Description of SyncClock state:
  65 // clk_ - variable size vector clock, low kClkBits hold timestamp,
  66 //   the remaining bits hold "acquired" flag (the actual value is thread's
  67 //   reused counter);
  68 //   if acquried == thr->reused_, then the respective thread has already
  69 //   acquired this clock (except possibly for dirty elements).
  70 // dirty_ - holds up to two indeces in the vector clock that other threads
  71 //   need to acquire regardless of "acquired" flag value;
  72 // release_store_tid_ - denotes that the clock state is a result of
  73 //   release-store operation by the thread with release_store_tid_ index.
  74 // release_store_reused_ - reuse count of release_store_tid_.
  75
  76 // We don't have ThreadState in these methods, so this is an ugly hack that
  77 // works only in C++.
  78 #if !SANITIZER_GO
  79 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
  80 #else
  81 # define CPP_STAT_INC(typ) (void)0
  82 #endif
  83
  84 namespace __tsan {
  85
  86 static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
  87   return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
  88 }
  89
  90 // Drop reference to the first level block idx.
  91 static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
  92   ClockBlock *cb = ctx->clock_alloc.Map(idx);
  93   atomic_uint32_t *ref = ref_ptr(cb);
  94   u32 v = atomic_load(ref, memory_order_acquire);
  95   for (;;) {
  96     CHECK_GT(v, 0);
  97     if (v == 1)
  98       break;
  99     if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
 100       return;
 101   }
 102   // First level block owns second level blocks, so them as well.
 103   for (uptr i = 0; i < blocks; i++)
 104     ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
 105   ctx->clock_alloc.Free(c, idx);
 106 }
 107
 108 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
 109     : tid_(tid)
 110     , reused_(reused + 1)  // 0 has special meaning
 111     , cached_idx_()
 112     , cached_size_()
 113     , cached_blocks_() {
 114   CHECK_LT(tid, kMaxTidInClock);
 115   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
 116   nclk_ = tid_ + 1;
 117   last_acquire_ = 0;
 118   internal_memset(clk_, 0, sizeof(clk_));
 119 }
 120
 121 void ThreadClock::ResetCached(ClockCache *c) {
 122   if (cached_idx_) {
 123     UnrefClockBlock(c, cached_idx_, cached_blocks_);
 124     cached_idx_ = 0;
 125     cached_size_ = 0;
 126     cached_blocks_ = 0;
 127   }
 128 }
 129
 130 void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
 131   DCHECK_LE(nclk_, kMaxTid);
 132   DCHECK_LE(src->size_, kMaxTid);
 133   CPP_STAT_INC(StatClockAcquire);
 134
 135   // Check if it's empty -> no need to do anything.
 136   const uptr nclk = src->size_;
 137   if (nclk == 0) {
 138     CPP_STAT_INC(StatClockAcquireEmpty);
 139     return;
 140   }
 141
 142   bool acquired = false;
 143   for (unsigned i = 0; i < kDirtyTids; i++) {
 144     SyncClock::Dirty dirty = src->dirty_[i];
 145     unsigned tid = dirty.tid;
 146     if (tid != kInvalidTid) {
 147       if (clk_[tid] < dirty.epoch) {
 148         clk_[tid] = dirty.epoch;
 149         acquired = true;
 150       }
 151     }
 152   }
 153
 154   // Check if we've already acquired src after the last release operation on src
 155   if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
 156     // O(N) acquire.
 157     CPP_STAT_INC(StatClockAcquireFull);
 158     nclk_ = max(nclk_, nclk);
 159     u64 *dst_pos = &clk_[0];
 160     for (ClockElem &src_elem : *src) {
 161       u64 epoch = src_elem.epoch;
 162       if (*dst_pos < epoch) {
 163         *dst_pos = epoch;
 164         acquired = true;
 165       }
 166       dst_pos++;
 167     }
 168
 169     // Remember that this thread has acquired this clock.
 170     if (nclk > tid_)
 171       src->elem(tid_).reused = reused_;
 172   }
 173
 174   if (acquired) {
 175     CPP_STAT_INC(StatClockAcquiredSomething);
 176     last_acquire_ = clk_[tid_];
 177     ResetCached(c);
 178   }
 179 }
 180
 181 void ThreadClock::release(ClockCache *c, SyncClock *dst) {
 182   DCHECK_LE(nclk_, kMaxTid);
 183   DCHECK_LE(dst->size_, kMaxTid);
 184
 185   if (dst->size_ == 0) {
 186     // ReleaseStore will correctly set release_store_tid_,
 187     // which can be important for future operations.
 188     ReleaseStore(c, dst);
 189     return;
 190   }
 191
 192   CPP_STAT_INC(StatClockRelease);
 193   // Check if we need to resize dst.
 194   if (dst->size_ < nclk_)
 195     dst->Resize(c, nclk_);
 196
 197   // Check if we had not acquired anything from other threads
 198   // since the last release on dst. If so, we need to update
 199   // only dst->elem(tid_).
 200   if (dst->elem(tid_).epoch > last_acquire_) {
 201     UpdateCurrentThread(c, dst);
 202     if (dst->release_store_tid_ != tid_ ||
 203         dst->release_store_reused_ != reused_)
 204       dst->release_store_tid_ = kInvalidTid;
 205     return;
 206   }
 207
 208   // O(N) release.
 209   CPP_STAT_INC(StatClockReleaseFull);
 210   dst->Unshare(c);
 211   // First, remember whether we've acquired dst.
 212   bool acquired = IsAlreadyAcquired(dst);
 213   if (acquired)
 214     CPP_STAT_INC(StatClockReleaseAcquired);
 215   // Update dst->clk_.
 216   dst->FlushDirty();
 217   uptr i = 0;
 218   for (ClockElem &ce : *dst) {
 219     ce.epoch = max(ce.epoch, clk_[i]);
 220     ce.reused = 0;
 221     i++;
 222   }
 223   // Clear 'acquired' flag in the remaining elements.
 224   if (nclk_ < dst->size_)
 225     CPP_STAT_INC(StatClockReleaseClearTail);
 226   for (uptr i = nclk_; i < dst->size_; i++)
 227     dst->elem(i).reused = 0;
 228   dst->release_store_tid_ = kInvalidTid;
 229   dst->release_store_reused_ = 0;
 230   // If we've acquired dst, remember this fact,
 231   // so that we don't need to acquire it on next acquire.
 232   if (acquired)
 233     dst->elem(tid_).reused = reused_;
 234 }
 235
 236 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
 237   DCHECK_LE(nclk_, kMaxTid);
 238   DCHECK_LE(dst->size_, kMaxTid);
 239   CPP_STAT_INC(StatClockStore);
 240
 241   if (dst->size_ == 0 && cached_idx_ != 0) {
 242     // Reuse the cached clock.
 243     // Note: we could reuse/cache the cached clock in more cases:
 244     // we could update the existing clock and cache it, or replace it with the
 245     // currently cached clock and release the old one. And for a shared
 246     // existing clock, we could replace it with the currently cached;
 247     // or unshare, update and cache. But, for simplicity, we currnetly reuse
 248     // cached clock only when the target clock is empty.
 249     dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
 250     dst->tab_idx_ = cached_idx_;
 251     dst->size_ = cached_size_;
 252     dst->blocks_ = cached_blocks_;
 253     CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);
 254     // The cached clock is shared (immutable),
 255     // so this is where we store the current clock.
 256     dst->dirty_[0].tid = tid_;
 257     dst->dirty_[0].epoch = clk_[tid_];
 258     dst->release_store_tid_ = tid_;
 259     dst->release_store_reused_ = reused_;
 260     // Rememeber that we don't need to acquire it in future.
 261     dst->elem(tid_).reused = reused_;
 262     // Grab a reference.
 263     atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
 264     return;
 265   }
 266
 267   // Check if we need to resize dst.
 268   if (dst->size_ < nclk_)
 269     dst->Resize(c, nclk_);
 270
 271   if (dst->release_store_tid_ == tid_ &&
 272       dst->release_store_reused_ == reused_ &&
 273       dst->elem(tid_).epoch > last_acquire_) {
 274     CPP_STAT_INC(StatClockStoreFast);
 275     UpdateCurrentThread(c, dst);
 276     return;
 277   }
 278
 279   // O(N) release-store.
 280   CPP_STAT_INC(StatClockStoreFull);
 281   dst->Unshare(c);
 282   // Note: dst can be larger than this ThreadClock.
 283   // This is fine since clk_ beyond size is all zeros.
 284   uptr i = 0;
 285   for (ClockElem &ce : *dst) {
 286     ce.epoch = clk_[i];
 287     ce.reused = 0;
 288     i++;
 289   }
 290   for (uptr i = 0; i < kDirtyTids; i++)
 291     dst->dirty_[i].tid = kInvalidTid;
 292   dst->release_store_tid_ = tid_;
 293   dst->release_store_reused_ = reused_;
 294   // Rememeber that we don't need to acquire it in future.
 295   dst->elem(tid_).reused = reused_;
 296
 297   // If the resulting clock is cachable, cache it for future release operations.
 298   // The clock is always cachable if we released to an empty sync object.
 299   if (cached_idx_ == 0 && dst->Cachable()) {
 300     // Grab a reference to the ClockBlock.
 301     atomic_uint32_t *ref = ref_ptr(dst->tab_);
 302     if (atomic_load(ref, memory_order_acquire) == 1)
 303       atomic_store_relaxed(ref, 2);
 304     else
 305       atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
 306     cached_idx_ = dst->tab_idx_;
 307     cached_size_ = dst->size_;
 308     cached_blocks_ = dst->blocks_;
 309   }
 310 }
 311
 312 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
 313   CPP_STAT_INC(StatClockAcquireRelease);
 314   acquire(c, dst);
 315   ReleaseStore(c, dst);
 316 }
 317
 318 // Updates only single element related to the current thread in dst->clk_.
 319 void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
 320   // Update the threads time, but preserve 'acquired' flag.
 321   for (unsigned i = 0; i < kDirtyTids; i++) {
 322     SyncClock::Dirty *dirty = &dst->dirty_[i];
 323     const unsigned tid = dirty->tid;
 324     if (tid == tid_ || tid == kInvalidTid) {
 325       CPP_STAT_INC(StatClockReleaseFast);
 326       dirty->tid = tid_;
 327       dirty->epoch = clk_[tid_];
 328       return;
 329     }
 330   }
 331   // Reset all 'acquired' flags, O(N).
 332   // We are going to touch dst elements, so we need to unshare it.
 333   dst->Unshare(c);
 334   CPP_STAT_INC(StatClockReleaseSlow);
 335   dst->elem(tid_).epoch = clk_[tid_];
 336   for (uptr i = 0; i < dst->size_; i++)
 337     dst->elem(i).reused = 0;
 338   dst->FlushDirty();
 339 }
 340
 341 // Checks whether the current thread has already acquired src.
 342 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
 343   if (src->elem(tid_).reused != reused_)
 344     return false;
 345   for (unsigned i = 0; i < kDirtyTids; i++) {
 346     SyncClock::Dirty dirty = src->dirty_[i];
 347     if (dirty.tid != kInvalidTid) {
 348       if (clk_[dirty.tid] < dirty.epoch)
 349         return false;
 350     }
 351   }
 352   return true;
 353 }
 354
 355 // Sets a single element in the vector clock.
 356 // This function is called only from weird places like AcquireGlobal.
 357 void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
 358   DCHECK_LT(tid, kMaxTid);
 359   DCHECK_GE(v, clk_[tid]);
 360   clk_[tid] = v;
 361   if (nclk_ <= tid)
 362     nclk_ = tid + 1;
 363   last_acquire_ = clk_[tid_];
 364   ResetCached(c);
 365 }
 366
 367 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
 368   printf("clock=[");
 369   for (uptr i = 0; i < nclk_; i++)
 370     printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
 371   printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
 372 }
 373
 374 SyncClock::SyncClock() {
 375   ResetImpl();
 376 }
 377
 378 SyncClock::~SyncClock() {
 379   // Reset must be called before dtor.
 380   CHECK_EQ(size_, 0);
 381   CHECK_EQ(blocks_, 0);
 382   CHECK_EQ(tab_, 0);
 383   CHECK_EQ(tab_idx_, 0);
 384 }
 385
 386 void SyncClock::Reset(ClockCache *c) {
 387   if (size_)
 388     UnrefClockBlock(c, tab_idx_, blocks_);
 389   ResetImpl();
 390 }
 391
 392 void SyncClock::ResetImpl() {
 393   tab_ = 0;
 394   tab_idx_ = 0;
 395   size_ = 0;
 396   blocks_ = 0;
 397   release_store_tid_ = kInvalidTid;
 398   release_store_reused_ = 0;
 399   for (uptr i = 0; i < kDirtyTids; i++)
 400     dirty_[i].tid = kInvalidTid;
 401 }
 402
 403 void SyncClock::Resize(ClockCache *c, uptr nclk) {
 404   CPP_STAT_INC(StatClockReleaseResize);
 405   Unshare(c);
 406   if (nclk <= capacity()) {
 407     // Memory is already allocated, just increase the size.
 408     size_ = nclk;
 409     return;
 410   }
 411   if (size_ == 0) {
 412     // Grow from 0 to one-level table.
 413     CHECK_EQ(size_, 0);
 414     CHECK_EQ(blocks_, 0);
 415     CHECK_EQ(tab_, 0);
 416     CHECK_EQ(tab_idx_, 0);
 417     tab_idx_ = ctx->clock_alloc.Alloc(c);
 418     tab_ = ctx->clock_alloc.Map(tab_idx_);
 419     internal_memset(tab_, 0, sizeof(*tab_));
 420     atomic_store_relaxed(ref_ptr(tab_), 1);
 421     size_ = 1;
 422   } else if (size_ > blocks_ * ClockBlock::kClockCount) {
 423     u32 idx = ctx->clock_alloc.Alloc(c);
 424     ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
 425     uptr top = size_ - blocks_ * ClockBlock::kClockCount;
 426     CHECK_LT(top, ClockBlock::kClockCount);
 427     const uptr move = top * sizeof(tab_->clock[0]);
 428     internal_memcpy(&new_cb->clock[0], tab_->clock, move);
 429     internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
 430     internal_memset(tab_->clock, 0, move);
 431     append_block(idx);
 432   }
 433   // At this point we have first level table allocated and all clock elements
 434   // are evacuated from it to a second level block.
 435   // Add second level tables as necessary.
 436   while (nclk > capacity()) {
 437     u32 idx = ctx->clock_alloc.Alloc(c);
 438     ClockBlock *cb = ctx->clock_alloc.Map(idx);
 439     internal_memset(cb, 0, sizeof(*cb));
 440     append_block(idx);
 441   }
 442   size_ = nclk;
 443 }
 444
 445 // Flushes all dirty elements into the main clock array.
 446 void SyncClock::FlushDirty() {
 447   for (unsigned i = 0; i < kDirtyTids; i++) {
 448     Dirty *dirty = &dirty_[i];
 449     if (dirty->tid != kInvalidTid) {
 450       CHECK_LT(dirty->tid, size_);
 451       elem(dirty->tid).epoch = dirty->epoch;
 452       dirty->tid = kInvalidTid;
 453     }
 454   }
 455 }
 456
 457 bool SyncClock::IsShared() const {
 458   if (size_ == 0)
 459     return false;
 460   atomic_uint32_t *ref = ref_ptr(tab_);
 461   u32 v = atomic_load(ref, memory_order_acquire);
 462   CHECK_GT(v, 0);
 463   return v > 1;
 464 }
 465
 466 // Unshares the current clock if it's shared.
 467 // Shared clocks are immutable, so they need to be unshared before any updates.
 468 // Note: this does not apply to dirty entries as they are not shared.
 469 void SyncClock::Unshare(ClockCache *c) {
 470   if (!IsShared())
 471     return;
 472   // First, copy current state into old.
 473   SyncClock old;
 474   old.tab_ = tab_;
 475   old.tab_idx_ = tab_idx_;
 476   old.size_ = size_;
 477   old.blocks_ = blocks_;
 478   old.release_store_tid_ = release_store_tid_;
 479   old.release_store_reused_ = release_store_reused_;
 480   for (unsigned i = 0; i < kDirtyTids; i++)
 481     old.dirty_[i] = dirty_[i];
 482   // Then, clear current object.
 483   ResetImpl();
 484   // Allocate brand new clock in the current object.
 485   Resize(c, old.size_);
 486   // Now copy state back into this object.
 487   Iter old_iter(&old);
 488   for (ClockElem &ce : *this) {
 489     ce = *old_iter;
 490     ++old_iter;
 491   }
 492   release_store_tid_ = old.release_store_tid_;
 493   release_store_reused_ = old.release_store_reused_;
 494   for (unsigned i = 0; i < kDirtyTids; i++)
 495     dirty_[i] = old.dirty_[i];
 496   // Drop reference to old and delete if necessary.
 497   old.Reset(c);
 498 }
 499
 500 // Can we cache this clock for future release operations?
 501 ALWAYS_INLINE bool SyncClock::Cachable() const {
 502   if (size_ == 0)
 503     return false;
 504   for (unsigned i = 0; i < kDirtyTids; i++) {
 505     if (dirty_[i].tid != kInvalidTid)
 506       return false;
 507   }
 508   return atomic_load_relaxed(ref_ptr(tab_)) == 1;
 509 }
 510
 511 // elem linearizes the two-level structure into linear array.
 512 // Note: this is used only for one time accesses, vector operations use
 513 // the iterator as it is much faster.
 514 ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
 515   DCHECK_LT(tid, size_);
 516   const uptr block = tid / ClockBlock::kClockCount;
 517   DCHECK_LE(block, blocks_);
 518   tid %= ClockBlock::kClockCount;
 519   if (block == blocks_)
 520     return tab_->clock[tid];
 521   u32 idx = get_block(block);
 522   ClockBlock *cb = ctx->clock_alloc.Map(idx);
 523   return cb->clock[tid];
 524 }
 525
 526 ALWAYS_INLINE uptr SyncClock::capacity() const {
 527   if (size_ == 0)
 528     return 0;
 529   uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
 530   // How many clock elements we can fit into the first level block.
 531   // +1 for ref counter.
 532   uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
 533   return blocks_ * ClockBlock::kClockCount + top;
 534 }
 535
 536 ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
 537   DCHECK(size_);
 538   DCHECK_LT(bi, blocks_);
 539   return tab_->table[ClockBlock::kBlockIdx - bi];
 540 }
 541
 542 ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
 543   uptr bi = blocks_++;
 544   CHECK_EQ(get_block(bi), 0);
 545   tab_->table[ClockBlock::kBlockIdx - bi] = idx;
 546 }
 547
 548 // Used only by tests.
 549 u64 SyncClock::get(unsigned tid) const {
 550   for (unsigned i = 0; i < kDirtyTids; i++) {
 551     Dirty dirty = dirty_[i];
 552     if (dirty.tid == tid)
 553       return dirty.epoch;
 554   }
 555   return elem(tid).epoch;
 556 }
 557
 558 // Used only by Iter test.
 559 u64 SyncClock::get_clean(unsigned tid) const {
 560   return elem(tid).epoch;
 561 }
 562
 563 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
 564   printf("clock=[");
 565   for (uptr i = 0; i < size_; i++)
 566     printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
 567   printf("] reused=[");
 568   for (uptr i = 0; i < size_; i++)
 569     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
 570   printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
 571       release_store_tid_, release_store_reused_,
 572       dirty_[0].tid, dirty_[0].epoch,
 573       dirty_[1].tid, dirty_[1].epoch);
 574 }
 575
 576 void SyncClock::Iter::Next() {
 577   // Finished with the current block, move on to the next one.
 578   block_++;
 579   if (block_ < parent_->blocks_) {
 580     // Iterate over the next second level block.
 581     u32 idx = parent_->get_block(block_);
 582     ClockBlock *cb = ctx->clock_alloc.Map(idx);
 583     pos_ = &cb->clock[0];
 584     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
 585         ClockBlock::kClockCount);
 586     return;
 587   }
 588   if (block_ == parent_->blocks_ &&
 589       parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
 590     // Iterate over elements in the first level block.
 591     pos_ = &parent_->tab_->clock[0];
 592     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
 593         ClockBlock::kClockCount);
 594     return;
 595   }
 596   parent_ = nullptr;  // denotes end
 597 }
 598 }  // namespace __tsan