contrib/compiler-rt/lib/tsan/rtl/tsan_clock.cc

   1 //===-- tsan_clock.cc -----------------------------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file is a part of ThreadSanitizer (TSan), a race detector.
  10 //
  11 //===----------------------------------------------------------------------===//
  12 #include "tsan_clock.h"
  13 #include "tsan_rtl.h"
  14 #include "sanitizer_common/sanitizer_placement_new.h"
  15
  16 // SyncClock and ThreadClock implement vector clocks for sync variables
  17 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
  18 // ThreadClock contains fixed-size vector clock for maximum number of threads.
  19 // SyncClock contains growable vector clock for currently necessary number of
  20 // threads.
  21 // Together they implement very simple model of operations, namely:
  22 //
  23 //   void ThreadClock::acquire(const SyncClock *src) {
  24 //     for (int i = 0; i < kMaxThreads; i++)
  25 //       clock[i] = max(clock[i], src->clock[i]);
  26 //   }
  27 //
  28 //   void ThreadClock::release(SyncClock *dst) const {
  29 //     for (int i = 0; i < kMaxThreads; i++)
  30 //       dst->clock[i] = max(dst->clock[i], clock[i]);
  31 //   }
  32 //
  33 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
  34 //     for (int i = 0; i < kMaxThreads; i++)
  35 //       dst->clock[i] = clock[i];
  36 //   }
  37 //
  38 //   void ThreadClock::acq_rel(SyncClock *dst) {
  39 //     acquire(dst);
  40 //     release(dst);
  41 //   }
  42 //
  43 // Conformance to this model is extensively verified in tsan_clock_test.cc.
  44 // However, the implementation is significantly more complex. The complexity
  45 // allows to implement important classes of use cases in O(1) instead of O(N).
  46 //
  47 // The use cases are:
  48 // 1. Singleton/once atomic that has a single release-store operation followed
  49 //    by zillions of acquire-loads (the acquire-load is O(1)).
  50 // 2. Thread-local mutex (both lock and unlock can be O(1)).
  51 // 3. Leaf mutex (unlock is O(1)).
  52 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
  53 // 5. An atomic with a single writer (writes can be O(1)).
  54 // The implementation dynamically adopts to workload. So if an atomic is in
  55 // read-only phase, these reads will be O(1); if it later switches to read/write
  56 // phase, the implementation will correctly handle that by switching to O(N).
  57 //
  58 // Thread-safety note: all const operations on SyncClock's are conducted under
  59 // a shared lock; all non-const operations on SyncClock's are conducted under
  60 // an exclusive lock; ThreadClock's are private to respective threads and so
  61 // do not need any protection.
  62 //
  63 // Description of SyncClock state:
  64 // clk_ - variable size vector clock, low kClkBits hold timestamp,
  65 //   the remaining bits hold "acquired" flag (the actual value is thread's
  66 //   reused counter);
  67 //   if acquried == thr->reused_, then the respective thread has already
  68 //   acquired this clock (except possibly for dirty elements).
  69 // dirty_ - holds up to two indeces in the vector clock that other threads
  70 //   need to acquire regardless of "acquired" flag value;
  71 // release_store_tid_ - denotes that the clock state is a result of
  72 //   release-store operation by the thread with release_store_tid_ index.
  73 // release_store_reused_ - reuse count of release_store_tid_.
  74
  75 // We don't have ThreadState in these methods, so this is an ugly hack that
  76 // works only in C++.
  77 #if !SANITIZER_GO
  78 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
  79 #else
  80 # define CPP_STAT_INC(typ) (void)0
  81 #endif
  82
  83 namespace __tsan {
  84
  85 static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
  86   return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
  87 }
  88
  89 // Drop reference to the first level block idx.
  90 static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
  91   ClockBlock *cb = ctx->clock_alloc.Map(idx);
  92   atomic_uint32_t *ref = ref_ptr(cb);
  93   u32 v = atomic_load(ref, memory_order_acquire);
  94   for (;;) {
  95     CHECK_GT(v, 0);
  96     if (v == 1)
  97       break;
  98     if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
  99       return;
 100   }
 101   // First level block owns second level blocks, so them as well.
 102   for (uptr i = 0; i < blocks; i++)
 103     ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
 104   ctx->clock_alloc.Free(c, idx);
 105 }
 106
 107 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
 108     : tid_(tid)
 109     , reused_(reused + 1)  // 0 has special meaning
 110     , cached_idx_()
 111     , cached_size_()
 112     , cached_blocks_() {
 113   CHECK_LT(tid, kMaxTidInClock);
 114   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
 115   nclk_ = tid_ + 1;
 116   last_acquire_ = 0;
 117   internal_memset(clk_, 0, sizeof(clk_));
 118 }
 119
 120 void ThreadClock::ResetCached(ClockCache *c) {
 121   if (cached_idx_) {
 122     UnrefClockBlock(c, cached_idx_, cached_blocks_);
 123     cached_idx_ = 0;
 124     cached_size_ = 0;
 125     cached_blocks_ = 0;
 126   }
 127 }
 128
 129 void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
 130   DCHECK_LE(nclk_, kMaxTid);
 131   DCHECK_LE(src->size_, kMaxTid);
 132   CPP_STAT_INC(StatClockAcquire);
 133
 134   // Check if it's empty -> no need to do anything.
 135   const uptr nclk = src->size_;
 136   if (nclk == 0) {
 137     CPP_STAT_INC(StatClockAcquireEmpty);
 138     return;
 139   }
 140
 141   bool acquired = false;
 142   for (unsigned i = 0; i < kDirtyTids; i++) {
 143     SyncClock::Dirty dirty = src->dirty_[i];
 144     unsigned tid = dirty.tid;
 145     if (tid != kInvalidTid) {
 146       if (clk_[tid] < dirty.epoch) {
 147         clk_[tid] = dirty.epoch;
 148         acquired = true;
 149       }
 150     }
 151   }
 152
 153   // Check if we've already acquired src after the last release operation on src
 154   if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
 155     // O(N) acquire.
 156     CPP_STAT_INC(StatClockAcquireFull);
 157     nclk_ = max(nclk_, nclk);
 158     u64 *dst_pos = &clk_[0];
 159     for (ClockElem &src_elem : *src) {
 160       u64 epoch = src_elem.epoch;
 161       if (*dst_pos < epoch) {
 162         *dst_pos = epoch;
 163         acquired = true;
 164       }
 165       dst_pos++;
 166     }
 167
 168     // Remember that this thread has acquired this clock.
 169     if (nclk > tid_)
 170       src->elem(tid_).reused = reused_;
 171   }
 172
 173   if (acquired) {
 174     CPP_STAT_INC(StatClockAcquiredSomething);
 175     last_acquire_ = clk_[tid_];
 176     ResetCached(c);
 177   }
 178 }
 179
 180 void ThreadClock::release(ClockCache *c, SyncClock *dst) {
 181   DCHECK_LE(nclk_, kMaxTid);
 182   DCHECK_LE(dst->size_, kMaxTid);
 183
 184   if (dst->size_ == 0) {
 185     // ReleaseStore will correctly set release_store_tid_,
 186     // which can be important for future operations.
 187     ReleaseStore(c, dst);
 188     return;
 189   }
 190
 191   CPP_STAT_INC(StatClockRelease);
 192   // Check if we need to resize dst.
 193   if (dst->size_ < nclk_)
 194     dst->Resize(c, nclk_);
 195
 196   // Check if we had not acquired anything from other threads
 197   // since the last release on dst. If so, we need to update
 198   // only dst->elem(tid_).
 199   if (dst->elem(tid_).epoch > last_acquire_) {
 200     UpdateCurrentThread(c, dst);
 201     if (dst->release_store_tid_ != tid_ ||
 202         dst->release_store_reused_ != reused_)
 203       dst->release_store_tid_ = kInvalidTid;
 204     return;
 205   }
 206
 207   // O(N) release.
 208   CPP_STAT_INC(StatClockReleaseFull);
 209   dst->Unshare(c);
 210   // First, remember whether we've acquired dst.
 211   bool acquired = IsAlreadyAcquired(dst);
 212   if (acquired)
 213     CPP_STAT_INC(StatClockReleaseAcquired);
 214   // Update dst->clk_.
 215   dst->FlushDirty();
 216   uptr i = 0;
 217   for (ClockElem &ce : *dst) {
 218     ce.epoch = max(ce.epoch, clk_[i]);
 219     ce.reused = 0;
 220     i++;
 221   }
 222   // Clear 'acquired' flag in the remaining elements.
 223   if (nclk_ < dst->size_)
 224     CPP_STAT_INC(StatClockReleaseClearTail);
 225   for (uptr i = nclk_; i < dst->size_; i++)
 226     dst->elem(i).reused = 0;
 227   dst->release_store_tid_ = kInvalidTid;
 228   dst->release_store_reused_ = 0;
 229   // If we've acquired dst, remember this fact,
 230   // so that we don't need to acquire it on next acquire.
 231   if (acquired)
 232     dst->elem(tid_).reused = reused_;
 233 }
 234
 235 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
 236   DCHECK_LE(nclk_, kMaxTid);
 237   DCHECK_LE(dst->size_, kMaxTid);
 238   CPP_STAT_INC(StatClockStore);
 239
 240   if (dst->size_ == 0 && cached_idx_ != 0) {
 241     // Reuse the cached clock.
 242     // Note: we could reuse/cache the cached clock in more cases:
 243     // we could update the existing clock and cache it, or replace it with the
 244     // currently cached clock and release the old one. And for a shared
 245     // existing clock, we could replace it with the currently cached;
 246     // or unshare, update and cache. But, for simplicity, we currnetly reuse
 247     // cached clock only when the target clock is empty.
 248     dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
 249     dst->tab_idx_ = cached_idx_;
 250     dst->size_ = cached_size_;
 251     dst->blocks_ = cached_blocks_;
 252     CHECK_EQ(dst->dirty_[0].tid, kInvalidTid);
 253     // The cached clock is shared (immutable),
 254     // so this is where we store the current clock.
 255     dst->dirty_[0].tid = tid_;
 256     dst->dirty_[0].epoch = clk_[tid_];
 257     dst->release_store_tid_ = tid_;
 258     dst->release_store_reused_ = reused_;
 259     // Rememeber that we don't need to acquire it in future.
 260     dst->elem(tid_).reused = reused_;
 261     // Grab a reference.
 262     atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
 263     return;
 264   }
 265
 266   // Check if we need to resize dst.
 267   if (dst->size_ < nclk_)
 268     dst->Resize(c, nclk_);
 269
 270   if (dst->release_store_tid_ == tid_ &&
 271       dst->release_store_reused_ == reused_ &&
 272       dst->elem(tid_).epoch > last_acquire_) {
 273     CPP_STAT_INC(StatClockStoreFast);
 274     UpdateCurrentThread(c, dst);
 275     return;
 276   }
 277
 278   // O(N) release-store.
 279   CPP_STAT_INC(StatClockStoreFull);
 280   dst->Unshare(c);
 281   // Note: dst can be larger than this ThreadClock.
 282   // This is fine since clk_ beyond size is all zeros.
 283   uptr i = 0;
 284   for (ClockElem &ce : *dst) {
 285     ce.epoch = clk_[i];
 286     ce.reused = 0;
 287     i++;
 288   }
 289   for (uptr i = 0; i < kDirtyTids; i++)
 290     dst->dirty_[i].tid = kInvalidTid;
 291   dst->release_store_tid_ = tid_;
 292   dst->release_store_reused_ = reused_;
 293   // Rememeber that we don't need to acquire it in future.
 294   dst->elem(tid_).reused = reused_;
 295
 296   // If the resulting clock is cachable, cache it for future release operations.
 297   // The clock is always cachable if we released to an empty sync object.
 298   if (cached_idx_ == 0 && dst->Cachable()) {
 299     // Grab a reference to the ClockBlock.
 300     atomic_uint32_t *ref = ref_ptr(dst->tab_);
 301     if (atomic_load(ref, memory_order_acquire) == 1)
 302       atomic_store_relaxed(ref, 2);
 303     else
 304       atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
 305     cached_idx_ = dst->tab_idx_;
 306     cached_size_ = dst->size_;
 307     cached_blocks_ = dst->blocks_;
 308   }
 309 }
 310
 311 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
 312   CPP_STAT_INC(StatClockAcquireRelease);
 313   acquire(c, dst);
 314   ReleaseStore(c, dst);
 315 }
 316
 317 // Updates only single element related to the current thread in dst->clk_.
 318 void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
 319   // Update the threads time, but preserve 'acquired' flag.
 320   for (unsigned i = 0; i < kDirtyTids; i++) {
 321     SyncClock::Dirty *dirty = &dst->dirty_[i];
 322     const unsigned tid = dirty->tid;
 323     if (tid == tid_ || tid == kInvalidTid) {
 324       CPP_STAT_INC(StatClockReleaseFast);
 325       dirty->tid = tid_;
 326       dirty->epoch = clk_[tid_];
 327       return;
 328     }
 329   }
 330   // Reset all 'acquired' flags, O(N).
 331   // We are going to touch dst elements, so we need to unshare it.
 332   dst->Unshare(c);
 333   CPP_STAT_INC(StatClockReleaseSlow);
 334   dst->elem(tid_).epoch = clk_[tid_];
 335   for (uptr i = 0; i < dst->size_; i++)
 336     dst->elem(i).reused = 0;
 337   dst->FlushDirty();
 338 }
 339
 340 // Checks whether the current thread has already acquired src.
 341 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
 342   if (src->elem(tid_).reused != reused_)
 343     return false;
 344   for (unsigned i = 0; i < kDirtyTids; i++) {
 345     SyncClock::Dirty dirty = src->dirty_[i];
 346     if (dirty.tid != kInvalidTid) {
 347       if (clk_[dirty.tid] < dirty.epoch)
 348         return false;
 349     }
 350   }
 351   return true;
 352 }
 353
 354 // Sets a single element in the vector clock.
 355 // This function is called only from weird places like AcquireGlobal.
 356 void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
 357   DCHECK_LT(tid, kMaxTid);
 358   DCHECK_GE(v, clk_[tid]);
 359   clk_[tid] = v;
 360   if (nclk_ <= tid)
 361     nclk_ = tid + 1;
 362   last_acquire_ = clk_[tid_];
 363   ResetCached(c);
 364 }
 365
 366 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
 367   printf("clock=[");
 368   for (uptr i = 0; i < nclk_; i++)
 369     printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
 370   printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
 371 }
 372
 373 SyncClock::SyncClock() {
 374   ResetImpl();
 375 }
 376
 377 SyncClock::~SyncClock() {
 378   // Reset must be called before dtor.
 379   CHECK_EQ(size_, 0);
 380   CHECK_EQ(blocks_, 0);
 381   CHECK_EQ(tab_, 0);
 382   CHECK_EQ(tab_idx_, 0);
 383 }
 384
 385 void SyncClock::Reset(ClockCache *c) {
 386   if (size_)
 387     UnrefClockBlock(c, tab_idx_, blocks_);
 388   ResetImpl();
 389 }
 390
 391 void SyncClock::ResetImpl() {
 392   tab_ = 0;
 393   tab_idx_ = 0;
 394   size_ = 0;
 395   blocks_ = 0;
 396   release_store_tid_ = kInvalidTid;
 397   release_store_reused_ = 0;
 398   for (uptr i = 0; i < kDirtyTids; i++)
 399     dirty_[i].tid = kInvalidTid;
 400 }
 401
 402 void SyncClock::Resize(ClockCache *c, uptr nclk) {
 403   CPP_STAT_INC(StatClockReleaseResize);
 404   Unshare(c);
 405   if (nclk <= capacity()) {
 406     // Memory is already allocated, just increase the size.
 407     size_ = nclk;
 408     return;
 409   }
 410   if (size_ == 0) {
 411     // Grow from 0 to one-level table.
 412     CHECK_EQ(size_, 0);
 413     CHECK_EQ(blocks_, 0);
 414     CHECK_EQ(tab_, 0);
 415     CHECK_EQ(tab_idx_, 0);
 416     tab_idx_ = ctx->clock_alloc.Alloc(c);
 417     tab_ = ctx->clock_alloc.Map(tab_idx_);
 418     internal_memset(tab_, 0, sizeof(*tab_));
 419     atomic_store_relaxed(ref_ptr(tab_), 1);
 420     size_ = 1;
 421   } else if (size_ > blocks_ * ClockBlock::kClockCount) {
 422     u32 idx = ctx->clock_alloc.Alloc(c);
 423     ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
 424     uptr top = size_ - blocks_ * ClockBlock::kClockCount;
 425     CHECK_LT(top, ClockBlock::kClockCount);
 426     const uptr move = top * sizeof(tab_->clock[0]);
 427     internal_memcpy(&new_cb->clock[0], tab_->clock, move);
 428     internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
 429     internal_memset(tab_->clock, 0, move);
 430     append_block(idx);
 431   }
 432   // At this point we have first level table allocated and all clock elements
 433   // are evacuated from it to a second level block.
 434   // Add second level tables as necessary.
 435   while (nclk > capacity()) {
 436     u32 idx = ctx->clock_alloc.Alloc(c);
 437     ClockBlock *cb = ctx->clock_alloc.Map(idx);
 438     internal_memset(cb, 0, sizeof(*cb));
 439     append_block(idx);
 440   }
 441   size_ = nclk;
 442 }
 443
 444 // Flushes all dirty elements into the main clock array.
 445 void SyncClock::FlushDirty() {
 446   for (unsigned i = 0; i < kDirtyTids; i++) {
 447     Dirty *dirty = &dirty_[i];
 448     if (dirty->tid != kInvalidTid) {
 449       CHECK_LT(dirty->tid, size_);
 450       elem(dirty->tid).epoch = dirty->epoch;
 451       dirty->tid = kInvalidTid;
 452     }
 453   }
 454 }
 455
 456 bool SyncClock::IsShared() const {
 457   if (size_ == 0)
 458     return false;
 459   atomic_uint32_t *ref = ref_ptr(tab_);
 460   u32 v = atomic_load(ref, memory_order_acquire);
 461   CHECK_GT(v, 0);
 462   return v > 1;
 463 }
 464
 465 // Unshares the current clock if it's shared.
 466 // Shared clocks are immutable, so they need to be unshared before any updates.
 467 // Note: this does not apply to dirty entries as they are not shared.
 468 void SyncClock::Unshare(ClockCache *c) {
 469   if (!IsShared())
 470     return;
 471   // First, copy current state into old.
 472   SyncClock old;
 473   old.tab_ = tab_;
 474   old.tab_idx_ = tab_idx_;
 475   old.size_ = size_;
 476   old.blocks_ = blocks_;
 477   old.release_store_tid_ = release_store_tid_;
 478   old.release_store_reused_ = release_store_reused_;
 479   for (unsigned i = 0; i < kDirtyTids; i++)
 480     old.dirty_[i] = dirty_[i];
 481   // Then, clear current object.
 482   ResetImpl();
 483   // Allocate brand new clock in the current object.
 484   Resize(c, old.size_);
 485   // Now copy state back into this object.
 486   Iter old_iter(&old);
 487   for (ClockElem &ce : *this) {
 488     ce = *old_iter;
 489     ++old_iter;
 490   }
 491   release_store_tid_ = old.release_store_tid_;
 492   release_store_reused_ = old.release_store_reused_;
 493   for (unsigned i = 0; i < kDirtyTids; i++)
 494     dirty_[i] = old.dirty_[i];
 495   // Drop reference to old and delete if necessary.
 496   old.Reset(c);
 497 }
 498
 499 // Can we cache this clock for future release operations?
 500 ALWAYS_INLINE bool SyncClock::Cachable() const {
 501   if (size_ == 0)
 502     return false;
 503   for (unsigned i = 0; i < kDirtyTids; i++) {
 504     if (dirty_[i].tid != kInvalidTid)
 505       return false;
 506   }
 507   return atomic_load_relaxed(ref_ptr(tab_)) == 1;
 508 }
 509
 510 // elem linearizes the two-level structure into linear array.
 511 // Note: this is used only for one time accesses, vector operations use
 512 // the iterator as it is much faster.
 513 ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
 514   DCHECK_LT(tid, size_);
 515   const uptr block = tid / ClockBlock::kClockCount;
 516   DCHECK_LE(block, blocks_);
 517   tid %= ClockBlock::kClockCount;
 518   if (block == blocks_)
 519     return tab_->clock[tid];
 520   u32 idx = get_block(block);
 521   ClockBlock *cb = ctx->clock_alloc.Map(idx);
 522   return cb->clock[tid];
 523 }
 524
 525 ALWAYS_INLINE uptr SyncClock::capacity() const {
 526   if (size_ == 0)
 527     return 0;
 528   uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
 529   // How many clock elements we can fit into the first level block.
 530   // +1 for ref counter.
 531   uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
 532   return blocks_ * ClockBlock::kClockCount + top;
 533 }
 534
 535 ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
 536   DCHECK(size_);
 537   DCHECK_LT(bi, blocks_);
 538   return tab_->table[ClockBlock::kBlockIdx - bi];
 539 }
 540
 541 ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
 542   uptr bi = blocks_++;
 543   CHECK_EQ(get_block(bi), 0);
 544   tab_->table[ClockBlock::kBlockIdx - bi] = idx;
 545 }
 546
 547 // Used only by tests.
 548 u64 SyncClock::get(unsigned tid) const {
 549   for (unsigned i = 0; i < kDirtyTids; i++) {
 550     Dirty dirty = dirty_[i];
 551     if (dirty.tid == tid)
 552       return dirty.epoch;
 553   }
 554   return elem(tid).epoch;
 555 }
 556
 557 // Used only by Iter test.
 558 u64 SyncClock::get_clean(unsigned tid) const {
 559   return elem(tid).epoch;
 560 }
 561
 562 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
 563   printf("clock=[");
 564   for (uptr i = 0; i < size_; i++)
 565     printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
 566   printf("] reused=[");
 567   for (uptr i = 0; i < size_; i++)
 568     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
 569   printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
 570       release_store_tid_, release_store_reused_,
 571       dirty_[0].tid, dirty_[0].epoch,
 572       dirty_[1].tid, dirty_[1].epoch);
 573 }
 574
 575 void SyncClock::Iter::Next() {
 576   // Finished with the current block, move on to the next one.
 577   block_++;
 578   if (block_ < parent_->blocks_) {
 579     // Iterate over the next second level block.
 580     u32 idx = parent_->get_block(block_);
 581     ClockBlock *cb = ctx->clock_alloc.Map(idx);
 582     pos_ = &cb->clock[0];
 583     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
 584         ClockBlock::kClockCount);
 585     return;
 586   }
 587   if (block_ == parent_->blocks_ &&
 588       parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
 589     // Iterate over elements in the first level block.
 590     pos_ = &parent_->tab_->clock[0];
 591     end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
 592         ClockBlock::kClockCount);
 593     return;
 594   }
 595   parent_ = nullptr;  // denotes end
 596 }
 597 }  // namespace __tsan