contrib/compiler-rt/lib/tsan/rtl/tsan_clock.cc

   1 //===-- tsan_clock.cc -----------------------------------------------------===//
   2 //
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file is a part of ThreadSanitizer (TSan), a race detector.
  11 //
  12 //===----------------------------------------------------------------------===//
  13 #include "tsan_clock.h"
  14 #include "tsan_rtl.h"
  15 #include "sanitizer_common/sanitizer_placement_new.h"
  16
  17 // SyncClock and ThreadClock implement vector clocks for sync variables
  18 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
  19 // ThreadClock contains fixed-size vector clock for maximum number of threads.
  20 // SyncClock contains growable vector clock for currently necessary number of
  21 // threads.
  22 // Together they implement very simple model of operations, namely:
  23 //
  24 //   void ThreadClock::acquire(const SyncClock *src) {
  25 //     for (int i = 0; i < kMaxThreads; i++)
  26 //       clock[i] = max(clock[i], src->clock[i]);
  27 //   }
  28 //
  29 //   void ThreadClock::release(SyncClock *dst) const {
  30 //     for (int i = 0; i < kMaxThreads; i++)
  31 //       dst->clock[i] = max(dst->clock[i], clock[i]);
  32 //   }
  33 //
  34 //   void ThreadClock::ReleaseStore(SyncClock *dst) const {
  35 //     for (int i = 0; i < kMaxThreads; i++)
  36 //       dst->clock[i] = clock[i];
  37 //   }
  38 //
  39 //   void ThreadClock::acq_rel(SyncClock *dst) {
  40 //     acquire(dst);
  41 //     release(dst);
  42 //   }
  43 //
  44 // Conformance to this model is extensively verified in tsan_clock_test.cc.
  45 // However, the implementation is significantly more complex. The complexity
  46 // allows to implement important classes of use cases in O(1) instead of O(N).
  47 //
  48 // The use cases are:
  49 // 1. Singleton/once atomic that has a single release-store operation followed
  50 //    by zillions of acquire-loads (the acquire-load is O(1)).
  51 // 2. Thread-local mutex (both lock and unlock can be O(1)).
  52 // 3. Leaf mutex (unlock is O(1)).
  53 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
  54 // 5. An atomic with a single writer (writes can be O(1)).
  55 // The implementation dynamically adopts to workload. So if an atomic is in
  56 // read-only phase, these reads will be O(1); if it later switches to read/write
  57 // phase, the implementation will correctly handle that by switching to O(N).
  58 //
  59 // Thread-safety note: all const operations on SyncClock's are conducted under
  60 // a shared lock; all non-const operations on SyncClock's are conducted under
  61 // an exclusive lock; ThreadClock's are private to respective threads and so
  62 // do not need any protection.
  63 //
  64 // Description of ThreadClock state:
  65 // clk_ - fixed size vector clock.
  66 // nclk_ - effective size of the vector clock (the rest is zeros).
  67 // tid_ - index of the thread associated with he clock ("current thread").
  68 // last_acquire_ - current thread time when it acquired something from
  69 //   other threads.
  70 //
  71 // Description of SyncClock state:
  72 // clk_ - variable size vector clock, low kClkBits hold timestamp,
  73 //   the remaining bits hold "acquired" flag (the actual value is thread's
  74 //   reused counter);
  75 //   if acquried == thr->reused_, then the respective thread has already
  76 //   acquired this clock (except possibly dirty_tids_).
  77 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
  78 //   need to acquire regardless of "acquired" flag value;
  79 // release_store_tid_ - denotes that the clock state is a result of
  80 //   release-store operation by the thread with release_store_tid_ index.
  81 // release_store_reused_ - reuse count of release_store_tid_.
  82
  83 // We don't have ThreadState in these methods, so this is an ugly hack that
  84 // works only in C++.
  85 #ifndef SANITIZER_GO
  86 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
  87 #else
  88 # define CPP_STAT_INC(typ) (void)0
  89 #endif
  90
  91 namespace __tsan {
  92
  93 const unsigned kInvalidTid = (unsigned)-1;
  94
  95 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
  96     : tid_(tid)
  97     , reused_(reused + 1) {  // 0 has special meaning
  98   CHECK_LT(tid, kMaxTidInClock);
  99   CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
 100   nclk_ = tid_ + 1;
 101   last_acquire_ = 0;
 102   internal_memset(clk_, 0, sizeof(clk_));
 103   clk_[tid_].reused = reused_;
 104 }
 105
 106 void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
 107   DCHECK_LE(nclk_, kMaxTid);
 108   DCHECK_LE(src->size_, kMaxTid);
 109   CPP_STAT_INC(StatClockAcquire);
 110
 111   // Check if it's empty -> no need to do anything.
 112   const uptr nclk = src->size_;
 113   if (nclk == 0) {
 114     CPP_STAT_INC(StatClockAcquireEmpty);
 115     return;
 116   }
 117
 118   // Check if we've already acquired src after the last release operation on src
 119   bool acquired = false;
 120   if (nclk > tid_) {
 121     CPP_STAT_INC(StatClockAcquireLarge);
 122     if (src->elem(tid_).reused == reused_) {
 123       CPP_STAT_INC(StatClockAcquireRepeat);
 124       for (unsigned i = 0; i < kDirtyTids; i++) {
 125         unsigned tid = src->dirty_tids_[i];
 126         if (tid != kInvalidTid) {
 127           u64 epoch = src->elem(tid).epoch;
 128           if (clk_[tid].epoch < epoch) {
 129             clk_[tid].epoch = epoch;
 130             acquired = true;
 131           }
 132         }
 133       }
 134       if (acquired) {
 135         CPP_STAT_INC(StatClockAcquiredSomething);
 136         last_acquire_ = clk_[tid_].epoch;
 137       }
 138       return;
 139     }
 140   }
 141
 142   // O(N) acquire.
 143   CPP_STAT_INC(StatClockAcquireFull);
 144   nclk_ = max(nclk_, nclk);
 145   for (uptr i = 0; i < nclk; i++) {
 146     u64 epoch = src->elem(i).epoch;
 147     if (clk_[i].epoch < epoch) {
 148       clk_[i].epoch = epoch;
 149       acquired = true;
 150     }
 151   }
 152
 153   // Remember that this thread has acquired this clock.
 154   if (nclk > tid_)
 155     src->elem(tid_).reused = reused_;
 156
 157   if (acquired) {
 158     CPP_STAT_INC(StatClockAcquiredSomething);
 159     last_acquire_ = clk_[tid_].epoch;
 160   }
 161 }
 162
 163 void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
 164   DCHECK_LE(nclk_, kMaxTid);
 165   DCHECK_LE(dst->size_, kMaxTid);
 166
 167   if (dst->size_ == 0) {
 168     // ReleaseStore will correctly set release_store_tid_,
 169     // which can be important for future operations.
 170     ReleaseStore(c, dst);
 171     return;
 172   }
 173
 174   CPP_STAT_INC(StatClockRelease);
 175   // Check if we need to resize dst.
 176   if (dst->size_ < nclk_)
 177     dst->Resize(c, nclk_);
 178
 179   // Check if we had not acquired anything from other threads
 180   // since the last release on dst. If so, we need to update
 181   // only dst->elem(tid_).
 182   if (dst->elem(tid_).epoch > last_acquire_) {
 183     UpdateCurrentThread(dst);
 184     if (dst->release_store_tid_ != tid_ ||
 185         dst->release_store_reused_ != reused_)
 186       dst->release_store_tid_ = kInvalidTid;
 187     return;
 188   }
 189
 190   // O(N) release.
 191   CPP_STAT_INC(StatClockReleaseFull);
 192   // First, remember whether we've acquired dst.
 193   bool acquired = IsAlreadyAcquired(dst);
 194   if (acquired)
 195     CPP_STAT_INC(StatClockReleaseAcquired);
 196   // Update dst->clk_.
 197   for (uptr i = 0; i < nclk_; i++) {
 198     ClockElem &ce = dst->elem(i);
 199     ce.epoch = max(ce.epoch, clk_[i].epoch);
 200     ce.reused = 0;
 201   }
 202   // Clear 'acquired' flag in the remaining elements.
 203   if (nclk_ < dst->size_)
 204     CPP_STAT_INC(StatClockReleaseClearTail);
 205   for (uptr i = nclk_; i < dst->size_; i++)
 206     dst->elem(i).reused = 0;
 207   for (unsigned i = 0; i < kDirtyTids; i++)
 208     dst->dirty_tids_[i] = kInvalidTid;
 209   dst->release_store_tid_ = kInvalidTid;
 210   dst->release_store_reused_ = 0;
 211   // If we've acquired dst, remember this fact,
 212   // so that we don't need to acquire it on next acquire.
 213   if (acquired)
 214     dst->elem(tid_).reused = reused_;
 215 }
 216
 217 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
 218   DCHECK_LE(nclk_, kMaxTid);
 219   DCHECK_LE(dst->size_, kMaxTid);
 220   CPP_STAT_INC(StatClockStore);
 221
 222   // Check if we need to resize dst.
 223   if (dst->size_ < nclk_)
 224     dst->Resize(c, nclk_);
 225
 226   if (dst->release_store_tid_ == tid_ &&
 227       dst->release_store_reused_ == reused_ &&
 228       dst->elem(tid_).epoch > last_acquire_) {
 229     CPP_STAT_INC(StatClockStoreFast);
 230     UpdateCurrentThread(dst);
 231     return;
 232   }
 233
 234   // O(N) release-store.
 235   CPP_STAT_INC(StatClockStoreFull);
 236   for (uptr i = 0; i < nclk_; i++) {
 237     ClockElem &ce = dst->elem(i);
 238     ce.epoch = clk_[i].epoch;
 239     ce.reused = 0;
 240   }
 241   // Clear the tail of dst->clk_.
 242   if (nclk_ < dst->size_) {
 243     for (uptr i = nclk_; i < dst->size_; i++) {
 244       ClockElem &ce = dst->elem(i);
 245       ce.epoch = 0;
 246       ce.reused = 0;
 247     }
 248     CPP_STAT_INC(StatClockStoreTail);
 249   }
 250   for (unsigned i = 0; i < kDirtyTids; i++)
 251     dst->dirty_tids_[i] = kInvalidTid;
 252   dst->release_store_tid_ = tid_;
 253   dst->release_store_reused_ = reused_;
 254   // Rememeber that we don't need to acquire it in future.
 255   dst->elem(tid_).reused = reused_;
 256 }
 257
 258 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
 259   CPP_STAT_INC(StatClockAcquireRelease);
 260   acquire(c, dst);
 261   ReleaseStore(c, dst);
 262 }
 263
 264 // Updates only single element related to the current thread in dst->clk_.
 265 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
 266   // Update the threads time, but preserve 'acquired' flag.
 267   dst->elem(tid_).epoch = clk_[tid_].epoch;
 268
 269   for (unsigned i = 0; i < kDirtyTids; i++) {
 270     if (dst->dirty_tids_[i] == tid_) {
 271       CPP_STAT_INC(StatClockReleaseFast1);
 272       return;
 273     }
 274     if (dst->dirty_tids_[i] == kInvalidTid) {
 275       CPP_STAT_INC(StatClockReleaseFast2);
 276       dst->dirty_tids_[i] = tid_;
 277       return;
 278     }
 279   }
 280   // Reset all 'acquired' flags, O(N).
 281   CPP_STAT_INC(StatClockReleaseSlow);
 282   for (uptr i = 0; i < dst->size_; i++)
 283     dst->elem(i).reused = 0;
 284   for (unsigned i = 0; i < kDirtyTids; i++)
 285     dst->dirty_tids_[i] = kInvalidTid;
 286 }
 287
 288 // Checks whether the current threads has already acquired src.
 289 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
 290   if (src->elem(tid_).reused != reused_)
 291     return false;
 292   for (unsigned i = 0; i < kDirtyTids; i++) {
 293     unsigned tid = src->dirty_tids_[i];
 294     if (tid != kInvalidTid) {
 295       if (clk_[tid].epoch < src->elem(tid).epoch)
 296         return false;
 297     }
 298   }
 299   return true;
 300 }
 301
 302 void SyncClock::Resize(ClockCache *c, uptr nclk) {
 303   CPP_STAT_INC(StatClockReleaseResize);
 304   if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
 305       RoundUpTo(size_, ClockBlock::kClockCount)) {
 306     // Growing within the same block.
 307     // Memory is already allocated, just increase the size.
 308     size_ = nclk;
 309     return;
 310   }
 311   if (nclk <= ClockBlock::kClockCount) {
 312     // Grow from 0 to one-level table.
 313     CHECK_EQ(size_, 0);
 314     CHECK_EQ(tab_, 0);
 315     CHECK_EQ(tab_idx_, 0);
 316     size_ = nclk;
 317     tab_idx_ = ctx->clock_alloc.Alloc(c);
 318     tab_ = ctx->clock_alloc.Map(tab_idx_);
 319     internal_memset(tab_, 0, sizeof(*tab_));
 320     return;
 321   }
 322   // Growing two-level table.
 323   if (size_ == 0) {
 324     // Allocate first level table.
 325     tab_idx_ = ctx->clock_alloc.Alloc(c);
 326     tab_ = ctx->clock_alloc.Map(tab_idx_);
 327     internal_memset(tab_, 0, sizeof(*tab_));
 328   } else if (size_ <= ClockBlock::kClockCount) {
 329     // Transform one-level table to two-level table.
 330     u32 old = tab_idx_;
 331     tab_idx_ = ctx->clock_alloc.Alloc(c);
 332     tab_ = ctx->clock_alloc.Map(tab_idx_);
 333     internal_memset(tab_, 0, sizeof(*tab_));
 334     tab_->table[0] = old;
 335   }
 336   // At this point we have first level table allocated.
 337   // Add second level tables as necessary.
 338   for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
 339       i < nclk; i += ClockBlock::kClockCount) {
 340     u32 idx = ctx->clock_alloc.Alloc(c);
 341     ClockBlock *cb = ctx->clock_alloc.Map(idx);
 342     internal_memset(cb, 0, sizeof(*cb));
 343     CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
 344     tab_->table[i/ClockBlock::kClockCount] = idx;
 345   }
 346   size_ = nclk;
 347 }
 348
 349 // Sets a single element in the vector clock.
 350 // This function is called only from weird places like AcquireGlobal.
 351 void ThreadClock::set(unsigned tid, u64 v) {
 352   DCHECK_LT(tid, kMaxTid);
 353   DCHECK_GE(v, clk_[tid].epoch);
 354   clk_[tid].epoch = v;
 355   if (nclk_ <= tid)
 356     nclk_ = tid + 1;
 357   last_acquire_ = clk_[tid_].epoch;
 358 }
 359
 360 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
 361   printf("clock=[");
 362   for (uptr i = 0; i < nclk_; i++)
 363     printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
 364   printf("] reused=[");
 365   for (uptr i = 0; i < nclk_; i++)
 366     printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
 367   printf("] tid=%u/%u last_acq=%llu",
 368       tid_, reused_, last_acquire_);
 369 }
 370
 371 SyncClock::SyncClock()
 372     : release_store_tid_(kInvalidTid)
 373     , release_store_reused_()
 374     , tab_()
 375     , tab_idx_()
 376     , size_() {
 377   for (uptr i = 0; i < kDirtyTids; i++)
 378     dirty_tids_[i] = kInvalidTid;
 379 }
 380
 381 SyncClock::~SyncClock() {
 382   // Reset must be called before dtor.
 383   CHECK_EQ(size_, 0);
 384   CHECK_EQ(tab_, 0);
 385   CHECK_EQ(tab_idx_, 0);
 386 }
 387
 388 void SyncClock::Reset(ClockCache *c) {
 389   if (size_ == 0) {
 390     // nothing
 391   } else if (size_ <= ClockBlock::kClockCount) {
 392     // One-level table.
 393     ctx->clock_alloc.Free(c, tab_idx_);
 394   } else {
 395     // Two-level table.
 396     for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
 397       ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
 398     ctx->clock_alloc.Free(c, tab_idx_);
 399   }
 400   tab_ = 0;
 401   tab_idx_ = 0;
 402   size_ = 0;
 403   release_store_tid_ = kInvalidTid;
 404   release_store_reused_ = 0;
 405   for (uptr i = 0; i < kDirtyTids; i++)
 406     dirty_tids_[i] = kInvalidTid;
 407 }
 408
 409 ClockElem &SyncClock::elem(unsigned tid) const {
 410   DCHECK_LT(tid, size_);
 411   if (size_ <= ClockBlock::kClockCount)
 412     return tab_->clock[tid];
 413   u32 idx = tab_->table[tid / ClockBlock::kClockCount];
 414   ClockBlock *cb = ctx->clock_alloc.Map(idx);
 415   return cb->clock[tid % ClockBlock::kClockCount];
 416 }
 417
 418 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
 419   printf("clock=[");
 420   for (uptr i = 0; i < size_; i++)
 421     printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
 422   printf("] reused=[");
 423   for (uptr i = 0; i < size_; i++)
 424     printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
 425   printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
 426       release_store_tid_, release_store_reused_,
 427       dirty_tids_[0], dirty_tids_[1]);
 428 }
 429 }  // namespace __tsan