1 //===-- tsan_clock.cc -----------------------------------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file is a part of ThreadSanitizer (TSan), a race detector.
12 //===----------------------------------------------------------------------===//
13 #include "tsan_clock.h"
15 #include "sanitizer_common/sanitizer_placement_new.h"
17 // SyncClock and ThreadClock implement vector clocks for sync variables
18 // (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
19 // ThreadClock contains fixed-size vector clock for maximum number of threads.
20 // SyncClock contains growable vector clock for currently necessary number of
22 // Together they implement very simple model of operations, namely:
24 // void ThreadClock::acquire(const SyncClock *src) {
25 // for (int i = 0; i < kMaxThreads; i++)
26 // clock[i] = max(clock[i], src->clock[i]);
29 // void ThreadClock::release(SyncClock *dst) const {
30 // for (int i = 0; i < kMaxThreads; i++)
31 // dst->clock[i] = max(dst->clock[i], clock[i]);
34 // void ThreadClock::ReleaseStore(SyncClock *dst) const {
35 // for (int i = 0; i < kMaxThreads; i++)
36 // dst->clock[i] = clock[i];
39 // void ThreadClock::acq_rel(SyncClock *dst) {
44 // Conformance to this model is extensively verified in tsan_clock_test.cc.
45 // However, the implementation is significantly more complex. The complexity
46 // allows to implement important classes of use cases in O(1) instead of O(N).
49 // 1. Singleton/once atomic that has a single release-store operation followed
50 // by zillions of acquire-loads (the acquire-load is O(1)).
51 // 2. Thread-local mutex (both lock and unlock can be O(1)).
52 // 3. Leaf mutex (unlock is O(1)).
53 // 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
54 // 5. An atomic with a single writer (writes can be O(1)).
55 // The implementation dynamically adopts to workload. So if an atomic is in
56 // read-only phase, these reads will be O(1); if it later switches to read/write
57 // phase, the implementation will correctly handle that by switching to O(N).
59 // Thread-safety note: all const operations on SyncClock's are conducted under
60 // a shared lock; all non-const operations on SyncClock's are conducted under
61 // an exclusive lock; ThreadClock's are private to respective threads and so
62 // do not need any protection.
64 // Description of ThreadClock state:
65 // clk_ - fixed size vector clock.
66 // nclk_ - effective size of the vector clock (the rest is zeros).
67 // tid_ - index of the thread associated with he clock ("current thread").
68 // last_acquire_ - current thread time when it acquired something from
71 // Description of SyncClock state:
72 // clk_ - variable size vector clock, low kClkBits hold timestamp,
73 // the remaining bits hold "acquired" flag (the actual value is thread's
75 // if acquried == thr->reused_, then the respective thread has already
76 // acquired this clock (except possibly dirty_tids_).
77 // dirty_tids_ - holds up to two indeces in the vector clock that other threads
78 // need to acquire regardless of "acquired" flag value;
79 // release_store_tid_ - denotes that the clock state is a result of
80 // release-store operation by the thread with release_store_tid_ index.
81 // release_store_reused_ - reuse count of release_store_tid_.
83 // We don't have ThreadState in these methods, so this is an ugly hack that
86 # define CPP_STAT_INC(typ) StatInc(cur_thread(), typ)
88 # define CPP_STAT_INC(typ) (void)0
93 const unsigned kInvalidTid = (unsigned)-1;
95 ThreadClock::ThreadClock(unsigned tid, unsigned reused)
97 , reused_(reused + 1) { // 0 has special meaning
98 CHECK_LT(tid, kMaxTidInClock);
99 CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
102 internal_memset(clk_, 0, sizeof(clk_));
103 clk_[tid_].reused = reused_;
106 void ThreadClock::acquire(ClockCache *c, const SyncClock *src) {
107 DCHECK_LE(nclk_, kMaxTid);
108 DCHECK_LE(src->size_, kMaxTid);
109 CPP_STAT_INC(StatClockAcquire);
111 // Check if it's empty -> no need to do anything.
112 const uptr nclk = src->size_;
114 CPP_STAT_INC(StatClockAcquireEmpty);
118 // Check if we've already acquired src after the last release operation on src
119 bool acquired = false;
121 CPP_STAT_INC(StatClockAcquireLarge);
122 if (src->elem(tid_).reused == reused_) {
123 CPP_STAT_INC(StatClockAcquireRepeat);
124 for (unsigned i = 0; i < kDirtyTids; i++) {
125 unsigned tid = src->dirty_tids_[i];
126 if (tid != kInvalidTid) {
127 u64 epoch = src->elem(tid).epoch;
128 if (clk_[tid].epoch < epoch) {
129 clk_[tid].epoch = epoch;
135 CPP_STAT_INC(StatClockAcquiredSomething);
136 last_acquire_ = clk_[tid_].epoch;
143 CPP_STAT_INC(StatClockAcquireFull);
144 nclk_ = max(nclk_, nclk);
145 for (uptr i = 0; i < nclk; i++) {
146 u64 epoch = src->elem(i).epoch;
147 if (clk_[i].epoch < epoch) {
148 clk_[i].epoch = epoch;
153 // Remember that this thread has acquired this clock.
155 src->elem(tid_).reused = reused_;
158 CPP_STAT_INC(StatClockAcquiredSomething);
159 last_acquire_ = clk_[tid_].epoch;
163 void ThreadClock::release(ClockCache *c, SyncClock *dst) const {
164 DCHECK_LE(nclk_, kMaxTid);
165 DCHECK_LE(dst->size_, kMaxTid);
167 if (dst->size_ == 0) {
168 // ReleaseStore will correctly set release_store_tid_,
169 // which can be important for future operations.
170 ReleaseStore(c, dst);
174 CPP_STAT_INC(StatClockRelease);
175 // Check if we need to resize dst.
176 if (dst->size_ < nclk_)
177 dst->Resize(c, nclk_);
179 // Check if we had not acquired anything from other threads
180 // since the last release on dst. If so, we need to update
181 // only dst->elem(tid_).
182 if (dst->elem(tid_).epoch > last_acquire_) {
183 UpdateCurrentThread(dst);
184 if (dst->release_store_tid_ != tid_ ||
185 dst->release_store_reused_ != reused_)
186 dst->release_store_tid_ = kInvalidTid;
191 CPP_STAT_INC(StatClockReleaseFull);
192 // First, remember whether we've acquired dst.
193 bool acquired = IsAlreadyAcquired(dst);
195 CPP_STAT_INC(StatClockReleaseAcquired);
197 for (uptr i = 0; i < nclk_; i++) {
198 ClockElem &ce = dst->elem(i);
199 ce.epoch = max(ce.epoch, clk_[i].epoch);
202 // Clear 'acquired' flag in the remaining elements.
203 if (nclk_ < dst->size_)
204 CPP_STAT_INC(StatClockReleaseClearTail);
205 for (uptr i = nclk_; i < dst->size_; i++)
206 dst->elem(i).reused = 0;
207 for (unsigned i = 0; i < kDirtyTids; i++)
208 dst->dirty_tids_[i] = kInvalidTid;
209 dst->release_store_tid_ = kInvalidTid;
210 dst->release_store_reused_ = 0;
211 // If we've acquired dst, remember this fact,
212 // so that we don't need to acquire it on next acquire.
214 dst->elem(tid_).reused = reused_;
217 void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) const {
218 DCHECK_LE(nclk_, kMaxTid);
219 DCHECK_LE(dst->size_, kMaxTid);
220 CPP_STAT_INC(StatClockStore);
222 // Check if we need to resize dst.
223 if (dst->size_ < nclk_)
224 dst->Resize(c, nclk_);
226 if (dst->release_store_tid_ == tid_ &&
227 dst->release_store_reused_ == reused_ &&
228 dst->elem(tid_).epoch > last_acquire_) {
229 CPP_STAT_INC(StatClockStoreFast);
230 UpdateCurrentThread(dst);
234 // O(N) release-store.
235 CPP_STAT_INC(StatClockStoreFull);
236 for (uptr i = 0; i < nclk_; i++) {
237 ClockElem &ce = dst->elem(i);
238 ce.epoch = clk_[i].epoch;
241 // Clear the tail of dst->clk_.
242 if (nclk_ < dst->size_) {
243 for (uptr i = nclk_; i < dst->size_; i++) {
244 ClockElem &ce = dst->elem(i);
248 CPP_STAT_INC(StatClockStoreTail);
250 for (unsigned i = 0; i < kDirtyTids; i++)
251 dst->dirty_tids_[i] = kInvalidTid;
252 dst->release_store_tid_ = tid_;
253 dst->release_store_reused_ = reused_;
254 // Rememeber that we don't need to acquire it in future.
255 dst->elem(tid_).reused = reused_;
258 void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
259 CPP_STAT_INC(StatClockAcquireRelease);
261 ReleaseStore(c, dst);
264 // Updates only single element related to the current thread in dst->clk_.
265 void ThreadClock::UpdateCurrentThread(SyncClock *dst) const {
266 // Update the threads time, but preserve 'acquired' flag.
267 dst->elem(tid_).epoch = clk_[tid_].epoch;
269 for (unsigned i = 0; i < kDirtyTids; i++) {
270 if (dst->dirty_tids_[i] == tid_) {
271 CPP_STAT_INC(StatClockReleaseFast1);
274 if (dst->dirty_tids_[i] == kInvalidTid) {
275 CPP_STAT_INC(StatClockReleaseFast2);
276 dst->dirty_tids_[i] = tid_;
280 // Reset all 'acquired' flags, O(N).
281 CPP_STAT_INC(StatClockReleaseSlow);
282 for (uptr i = 0; i < dst->size_; i++)
283 dst->elem(i).reused = 0;
284 for (unsigned i = 0; i < kDirtyTids; i++)
285 dst->dirty_tids_[i] = kInvalidTid;
288 // Checks whether the current threads has already acquired src.
289 bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
290 if (src->elem(tid_).reused != reused_)
292 for (unsigned i = 0; i < kDirtyTids; i++) {
293 unsigned tid = src->dirty_tids_[i];
294 if (tid != kInvalidTid) {
295 if (clk_[tid].epoch < src->elem(tid).epoch)
302 void SyncClock::Resize(ClockCache *c, uptr nclk) {
303 CPP_STAT_INC(StatClockReleaseResize);
304 if (RoundUpTo(nclk, ClockBlock::kClockCount) <=
305 RoundUpTo(size_, ClockBlock::kClockCount)) {
306 // Growing within the same block.
307 // Memory is already allocated, just increase the size.
311 if (nclk <= ClockBlock::kClockCount) {
312 // Grow from 0 to one-level table.
315 CHECK_EQ(tab_idx_, 0);
317 tab_idx_ = ctx->clock_alloc.Alloc(c);
318 tab_ = ctx->clock_alloc.Map(tab_idx_);
319 internal_memset(tab_, 0, sizeof(*tab_));
322 // Growing two-level table.
324 // Allocate first level table.
325 tab_idx_ = ctx->clock_alloc.Alloc(c);
326 tab_ = ctx->clock_alloc.Map(tab_idx_);
327 internal_memset(tab_, 0, sizeof(*tab_));
328 } else if (size_ <= ClockBlock::kClockCount) {
329 // Transform one-level table to two-level table.
331 tab_idx_ = ctx->clock_alloc.Alloc(c);
332 tab_ = ctx->clock_alloc.Map(tab_idx_);
333 internal_memset(tab_, 0, sizeof(*tab_));
334 tab_->table[0] = old;
336 // At this point we have first level table allocated.
337 // Add second level tables as necessary.
338 for (uptr i = RoundUpTo(size_, ClockBlock::kClockCount);
339 i < nclk; i += ClockBlock::kClockCount) {
340 u32 idx = ctx->clock_alloc.Alloc(c);
341 ClockBlock *cb = ctx->clock_alloc.Map(idx);
342 internal_memset(cb, 0, sizeof(*cb));
343 CHECK_EQ(tab_->table[i/ClockBlock::kClockCount], 0);
344 tab_->table[i/ClockBlock::kClockCount] = idx;
349 // Sets a single element in the vector clock.
350 // This function is called only from weird places like AcquireGlobal.
351 void ThreadClock::set(unsigned tid, u64 v) {
352 DCHECK_LT(tid, kMaxTid);
353 DCHECK_GE(v, clk_[tid].epoch);
357 last_acquire_ = clk_[tid_].epoch;
360 void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
362 for (uptr i = 0; i < nclk_; i++)
363 printf("%s%llu", i == 0 ? "" : ",", clk_[i].epoch);
364 printf("] reused=[");
365 for (uptr i = 0; i < nclk_; i++)
366 printf("%s%llu", i == 0 ? "" : ",", clk_[i].reused);
367 printf("] tid=%u/%u last_acq=%llu",
368 tid_, reused_, last_acquire_);
371 SyncClock::SyncClock()
372 : release_store_tid_(kInvalidTid)
373 , release_store_reused_()
377 for (uptr i = 0; i < kDirtyTids; i++)
378 dirty_tids_[i] = kInvalidTid;
381 SyncClock::~SyncClock() {
382 // Reset must be called before dtor.
385 CHECK_EQ(tab_idx_, 0);
388 void SyncClock::Reset(ClockCache *c) {
391 } else if (size_ <= ClockBlock::kClockCount) {
393 ctx->clock_alloc.Free(c, tab_idx_);
396 for (uptr i = 0; i < size_; i += ClockBlock::kClockCount)
397 ctx->clock_alloc.Free(c, tab_->table[i / ClockBlock::kClockCount]);
398 ctx->clock_alloc.Free(c, tab_idx_);
403 release_store_tid_ = kInvalidTid;
404 release_store_reused_ = 0;
405 for (uptr i = 0; i < kDirtyTids; i++)
406 dirty_tids_[i] = kInvalidTid;
409 ClockElem &SyncClock::elem(unsigned tid) const {
410 DCHECK_LT(tid, size_);
411 if (size_ <= ClockBlock::kClockCount)
412 return tab_->clock[tid];
413 u32 idx = tab_->table[tid / ClockBlock::kClockCount];
414 ClockBlock *cb = ctx->clock_alloc.Map(idx);
415 return cb->clock[tid % ClockBlock::kClockCount];
418 void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
420 for (uptr i = 0; i < size_; i++)
421 printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
422 printf("] reused=[");
423 for (uptr i = 0; i < size_; i++)
424 printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
425 printf("] release_store_tid=%d/%d dirty_tids=%d/%d",
426 release_store_tid_, release_store_reused_,
427 dirty_tids_[0], dirty_tids_[1]);
429 } // namespace __tsan