284 lines
9.3 KiB
C++
284 lines
9.3 KiB
C++
//===-- tsan_clock.h --------------------------------------------*- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file is a part of ThreadSanitizer (TSan), a race detector.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#ifndef TSAN_CLOCK_H
|
|
#define TSAN_CLOCK_H
|
|
|
|
#include "tsan_defs.h"
|
|
#include "tsan_dense_alloc.h"
|
|
|
|
namespace __tsan {
|
|
|
|
typedef DenseSlabAlloc<ClockBlock, 1<<16, 1<<10> ClockAlloc;
|
|
typedef DenseSlabAllocCache ClockCache;
|
|
|
|
// The clock that lives in sync variables (mutexes, atomics, etc).
|
|
class SyncClock {
|
|
public:
|
|
SyncClock();
|
|
~SyncClock();
|
|
|
|
uptr size() const;
|
|
|
|
// These are used only in tests.
|
|
u64 get(unsigned tid) const;
|
|
u64 get_clean(unsigned tid) const;
|
|
|
|
void Resize(ClockCache *c, uptr nclk);
|
|
void Reset(ClockCache *c);
|
|
|
|
void DebugDump(int(*printf)(const char *s, ...));
|
|
|
|
// Clock element iterator.
|
|
// Note: it iterates only over the table without regard to dirty entries.
|
|
class Iter {
|
|
public:
|
|
explicit Iter(SyncClock* parent);
|
|
Iter& operator++();
|
|
bool operator!=(const Iter& other);
|
|
ClockElem &operator*();
|
|
|
|
private:
|
|
SyncClock *parent_;
|
|
// [pos_, end_) is the current continuous range of clock elements.
|
|
ClockElem *pos_;
|
|
ClockElem *end_;
|
|
int block_; // Current number of second level block.
|
|
|
|
NOINLINE void Next();
|
|
};
|
|
|
|
Iter begin();
|
|
Iter end();
|
|
|
|
private:
|
|
friend class ThreadClock;
|
|
friend class Iter;
|
|
static const uptr kDirtyTids = 2;
|
|
|
|
struct Dirty {
|
|
u64 epoch : kClkBits;
|
|
u64 tid : 64 - kClkBits; // kInvalidId if not active
|
|
};
|
|
|
|
unsigned release_store_tid_;
|
|
unsigned release_store_reused_;
|
|
Dirty dirty_[kDirtyTids];
|
|
// If size_ is 0, tab_ is nullptr.
|
|
// If size <= 64 (kClockCount), tab_ contains pointer to an array with
|
|
// 64 ClockElem's (ClockBlock::clock).
|
|
// Otherwise, tab_ points to an array with up to 127 u32 elements,
|
|
// each pointing to the second-level 512b block with 64 ClockElem's.
|
|
// Unused space in the first level ClockBlock is used to store additional
|
|
// clock elements.
|
|
// The last u32 element in the first level ClockBlock is always used as
|
|
// reference counter.
|
|
//
|
|
// See the following scheme for details.
|
|
// All memory blocks are 512 bytes (allocated from ClockAlloc).
|
|
// Clock (clk) elements are 64 bits.
|
|
// Idx and ref are 32 bits.
|
|
//
|
|
// tab_
|
|
// |
|
|
// \/
|
|
// +----------------------------------------------------+
|
|
// | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
|
|
// +----------------------------------------------------+
|
|
// | |
|
|
// | \/
|
|
// | +----------------+
|
|
// | | clk0 ... clk63 |
|
|
// | +----------------+
|
|
// \/
|
|
// +------------------+
|
|
// | clk64 ... clk127 |
|
|
// +------------------+
|
|
//
|
|
// Note: dirty entries, if active, always override what's stored in the clock.
|
|
ClockBlock *tab_;
|
|
u32 tab_idx_;
|
|
u16 size_;
|
|
u16 blocks_; // Number of second level blocks.
|
|
|
|
void Unshare(ClockCache *c);
|
|
bool IsShared() const;
|
|
bool Cachable() const;
|
|
void ResetImpl();
|
|
void FlushDirty();
|
|
uptr capacity() const;
|
|
u32 get_block(uptr bi) const;
|
|
void append_block(u32 idx);
|
|
ClockElem &elem(unsigned tid) const;
|
|
};
|
|
|
|
// The clock that lives in threads.
|
|
class ThreadClock {
|
|
public:
|
|
typedef DenseSlabAllocCache Cache;
|
|
|
|
explicit ThreadClock(unsigned tid, unsigned reused = 0);
|
|
|
|
u64 get(unsigned tid) const;
|
|
void set(ClockCache *c, unsigned tid, u64 v);
|
|
void set(u64 v);
|
|
void tick();
|
|
uptr size() const;
|
|
|
|
void acquire(ClockCache *c, SyncClock *src);
|
|
void releaseStoreAcquire(ClockCache *c, SyncClock *src);
|
|
void release(ClockCache *c, SyncClock *dst);
|
|
void acq_rel(ClockCache *c, SyncClock *dst);
|
|
void ReleaseStore(ClockCache *c, SyncClock *dst);
|
|
void ResetCached(ClockCache *c);
|
|
void NoteGlobalAcquire(u64 v);
|
|
|
|
void DebugReset();
|
|
void DebugDump(int(*printf)(const char *s, ...));
|
|
|
|
private:
|
|
static const uptr kDirtyTids = SyncClock::kDirtyTids;
|
|
// Index of the thread associated with he clock ("current thread").
|
|
const unsigned tid_;
|
|
const unsigned reused_; // tid_ reuse count.
|
|
// Current thread time when it acquired something from other threads.
|
|
u64 last_acquire_;
|
|
|
|
// Last time another thread has done a global acquire of this thread's clock.
|
|
// It helps to avoid problem described in:
|
|
// https://github.com/golang/go/issues/39186
|
|
// See test/tsan/java_finalizer2.cpp for a regression test.
|
|
// Note the failuire is _extremely_ hard to hit, so if you are trying
|
|
// to reproduce it, you may want to run something like:
|
|
// $ go get golang.org/x/tools/cmd/stress
|
|
// $ stress -p=64 ./a.out
|
|
//
|
|
// The crux of the problem is roughly as follows.
|
|
// A number of O(1) optimizations in the clocks algorithm assume proper
|
|
// transitive cumulative propagation of clock values. The AcquireGlobal
|
|
// operation may produce an inconsistent non-linearazable view of
|
|
// thread clocks. Namely, it may acquire a later value from a thread
|
|
// with a higher ID, but fail to acquire an earlier value from a thread
|
|
// with a lower ID. If a thread that executed AcquireGlobal then releases
|
|
// to a sync clock, it will spoil the sync clock with the inconsistent
|
|
// values. If another thread later releases to the sync clock, the optimized
|
|
// algorithm may break.
|
|
//
|
|
// The exact sequence of events that leads to the failure.
|
|
// - thread 1 executes AcquireGlobal
|
|
// - thread 1 acquires value 1 for thread 2
|
|
// - thread 2 increments clock to 2
|
|
// - thread 2 releases to sync object 1
|
|
// - thread 3 at time 1
|
|
// - thread 3 acquires from sync object 1
|
|
// - thread 3 increments clock to 2
|
|
// - thread 1 acquires value 2 for thread 3
|
|
// - thread 1 releases to sync object 2
|
|
// - sync object 2 clock has 1 for thread 2 and 2 for thread 3
|
|
// - thread 3 releases to sync object 2
|
|
// - thread 3 sees value 2 in the clock for itself
|
|
// and decides that it has already released to the clock
|
|
// and did not acquire anything from other threads after that
|
|
// (the last_acquire_ check in release operation)
|
|
// - thread 3 does not update the value for thread 2 in the clock from 1 to 2
|
|
// - thread 4 acquires from sync object 2
|
|
// - thread 4 detects a false race with thread 2
|
|
// as it should have been synchronized with thread 2 up to time 2,
|
|
// but because of the broken clock it is now synchronized only up to time 1
|
|
//
|
|
// The global_acquire_ value helps to prevent this scenario.
|
|
// Namely, thread 3 will not trust any own clock values up to global_acquire_
|
|
// for the purposes of the last_acquire_ optimization.
|
|
atomic_uint64_t global_acquire_;
|
|
|
|
// Cached SyncClock (without dirty entries and release_store_tid_).
|
|
// We reuse it for subsequent store-release operations without intervening
|
|
// acquire operations. Since it is shared (and thus constant), clock value
|
|
// for the current thread is then stored in dirty entries in the SyncClock.
|
|
// We host a refernece to the table while it is cached here.
|
|
u32 cached_idx_;
|
|
u16 cached_size_;
|
|
u16 cached_blocks_;
|
|
|
|
// Number of active elements in the clk_ table (the rest is zeros).
|
|
uptr nclk_;
|
|
u64 clk_[kMaxTidInClock]; // Fixed size vector clock.
|
|
|
|
bool IsAlreadyAcquired(const SyncClock *src) const;
|
|
bool HasAcquiredAfterRelease(const SyncClock *dst) const;
|
|
void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
|
|
};
|
|
|
|
ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
|
|
DCHECK_LT(tid, kMaxTidInClock);
|
|
return clk_[tid];
|
|
}
|
|
|
|
ALWAYS_INLINE void ThreadClock::set(u64 v) {
|
|
DCHECK_GE(v, clk_[tid_]);
|
|
clk_[tid_] = v;
|
|
}
|
|
|
|
ALWAYS_INLINE void ThreadClock::tick() {
|
|
clk_[tid_]++;
|
|
}
|
|
|
|
ALWAYS_INLINE uptr ThreadClock::size() const {
|
|
return nclk_;
|
|
}
|
|
|
|
ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) {
|
|
// Here we rely on the fact that AcquireGlobal is protected by
|
|
// ThreadRegistryLock, thus only one thread at a time executes it
|
|
// and values passed to this function should not go backwards.
|
|
CHECK_LE(atomic_load_relaxed(&global_acquire_), v);
|
|
atomic_store_relaxed(&global_acquire_, v);
|
|
}
|
|
|
|
ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
|
|
return Iter(this);
|
|
}
|
|
|
|
ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
|
|
return Iter(nullptr);
|
|
}
|
|
|
|
ALWAYS_INLINE uptr SyncClock::size() const {
|
|
return size_;
|
|
}
|
|
|
|
ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
|
|
: parent_(parent)
|
|
, pos_(nullptr)
|
|
, end_(nullptr)
|
|
, block_(-1) {
|
|
if (parent)
|
|
Next();
|
|
}
|
|
|
|
ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
|
|
pos_++;
|
|
if (UNLIKELY(pos_ >= end_))
|
|
Next();
|
|
return *this;
|
|
}
|
|
|
|
ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
|
|
return parent_ != other.parent_;
|
|
}
|
|
|
|
ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
|
|
return *pos_;
|
|
}
|
|
} // namespace __tsan
|
|
|
|
#endif // TSAN_CLOCK_H
|