contrib/jemalloc/include/jemalloc/internal/atomic_gcc_sync.h

   1 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
   2 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
   3
   4 #define ATOMIC_INIT(...) {__VA_ARGS__}
   5
   6 typedef enum {
   7         atomic_memory_order_relaxed,
   8         atomic_memory_order_acquire,
   9         atomic_memory_order_release,
  10         atomic_memory_order_acq_rel,
  11         atomic_memory_order_seq_cst
  12 } atomic_memory_order_t;
  13
  14 ATOMIC_INLINE void
  15 atomic_fence(atomic_memory_order_t mo) {
  16         /* Easy cases first: no barrier, and full barrier. */
  17         if (mo == atomic_memory_order_relaxed) {
  18                 asm volatile("" ::: "memory");
  19                 return;
  20         }
  21         if (mo == atomic_memory_order_seq_cst) {
  22                 asm volatile("" ::: "memory");
  23                 __sync_synchronize();
  24                 asm volatile("" ::: "memory");
  25                 return;
  26         }
  27         asm volatile("" ::: "memory");
  28 #  if defined(__i386__) || defined(__x86_64__)
  29         /* This is implicit on x86. */
  30 #  elif defined(__ppc__)
  31         asm volatile("lwsync");
  32 #  elif defined(__sparc__) && defined(__arch64__)
  33         if (mo == atomic_memory_order_acquire) {
  34                 asm volatile("membar #LoadLoad | #LoadStore");
  35         } else if (mo == atomic_memory_order_release) {
  36                 asm volatile("membar #LoadStore | #StoreStore");
  37         } else {
  38                 asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
  39         }
  40 #  else
  41         __sync_synchronize();
  42 #  endif
  43         asm volatile("" ::: "memory");
  44 }
  45
  46 /*
  47  * A correct implementation of seq_cst loads and stores on weakly ordered
  48  * architectures could do either of the following:
  49  *   1. store() is weak-fence -> store -> strong fence, load() is load ->
  50  *      strong-fence.
  51  *   2. store() is strong-fence -> store, load() is strong-fence -> load ->
  52  *      weak-fence.
  53  * The tricky thing is, load() and store() above can be the load or store
  54  * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
  55  * means going with strategy 2.
  56  * On strongly ordered architectures, the natural strategy is to stick a strong
  57  * fence after seq_cst stores, and have naked loads.  So we want the strong
  58  * fences in different places on different architectures.
  59  * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
  60  * accomplish this.
  61  */
  62
  63 ATOMIC_INLINE void
  64 atomic_pre_sc_load_fence() {
  65 #  if defined(__i386__) || defined(__x86_64__) ||                       \
  66     (defined(__sparc__) && defined(__arch64__))
  67         atomic_fence(atomic_memory_order_relaxed);
  68 #  else
  69         atomic_fence(atomic_memory_order_seq_cst);
  70 #  endif
  71 }
  72
  73 ATOMIC_INLINE void
  74 atomic_post_sc_store_fence() {
  75 #  if defined(__i386__) || defined(__x86_64__) ||                       \
  76     (defined(__sparc__) && defined(__arch64__))
  77         atomic_fence(atomic_memory_order_seq_cst);
  78 #  else
  79         atomic_fence(atomic_memory_order_relaxed);
  80 #  endif
  81
  82 }
  83
  84 #define JEMALLOC_GENERATE_ATOMICS(type, short_type,                     \
  85     /* unused */ lg_size)                                               \
  86 typedef struct {                                                        \
  87         type volatile repr;                                             \
  88 } atomic_##short_type##_t;                                              \
  89                                                                         \
  90 ATOMIC_INLINE type                                                      \
  91 atomic_load_##short_type(const atomic_##short_type##_t *a,              \
  92     atomic_memory_order_t mo) {                                         \
  93         if (mo == atomic_memory_order_seq_cst) {                        \
  94                 atomic_pre_sc_load_fence();                             \
  95         }                                                               \
  96         type result = a->repr;                                          \
  97         if (mo != atomic_memory_order_relaxed) {                        \
  98                 atomic_fence(atomic_memory_order_acquire);              \
  99         }                                                               \
 100         return result;                                                  \
 101 }                                                                       \
 102                                                                         \
 103 ATOMIC_INLINE void                                                      \
 104 atomic_store_##short_type(atomic_##short_type##_t *a,                   \
 105     type val, atomic_memory_order_t mo) {                               \
 106         if (mo != atomic_memory_order_relaxed) {                        \
 107                 atomic_fence(atomic_memory_order_release);              \
 108         }                                                               \
 109         a->repr = val;                                                  \
 110         if (mo == atomic_memory_order_seq_cst) {                        \
 111                 atomic_post_sc_store_fence();                           \
 112         }                                                               \
 113 }                                                                       \
 114                                                                         \
 115 ATOMIC_INLINE type                                                      \
 116 atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,      \
 117     atomic_memory_order_t mo) {                                         \
 118         /*                                                              \
 119          * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
 120          * an atomic exchange builtin.  We fake it with a CAS loop.     \
 121          */                                                             \
 122         while (true) {                                                  \
 123                 type old = a->repr;                                     \
 124                 if (__sync_bool_compare_and_swap(&a->repr, old, val)) { \
 125                         return old;                                     \
 126                 }                                                       \
 127         }                                                               \
 128 }                                                                       \
 129                                                                         \
 130 ATOMIC_INLINE bool                                                      \
 131 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,   \
 132     type *expected, type desired, atomic_memory_order_t success_mo,     \
 133     atomic_memory_order_t failure_mo) {                                 \
 134         type prev = __sync_val_compare_and_swap(&a->repr, *expected,    \
 135             desired);                                                   \
 136         if (prev == *expected) {                                        \
 137                 return true;                                            \
 138         } else {                                                        \
 139                 *expected = prev;                                       \
 140                 return false;                                           \
 141         }                                                               \
 142 }                                                                       \
 143 ATOMIC_INLINE bool                                                      \
 144 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a, \
 145     type *expected, type desired, atomic_memory_order_t success_mo,     \
 146     atomic_memory_order_t failure_mo) {                                 \
 147         type prev = __sync_val_compare_and_swap(&a->repr, *expected,    \
 148             desired);                                                   \
 149         if (prev == *expected) {                                        \
 150                 return true;                                            \
 151         } else {                                                        \
 152                 *expected = prev;                                       \
 153                 return false;                                           \
 154         }                                                               \
 155 }
 156
 157 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,                 \
 158     /* unused */ lg_size)                                               \
 159 JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)       \
 160                                                                         \
 161 ATOMIC_INLINE type                                                      \
 162 atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,     \
 163     atomic_memory_order_t mo) {                                         \
 164         return __sync_fetch_and_add(&a->repr, val);                     \
 165 }                                                                       \
 166                                                                         \
 167 ATOMIC_INLINE type                                                      \
 168 atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,     \
 169     atomic_memory_order_t mo) {                                         \
 170         return __sync_fetch_and_sub(&a->repr, val);                     \
 171 }                                                                       \
 172                                                                         \
 173 ATOMIC_INLINE type                                                      \
 174 atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,     \
 175     atomic_memory_order_t mo) {                                         \
 176         return __sync_fetch_and_and(&a->repr, val);                     \
 177 }                                                                       \
 178                                                                         \
 179 ATOMIC_INLINE type                                                      \
 180 atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,      \
 181     atomic_memory_order_t mo) {                                         \
 182         return __sync_fetch_and_or(&a->repr, val);                      \
 183 }                                                                       \
 184                                                                         \
 185 ATOMIC_INLINE type                                                      \
 186 atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,     \
 187     atomic_memory_order_t mo) {                                         \
 188         return __sync_fetch_and_xor(&a->repr, val);                     \
 189 }
 190
 191 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */