From 449aa8c6415a21a51841ebb8de79339701bcc295 Mon Sep 17 00:00:00 2001 From: Olivier Houchard Date: Thu, 9 Aug 2018 12:07:37 +0000 Subject: [PATCH] Import CK as of commit 08813496570879fbcc2adcdd9ddc0a054361bfde, mostly to avoid using lwsync on ppc32. --- include/gcc/ppc/ck_pr.h | 32 ++++++++++++++++++++------------ include/spinlock/hclh.h | 12 ++++++++---- src/ck_barrier_combining.c | 8 ++++---- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/include/gcc/ppc/ck_pr.h b/include/gcc/ppc/ck_pr.h index cd7935dd725..73f0cb78ee7 100644 --- a/include/gcc/ppc/ck_pr.h +++ b/include/gcc/ppc/ck_pr.h @@ -67,21 +67,29 @@ ck_pr_stall(void) __asm__ __volatile__(I ::: "memory"); \ } -CK_PR_FENCE(atomic, "lwsync") -CK_PR_FENCE(atomic_store, "lwsync") +#ifdef CK_MD_PPC32_LWSYNC +#define CK_PR_LWSYNCOP "lwsync" +#else /* CK_MD_PPC32_LWSYNC_DISABLE */ +#define CK_PR_LWSYNCOP "sync" +#endif + +CK_PR_FENCE(atomic, CK_PR_LWSYNCOP) +CK_PR_FENCE(atomic_store, CK_PR_LWSYNCOP) CK_PR_FENCE(atomic_load, "sync") -CK_PR_FENCE(store_atomic, "lwsync") -CK_PR_FENCE(load_atomic, "lwsync") -CK_PR_FENCE(store, "lwsync") +CK_PR_FENCE(store_atomic, CK_PR_LWSYNCOP) +CK_PR_FENCE(load_atomic, CK_PR_LWSYNCOP) +CK_PR_FENCE(store, CK_PR_LWSYNCOP) CK_PR_FENCE(store_load, "sync") -CK_PR_FENCE(load, "lwsync") -CK_PR_FENCE(load_store, "lwsync") +CK_PR_FENCE(load, CK_PR_LWSYNCOP) +CK_PR_FENCE(load_store, CK_PR_LWSYNCOP) CK_PR_FENCE(memory, "sync") -CK_PR_FENCE(acquire, "lwsync") -CK_PR_FENCE(release, "lwsync") -CK_PR_FENCE(acqrel, "lwsync") -CK_PR_FENCE(lock, "lwsync") -CK_PR_FENCE(unlock, "lwsync") +CK_PR_FENCE(acquire, CK_PR_LWSYNCOP) +CK_PR_FENCE(release, CK_PR_LWSYNCOP) +CK_PR_FENCE(acqrel, CK_PR_LWSYNCOP) +CK_PR_FENCE(lock, CK_PR_LWSYNCOP) +CK_PR_FENCE(unlock, CK_PR_LWSYNCOP) + +#undef CK_PR_LWSYNCOP #undef CK_PR_FENCE diff --git a/include/spinlock/hclh.h b/include/spinlock/hclh.h index 296448b4e20..ece56c602ad 100644 --- a/include/spinlock/hclh.h +++ b/include/spinlock/hclh.h @@ -81,6 +81,8 @@ ck_spinlock_hclh_lock(struct ck_spinlock_hclh **glob_queue, thread->wait = true; thread->splice = false; thread->cluster_id = (*local_queue)->cluster_id; + /* Make sure previous->previous doesn't appear to be NULL */ + thread->previous = *local_queue; /* Serialize with respect to update of local queue. */ ck_pr_fence_store_atomic(); @@ -91,13 +93,15 @@ ck_spinlock_hclh_lock(struct ck_spinlock_hclh **glob_queue, /* Wait until previous thread from the local queue is done with lock. */ ck_pr_fence_load(); - if (previous->previous != NULL && - previous->cluster_id == thread->cluster_id) { - while (ck_pr_load_uint(&previous->wait) == true) + if (previous->previous != NULL) { + while (ck_pr_load_uint(&previous->wait) == true && + ck_pr_load_int(&previous->cluster_id) == thread->cluster_id && + ck_pr_load_uint(&previous->splice) == false) ck_pr_stall(); /* We're head of the global queue, we're done */ - if (ck_pr_load_uint(&previous->splice) == false) + if (ck_pr_load_int(&previous->cluster_id) == thread->cluster_id && + ck_pr_load_uint(&previous->splice) == false) return; } diff --git a/src/ck_barrier_combining.c b/src/ck_barrier_combining.c index 3ee72fdb086..ed1960c0ed9 100644 --- a/src/ck_barrier_combining.c +++ b/src/ck_barrier_combining.c @@ -35,7 +35,7 @@ struct ck_barrier_combining_queue { struct ck_barrier_combining_group *tail; }; -CK_CC_INLINE static struct ck_barrier_combining_group * +static struct ck_barrier_combining_group * ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue) { struct ck_barrier_combining_group *front = NULL; @@ -48,7 +48,7 @@ ck_barrier_combining_queue_dequeue(struct ck_barrier_combining_queue *queue) return front; } -CK_CC_INLINE static void +static void ck_barrier_combining_insert(struct ck_barrier_combining_group *parent, struct ck_barrier_combining_group *tnode, struct ck_barrier_combining_group **child) @@ -72,7 +72,7 @@ ck_barrier_combining_insert(struct ck_barrier_combining_group *parent, * into the barrier's tree. We use a queue to implement this * traversal. */ -CK_CC_INLINE static void +static void ck_barrier_combining_queue_enqueue(struct ck_barrier_combining_queue *queue, struct ck_barrier_combining_group *node_value) { @@ -185,10 +185,10 @@ ck_barrier_combining_aux(struct ck_barrier_combining *barrier, ck_pr_fence_store(); ck_pr_store_uint(&tnode->sense, ~tnode->sense); } else { - ck_pr_fence_memory(); while (sense != ck_pr_load_uint(&tnode->sense)) ck_pr_stall(); } + ck_pr_fence_memory(); return; } -- 2.45.0