summaryrefslogtreecommitdiffstats
path: root/libexec/rtld-elf/alpha/lockdflt.c
diff options
context:
space:
mode:
Diffstat (limited to 'libexec/rtld-elf/alpha/lockdflt.c')
-rw-r--r--libexec/rtld-elf/alpha/lockdflt.c151
1 files changed, 110 insertions, 41 deletions
diff --git a/libexec/rtld-elf/alpha/lockdflt.c b/libexec/rtld-elf/alpha/lockdflt.c
index 4233b36..65900a6 100644
--- a/libexec/rtld-elf/alpha/lockdflt.c
+++ b/libexec/rtld-elf/alpha/lockdflt.c
@@ -26,64 +26,133 @@
*/
/*
- * Default thread locking implementation for the dynamic linker. It
- * is used until the client registers a different implementation with
- * dllockinit(). The default implementation does mutual exclusion by
- * blocking almost all signals. This is based on the observation that
- * most userland thread packages use signals to support preemption.
+ * Thread locking implementation for the dynamic linker.
+ *
+ * We use the "simple, non-scalable reader-preference lock" from:
+ *
+ * J. M. Mellor-Crummey and M. L. Scott. "Scalable Reader-Writer
+ * Synchronization for Shared-Memory Multiprocessors." 3rd ACM Symp. on
+ * Principles and Practice of Parallel Programming, April 1991.
+ *
+ * In this algorithm the lock is a single word. Its low-order bit is
+ * set when a writer holds the lock. The remaining high-order bits
+ * contain a count of readers desiring the lock. The algorithm requires
+ * atomic "compare_and_store" and "add" operations, which we implement
+ * using assembly language sequences in "rtld_start.S".
+ *
+ * These are spinlocks. When spinning we call nanosleep() for 1
+ * microsecond each time around the loop. This will most likely yield
+ * the CPU to other threads (including, we hope, the lockholder) allowing
+ * them to make some progress.
*/
-#include <dlfcn.h>
-#include <signal.h>
#include <stdlib.h>
+#include <time.h>
#include "debug.h"
#include "rtld.h"
-typedef struct Struct_LockDflt {
- sigset_t lock_mask;
- sigset_t old_mask;
- int depth;
-} LockDflt;
+/*
+ * This value of CACHE_LINE_SIZE is conservative. The actual size
+ * is 32 on the 21064, 21064A, 21066, 21066A, and 21164. It is 64
+ * on the 21264. Compaq recommends sequestering each lock in its own
+ * 128-byte block to allow for future implementations with larger
+ * cache lines.
+ */
+#define CACHE_LINE_SIZE 128
-void
-lockdflt_acquire(void *lock)
+#define WAFLAG 0x1 /* A writer holds the lock */
+#define RC_INCR 0x2 /* Adjusts count of readers desiring lock */
+
+typedef struct Struct_Lock {
+ volatile int lock;
+ void *base;
+} Lock;
+
+static const struct timespec usec = { 0, 1000 }; /* 1 usec. */
+
+static void *
+lock_create(void *context)
{
- LockDflt *l = (LockDflt *)lock;
- sigprocmask(SIG_BLOCK, &l->lock_mask, &l->old_mask);
- assert(l->depth == 0);
- l->depth++;
+ void *base;
+ char *p;
+ uintptr_t r;
+ Lock *l;
+
+ /*
+ * Arrange for the lock to occupy its own cache line. First, we
+ * optimistically allocate just a cache line, hoping that malloc
+ * will give us a well-aligned block of memory. If that doesn't
+ * work, we allocate a larger block and take a well-aligned cache
+ * line from it.
+ */
+ base = xmalloc(CACHE_LINE_SIZE);
+ p = (char *)base;
+ if ((uintptr_t)p % CACHE_LINE_SIZE != 0) {
+ free(base);
+ base = xmalloc(2 * CACHE_LINE_SIZE);
+ p = (char *)base;
+ if ((r = (uintptr_t)p % CACHE_LINE_SIZE) != 0)
+ p += CACHE_LINE_SIZE - r;
+ }
+ l = (Lock *)p;
+ l->base = base;
+ l->lock = 0;
+ return l;
}
-void *
-lockdflt_create(void *context)
+static void
+lock_destroy(void *lock)
{
- LockDflt *l;
-
- l = NEW(LockDflt);
- l->depth = 0;
- sigfillset(&l->lock_mask);
- sigdelset(&l->lock_mask, SIGTRAP);
- sigdelset(&l->lock_mask, SIGABRT);
- sigdelset(&l->lock_mask, SIGBUS);
- sigdelset(&l->lock_mask, SIGSEGV);
- sigdelset(&l->lock_mask, SIGKILL);
- sigdelset(&l->lock_mask, SIGSTOP);
- return l;
+ Lock *l = (Lock *)lock;
+
+ free(l->base);
}
-void
-lockdflt_destroy(void *lock)
+static void
+rlock_acquire(void *lock)
+{
+ Lock *l = (Lock *)lock;
+
+ atomic_add_int(&l->lock, RC_INCR);
+ while (l->lock & WAFLAG)
+ nanosleep(&usec, NULL);
+}
+
+static void
+wlock_acquire(void *lock)
+{
+ Lock *l = (Lock *)lock;
+
+ while (cmp0_and_store_int(&l->lock, WAFLAG) != 0)
+ nanosleep(&usec, NULL);
+}
+
+static void
+rlock_release(void *lock)
{
- LockDflt *l = (LockDflt *)lock;
- free(l);
+ Lock *l = (Lock *)lock;
+
+ atomic_add_int(&l->lock, -RC_INCR);
+}
+
+static void
+wlock_release(void *lock)
+{
+ Lock *l = (Lock *)lock;
+
+ atomic_add_int(&l->lock, -WAFLAG);
}
void
-lockdflt_release(void *lock)
+lockdflt_init(LockInfo *li)
{
- LockDflt *l = (LockDflt *)lock;
- assert(l->depth == 1);
- l->depth--;
- sigprocmask(SIG_SETMASK, &l->old_mask, NULL);
+ li->context = NULL;
+ li->lock_create = lock_create;
+ li->rlock_acquire = rlock_acquire;
+ li->wlock_acquire = wlock_acquire;
+ li->rlock_release = rlock_release;
+ li->wlock_release = wlock_release;
+ li->lock_destroy = lock_destroy;
+ li->context_destroy = NULL;
}
OpenPOWER on IntegriCloud