summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2012-03-10 08:49:44 +0000
committerkib <kib@FreeBSD.org>2012-03-10 08:49:44 +0000
commitfb48ba5d907bec4322e4292fd840d33a0bb662ac (patch)
treea8f58ba276201ae6ec5ed36f99021f82c9135db4
parentb186de9a2407a944cc309e501c75e8e9afe37ab5 (diff)
downloadFreeBSD-src-fb48ba5d907bec4322e4292fd840d33a0bb662ac.zip
FreeBSD-src-fb48ba5d907bec4322e4292fd840d33a0bb662ac.tar.gz
Optimize tls_get_addr_common(). The change provides around 30% speedup
for TLS microbenchmark using global-dynamic TLS model on amd64 (which is default for PIC dso objects). Split the slow path into tls_get_addr_slow(), for which inlining is disabled. This prevents the registers spill on tls_get_addr_common() entry. Provide static branch hint to the compiler, indicating that slow path is not likely to be taken. While there, do some minimal style adjustments. Reported and tested by: davidxu MFC after: 1 week
-rw-r--r--libexec/rtld-elf/rtld.c31
1 files changed, 22 insertions, 9 deletions
diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c
index 568d10b..b8524da 100644
--- a/libexec/rtld-elf/rtld.c
+++ b/libexec/rtld-elf/rtld.c
@@ -3507,17 +3507,17 @@ unref_dag(Obj_Entry *root)
/*
* Common code for MD __tls_get_addr().
*/
-void *
-tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset)
+static void *tls_get_addr_slow(Elf_Addr **, int, size_t) __noinline;
+static void *
+tls_get_addr_slow(Elf_Addr **dtvp, int index, size_t offset)
{
- Elf_Addr* dtv = *dtvp;
+ Elf_Addr *newdtv, *dtv;
RtldLockState lockstate;
+ int to_copy;
+ dtv = *dtvp;
/* Check dtv generation in case new modules have arrived */
if (dtv[0] != tls_dtv_generation) {
- Elf_Addr* newdtv;
- int to_copy;
-
wlock_acquire(rtld_bind_lock, &lockstate);
newdtv = calloc(1, (tls_max_index + 2) * sizeof(Elf_Addr));
to_copy = dtv[1];
@@ -3532,14 +3532,27 @@ tls_get_addr_common(Elf_Addr** dtvp, int index, size_t offset)
}
/* Dynamically allocate module TLS if necessary */
- if (!dtv[index + 1]) {
+ if (dtv[index + 1] == 0) {
/* Signal safe, wlock will block out signals. */
- wlock_acquire(rtld_bind_lock, &lockstate);
+ wlock_acquire(rtld_bind_lock, &lockstate);
if (!dtv[index + 1])
dtv[index + 1] = (Elf_Addr)allocate_module_tls(index);
lock_release(rtld_bind_lock, &lockstate);
}
- return (void*) (dtv[index + 1] + offset);
+ return ((void *)(dtv[index + 1] + offset));
+}
+
+void *
+tls_get_addr_common(Elf_Addr **dtvp, int index, size_t offset)
+{
+ Elf_Addr *dtv;
+
+ dtv = *dtvp;
+ /* Check dtv generation in case new modules have arrived */
+ if (__predict_true(dtv[0] == tls_dtv_generation &&
+ dtv[index + 1] != 0))
+ return ((void *)(dtv[index + 1] + offset));
+ return (tls_get_addr_slow(dtvp, index, offset));
}
#if defined(__arm__) || defined(__ia64__) || defined(__mips__) || defined(__powerpc__)
OpenPOWER on IntegriCloud