summaryrefslogtreecommitdiffstats
path: root/arch/x86/xen
diff options
context:
space:
mode:
authorDavid Vrabel <david.vrabel@citrix.com>2012-07-09 11:39:08 +0100
committerKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>2012-07-19 15:51:57 -0400
commit1c32cdc633c96a14cca35de9db19338354c25111 (patch)
tree6a1e9df672d3aa8794bf197037be01ef1b34383c /arch/x86/xen
parent59290362da587ce2e6d2f4a7f85e362fa2d7fd39 (diff)
downloadop-kernel-dev-1c32cdc633c96a14cca35de9db19338354c25111.zip
op-kernel-dev-1c32cdc633c96a14cca35de9db19338354c25111.tar.gz
xen/x86: avoid updating TLS descriptors if they haven't changed
When switching tasks in a Xen PV guest, avoid updating the TLS descriptors if they haven't changed. This improves the speed of context switches by almost 10% as much of the time the descriptors are the same or only one is different. The descriptors written into the GDT by Xen are modified from the values passed in the update_descriptor hypercall so we keep shadow copies of the three TLS descriptors to compare against. lmbench3 test Before After Improvement -------------------------------------------- lat_ctx -s 32 24 7.19 6.52 9% lat_pipe 12.56 11.66 7% Signed-off-by: David Vrabel <david.vrabel@citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Diffstat (limited to 'arch/x86/xen')
-rw-r--r--arch/x86/xen/enlighten.c29
1 files changed, 26 insertions, 3 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 225ffdc..9c80869 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -125,6 +125,19 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
*/
static int have_vcpu_info_placement = 1;
+struct tls_descs {
+ struct desc_struct desc[3];
+};
+
+/*
+ * Updating the 3 TLS descriptors in the GDT on every task switch is
+ * surprisingly expensive so we avoid updating them if they haven't
+ * changed. Since Xen writes different descriptors than the one
+ * passed in the update_descriptor hypercall we keep shadow copies to
+ * compare against.
+ */
+static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc);
+
static void clamp_max_cpus(void)
{
#ifdef CONFIG_SMP
@@ -548,9 +561,19 @@ static inline bool desc_equal(const struct desc_struct *d1,
static void load_TLS_descriptor(struct thread_struct *t,
unsigned int cpu, unsigned int i)
{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
- struct multicall_space mc = __xen_mc_entry(0);
+ struct desc_struct *shadow = &per_cpu(shadow_tls_desc, cpu).desc[i];
+ struct desc_struct *gdt;
+ xmaddr_t maddr;
+ struct multicall_space mc;
+
+ if (desc_equal(shadow, &t->tls_array[i]))
+ return;
+
+ *shadow = t->tls_array[i];
+
+ gdt = get_cpu_gdt_table(cpu);
+ maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
+ mc = __xen_mc_entry(0);
MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
}
OpenPOWER on IntegriCloud