summaryrefslogtreecommitdiffstats
path: root/sys/amd64/amd64/cpu_switch.S
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2013-08-30 07:59:49 +0000
committerkib <kib@FreeBSD.org>2013-08-30 07:59:49 +0000
commita2b5da0090b331918b7db2ece8b9ca5d545d4a6c (patch)
tree04770c540ba8145f9288bad14ac5d4d59ac30c95 /sys/amd64/amd64/cpu_switch.S
parent748f95c68727abdadaf3ea8816cc19784d05411d (diff)
downloadFreeBSD-src-a2b5da0090b331918b7db2ece8b9ca5d545d4a6c.zip
FreeBSD-src-a2b5da0090b331918b7db2ece8b9ca5d545d4a6c.tar.gz
Implement support for the process-context identifiers ('PCID') on
Intel CPUs. The feature tags TLB entries with the Id of the address space and allows to avoid TLB invalidation on the context switch, it is available only in the long mode. In the microbenchmarks, using the PCID decreased latency of the context switches by ~30% on SandyBridge class desktop CPUs, measured with the lat_ctx program from lmbench. If available, use INVPCID instruction when a TLB entry in non-current address space needs to be invalidated. The instruction is typically available on the Haswell. If needed, the use of PCID can be turned off with the vm.pmap.pcid_enabled loader tunable set to 0. The state of the feature is reported by the vm.pmap.pcid_enabled sysctl. The sysctl vm.pmap.pcid_save_cnt reports the number of context switches which avoided invalidating the TLB; compare with the total number of context switches, available as sysctl vm.stats.sys.v_swtch. Sponsored by: The FreeBSD Foundation Reviewed by: alc Tested by: pho, bf
Diffstat (limited to 'sys/amd64/amd64/cpu_switch.S')
-rw-r--r--sys/amd64/amd64/cpu_switch.S34
1 files changed, 27 insertions, 7 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S
index ed1ccb5..ac30990 100644
--- a/sys/amd64/amd64/cpu_switch.S
+++ b/sys/amd64/amd64/cpu_switch.S
@@ -77,8 +77,7 @@ ENTRY(cpu_throw)
LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */
1:
movq TD_PCB(%rsi),%r8 /* newtd->td_pcb */
- movq PCB_CR3(%r8),%rdx
- movq %rdx,%cr3 /* new address space */
+ movq PCB_CR3(%r8),%rcx /* new address space */
jmp swact
END(cpu_throw)
@@ -145,20 +144,41 @@ ctx_switch_xsave:
SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */
jmp sw1
swinact:
- movq %rcx,%cr3 /* new address space */
- movl PCPU(CPUID), %eax
+ movl PCPU(CPUID),%eax
/* Release bit from old pmap->pm_active */
- movq PCPU(CURPMAP),%rcx
- LK btrl %eax,PM_ACTIVE(%rcx) /* clear old */
- SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */
+ movq PCPU(CURPMAP),%r12
+ LK btrl %eax,PM_ACTIVE(%r12) /* clear old */
+ SETLK %rdx,TD_LOCK(%rdi) /* Release the old thread */
swact:
/* Set bit in new pmap->pm_active */
movq TD_PROC(%rsi),%rdx /* newproc */
movq P_VMSPACE(%rdx), %rdx
addq $VM_PMAP,%rdx
+ cmpl $-1,PM_PCID(%rdx)
+ je 1f
+ LK btsl %eax,PM_SAVE(%rdx)
+ jnc 1f
+ btsq $63,%rcx /* CR3_PCID_SAVE */
+ incq PCPU(PM_SAVE_CNT)
+1:
+ movq %rcx,%cr3 /* new address space */
LK btsl %eax,PM_ACTIVE(%rdx) /* set new */
movq %rdx,PCPU(CURPMAP)
+ /*
+ * We might lose the race and other CPU might have changed
+ * the pmap after we set our bit in pmap->pm_save. Recheck.
+ * Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was
+ * modified, causing TLB flush for this pcid.
+ */
+ btrq $63,%rcx
+ jnc 1f
+ LK btsl %eax,PM_SAVE(%rdx)
+ jc 1f
+ decq PCPU(PM_SAVE_CNT)
+ movq %rcx,%cr3
+1:
+
sw1:
#if defined(SCHED_ULE) && defined(SMP)
/* Wait for the new thread to become unblocked */
OpenPOWER on IntegriCloud