diff options
author | kib <kib@FreeBSD.org> | 2013-08-30 07:59:49 +0000 |
---|---|---|
committer | kib <kib@FreeBSD.org> | 2013-08-30 07:59:49 +0000 |
commit | a2b5da0090b331918b7db2ece8b9ca5d545d4a6c (patch) | |
tree | 04770c540ba8145f9288bad14ac5d4d59ac30c95 /sys/amd64/amd64/cpu_switch.S | |
parent | 748f95c68727abdadaf3ea8816cc19784d05411d (diff) | |
download | FreeBSD-src-a2b5da0090b331918b7db2ece8b9ca5d545d4a6c.zip FreeBSD-src-a2b5da0090b331918b7db2ece8b9ca5d545d4a6c.tar.gz |
Implement support for the process-context identifiers ('PCID') on
Intel CPUs. The feature tags TLB entries with the Id of the address
space and allows to avoid TLB invalidation on the context switch, it
is available only in the long mode. In the microbenchmarks, using the
PCID decreased latency of the context switches by ~30% on SandyBridge
class desktop CPUs, measured with the lat_ctx program from lmbench.
If available, use INVPCID instruction when a TLB entry in non-current
address space needs to be invalidated. The instruction is typically
available on the Haswell.
If needed, the use of PCID can be turned off with the
vm.pmap.pcid_enabled loader tunable set to 0. The state of the
feature is reported by the vm.pmap.pcid_enabled sysctl. The sysctl
vm.pmap.pcid_save_cnt reports the number of context switches which
avoided invalidating the TLB; compare with the total number of context
switches, available as sysctl vm.stats.sys.v_swtch.
Sponsored by: The FreeBSD Foundation
Reviewed by: alc
Tested by: pho, bf
Diffstat (limited to 'sys/amd64/amd64/cpu_switch.S')
-rw-r--r-- | sys/amd64/amd64/cpu_switch.S | 34 |
1 files changed, 27 insertions, 7 deletions
diff --git a/sys/amd64/amd64/cpu_switch.S b/sys/amd64/amd64/cpu_switch.S index ed1ccb5..ac30990 100644 --- a/sys/amd64/amd64/cpu_switch.S +++ b/sys/amd64/amd64/cpu_switch.S @@ -77,8 +77,7 @@ ENTRY(cpu_throw) LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */ 1: movq TD_PCB(%rsi),%r8 /* newtd->td_pcb */ - movq PCB_CR3(%r8),%rdx - movq %rdx,%cr3 /* new address space */ + movq PCB_CR3(%r8),%rcx /* new address space */ jmp swact END(cpu_throw) @@ -145,20 +144,41 @@ ctx_switch_xsave: SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ jmp sw1 swinact: - movq %rcx,%cr3 /* new address space */ - movl PCPU(CPUID), %eax + movl PCPU(CPUID),%eax /* Release bit from old pmap->pm_active */ - movq PCPU(CURPMAP),%rcx - LK btrl %eax,PM_ACTIVE(%rcx) /* clear old */ - SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ + movq PCPU(CURPMAP),%r12 + LK btrl %eax,PM_ACTIVE(%r12) /* clear old */ + SETLK %rdx,TD_LOCK(%rdi) /* Release the old thread */ swact: /* Set bit in new pmap->pm_active */ movq TD_PROC(%rsi),%rdx /* newproc */ movq P_VMSPACE(%rdx), %rdx addq $VM_PMAP,%rdx + cmpl $-1,PM_PCID(%rdx) + je 1f + LK btsl %eax,PM_SAVE(%rdx) + jnc 1f + btsq $63,%rcx /* CR3_PCID_SAVE */ + incq PCPU(PM_SAVE_CNT) +1: + movq %rcx,%cr3 /* new address space */ LK btsl %eax,PM_ACTIVE(%rdx) /* set new */ movq %rdx,PCPU(CURPMAP) + /* + * We might lose the race and other CPU might have changed + * the pmap after we set our bit in pmap->pm_save. Recheck. + * Reload %cr3 with CR3_PCID_SAVE bit cleared if pmap was + * modified, causing TLB flush for this pcid. + */ + btrq $63,%rcx + jnc 1f + LK btsl %eax,PM_SAVE(%rdx) + jc 1f + decq PCPU(PM_SAVE_CNT) + movq %rcx,%cr3 +1: + sw1: #if defined(SCHED_ULE) && defined(SMP) /* Wait for the new thread to become unblocked */ |