summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorgordon <gordon@FreeBSD.org>2019-05-14 23:20:16 +0000
committergordon <gordon@FreeBSD.org>2019-05-14 23:20:16 +0000
commitc40b7fa0e31952611197da59c5103250fb4e1d70 (patch)
tree1e21d0ab55f4a9bcd6246de023ff51ab5415246f
parent0e01c3bb0b9c163317d5a53c8f768356ad9304dd (diff)
downloadFreeBSD-src-c40b7fa0e31952611197da59c5103250fb4e1d70.zip
FreeBSD-src-c40b7fa0e31952611197da59c5103250fb4e1d70.tar.gz
Mitigations for Microarchitectural Data Sampling.
Approved by: so Security: FreeBSD-SA-19:07.mds Security: CVE-2018-12126 Security: CVE-2018-12127 Security: CVE-2018-12130 Security: CVE-2019-11091
-rw-r--r--sys/amd64/amd64/exception.S2
-rw-r--r--sys/amd64/amd64/genassym.c3
-rw-r--r--sys/amd64/amd64/initcpu.c1
-rw-r--r--sys/amd64/amd64/machdep.c1
-rw-r--r--sys/amd64/amd64/support.S248
-rw-r--r--sys/amd64/include/pcpu.h6
-rw-r--r--sys/dev/cpuctl/cpuctl.c1
-rw-r--r--sys/i386/i386/exception.s1
-rw-r--r--sys/i386/i386/genassym.c3
-rw-r--r--sys/i386/i386/initcpu.c1
-rw-r--r--sys/i386/i386/support.s191
-rw-r--r--sys/i386/include/pcpu.h6
-rw-r--r--sys/x86/include/specialreg.h2
-rw-r--r--sys/x86/include/x86_var.h2
-rw-r--r--sys/x86/x86/cpu_machdep.c195
15 files changed, 661 insertions, 2 deletions
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index c35cea9..8e281d9 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -487,6 +487,7 @@ fast_syscall_common:
testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax)
jne 3f
call handle_ibrs_exit
+ callq *mds_handler
/* Restore preserved registers. */
MEXITCOUNT
movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */
@@ -1121,6 +1122,7 @@ ld_regs:
jz 2f /* keep running with kernel GS.base */
cli
call handle_ibrs_exit_rs
+ callq *mds_handler
cmpb $0,pti
je 1f
pushq %rdx
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 28f584f..fd45d22 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -230,6 +230,9 @@ ASSYM(PC_SAVED_UCR3, offsetof(struct pcpu, pc_saved_ucr3));
ASSYM(PC_PTI_STACK, offsetof(struct pcpu, pc_pti_stack));
ASSYM(PC_PTI_STACK_SZ, PC_PTI_STACK_SZ);
ASSYM(PC_IBPB_SET, offsetof(struct pcpu, pc_ibpb_set));
+ASSYM(PC_MDS_TMP, offsetof(struct pcpu, pc_mds_tmp));
+ASSYM(PC_MDS_BUF, offsetof(struct pcpu, pc_mds_buf));
+ASSYM(PC_MDS_BUF64, offsetof(struct pcpu, pc_mds_buf64));
ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL);
ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL);
diff --git a/sys/amd64/amd64/initcpu.c b/sys/amd64/amd64/initcpu.c
index abe9d63..9df6477 100644
--- a/sys/amd64/amd64/initcpu.c
+++ b/sys/amd64/amd64/initcpu.c
@@ -223,6 +223,7 @@ initializecpu(void)
}
hw_ibrs_recalculate();
hw_ssb_recalculate(false);
+ hw_mds_recalculate();
switch (cpu_vendor_id) {
case CPU_VENDOR_AMD:
init_amd();
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index cf7ccb2..c0e8f3f 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1851,6 +1851,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable);
TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable);
+ TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable);
/* Location of kernel stack for locore */
return ((u_int64_t)thread0.td_pcb);
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index b092ef0..d045168 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -1,8 +1,13 @@
/*-
+ * Copyright (c) 2018-2019 The FreeBSD Foundation
* Copyright (c) 2003 Peter Wemm.
* Copyright (c) 1993 The Regents of the University of California.
* All rights reserved.
*
+ * Portions of this software were developed by
+ * Konstantin Belousov <kib@FreeBSD.org> under sponsorship from
+ * the FreeBSD Foundation.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -892,3 +897,246 @@ ENTRY(handle_ibrs_exit_rs)
END(handle_ibrs_exit_rs)
.noaltmacro
+
+ENTRY(mds_handler_void)
+ retq
+END(mds_handler_void)
+
+ENTRY(mds_handler_verw)
+ subq $8, %rsp
+ movw %ds, (%rsp)
+ verw (%rsp)
+ addq $8, %rsp
+ retq
+END(mds_handler_verw)
+
+ENTRY(mds_handler_ivb)
+ pushq %rax
+ pushq %rdx
+ pushq %rcx
+
+ movq %cr0, %rax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movq PCPU(MDS_BUF), %rdx
+ movdqa %xmm0, PCPU(MDS_TMP)
+ pxor %xmm0, %xmm0
+
+ lfence
+ orpd (%rdx), %xmm0
+ orpd (%rdx), %xmm0
+ mfence
+ movl $40, %ecx
+ addq $16, %rdx
+2: movntdq %xmm0, (%rdx)
+ addq $16, %rdx
+ decl %ecx
+ jnz 2b
+ mfence
+
+ movdqa PCPU(MDS_TMP),%xmm0
+ testb $CR0_TS, %al
+ je 3f
+ movq %rax, %cr0
+3: popq %rcx
+ popq %rdx
+ popq %rax
+ retq
+END(mds_handler_ivb)
+
+ENTRY(mds_handler_bdw)
+ pushq %rax
+ pushq %rbx
+ pushq %rcx
+ pushq %rdi
+ pushq %rsi
+
+ movq %cr0, %rax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movq PCPU(MDS_BUF), %rbx
+ movdqa %xmm0, PCPU(MDS_TMP)
+ pxor %xmm0, %xmm0
+
+ movq %rbx, %rdi
+ movq %rbx, %rsi
+ movl $40, %ecx
+2: movntdq %xmm0, (%rbx)
+ addq $16, %rbx
+ decl %ecx
+ jnz 2b
+ mfence
+ movl $1536, %ecx
+ rep; movsb
+ lfence
+
+ movdqa PCPU(MDS_TMP),%xmm0
+ testb $CR0_TS, %al
+ je 3f
+ movq %rax, %cr0
+3: popq %rsi
+ popq %rdi
+ popq %rcx
+ popq %rbx
+ popq %rax
+ retq
+END(mds_handler_bdw)
+
+ENTRY(mds_handler_skl_sse)
+ pushq %rax
+ pushq %rdx
+ pushq %rcx
+ pushq %rdi
+
+ movq %cr0, %rax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movq PCPU(MDS_BUF), %rdi
+ movq PCPU(MDS_BUF64), %rdx
+ movdqa %xmm0, PCPU(MDS_TMP)
+ pxor %xmm0, %xmm0
+
+ lfence
+ orpd (%rdx), %xmm0
+ orpd (%rdx), %xmm0
+ xorl %eax, %eax
+2: clflushopt 5376(%rdi, %rax, 8)
+ addl $8, %eax
+ cmpl $8 * 12, %eax
+ jb 2b
+ sfence
+ movl $6144, %ecx
+ xorl %eax, %eax
+ rep; stosb
+ mfence
+
+ movdqa PCPU(MDS_TMP), %xmm0
+ testb $CR0_TS, %al
+ je 3f
+ movq %rax, %cr0
+3: popq %rdi
+ popq %rcx
+ popq %rdx
+ popq %rax
+ retq
+END(mds_handler_skl_sse)
+
+ENTRY(mds_handler_skl_avx)
+ pushq %rax
+ pushq %rdx
+ pushq %rcx
+ pushq %rdi
+
+ movq %cr0, %rax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movq PCPU(MDS_BUF), %rdi
+ movq PCPU(MDS_BUF64), %rdx
+ vmovdqa %ymm0, PCPU(MDS_TMP)
+ vpxor %ymm0, %ymm0, %ymm0
+
+ lfence
+ vorpd (%rdx), %ymm0, %ymm0
+ vorpd (%rdx), %ymm0, %ymm0
+ xorl %eax, %eax
+2: clflushopt 5376(%rdi, %rax, 8)
+ addl $8, %eax
+ cmpl $8 * 12, %eax
+ jb 2b
+ sfence
+ movl $6144, %ecx
+ xorl %eax, %eax
+ rep; stosb
+ mfence
+
+ vmovdqa PCPU(MDS_TMP), %ymm0
+ testb $CR0_TS, %al
+ je 3f
+ movq %rax, %cr0
+3: popq %rdi
+ popq %rcx
+ popq %rdx
+ popq %rax
+ retq
+END(mds_handler_skl_avx)
+
+ENTRY(mds_handler_skl_avx512)
+ pushq %rax
+ pushq %rdx
+ pushq %rcx
+ pushq %rdi
+
+ movq %cr0, %rax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movq PCPU(MDS_BUF), %rdi
+ movq PCPU(MDS_BUF64), %rdx
+/* vmovdqa64 %zmm0, PCPU(MDS_TMP) */
+ .byte 0x65, 0x62, 0xf1, 0xfd, 0x48, 0x7f, 0x04, 0x25
+ .long PC_MDS_TMP
+/* vpxor %zmm0, %zmm0, %zmm0 */
+ .byte 0x62, 0xf1, 0xfd, 0x48, 0xef, 0xc0
+
+ lfence
+/* vorpd (%rdx), %zmm0, %zmm0 */
+ .byte 0x62, 0xf1, 0xfd, 0x48, 0x56, 0x02
+/* vorpd (%rdx), %zmm0, %zmm0 */
+ .byte 0x62, 0xf1, 0xfd, 0x48, 0x56, 0x02
+ xorl %eax, %eax
+2: clflushopt 5376(%rdi, %rax, 8)
+ addl $8, %eax
+ cmpl $8 * 12, %eax
+ jb 2b
+ sfence
+ movl $6144, %ecx
+ xorl %eax, %eax
+ rep; stosb
+ mfence
+
+/* vmovdqa64 PCPU(MDS_TMP), %zmm0 */
+ .byte 0x65, 0x62, 0xf1, 0xfd, 0x48, 0x6f, 0x04, 0x25
+ .long PC_MDS_TMP
+ testb $CR0_TS, %al
+ je 3f
+ movq %rax, %cr0
+3: popq %rdi
+ popq %rcx
+ popq %rdx
+ popq %rax
+ retq
+END(mds_handler_skl_avx512)
+
+ENTRY(mds_handler_silvermont)
+ pushq %rax
+ pushq %rdx
+ pushq %rcx
+
+ movq %cr0, %rax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movq PCPU(MDS_BUF), %rdx
+ movdqa %xmm0, PCPU(MDS_TMP)
+ pxor %xmm0, %xmm0
+
+ movl $16, %ecx
+2: movntdq %xmm0, (%rdx)
+ addq $16, %rdx
+ decl %ecx
+ jnz 2b
+ mfence
+
+ movdqa PCPU(MDS_TMP),%xmm0
+ testb $CR0_TS, %al
+ je 3f
+ movq %rax, %cr0
+3: popq %rcx
+ popq %rdx
+ popq %rax
+ retq
+END(mds_handler_silvermont)
diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h
index e40c521..c7fa379 100644
--- a/sys/amd64/include/pcpu.h
+++ b/sys/amd64/include/pcpu.h
@@ -73,7 +73,11 @@
uint32_t pc_pcid_gen; \
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
uint32_t pc_ibpb_set; \
- char __pad[96] /* be divisor of PAGE_SIZE \
+ void *pc_mds_buf; \
+ void *pc_mds_buf64; \
+ uint32_t pc_pad[20]; \
+ uint8_t pc_mds_tmp[64]; \
+ char __pad[960] /* be divisor of PAGE_SIZE \
after cache alignment */
#define PC_DBREG_CMD_NONE 0
diff --git a/sys/dev/cpuctl/cpuctl.c b/sys/dev/cpuctl/cpuctl.c
index 76163ca..bafd861 100644
--- a/sys/dev/cpuctl/cpuctl.c
+++ b/sys/dev/cpuctl/cpuctl.c
@@ -530,6 +530,7 @@ cpuctl_do_eval_cpu_features(int cpu, struct thread *td)
hw_ibrs_recalculate();
restore_cpu(oldcpu, is_bound, td);
hw_ssb_recalculate(true);
+ hw_mds_recalculate();
printcpuinfo();
return (0);
}
diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s
index 53b846a..dddb9d8 100644
--- a/sys/i386/i386/exception.s
+++ b/sys/i386/i386/exception.s
@@ -406,6 +406,7 @@ doreti_ast:
*/
doreti_exit:
MEXITCOUNT
+ call *mds_handler
.globl doreti_popl_fs
doreti_popl_fs:
diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c
index c4c61ed..9445767 100644
--- a/sys/i386/i386/genassym.c
+++ b/sys/i386/i386/genassym.c
@@ -217,6 +217,9 @@ ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt));
ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
ASSYM(PC_PRIVATE_TSS, offsetof(struct pcpu, pc_private_tss));
+ASSYM(PC_MDS_TMP, offsetof(struct pcpu, pc_mds_tmp));
+ASSYM(PC_MDS_BUF, offsetof(struct pcpu, pc_mds_buf));
+ASSYM(PC_MDS_BUF64, offsetof(struct pcpu, pc_mds_buf64));
#ifdef DEV_APIC
ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL);
diff --git a/sys/i386/i386/initcpu.c b/sys/i386/i386/initcpu.c
index ce86133..9637dc4 100644
--- a/sys/i386/i386/initcpu.c
+++ b/sys/i386/i386/initcpu.c
@@ -768,6 +768,7 @@ initializecpu(void)
elf32_nxstack = 1;
}
#endif
+ hw_mds_recalculate();
}
void
diff --git a/sys/i386/i386/support.s b/sys/i386/i386/support.s
index 3535c7a..1125143 100644
--- a/sys/i386/i386/support.s
+++ b/sys/i386/i386/support.s
@@ -826,3 +826,194 @@ END(handle_ibrs_entry)
ENTRY(handle_ibrs_exit)
ret
END(handle_ibrs_exit)
+
+ENTRY(mds_handler_void)
+ ret
+END(mds_handler_void)
+
+ENTRY(mds_handler_verw)
+ subl $4, %esp
+ movw %ds, (%esp)
+ verw (%esp)
+ addl $4, %esp
+ ret
+END(mds_handler_verw)
+
+ENTRY(mds_handler_ivb)
+ movl %cr0, %eax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movl PCPU(MDS_BUF), %edx
+ movdqa %xmm0, PCPU(MDS_TMP)
+ pxor %xmm0, %xmm0
+
+ lfence
+ orpd (%edx), %xmm0
+ orpd (%edx), %xmm0
+ mfence
+ movl $40, %ecx
+ addl $16, %edx
+2: movntdq %xmm0, (%edx)
+ addl $16, %edx
+ decl %ecx
+ jnz 2b
+ mfence
+
+ movdqa PCPU(MDS_TMP),%xmm0
+ testb $CR0_TS, %al
+ je 3f
+ movl %eax, %cr0
+3: ret
+END(mds_handler_ivb)
+
+ENTRY(mds_handler_bdw)
+ movl %cr0, %eax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movl PCPU(MDS_BUF), %ebx
+ movdqa %xmm0, PCPU(MDS_TMP)
+ pxor %xmm0, %xmm0
+
+ movl %ebx, %edi
+ movl %ebx, %esi
+ movl $40, %ecx
+2: movntdq %xmm0, (%ebx)
+ addl $16, %ebx
+ decl %ecx
+ jnz 2b
+ mfence
+ movl $1536, %ecx
+ rep; movsb
+ lfence
+
+ movdqa PCPU(MDS_TMP),%xmm0
+ testb $CR0_TS, %al
+ je 3f
+ movl %eax, %cr0
+3: ret
+END(mds_handler_bdw)
+
+ENTRY(mds_handler_skl_sse)
+ movl %cr0, %eax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movl PCPU(MDS_BUF), %edi
+ movl PCPU(MDS_BUF64), %edx
+ movdqa %xmm0, PCPU(MDS_TMP)
+ pxor %xmm0, %xmm0
+
+ lfence
+ orpd (%edx), %xmm0
+ orpd (%edx), %xmm0
+ xorl %eax, %eax
+2: clflushopt 5376(%edi, %eax, 8)
+ addl $8, %eax
+ cmpl $8 * 12, %eax
+ jb 2b
+ sfence
+ movl $6144, %ecx
+ xorl %eax, %eax
+ rep; stosb
+ mfence
+
+ movdqa PCPU(MDS_TMP), %xmm0
+ testb $CR0_TS, %al
+ je 3f
+ movl %eax, %cr0
+3: ret
+END(mds_handler_skl_sse)
+
+ENTRY(mds_handler_skl_avx)
+ movl %cr0, %eax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movl PCPU(MDS_BUF), %edi
+ movl PCPU(MDS_BUF64), %edx
+ vmovdqa %ymm0, PCPU(MDS_TMP)
+ vpxor %ymm0, %ymm0, %ymm0
+
+ lfence
+ vorpd (%edx), %ymm0, %ymm0
+ vorpd (%edx), %ymm0, %ymm0
+ xorl %eax, %eax
+2: clflushopt 5376(%edi, %eax, 8)
+ addl $8, %eax
+ cmpl $8 * 12, %eax
+ jb 2b
+ sfence
+ movl $6144, %ecx
+ xorl %eax, %eax
+ rep; stosb
+ mfence
+
+ vmovdqa PCPU(MDS_TMP), %ymm0
+ testb $CR0_TS, %al
+ je 3f
+ movl %eax, %cr0
+3: ret
+END(mds_handler_skl_avx)
+
+ENTRY(mds_handler_skl_avx512)
+ movl %cr0, %eax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movl PCPU(MDS_BUF), %edi
+ movl PCPU(MDS_BUF64), %edx
+/* vmovdqa64 %zmm0, PCPU(MDS_TMP) */
+ .byte 0x64, 0x62, 0xf1, 0xfd, 0x48, 0x7f, 0x05
+ .long PC_MDS_TMP
+/* vpxor %zmm0, %zmm0, %zmm0 */
+ .byte 0x62, 0xf1, 0xfd, 0x48, 0xef, 0xc0
+
+ lfence
+/* vorpd (%edx), %zmm0, %zmm0 */
+ .byte 0x62, 0xf1, 0xfd, 0x48, 0x56, 0x02
+/* vorpd (%edx), %zmm0, %zmm0 */
+ .byte 0x62, 0xf1, 0xfd, 0x48, 0x56, 0x02
+ xorl %eax, %eax
+2: clflushopt 5376(%edi, %eax, 8)
+ addl $8, %eax
+ cmpl $8 * 12, %eax
+ jb 2b
+ sfence
+ movl $6144, %ecx
+ xorl %eax, %eax
+ rep; stosb
+ mfence
+
+/* vmovdqa64 PCPU(MDS_TMP), %zmm0 */
+ .byte 0x64, 0x62, 0xf1, 0xfd, 0x48, 0x6f, 0x05
+ .long PC_MDS_TMP
+ testb $CR0_TS, %al
+ je 3f
+ movl %eax, %cr0
+3: ret
+END(mds_handler_skl_avx512)
+
+ENTRY(mds_handler_silvermont)
+ movl %cr0, %eax
+ testb $CR0_TS, %al
+ je 1f
+ clts
+1: movl PCPU(MDS_BUF), %edx
+ movdqa %xmm0, PCPU(MDS_TMP)
+ pxor %xmm0, %xmm0
+
+ movl $16, %ecx
+2: movntdq %xmm0, (%edx)
+ addl $16, %edx
+ decl %ecx
+ jnz 2b
+ mfence
+
+ movdqa PCPU(MDS_TMP),%xmm0
+ testb $CR0_TS, %al
+ je 3f
+ movl %eax, %cr0
+3: ret
+END(mds_handler_silvermont)
diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h
index a74cf9f..df809f7 100644
--- a/sys/i386/include/pcpu.h
+++ b/sys/i386/include/pcpu.h
@@ -69,7 +69,11 @@
vm_offset_t pc_qmap_addr; /* KVA for temporary mappings */\
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
uint32_t pc_ibpb_set; \
- char __pad[185]
+ void *pc_mds_buf; \
+ void *pc_mds_buf64; \
+ uint32_t pc_pad[12]; \
+ uint8_t pc_mds_tmp[64]; \
+ char __pad[153]
#ifdef _KERNEL
diff --git a/sys/x86/include/specialreg.h b/sys/x86/include/specialreg.h
index bdd87ef..e17068f 100644
--- a/sys/x86/include/specialreg.h
+++ b/sys/x86/include/specialreg.h
@@ -385,6 +385,7 @@
/*
* CPUID instruction 7 Structured Extended Features, leaf 0 edx info
*/
+#define CPUID_STDEXT3_MD_CLEAR 0x00000400
#define CPUID_STDEXT3_IBPB 0x04000000
#define CPUID_STDEXT3_STIBP 0x08000000
#define CPUID_STDEXT3_L1D_FLUSH 0x10000000
@@ -395,6 +396,7 @@
#define IA32_ARCH_CAP_RDCL_NO 0x00000001
#define IA32_ARCH_CAP_IBRS_ALL 0x00000002
#define IA32_ARCH_CAP_SSBD_NO 0x00000004
+#define IA32_ARCH_CAP_MDS_NO 0x00000020
/*
* CPUID manufacturers identifiers
diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h
index 8dd28f3..c17fe5c 100644
--- a/sys/x86/include/x86_var.h
+++ b/sys/x86/include/x86_var.h
@@ -83,6 +83,7 @@ extern int use_xsave;
extern uint64_t xsave_mask;
extern int pti;
extern int hw_ibrs_active;
+extern int hw_mds_disable;
extern int hw_ssb_active;
struct pcb;
@@ -134,6 +135,7 @@ int isa_nmi(int cd);
void handle_ibrs_entry(void);
void handle_ibrs_exit(void);
void hw_ibrs_recalculate(void);
+void hw_mds_recalculate(void);
void hw_ssb_recalculate(bool all_cpus);
void nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame);
void nmi_call_kdb_smp(u_int type, struct trapframe *frame);
diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c
index 47d1d67..0604054 100644
--- a/sys/x86/x86/cpu_machdep.c
+++ b/sys/x86/x86/cpu_machdep.c
@@ -938,3 +938,198 @@ SYSCTL_PROC(_hw, OID_AUTO, spec_store_bypass_disable, CTLTYPE_INT |
hw_ssb_disable_handler, "I",
"Speculative Store Bypass Disable (0 - off, 1 - on, 2 - auto");
+int hw_mds_disable;
+
+/*
+ * Handler for Microarchitectural Data Sampling issues. Really not a
+ * pointer to C function: on amd64 the code must not change any CPU
+ * architectural state except possibly %rflags. Also, it is always
+ * called with interrupts disabled.
+ */
+void (*mds_handler)(void);
+void mds_handler_void(void);
+void mds_handler_verw(void);
+void mds_handler_ivb(void);
+void mds_handler_bdw(void);
+void mds_handler_skl_sse(void);
+void mds_handler_skl_avx(void);
+void mds_handler_skl_avx512(void);
+void mds_handler_silvermont(void);
+
+static int
+sysctl_hw_mds_disable_state_handler(SYSCTL_HANDLER_ARGS)
+{
+ const char *state;
+
+ if (mds_handler == mds_handler_void)
+ state = "inactive";
+ else if (mds_handler == mds_handler_verw)
+ state = "VERW";
+ else if (mds_handler == mds_handler_ivb)
+ state = "software IvyBridge";
+ else if (mds_handler == mds_handler_bdw)
+ state = "software Broadwell";
+ else if (mds_handler == mds_handler_skl_sse)
+ state = "software Skylake SSE";
+ else if (mds_handler == mds_handler_skl_avx)
+ state = "software Skylake AVX";
+ else if (mds_handler == mds_handler_skl_avx512)
+ state = "software Skylake AVX512";
+ else if (mds_handler == mds_handler_silvermont)
+ state = "software Silvermont";
+ else
+ state = "unknown";
+ return (SYSCTL_OUT(req, state, strlen(state)));
+}
+
+SYSCTL_PROC(_hw, OID_AUTO, mds_disable_state,
+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
+ sysctl_hw_mds_disable_state_handler, "A",
+ "Microarchitectural Data Sampling Mitigation state");
+
+_Static_assert(__offsetof(struct pcpu, pc_mds_tmp) % 64 == 0, "MDS AVX512");
+
+void
+hw_mds_recalculate(void)
+{
+ struct pcpu *pc;
+ vm_offset_t b64;
+ u_long xcr0;
+ int i;
+
+ /*
+ * Allow user to force VERW variant even if MD_CLEAR is not
+ * reported. For instance, hypervisor might unknowingly
+ * filter the cap out.
+ * For the similar reasons, and for testing, allow to enable
+ * mitigation even for RDCL_NO or MDS_NO caps.
+ */
+ if (cpu_vendor_id != CPU_VENDOR_INTEL || hw_mds_disable == 0 ||
+ ((cpu_ia32_arch_caps & (IA32_ARCH_CAP_RDCL_NO |
+ IA32_ARCH_CAP_MDS_NO)) != 0 && hw_mds_disable == 3)) {
+ mds_handler = mds_handler_void;
+ } else if (((cpu_stdext_feature3 & CPUID_STDEXT3_MD_CLEAR) != 0 &&
+ hw_mds_disable == 3) || hw_mds_disable == 1) {
+ mds_handler = mds_handler_verw;
+ } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
+ (CPUID_TO_MODEL(cpu_id) == 0x2e || CPUID_TO_MODEL(cpu_id) == 0x1e ||
+ CPUID_TO_MODEL(cpu_id) == 0x1f || CPUID_TO_MODEL(cpu_id) == 0x1a ||
+ CPUID_TO_MODEL(cpu_id) == 0x2f || CPUID_TO_MODEL(cpu_id) == 0x25 ||
+ CPUID_TO_MODEL(cpu_id) == 0x2c || CPUID_TO_MODEL(cpu_id) == 0x2d ||
+ CPUID_TO_MODEL(cpu_id) == 0x2a || CPUID_TO_MODEL(cpu_id) == 0x3e ||
+ CPUID_TO_MODEL(cpu_id) == 0x3a) &&
+ (hw_mds_disable == 2 || hw_mds_disable == 3)) {
+ /*
+ * Nehalem, SandyBridge, IvyBridge
+ */
+ CPU_FOREACH(i) {
+ pc = pcpu_find(i);
+ if (pc->pc_mds_buf == NULL) {
+ pc->pc_mds_buf = malloc(672, M_TEMP,
+ M_WAITOK);
+ bzero(pc->pc_mds_buf, 16);
+ }
+ }
+ mds_handler = mds_handler_ivb;
+ } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
+ (CPUID_TO_MODEL(cpu_id) == 0x3f || CPUID_TO_MODEL(cpu_id) == 0x3c ||
+ CPUID_TO_MODEL(cpu_id) == 0x45 || CPUID_TO_MODEL(cpu_id) == 0x46 ||
+ CPUID_TO_MODEL(cpu_id) == 0x56 || CPUID_TO_MODEL(cpu_id) == 0x4f ||
+ CPUID_TO_MODEL(cpu_id) == 0x47 || CPUID_TO_MODEL(cpu_id) == 0x3d) &&
+ (hw_mds_disable == 2 || hw_mds_disable == 3)) {
+ /*
+ * Haswell, Broadwell
+ */
+ CPU_FOREACH(i) {
+ pc = pcpu_find(i);
+ if (pc->pc_mds_buf == NULL) {
+ pc->pc_mds_buf = malloc(1536, M_TEMP,
+ M_WAITOK);
+ bzero(pc->pc_mds_buf, 16);
+ }
+ }
+ mds_handler = mds_handler_bdw;
+ } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
+ ((CPUID_TO_MODEL(cpu_id) == 0x55 && (cpu_id &
+ CPUID_STEPPING) <= 5) ||
+ CPUID_TO_MODEL(cpu_id) == 0x4e || CPUID_TO_MODEL(cpu_id) == 0x5e ||
+ (CPUID_TO_MODEL(cpu_id) == 0x8e && (cpu_id &
+ CPUID_STEPPING) <= 0xb) ||
+ (CPUID_TO_MODEL(cpu_id) == 0x9e && (cpu_id &
+ CPUID_STEPPING) <= 0xc)) &&
+ (hw_mds_disable == 2 || hw_mds_disable == 3)) {
+ /*
+ * Skylake, KabyLake, CoffeeLake, WhiskeyLake,
+ * CascadeLake
+ */
+ CPU_FOREACH(i) {
+ pc = pcpu_find(i);
+ if (pc->pc_mds_buf == NULL) {
+ pc->pc_mds_buf = malloc(6 * 1024,
+ M_TEMP, M_WAITOK);
+ b64 = (vm_offset_t)malloc(64 + 63,
+ M_TEMP, M_WAITOK);
+ pc->pc_mds_buf64 = (void *)roundup2(b64, 64);
+ bzero(pc->pc_mds_buf64, 64);
+ }
+ }
+ xcr0 = rxcr(0);
+ if ((xcr0 & XFEATURE_ENABLED_ZMM_HI256) != 0 &&
+ (cpu_stdext_feature2 & CPUID_STDEXT_AVX512DQ) != 0)
+ mds_handler = mds_handler_skl_avx512;
+ else if ((xcr0 & XFEATURE_ENABLED_AVX) != 0 &&
+ (cpu_feature2 & CPUID2_AVX) != 0)
+ mds_handler = mds_handler_skl_avx;
+ else
+ mds_handler = mds_handler_skl_sse;
+ } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
+ ((CPUID_TO_MODEL(cpu_id) == 0x37 ||
+ CPUID_TO_MODEL(cpu_id) == 0x4a ||
+ CPUID_TO_MODEL(cpu_id) == 0x4c ||
+ CPUID_TO_MODEL(cpu_id) == 0x4d ||
+ CPUID_TO_MODEL(cpu_id) == 0x5a ||
+ CPUID_TO_MODEL(cpu_id) == 0x5d ||
+ CPUID_TO_MODEL(cpu_id) == 0x6e ||
+ CPUID_TO_MODEL(cpu_id) == 0x65 ||
+ CPUID_TO_MODEL(cpu_id) == 0x75 ||
+ CPUID_TO_MODEL(cpu_id) == 0x1c ||
+ CPUID_TO_MODEL(cpu_id) == 0x26 ||
+ CPUID_TO_MODEL(cpu_id) == 0x27 ||
+ CPUID_TO_MODEL(cpu_id) == 0x35 ||
+ CPUID_TO_MODEL(cpu_id) == 0x36 ||
+ CPUID_TO_MODEL(cpu_id) == 0x7a))) {
+ /* Silvermont, Airmont */
+ CPU_FOREACH(i) {
+ pc = pcpu_find(i);
+ if (pc->pc_mds_buf == NULL)
+ pc->pc_mds_buf = malloc(256, M_TEMP, M_WAITOK);
+ }
+ mds_handler = mds_handler_silvermont;
+ } else {
+ hw_mds_disable = 0;
+ mds_handler = mds_handler_void;
+ }
+}
+
+static int
+sysctl_mds_disable_handler(SYSCTL_HANDLER_ARGS)
+{
+ int error, val;
+
+ val = hw_mds_disable;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ if (val < 0 || val > 3)
+ return (EINVAL);
+ hw_mds_disable = val;
+ hw_mds_recalculate();
+ return (0);
+}
+
+SYSCTL_PROC(_hw, OID_AUTO, mds_disable, CTLTYPE_INT |
+ CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0,
+ sysctl_mds_disable_handler, "I",
+ "Microarchitectural Data Sampling Mitigation "
+ "(0 - off, 1 - on VERW, 2 - on SW, 3 - on AUTO");
+
OpenPOWER on IntegriCloud