IFC @r271694

author: neel <neel@FreeBSD.org> 2014-09-17 18:46:51 +0000
committer: neel <neel@FreeBSD.org> 2014-09-17 18:46:51 +0000
commit: c9b7ad126a69df6d747aa31e822ad6ae3ea9c900 (patch)
tree: c99380d1a0386a2472762670318cd92cfcb5b65a /sys/amd64/vmm
parent: eefb10a1843c72b46fee9c11a6cde4146031ea4d (diff)
parent: 65bccd5b546490ed3e9ef43ce93d5a573d366801 (diff)
download: FreeBSD-src-c9b7ad126a69df6d747aa31e822ad6ae3ea9c900.zip
FreeBSD-src-c9b7ad126a69df6d747aa31e822ad6ae3ea9c900.tar.gz
6 files changed, 337 insertions, 93 deletions
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index c57c232..32ac257 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -1055,8 +1055,7 @@ disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
 }
 
 static int
-svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, int running,
-    uint64_t val)
+svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t val)
 {
 	struct vmcb_ctrl *ctrl;
 	int oldval, newval;
@@ -1071,6 +1070,16 @@ svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, int running,
 	return (0);
 }
 
+static int
+svm_get_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t *val)
+{
+	struct vmcb_ctrl *ctrl;
+
+	ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+	*val = ctrl->intr_shadow;
+	return (0);
+}
+
 /*
  * Once an NMI is injected it blocks delivery of further NMIs until the handler
  * executes an IRET. The IRET intercept is enabled when an NMI is injected to
@@ -1096,7 +1105,7 @@ enable_nmi_blocking(struct svm_softc *sc, int vcpu)
 }
 
 static void
-clear_nmi_blocking(struct svm_softc *sc, int vcpu, int running)
+clear_nmi_blocking(struct svm_softc *sc, int vcpu)
 {
 	int error;
 
@@ -1119,7 +1128,7 @@ clear_nmi_blocking(struct svm_softc *sc, int vcpu, int running)
 	 * Set 'intr_shadow' to prevent an NMI from being injected on the
 	 * immediate VMRUN.
 	 */
-	error = svm_modify_intr_shadow(sc, vcpu, running, 1);
+	error = svm_modify_intr_shadow(sc, vcpu, 1);
 	KASSERT(!error, ("%s: error %d setting intr_shadow", __func__, error));
 }
 
@@ -1256,7 +1265,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
 		 * Restart execution at "iret" but with the intercept cleared.
 		 */
 		vmexit->inst_length = 0;
-		clear_nmi_blocking(svm_sc, vcpu, 1);
+		clear_nmi_blocking(svm_sc, vcpu);
 		handled = 1;
 		break;
 	case VMCB_EXIT_VINTR:	/* interrupt window exiting */
@@ -1967,10 +1976,14 @@ svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
 	struct svm_softc *svm_sc;
 	struct vmcb *vmcb;
 	register_t *reg;
-	
+
 	svm_sc = arg;
 	vmcb = svm_get_vmcb(svm_sc, vcpu);
 
+	if (ident == VM_REG_GUEST_INTR_SHADOW) {
+		return (svm_get_intr_shadow(svm_sc, vcpu, val));
+	}
+
 	if (vmcb_read(vmcb, ident, val) == 0) {
 		return (0);
 	}
@@ -1999,6 +2012,11 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
 
 	svm_sc = arg;
 	vmcb = svm_get_vmcb(svm_sc, vcpu);
+
+	if (ident == VM_REG_GUEST_INTR_SHADOW) {
+		return (svm_modify_intr_shadow(svm_sc, vcpu, val));
+	}
+
 	if (vmcb_write(vmcb, ident, val) == 0) {
 		return (0);
 	}
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 8e35781..5e42679 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -2711,6 +2711,46 @@ vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val)
 }
 
 static int
+vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval)
+{
+	uint64_t gi;
+	int error;
+
+	error = vmcs_getreg(&vmx->vmcs[vcpu], running, 
+	    VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi);
+	*retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
+	return (error);
+}
+
+static int
+vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val)
+{
+	struct vmcs *vmcs;
+	uint64_t gi;
+	int error, ident;
+
+	/*
+	 * Forcing the vcpu into an interrupt shadow is not supported.
+	 */
+	if (val) {
+		error = EINVAL;
+		goto done;
+	}
+
+	vmcs = &vmx->vmcs[vcpu];
+	ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY);
+	error = vmcs_getreg(vmcs, running, ident, &gi);
+	if (error == 0) {
+		gi &= ~HWINTR_BLOCKING;
+		error = vmcs_setreg(vmcs, running, ident, gi);
+	}
+done:
+	VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val,
+	    error ? "failed" : "succeeded");
+	return (error);
+}
+
+static int
 vmx_shadow_reg(int reg)
 {
 	int shreg;
@@ -2741,6 +2781,9 @@ vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
 	if (running && hostcpu != curcpu)
 		panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
 
+	if (reg == VM_REG_GUEST_INTR_SHADOW)
+		return (vmx_get_intr_shadow(vmx, vcpu, running, retval));
+
 	if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0)
 		return (0);
 
@@ -2759,6 +2802,9 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
 	if (running && hostcpu != curcpu)
 		panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
 
+	if (reg == VM_REG_GUEST_INTR_SHADOW)
+		return (vmx_modify_intr_shadow(vmx, vcpu, running, val));
+
 	if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0)
 		return (0);
 
diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c
index ee6fc84..d8ccebd 100644
--- a/sys/amd64/vmm/io/vatpic.c
+++ b/sys/amd64/vmm/io/vatpic.c
@@ -195,26 +195,29 @@ vatpic_notify_intr(struct vatpic *vatpic)
 		    atpic->mask, atpic->request, atpic->service);
 
 		/*
+		 * From Section 3.6.2, "Interrupt Modes", in the
+		 * MPtable Specification, Version 1.4
+		 *
 		 * PIC interrupts are routed to both the Local APIC
 		 * and the I/O APIC to support operation in 1 of 3
 		 * modes.
 		 *
 		 * 1. Legacy PIC Mode: the PIC effectively bypasses
-		 * all APIC components.  In mode '1' the local APIC is
+		 * all APIC components.  In this mode the local APIC is
 		 * disabled and LINT0 is reconfigured as INTR to
 		 * deliver the PIC interrupt directly to the CPU.
 		 *
 		 * 2. Virtual Wire Mode: the APIC is treated as a
 		 * virtual wire which delivers interrupts from the PIC
-		 * to the CPU.  In mode '2' LINT0 is programmed as
+		 * to the CPU.  In this mode LINT0 is programmed as
 		 * ExtINT to indicate that the PIC is the source of
 		 * the interrupt.
 		 *
-		 * 3. Symmetric I/O Mode: PIC interrupts are fielded
-		 * by the I/O APIC and delivered to the appropriate
-		 * CPU.  In mode '3' the I/O APIC input 0 is
-		 * programmed as ExtINT to indicate that the PIC is
-		 * the source of the interrupt.
+		 * 3. Virtual Wire Mode via I/O APIC: PIC interrupts are
+		 * fielded by the I/O APIC and delivered to the appropriate
+		 * CPU.  In this mode the I/O APIC input 0 is programmed
+		 * as ExtINT to indicate that the PIC is the source of the
+		 * interrupt.
 		 */
 		atpic->intr_raised = true;
 		lapic_set_local_intr(vatpic->vm, -1, APIC_LVT_LINT0);
@@ -497,13 +500,19 @@ vatpic_pending_intr(struct vm *vm, int *vecptr)
 	VATPIC_LOCK(vatpic);
 
 	pin = vatpic_get_highest_irrpin(atpic);
-	if (pin == -1)
-		pin = 7;
 	if (pin == 2) {
 		atpic = &vatpic->atpic[1];
 		pin = vatpic_get_highest_irrpin(atpic);
 	}
 
+	/*
+	 * If there are no pins active at this moment then return the spurious
+	 * interrupt vector instead.
+	 */
+	if (pin == -1)
+		pin = 7;
+
+	KASSERT(pin >= 0 && pin <= 7, ("%s: invalid pin %d", __func__, pin));
 	*vecptr = atpic->irq_base + pin;
 
 	VATPIC_UNLOCK(vatpic);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 55c9886..7a3ccc8 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1090,7 +1090,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 {
 	struct vcpu *vcpu;
 	const char *wmesg;
-	int t, vcpu_halted, vm_halted;
+	int error, t, vcpu_halted, vm_halted;
 
 	KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
 
@@ -1098,6 +1098,22 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
 	vcpu_halted = 0;
 	vm_halted = 0;
 
+	/*
+	 * The typical way to halt a cpu is to execute: "sti; hlt"
+	 *
+	 * STI sets RFLAGS.IF to enable interrupts. However, the processor
+	 * remains in an "interrupt shadow" for an additional instruction
+	 * following the STI. This guarantees that "sti; hlt" sequence is
+	 * atomic and a pending interrupt will be recognized after the HLT.
+	 *
+	 * After the HLT emulation is done the vcpu is no longer in an
+	 * interrupt shadow and a pending interrupt can be injected on
+	 * the next entry into the guest.
+	 */
+	error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
+	KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
+	    __func__, error));
+
 	vcpu_lock(vcpu);
 	while (1) {
 		/*
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 60e7a57..ef19792 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -65,6 +65,7 @@ enum {
 	VIE_OP_TYPE_MOVZX,
 	VIE_OP_TYPE_AND,
 	VIE_OP_TYPE_OR,
+	VIE_OP_TYPE_SUB,
 	VIE_OP_TYPE_TWO_BYTE,
 	VIE_OP_TYPE_PUSH,
 	VIE_OP_TYPE_CMP,
@@ -97,6 +98,10 @@ static const struct vie_op one_byte_opcodes[256] = {
 		.op_byte = 0x0F,
 		.op_type = VIE_OP_TYPE_TWO_BYTE
 	},
+	[0x2B] = {
+		.op_byte = 0x2B,
+		.op_type = VIE_OP_TYPE_SUB,
+	},
 	[0x3B] = {
 		.op_byte = 0x3B,
 		.op_type = VIE_OP_TYPE_CMP,
@@ -311,46 +316,36 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
 	return (error);
 }
 
+#define	RFLAGS_STATUS_BITS    (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
+
 /*
  * Return the status flags that would result from doing (x - y).
  */
-static u_long
-getcc16(uint16_t x, uint16_t y)
-{
-	u_long rflags;
-
-	__asm __volatile("sub %1,%2; pushfq; popq %0" :
-	    "=r" (rflags) : "m" (y), "r" (x));
-	return (rflags);
-}
-
-static u_long
-getcc32(uint32_t x, uint32_t y)
-{
-	u_long rflags;
-
-	__asm __volatile("sub %1,%2; pushfq; popq %0" :
-	    "=r" (rflags) : "m" (y), "r" (x));
-	return (rflags);
-}
-
-static u_long
-getcc64(uint64_t x, uint64_t y)
-{
-	u_long rflags;
-
-	__asm __volatile("sub %1,%2; pushfq; popq %0" :
-	    "=r" (rflags) : "m" (y), "r" (x));
-	return (rflags);
-}
+#define	GETCC(sz)							\
+static u_long								\
+getcc##sz(uint##sz##_t x, uint##sz##_t y)				\
+{									\
+	u_long rflags;							\
+									\
+	__asm __volatile("sub %2,%1; pushfq; popq %0" :			\
+	    "=r" (rflags), "+r" (x) : "m" (y));				\
+	return (rflags);						\
+} struct __hack
+
+GETCC(8);
+GETCC(16);
+GETCC(32);
+GETCC(64);
 
 static u_long
 getcc(int opsize, uint64_t x, uint64_t y)
 {
-	KASSERT(opsize == 2 || opsize == 4 || opsize == 8,
+	KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
 	    ("getcc: invalid operand size %d", opsize));
 
-	if (opsize == 2)
+	if (opsize == 1)
+		return (getcc8(x, y));
+	else if (opsize == 2)
 		return (getcc16(x, y));
 	else if (opsize == 4)
 		return (getcc32(x, y));
@@ -564,7 +559,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 {
 	int error, size;
 	enum vm_reg_name reg;
-	uint64_t val1, val2;
+	uint64_t result, rflags, rflags2, val1, val2;
 
 	size = vie->opsize;
 	error = EINVAL;
@@ -592,23 +587,21 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 			break;
 
 		/* perform the operation and write the result */
-		val1 &= val2;
-		error = vie_update_register(vm, vcpuid, reg, val1, size);
+		result = val1 & val2;
+		error = vie_update_register(vm, vcpuid, reg, result, size);
 		break;
 	case 0x81:
 		/*
-		 * AND mem (ModRM:r/m) with immediate and store the
+		 * AND/OR mem (ModRM:r/m) with immediate and store the
 		 * result in mem.
 		 *
-		 * 81 /4		and r/m16, imm16
-		 * 81 /4		and r/m32, imm32
-		 * REX.W + 81 /4	and r/m64, imm32 sign-extended to 64
+		 * AND: i = 4
+		 * OR:  i = 1
+		 * 81 /i		op r/m16, imm16
+		 * 81 /i		op r/m32, imm32
+		 * REX.W + 81 /i	op r/m64, imm32 sign-extended to 64
 		 *
-		 * Currently, only the AND operation of the 0x81 opcode
-		 * is implemented (ModRM:reg = b100).
 		 */
-		if ((vie->reg & 7) != 4)
-			break;
 
 		/* get the first operand */
                 error = memread(vm, vcpuid, gpa, &val1, size, arg);
@@ -616,15 +609,48 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 			break;
 
                 /*
-		 * perform the operation with the pre-fetched immediate
-		 * operand and write the result
-		 */
-                val1 &= vie->immediate;
-                error = memwrite(vm, vcpuid, gpa, val1, size, arg);
+                 * perform the operation with the pre-fetched immediate
+                 * operand and write the result
+                 */
+		switch (vie->reg & 7) {
+		case 0x4:
+			/* modrm:reg == b100, AND */
+			result = val1 & vie->immediate;
+			break;
+		case 0x1:
+			/* modrm:reg == b001, OR */
+			result = val1 | vie->immediate;
+			break;
+		default:
+			error = EINVAL;
+			break;
+		}
+		if (error)
+			break;
+
+		error = memwrite(vm, vcpuid, gpa, result, size, arg);
 		break;
 	default:
 		break;
 	}
+	if (error)
+		return (error);
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	if (error)
+		return (error);
+
+	/*
+	 * OF and CF are cleared; the SF, ZF and PF flags are set according
+	 * to the result; AF is undefined.
+	 *
+	 * The updated status flags are obtained by subtracting 0 from 'result'.
+	 */
+	rflags2 = getcc(size, result, 0);
+	rflags &= ~RFLAGS_STATUS_BITS;
+	rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
 	return (error);
 }
 
@@ -633,7 +659,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
 {
 	int error, size;
-	uint64_t val1;
+	uint64_t val1, result, rflags, rflags2;
 
 	size = vie->opsize;
 	error = EINVAL;
@@ -663,17 +689,33 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 		 * perform the operation with the pre-fetched immediate
 		 * operand and write the result
 		 */
-                val1 |= vie->immediate;
-                error = memwrite(vm, vcpuid, gpa, val1, size, arg);
+                result = val1 | vie->immediate;
+                error = memwrite(vm, vcpuid, gpa, result, size, arg);
 		break;
 	default:
 		break;
 	}
+	if (error)
+		return (error);
+
+	error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+	if (error)
+		return (error);
+
+	/*
+	 * OF and CF are cleared; the SF, ZF and PF flags are set according
+	 * to the result; AF is undefined.
+	 *
+	 * The updated status flags are obtained by subtracting 0 from 'result'.
+	 */
+	rflags2 = getcc(size, result, 0);
+	rflags &= ~RFLAGS_STATUS_BITS;
+	rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+	error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
 	return (error);
 }
 
-#define	RFLAGS_STATUS_BITS    (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
-
 static int
 emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
@@ -723,6 +765,62 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 }
 
 static int
+emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+	    mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+	int error, size;
+	uint64_t nval, rflags, rflags2, val1, val2;
+	enum vm_reg_name reg;
+
+	size = vie->opsize;
+	error = EINVAL;
+
+	switch (vie->op.op_byte) {
+	case 0x2B:
+		/*
+		 * SUB r/m from r and store the result in r
+		 * 
+		 * 2B/r            SUB r16, r/m16
+		 * 2B/r            SUB r32, r/m32
+		 * REX.W + 2B/r    SUB r64, r/m64
+		 */
+
+		/* get the first operand */
+		reg = gpr_map[vie->reg];
+		error = vie_read_register(vm, vcpuid, reg, &val1);
+		if (error)
+			break;
+
+		/* get the second operand */
+		error = memread(vm, vcpuid, gpa, &val2, size, arg);
+		if (error)
+			break;
+
+		/* perform the operation and write the result */
+		nval = val1 - val2;
+		error = vie_update_register(vm, vcpuid, reg, nval, size);
+		break;
+	default:
+		break;
+	}
+
+	if (!error) {
+		rflags2 = getcc(size, val1, val2);
+		error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+		    &rflags);
+		if (error)
+			return (error);
+
+		rflags &= ~RFLAGS_STATUS_BITS;
+		rflags |= rflags2 & RFLAGS_STATUS_BITS;
+		error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+		    rflags, 8);
+	}
+
+	return (error);
+}
+
+static int
 emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
     struct vm_guest_paging *paging, mem_region_read_t memread,
     mem_region_write_t memwrite, void *arg)
@@ -865,6 +963,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
 		error = emulate_or(vm, vcpuid, gpa, vie,
 				    memread, memwrite, memarg);
 		break;
+	case VIE_OP_TYPE_SUB:
+		error = emulate_sub(vm, vcpuid, gpa, vie,
+				    memread, memwrite, memarg);
+		break;
 	default:
 		error = EINVAL;
 		break;
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
index 9f38bf3..be4b5af 100644
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/x86.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/pcpu.h>
 #include <sys/systm.h>
 #include <sys/cpuset.h>
+#include <sys/sysctl.h>
 
 #include <machine/clock.h>
 #include <machine/cpufunc.h>
@@ -45,20 +46,49 @@ __FBSDID("$FreeBSD$");
 #include "vmm_host.h"
 #include "x86.h"
 
+SYSCTL_DECL(_hw_vmm);
+static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD, 0, NULL);
+
 #define	CPUID_VM_HIGH		0x40000000
 
 static const char bhyve_id[12] = "bhyve bhyve ";
 
 static uint64_t bhyve_xcpuids;
 
+/*
+ * The default CPU topology is a single thread per package.
+ */
+static u_int threads_per_core = 1;
+SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
+    &threads_per_core, 0, NULL);
+
+static u_int cores_per_package = 1;
+SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
+    &cores_per_package, 0, NULL);
+
+static int cpuid_leaf_b = 1;
+SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
+    &cpuid_leaf_b, 0, NULL);
+
+/*
+ * Round up to the next power of two, if necessary, and then take log2.
+ * Returns -1 if argument is zero.
+ */
+static __inline int
+log2(u_int x)
+{
+
+	return (fls(x << (1 - powerof2(x))) - 1);
+}
+
 int
 x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 		  uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
 {
 	const struct xsave_limits *limits;
 	uint64_t cr4;
-	int error, enable_invpcid;
-	unsigned int 	func, regs[4];
+	int error, enable_invpcid, level, width, x2apic_id;
+	unsigned int func, regs[4], logical_cpus;
 	enum x2apic_state x2apic_state;
 
 	/*
@@ -209,30 +239,31 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
                         */
 			regs[3] &= ~CPUID_DS;
 
-			/*
-			 * Disable multi-core.
-			 */
+			logical_cpus = threads_per_core * cores_per_package;
 			regs[1] &= ~CPUID_HTT_CORES;
-			regs[3] &= ~CPUID_HTT;
+			regs[1] |= (logical_cpus & 0xff) << 16;
+			regs[3] |= CPUID_HTT;
 			break;
 
 		case CPUID_0000_0004:
-			do_cpuid(4, regs);
+			cpuid_count(*eax, *ecx, regs);
 
-			/*
-			 * Do not expose topology.
-			 *
-			 * The maximum number of processor cores in
-			 * this physical processor package and the
-			 * maximum number of threads sharing this
-			 * cache are encoded with "plus 1" encoding.
-			 * Adding one to the value in this register
-			 * field to obtains the actual value.
-			 *
-			 * Therefore 0 for both indicates 1 core per
-			 * package and no cache sharing.
-			 */
-			regs[0] &= 0xffff8000;
+			if (regs[0] || regs[1] || regs[2] || regs[3]) {
+				regs[0] &= 0x3ff;
+				regs[0] |= (cores_per_package - 1) << 26;
+				/*
+				 * Cache topology:
+				 * - L1 and L2 are shared only by the logical
+				 *   processors in a single core.
+				 * - L3 and above are shared by all logical
+				 *   processors in the package.
+				 */
+				logical_cpus = threads_per_core;
+				level = (regs[0] >> 5) & 0x7;
+				if (level >= 3)
+					logical_cpus *= cores_per_package;
+				regs[0] |= (logical_cpus - 1) << 14;
+			}
 			break;
 
 		case CPUID_0000_0007:
@@ -286,10 +317,32 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
 			/*
 			 * Processor topology enumeration
 			 */
-			regs[0] = 0;
-			regs[1] = 0;
-			regs[2] = *ecx & 0xff;
-			regs[3] = vcpu_id;
+			if (*ecx == 0) {
+				logical_cpus = threads_per_core;
+				width = log2(logical_cpus);
+				level = CPUID_TYPE_SMT;
+				x2apic_id = vcpu_id;
+			}
+
+			if (*ecx == 1) {
+				logical_cpus = threads_per_core *
+				    cores_per_package;
+				width = log2(logical_cpus);
+				level = CPUID_TYPE_CORE;
+				x2apic_id = vcpu_id;
+			}
+
+			if (!cpuid_leaf_b || *ecx >= 2) {
+				width = 0;
+				logical_cpus = 0;
+				level = 0;
+				x2apic_id = 0;
+			}
+
+			regs[0] = width & 0x1f;
+			regs[1] = logical_cpus & 0xffff;
+			regs[2] = (level << 8) | (*ecx & 0xff);
+			regs[3] = x2apic_id;
 			break;
 
 		case CPUID_0000_000D:
author	neel <neel@FreeBSD.org>	2014-09-17 18:46:51 +0000
committer	neel <neel@FreeBSD.org>	2014-09-17 18:46:51 +0000
commit	c9b7ad126a69df6d747aa31e822ad6ae3ea9c900 (patch)
tree	c99380d1a0386a2472762670318cd92cfcb5b65a /sys/amd64/vmm
parent	eefb10a1843c72b46fee9c11a6cde4146031ea4d (diff)
parent	65bccd5b546490ed3e9ef43ce93d5a573d366801 (diff)
download	FreeBSD-src-c9b7ad126a69df6d747aa31e822ad6ae3ea9c900.zip FreeBSD-src-c9b7ad126a69df6d747aa31e822ad6ae3ea9c900.tar.gz