summaryrefslogtreecommitdiffstats
path: root/sys/amd64/vmm
diff options
context:
space:
mode:
authorneel <neel@FreeBSD.org>2014-09-17 18:46:51 +0000
committerneel <neel@FreeBSD.org>2014-09-17 18:46:51 +0000
commitc9b7ad126a69df6d747aa31e822ad6ae3ea9c900 (patch)
treec99380d1a0386a2472762670318cd92cfcb5b65a /sys/amd64/vmm
parenteefb10a1843c72b46fee9c11a6cde4146031ea4d (diff)
parent65bccd5b546490ed3e9ef43ce93d5a573d366801 (diff)
downloadFreeBSD-src-c9b7ad126a69df6d747aa31e822ad6ae3ea9c900.zip
FreeBSD-src-c9b7ad126a69df6d747aa31e822ad6ae3ea9c900.tar.gz
IFC @r271694
Diffstat (limited to 'sys/amd64/vmm')
-rw-r--r--sys/amd64/vmm/amd/svm.c30
-rw-r--r--sys/amd64/vmm/intel/vmx.c46
-rw-r--r--sys/amd64/vmm/io/vatpic.c27
-rw-r--r--sys/amd64/vmm/vmm.c18
-rw-r--r--sys/amd64/vmm/vmm_instruction_emul.c206
-rw-r--r--sys/amd64/vmm/x86.c103
6 files changed, 337 insertions, 93 deletions
diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c
index c57c232..32ac257 100644
--- a/sys/amd64/vmm/amd/svm.c
+++ b/sys/amd64/vmm/amd/svm.c
@@ -1055,8 +1055,7 @@ disable_intr_window_exiting(struct svm_softc *sc, int vcpu)
}
static int
-svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, int running,
- uint64_t val)
+svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t val)
{
struct vmcb_ctrl *ctrl;
int oldval, newval;
@@ -1071,6 +1070,16 @@ svm_modify_intr_shadow(struct svm_softc *sc, int vcpu, int running,
return (0);
}
+static int
+svm_get_intr_shadow(struct svm_softc *sc, int vcpu, uint64_t *val)
+{
+ struct vmcb_ctrl *ctrl;
+
+ ctrl = svm_get_vmcb_ctrl(sc, vcpu);
+ *val = ctrl->intr_shadow;
+ return (0);
+}
+
/*
* Once an NMI is injected it blocks delivery of further NMIs until the handler
* executes an IRET. The IRET intercept is enabled when an NMI is injected to
@@ -1096,7 +1105,7 @@ enable_nmi_blocking(struct svm_softc *sc, int vcpu)
}
static void
-clear_nmi_blocking(struct svm_softc *sc, int vcpu, int running)
+clear_nmi_blocking(struct svm_softc *sc, int vcpu)
{
int error;
@@ -1119,7 +1128,7 @@ clear_nmi_blocking(struct svm_softc *sc, int vcpu, int running)
* Set 'intr_shadow' to prevent an NMI from being injected on the
* immediate VMRUN.
*/
- error = svm_modify_intr_shadow(sc, vcpu, running, 1);
+ error = svm_modify_intr_shadow(sc, vcpu, 1);
KASSERT(!error, ("%s: error %d setting intr_shadow", __func__, error));
}
@@ -1256,7 +1265,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
* Restart execution at "iret" but with the intercept cleared.
*/
vmexit->inst_length = 0;
- clear_nmi_blocking(svm_sc, vcpu, 1);
+ clear_nmi_blocking(svm_sc, vcpu);
handled = 1;
break;
case VMCB_EXIT_VINTR: /* interrupt window exiting */
@@ -1967,10 +1976,14 @@ svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
struct svm_softc *svm_sc;
struct vmcb *vmcb;
register_t *reg;
-
+
svm_sc = arg;
vmcb = svm_get_vmcb(svm_sc, vcpu);
+ if (ident == VM_REG_GUEST_INTR_SHADOW) {
+ return (svm_get_intr_shadow(svm_sc, vcpu, val));
+ }
+
if (vmcb_read(vmcb, ident, val) == 0) {
return (0);
}
@@ -1999,6 +2012,11 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
svm_sc = arg;
vmcb = svm_get_vmcb(svm_sc, vcpu);
+
+ if (ident == VM_REG_GUEST_INTR_SHADOW) {
+ return (svm_modify_intr_shadow(svm_sc, vcpu, val));
+ }
+
if (vmcb_write(vmcb, ident, val) == 0) {
return (0);
}
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index 8e35781..5e42679 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -2711,6 +2711,46 @@ vmxctx_setreg(struct vmxctx *vmxctx, int reg, uint64_t val)
}
static int
+vmx_get_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t *retval)
+{
+ uint64_t gi;
+ int error;
+
+ error = vmcs_getreg(&vmx->vmcs[vcpu], running,
+ VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY), &gi);
+ *retval = (gi & HWINTR_BLOCKING) ? 1 : 0;
+ return (error);
+}
+
+static int
+vmx_modify_intr_shadow(struct vmx *vmx, int vcpu, int running, uint64_t val)
+{
+ struct vmcs *vmcs;
+ uint64_t gi;
+ int error, ident;
+
+ /*
+ * Forcing the vcpu into an interrupt shadow is not supported.
+ */
+ if (val) {
+ error = EINVAL;
+ goto done;
+ }
+
+ vmcs = &vmx->vmcs[vcpu];
+ ident = VMCS_IDENT(VMCS_GUEST_INTERRUPTIBILITY);
+ error = vmcs_getreg(vmcs, running, ident, &gi);
+ if (error == 0) {
+ gi &= ~HWINTR_BLOCKING;
+ error = vmcs_setreg(vmcs, running, ident, gi);
+ }
+done:
+ VCPU_CTR2(vmx->vm, vcpu, "Setting intr_shadow to %#lx %s", val,
+ error ? "failed" : "succeeded");
+ return (error);
+}
+
+static int
vmx_shadow_reg(int reg)
{
int shreg;
@@ -2741,6 +2781,9 @@ vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval)
if (running && hostcpu != curcpu)
panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu);
+ if (reg == VM_REG_GUEST_INTR_SHADOW)
+ return (vmx_get_intr_shadow(vmx, vcpu, running, retval));
+
if (vmxctx_getreg(&vmx->ctx[vcpu], reg, retval) == 0)
return (0);
@@ -2759,6 +2802,9 @@ vmx_setreg(void *arg, int vcpu, int reg, uint64_t val)
if (running && hostcpu != curcpu)
panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu);
+ if (reg == VM_REG_GUEST_INTR_SHADOW)
+ return (vmx_modify_intr_shadow(vmx, vcpu, running, val));
+
if (vmxctx_setreg(&vmx->ctx[vcpu], reg, val) == 0)
return (0);
diff --git a/sys/amd64/vmm/io/vatpic.c b/sys/amd64/vmm/io/vatpic.c
index ee6fc84..d8ccebd 100644
--- a/sys/amd64/vmm/io/vatpic.c
+++ b/sys/amd64/vmm/io/vatpic.c
@@ -195,26 +195,29 @@ vatpic_notify_intr(struct vatpic *vatpic)
atpic->mask, atpic->request, atpic->service);
/*
+ * From Section 3.6.2, "Interrupt Modes", in the
+ * MPtable Specification, Version 1.4
+ *
* PIC interrupts are routed to both the Local APIC
* and the I/O APIC to support operation in 1 of 3
* modes.
*
* 1. Legacy PIC Mode: the PIC effectively bypasses
- * all APIC components. In mode '1' the local APIC is
+ * all APIC components. In this mode the local APIC is
* disabled and LINT0 is reconfigured as INTR to
* deliver the PIC interrupt directly to the CPU.
*
* 2. Virtual Wire Mode: the APIC is treated as a
* virtual wire which delivers interrupts from the PIC
- * to the CPU. In mode '2' LINT0 is programmed as
+ * to the CPU. In this mode LINT0 is programmed as
* ExtINT to indicate that the PIC is the source of
* the interrupt.
*
- * 3. Symmetric I/O Mode: PIC interrupts are fielded
- * by the I/O APIC and delivered to the appropriate
- * CPU. In mode '3' the I/O APIC input 0 is
- * programmed as ExtINT to indicate that the PIC is
- * the source of the interrupt.
+ * 3. Virtual Wire Mode via I/O APIC: PIC interrupts are
+ * fielded by the I/O APIC and delivered to the appropriate
+ * CPU. In this mode the I/O APIC input 0 is programmed
+ * as ExtINT to indicate that the PIC is the source of the
+ * interrupt.
*/
atpic->intr_raised = true;
lapic_set_local_intr(vatpic->vm, -1, APIC_LVT_LINT0);
@@ -497,13 +500,19 @@ vatpic_pending_intr(struct vm *vm, int *vecptr)
VATPIC_LOCK(vatpic);
pin = vatpic_get_highest_irrpin(atpic);
- if (pin == -1)
- pin = 7;
if (pin == 2) {
atpic = &vatpic->atpic[1];
pin = vatpic_get_highest_irrpin(atpic);
}
+ /*
+ * If there are no pins active at this moment then return the spurious
+ * interrupt vector instead.
+ */
+ if (pin == -1)
+ pin = 7;
+
+ KASSERT(pin >= 0 && pin <= 7, ("%s: invalid pin %d", __func__, pin));
*vecptr = atpic->irq_base + pin;
VATPIC_UNLOCK(vatpic);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 55c9886..7a3ccc8 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -1090,7 +1090,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
{
struct vcpu *vcpu;
const char *wmesg;
- int t, vcpu_halted, vm_halted;
+ int error, t, vcpu_halted, vm_halted;
KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
@@ -1098,6 +1098,22 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
vcpu_halted = 0;
vm_halted = 0;
+ /*
+ * The typical way to halt a cpu is to execute: "sti; hlt"
+ *
+ * STI sets RFLAGS.IF to enable interrupts. However, the processor
+ * remains in an "interrupt shadow" for an additional instruction
+ * following the STI. This guarantees that "sti; hlt" sequence is
+ * atomic and a pending interrupt will be recognized after the HLT.
+ *
+ * After the HLT emulation is done the vcpu is no longer in an
+ * interrupt shadow and a pending interrupt can be injected on
+ * the next entry into the guest.
+ */
+ error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
+ KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
+ __func__, error));
+
vcpu_lock(vcpu);
while (1) {
/*
diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c
index 60e7a57..ef19792 100644
--- a/sys/amd64/vmm/vmm_instruction_emul.c
+++ b/sys/amd64/vmm/vmm_instruction_emul.c
@@ -65,6 +65,7 @@ enum {
VIE_OP_TYPE_MOVZX,
VIE_OP_TYPE_AND,
VIE_OP_TYPE_OR,
+ VIE_OP_TYPE_SUB,
VIE_OP_TYPE_TWO_BYTE,
VIE_OP_TYPE_PUSH,
VIE_OP_TYPE_CMP,
@@ -97,6 +98,10 @@ static const struct vie_op one_byte_opcodes[256] = {
.op_byte = 0x0F,
.op_type = VIE_OP_TYPE_TWO_BYTE
},
+ [0x2B] = {
+ .op_byte = 0x2B,
+ .op_type = VIE_OP_TYPE_SUB,
+ },
[0x3B] = {
.op_byte = 0x3B,
.op_type = VIE_OP_TYPE_CMP,
@@ -311,46 +316,36 @@ vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
return (error);
}
+#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
+
/*
* Return the status flags that would result from doing (x - y).
*/
-static u_long
-getcc16(uint16_t x, uint16_t y)
-{
- u_long rflags;
-
- __asm __volatile("sub %1,%2; pushfq; popq %0" :
- "=r" (rflags) : "m" (y), "r" (x));
- return (rflags);
-}
-
-static u_long
-getcc32(uint32_t x, uint32_t y)
-{
- u_long rflags;
-
- __asm __volatile("sub %1,%2; pushfq; popq %0" :
- "=r" (rflags) : "m" (y), "r" (x));
- return (rflags);
-}
-
-static u_long
-getcc64(uint64_t x, uint64_t y)
-{
- u_long rflags;
-
- __asm __volatile("sub %1,%2; pushfq; popq %0" :
- "=r" (rflags) : "m" (y), "r" (x));
- return (rflags);
-}
+#define GETCC(sz) \
+static u_long \
+getcc##sz(uint##sz##_t x, uint##sz##_t y) \
+{ \
+ u_long rflags; \
+ \
+ __asm __volatile("sub %2,%1; pushfq; popq %0" : \
+ "=r" (rflags), "+r" (x) : "m" (y)); \
+ return (rflags); \
+} struct __hack
+
+GETCC(8);
+GETCC(16);
+GETCC(32);
+GETCC(64);
static u_long
getcc(int opsize, uint64_t x, uint64_t y)
{
- KASSERT(opsize == 2 || opsize == 4 || opsize == 8,
+ KASSERT(opsize == 1 || opsize == 2 || opsize == 4 || opsize == 8,
("getcc: invalid operand size %d", opsize));
- if (opsize == 2)
+ if (opsize == 1)
+ return (getcc8(x, y));
+ else if (opsize == 2)
return (getcc16(x, y));
else if (opsize == 4)
return (getcc32(x, y));
@@ -564,7 +559,7 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
{
int error, size;
enum vm_reg_name reg;
- uint64_t val1, val2;
+ uint64_t result, rflags, rflags2, val1, val2;
size = vie->opsize;
error = EINVAL;
@@ -592,23 +587,21 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
break;
/* perform the operation and write the result */
- val1 &= val2;
- error = vie_update_register(vm, vcpuid, reg, val1, size);
+ result = val1 & val2;
+ error = vie_update_register(vm, vcpuid, reg, result, size);
break;
case 0x81:
/*
- * AND mem (ModRM:r/m) with immediate and store the
+ * AND/OR mem (ModRM:r/m) with immediate and store the
* result in mem.
*
- * 81 /4 and r/m16, imm16
- * 81 /4 and r/m32, imm32
- * REX.W + 81 /4 and r/m64, imm32 sign-extended to 64
+ * AND: i = 4
+ * OR: i = 1
+ * 81 /i op r/m16, imm16
+ * 81 /i op r/m32, imm32
+ * REX.W + 81 /i op r/m64, imm32 sign-extended to 64
*
- * Currently, only the AND operation of the 0x81 opcode
- * is implemented (ModRM:reg = b100).
*/
- if ((vie->reg & 7) != 4)
- break;
/* get the first operand */
error = memread(vm, vcpuid, gpa, &val1, size, arg);
@@ -616,15 +609,48 @@ emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
break;
/*
- * perform the operation with the pre-fetched immediate
- * operand and write the result
- */
- val1 &= vie->immediate;
- error = memwrite(vm, vcpuid, gpa, val1, size, arg);
+ * perform the operation with the pre-fetched immediate
+ * operand and write the result
+ */
+ switch (vie->reg & 7) {
+ case 0x4:
+ /* modrm:reg == b100, AND */
+ result = val1 & vie->immediate;
+ break;
+ case 0x1:
+ /* modrm:reg == b001, OR */
+ result = val1 | vie->immediate;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ if (error)
+ break;
+
+ error = memwrite(vm, vcpuid, gpa, result, size, arg);
break;
default:
break;
}
+ if (error)
+ return (error);
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ if (error)
+ return (error);
+
+ /*
+ * OF and CF are cleared; the SF, ZF and PF flags are set according
+ * to the result; AF is undefined.
+ *
+ * The updated status flags are obtained by subtracting 0 from 'result'.
+ */
+ rflags2 = getcc(size, result, 0);
+ rflags &= ~RFLAGS_STATUS_BITS;
+ rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
return (error);
}
@@ -633,7 +659,7 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
{
int error, size;
- uint64_t val1;
+ uint64_t val1, result, rflags, rflags2;
size = vie->opsize;
error = EINVAL;
@@ -663,17 +689,33 @@ emulate_or(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
* perform the operation with the pre-fetched immediate
* operand and write the result
*/
- val1 |= vie->immediate;
- error = memwrite(vm, vcpuid, gpa, val1, size, arg);
+ result = val1 | vie->immediate;
+ error = memwrite(vm, vcpuid, gpa, result, size, arg);
break;
default:
break;
}
+ if (error)
+ return (error);
+
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
+ if (error)
+ return (error);
+
+ /*
+ * OF and CF are cleared; the SF, ZF and PF flags are set according
+ * to the result; AF is undefined.
+ *
+ * The updated status flags are obtained by subtracting 0 from 'result'.
+ */
+ rflags2 = getcc(size, result, 0);
+ rflags &= ~RFLAGS_STATUS_BITS;
+ rflags |= rflags2 & (PSL_PF | PSL_Z | PSL_N);
+
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, rflags, 8);
return (error);
}
-#define RFLAGS_STATUS_BITS (PSL_C | PSL_PF | PSL_AF | PSL_Z | PSL_N | PSL_V)
-
static int
emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
@@ -723,6 +765,62 @@ emulate_cmp(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
}
static int
+emulate_sub(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
+ mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
+{
+ int error, size;
+ uint64_t nval, rflags, rflags2, val1, val2;
+ enum vm_reg_name reg;
+
+ size = vie->opsize;
+ error = EINVAL;
+
+ switch (vie->op.op_byte) {
+ case 0x2B:
+ /*
+ * SUB r/m from r and store the result in r
+ *
+ * 2B/r SUB r16, r/m16
+ * 2B/r SUB r32, r/m32
+ * REX.W + 2B/r SUB r64, r/m64
+ */
+
+ /* get the first operand */
+ reg = gpr_map[vie->reg];
+ error = vie_read_register(vm, vcpuid, reg, &val1);
+ if (error)
+ break;
+
+ /* get the second operand */
+ error = memread(vm, vcpuid, gpa, &val2, size, arg);
+ if (error)
+ break;
+
+ /* perform the operation and write the result */
+ nval = val1 - val2;
+ error = vie_update_register(vm, vcpuid, reg, nval, size);
+ break;
+ default:
+ break;
+ }
+
+ if (!error) {
+ rflags2 = getcc(size, val1, val2);
+ error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+ &rflags);
+ if (error)
+ return (error);
+
+ rflags &= ~RFLAGS_STATUS_BITS;
+ rflags |= rflags2 & RFLAGS_STATUS_BITS;
+ error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RFLAGS,
+ rflags, 8);
+ }
+
+ return (error);
+}
+
+static int
emulate_push(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
struct vm_guest_paging *paging, mem_region_read_t memread,
mem_region_write_t memwrite, void *arg)
@@ -865,6 +963,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_or(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
break;
+ case VIE_OP_TYPE_SUB:
+ error = emulate_sub(vm, vcpuid, gpa, vie,
+ memread, memwrite, memarg);
+ break;
default:
error = EINVAL;
break;
diff --git a/sys/amd64/vmm/x86.c b/sys/amd64/vmm/x86.c
index 9f38bf3..be4b5af 100644
--- a/sys/amd64/vmm/x86.c
+++ b/sys/amd64/vmm/x86.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <sys/pcpu.h>
#include <sys/systm.h>
#include <sys/cpuset.h>
+#include <sys/sysctl.h>
#include <machine/clock.h>
#include <machine/cpufunc.h>
@@ -45,20 +46,49 @@ __FBSDID("$FreeBSD$");
#include "vmm_host.h"
#include "x86.h"
+SYSCTL_DECL(_hw_vmm);
+static SYSCTL_NODE(_hw_vmm, OID_AUTO, topology, CTLFLAG_RD, 0, NULL);
+
#define CPUID_VM_HIGH 0x40000000
static const char bhyve_id[12] = "bhyve bhyve ";
static uint64_t bhyve_xcpuids;
+/*
+ * The default CPU topology is a single thread per package.
+ */
+static u_int threads_per_core = 1;
+SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, threads_per_core, CTLFLAG_RDTUN,
+ &threads_per_core, 0, NULL);
+
+static u_int cores_per_package = 1;
+SYSCTL_UINT(_hw_vmm_topology, OID_AUTO, cores_per_package, CTLFLAG_RDTUN,
+ &cores_per_package, 0, NULL);
+
+static int cpuid_leaf_b = 1;
+SYSCTL_INT(_hw_vmm_topology, OID_AUTO, cpuid_leaf_b, CTLFLAG_RDTUN,
+ &cpuid_leaf_b, 0, NULL);
+
+/*
+ * Round up to the next power of two, if necessary, and then take log2.
+ * Returns -1 if argument is zero.
+ */
+static __inline int
+log2(u_int x)
+{
+
+ return (fls(x << (1 - powerof2(x))) - 1);
+}
+
int
x86_emulate_cpuid(struct vm *vm, int vcpu_id,
uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
const struct xsave_limits *limits;
uint64_t cr4;
- int error, enable_invpcid;
- unsigned int func, regs[4];
+ int error, enable_invpcid, level, width, x2apic_id;
+ unsigned int func, regs[4], logical_cpus;
enum x2apic_state x2apic_state;
/*
@@ -209,30 +239,31 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
*/
regs[3] &= ~CPUID_DS;
- /*
- * Disable multi-core.
- */
+ logical_cpus = threads_per_core * cores_per_package;
regs[1] &= ~CPUID_HTT_CORES;
- regs[3] &= ~CPUID_HTT;
+ regs[1] |= (logical_cpus & 0xff) << 16;
+ regs[3] |= CPUID_HTT;
break;
case CPUID_0000_0004:
- do_cpuid(4, regs);
+ cpuid_count(*eax, *ecx, regs);
- /*
- * Do not expose topology.
- *
- * The maximum number of processor cores in
- * this physical processor package and the
- * maximum number of threads sharing this
- * cache are encoded with "plus 1" encoding.
- * Adding one to the value in this register
- * field to obtains the actual value.
- *
- * Therefore 0 for both indicates 1 core per
- * package and no cache sharing.
- */
- regs[0] &= 0xffff8000;
+ if (regs[0] || regs[1] || regs[2] || regs[3]) {
+ regs[0] &= 0x3ff;
+ regs[0] |= (cores_per_package - 1) << 26;
+ /*
+ * Cache topology:
+ * - L1 and L2 are shared only by the logical
+ * processors in a single core.
+ * - L3 and above are shared by all logical
+ * processors in the package.
+ */
+ logical_cpus = threads_per_core;
+ level = (regs[0] >> 5) & 0x7;
+ if (level >= 3)
+ logical_cpus *= cores_per_package;
+ regs[0] |= (logical_cpus - 1) << 14;
+ }
break;
case CPUID_0000_0007:
@@ -286,10 +317,32 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
/*
* Processor topology enumeration
*/
- regs[0] = 0;
- regs[1] = 0;
- regs[2] = *ecx & 0xff;
- regs[3] = vcpu_id;
+ if (*ecx == 0) {
+ logical_cpus = threads_per_core;
+ width = log2(logical_cpus);
+ level = CPUID_TYPE_SMT;
+ x2apic_id = vcpu_id;
+ }
+
+ if (*ecx == 1) {
+ logical_cpus = threads_per_core *
+ cores_per_package;
+ width = log2(logical_cpus);
+ level = CPUID_TYPE_CORE;
+ x2apic_id = vcpu_id;
+ }
+
+ if (!cpuid_leaf_b || *ecx >= 2) {
+ width = 0;
+ logical_cpus = 0;
+ level = 0;
+ x2apic_id = 0;
+ }
+
+ regs[0] = width & 0x1f;
+ regs[1] = logical_cpus & 0xffff;
+ regs[2] = (level << 8) | (*ecx & 0xff);
+ regs[3] = x2apic_id;
break;
case CPUID_0000_000D:
OpenPOWER on IntegriCloud