diff options
Diffstat (limited to 'arch')
156 files changed, 7146 insertions, 3068 deletions
diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 0f23009..6ab6b33 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -172,7 +172,7 @@ not_angel: adr r0, LC0 ARM( ldmia r0, {r1, r2, r3, r4, r5, r6, r11, ip, sp}) THUMB( ldmia r0, {r1, r2, r3, r4, r5, r6, r11, ip} ) - THUMB( ldr sp, [r0, #28] ) + THUMB( ldr sp, [r0, #32] ) subs r0, r0, r1 @ calculate the delta offset @ if delta is zero, we are diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 7f36d00..feb988a 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -11,7 +11,11 @@ #define kmap_prot PAGE_KERNEL -#define flush_cache_kmaps() flush_cache_all() +#define flush_cache_kmaps() \ + do { \ + if (cache_is_vivt()) \ + flush_cache_all(); \ + } while (0) extern pte_t *pkmap_page_table; @@ -21,11 +25,20 @@ extern void *kmap_high(struct page *page); extern void *kmap_high_get(struct page *page); extern void kunmap_high(struct page *page); +extern void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte); +extern void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte); + +/* + * The following functions are already defined by <linux/highmem.h> + * when CONFIG_HIGHMEM is not set. + */ +#ifdef CONFIG_HIGHMEM extern void *kmap(struct page *page); extern void kunmap(struct page *page); extern void *kmap_atomic(struct page *page, enum km_type type); extern void kunmap_atomic(void *kvaddr, enum km_type type); extern void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); extern struct page *kmap_atomic_to_page(const void *ptr); +#endif #endif diff --git a/arch/arm/include/asm/kmap_types.h b/arch/arm/include/asm/kmap_types.h index c019949..c4b2ea3 100644 --- a/arch/arm/include/asm/kmap_types.h +++ b/arch/arm/include/asm/kmap_types.h @@ -18,6 +18,7 @@ enum km_type { KM_IRQ1, KM_SOFTIRQ0, KM_SOFTIRQ1, + KM_L1_CACHE, KM_L2_CACHE, KM_TYPE_NR }; diff --git a/arch/arm/include/asm/ucontext.h b/arch/arm/include/asm/ucontext.h index bf65e9f..47f023a 100644 --- a/arch/arm/include/asm/ucontext.h +++ b/arch/arm/include/asm/ucontext.h @@ -59,23 +59,22 @@ struct iwmmxt_sigframe { #endif /* CONFIG_IWMMXT */ #ifdef CONFIG_VFP -#if __LINUX_ARM_ARCH__ < 6 -/* For ARM pre-v6, we use fstmiax and fldmiax. This adds one extra - * word after the registers, and a word of padding at the end for - * alignment. */ #define VFP_MAGIC 0x56465001 -#define VFP_STORAGE_SIZE 152 -#else -#define VFP_MAGIC 0x56465002 -#define VFP_STORAGE_SIZE 144 -#endif struct vfp_sigframe { unsigned long magic; unsigned long size; - union vfp_state storage; -}; + struct user_vfp ufp; + struct user_vfp_exc ufp_exc; +} __attribute__((__aligned__(8))); + +/* + * 8 byte for magic and size, 264 byte for ufp, 12 bytes for ufp_exc, + * 4 bytes padding. + */ +#define VFP_STORAGE_SIZE sizeof(struct vfp_sigframe) + #endif /* CONFIG_VFP */ /* @@ -91,7 +90,7 @@ struct aux_sigframe { #ifdef CONFIG_IWMMXT struct iwmmxt_sigframe iwmmxt; #endif -#if 0 && defined CONFIG_VFP /* Not yet saved. */ +#ifdef CONFIG_VFP struct vfp_sigframe vfp; #endif /* Something that isn't a valid magic number for any coprocessor. */ diff --git a/arch/arm/include/asm/user.h b/arch/arm/include/asm/user.h index df95e05..05ac4b0 100644 --- a/arch/arm/include/asm/user.h +++ b/arch/arm/include/asm/user.h @@ -83,11 +83,21 @@ struct user{ /* * User specific VFP registers. If only VFPv2 is present, registers 16 to 31 - * are ignored by the ptrace system call. + * are ignored by the ptrace system call and the signal handler. */ struct user_vfp { unsigned long long fpregs[32]; unsigned long fpscr; }; +/* + * VFP exception registers exposed to user space during signal delivery. + * Fields not relavant to the current VFP architecture are ignored. + */ +struct user_vfp_exc { + unsigned long fpexc; + unsigned long fpinst; + unsigned long fpinst2; +}; + #endif /* _ARM_USER_H */ diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index e7714f3..907d5a6 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -18,6 +18,7 @@ #include <asm/cacheflush.h> #include <asm/ucontext.h> #include <asm/unistd.h> +#include <asm/vfp.h> #include "ptrace.h" #include "signal.h" @@ -175,6 +176,90 @@ static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) #endif +#ifdef CONFIG_VFP + +static int preserve_vfp_context(struct vfp_sigframe __user *frame) +{ + struct thread_info *thread = current_thread_info(); + struct vfp_hard_struct *h = &thread->vfpstate.hard; + const unsigned long magic = VFP_MAGIC; + const unsigned long size = VFP_STORAGE_SIZE; + int err = 0; + + vfp_sync_hwstate(thread); + __put_user_error(magic, &frame->magic, err); + __put_user_error(size, &frame->size, err); + + /* + * Copy the floating point registers. There can be unused + * registers see asm/hwcap.h for details. + */ + err |= __copy_to_user(&frame->ufp.fpregs, &h->fpregs, + sizeof(h->fpregs)); + /* + * Copy the status and control register. + */ + __put_user_error(h->fpscr, &frame->ufp.fpscr, err); + + /* + * Copy the exception registers. + */ + __put_user_error(h->fpexc, &frame->ufp_exc.fpexc, err); + __put_user_error(h->fpinst, &frame->ufp_exc.fpinst, err); + __put_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err); + + return err ? -EFAULT : 0; +} + +static int restore_vfp_context(struct vfp_sigframe __user *frame) +{ + struct thread_info *thread = current_thread_info(); + struct vfp_hard_struct *h = &thread->vfpstate.hard; + unsigned long magic; + unsigned long size; + unsigned long fpexc; + int err = 0; + + __get_user_error(magic, &frame->magic, err); + __get_user_error(size, &frame->size, err); + + if (err) + return -EFAULT; + if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) + return -EINVAL; + + /* + * Copy the floating point registers. There can be unused + * registers see asm/hwcap.h for details. + */ + err |= __copy_from_user(&h->fpregs, &frame->ufp.fpregs, + sizeof(h->fpregs)); + /* + * Copy the status and control register. + */ + __get_user_error(h->fpscr, &frame->ufp.fpscr, err); + + /* + * Sanitise and restore the exception registers. + */ + __get_user_error(fpexc, &frame->ufp_exc.fpexc, err); + /* Ensure the VFP is enabled. */ + fpexc |= FPEXC_EN; + /* Ensure FPINST2 is invalid and the exception flag is cleared. */ + fpexc &= ~(FPEXC_EX | FPEXC_FP2V); + h->fpexc = fpexc; + + __get_user_error(h->fpinst, &frame->ufp_exc.fpinst, err); + __get_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err); + + if (!err) + vfp_flush_hwstate(thread); + + return err ? -EFAULT : 0; +} + +#endif + /* * Do a signal return; undo the signal stack. These are aligned to 64-bit. */ @@ -233,8 +318,8 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) err |= restore_iwmmxt_context(&aux->iwmmxt); #endif #ifdef CONFIG_VFP -// if (err == 0) -// err |= vfp_restore_state(&sf->aux.vfp); + if (err == 0) + err |= restore_vfp_context(&aux->vfp); #endif return err; @@ -348,8 +433,8 @@ setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set) err |= preserve_iwmmxt_context(&aux->iwmmxt); #endif #ifdef CONFIG_VFP -// if (err == 0) -// err |= vfp_save_state(&sf->aux.vfp); + if (err == 0) + err |= preserve_vfp_context(&aux->vfp); #endif __put_user_error(0, &aux->end_magic, err); diff --git a/arch/arm/mach-at91/Makefile b/arch/arm/mach-at91/Makefile index 027dd57..d400455 100644 --- a/arch/arm/mach-at91/Makefile +++ b/arch/arm/mach-at91/Makefile @@ -16,8 +16,8 @@ obj-$(CONFIG_ARCH_AT91SAM9261) += at91sam9261.o at91sam926x_time.o at91sam9261_d obj-$(CONFIG_ARCH_AT91SAM9G10) += at91sam9261.o at91sam926x_time.o at91sam9261_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91SAM9263) += at91sam9263.o at91sam926x_time.o at91sam9263_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91SAM9RL) += at91sam9rl.o at91sam926x_time.o at91sam9rl_devices.o sam9_smc.o -obj-$(CONFIG_ARCH_AT91SAM9G20) += at91sam9260.o at91sam926x_time.o at91sam9260_devices.o sam9_smc.o - obj-$(CONFIG_ARCH_AT91SAM9G45) += at91sam9g45.o at91sam926x_time.o at91sam9g45_devices.o sam9_smc.o +obj-$(CONFIG_ARCH_AT91SAM9G20) += at91sam9260.o at91sam926x_time.o at91sam9260_devices.o sam9_smc.o +obj-$(CONFIG_ARCH_AT91SAM9G45) += at91sam9g45.o at91sam926x_time.o at91sam9g45_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91CAP9) += at91cap9.o at91sam926x_time.o at91cap9_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT572D940HF) += at572d940hf.o at91sam926x_time.o at572d940hf_devices.o sam9_smc.o obj-$(CONFIG_ARCH_AT91X40) += at91x40.o at91x40_time.o diff --git a/arch/arm/mach-at91/pm_slowclock.S b/arch/arm/mach-at91/pm_slowclock.S index 987fab3..9c5b48e 100644 --- a/arch/arm/mach-at91/pm_slowclock.S +++ b/arch/arm/mach-at91/pm_slowclock.S @@ -175,8 +175,6 @@ ENTRY(at91_slow_clock) orr r3, r3, #(1 << 29) /* bit 29 always set */ str r3, [r1, #(AT91_CKGR_PLLAR - AT91_PMC)] - wait_pllalock - /* Save PLLB setting and disable it */ ldr r3, [r1, #(AT91_CKGR_PLLBR - AT91_PMC)] str r3, .saved_pllbr @@ -184,8 +182,6 @@ ENTRY(at91_slow_clock) mov r3, #AT91_PMC_PLLCOUNT str r3, [r1, #(AT91_CKGR_PLLBR - AT91_PMC)] - wait_pllblock - /* Turn off the main oscillator */ ldr r3, [r1, #(AT91_CKGR_MOR - AT91_PMC)] bic r3, r3, #AT91_PMC_MOSCEN @@ -205,13 +201,25 @@ ENTRY(at91_slow_clock) ldr r3, .saved_pllbr str r3, [r1, #(AT91_CKGR_PLLBR - AT91_PMC)] + tst r3, #(AT91_PMC_MUL & 0xff0000) + bne 1f + tst r3, #(AT91_PMC_MUL & ~0xff0000) + beq 2f +1: wait_pllblock +2: /* Restore PLLA setting */ ldr r3, .saved_pllar str r3, [r1, #(AT91_CKGR_PLLAR - AT91_PMC)] + tst r3, #(AT91_PMC_MUL & 0xff0000) + bne 3f + tst r3, #(AT91_PMC_MUL & ~0xff0000) + beq 4f +3: wait_pllalock +4: #ifdef SLOWDOWN_MASTER_CLOCK /* diff --git a/arch/arm/mach-bcmring/dma.c b/arch/arm/mach-bcmring/dma.c index 2ccf670..29c0a91 100644 --- a/arch/arm/mach-bcmring/dma.c +++ b/arch/arm/mach-bcmring/dma.c @@ -2221,11 +2221,15 @@ EXPORT_SYMBOL(dma_map_create_descriptor_ring); int dma_unmap(DMA_MemMap_t *memMap, /* Stores state information about the map */ int dirtied /* non-zero if any of the pages were modified */ ) { + + int rc = 0; int regionIdx; int segmentIdx; DMA_Region_t *region; DMA_Segment_t *segment; + down(&memMap->lock); + for (regionIdx = 0; regionIdx < memMap->numRegionsUsed; regionIdx++) { region = &memMap->region[regionIdx]; @@ -2239,7 +2243,8 @@ int dma_unmap(DMA_MemMap_t *memMap, /* Stores state information about the map */ printk(KERN_ERR "%s: vmalloc'd pages are not yet supported\n", __func__); - return -EINVAL; + rc = -EINVAL; + goto out; } case DMA_MEM_TYPE_KMALLOC: @@ -2276,7 +2281,8 @@ int dma_unmap(DMA_MemMap_t *memMap, /* Stores state information about the map */ printk(KERN_ERR "%s: Unsupported memory type: %d\n", __func__, region->memType); - return -EINVAL; + rc = -EINVAL; + goto out; } } @@ -2314,9 +2320,10 @@ int dma_unmap(DMA_MemMap_t *memMap, /* Stores state information about the map */ memMap->numRegionsUsed = 0; memMap->inUse = 0; +out: up(&memMap->lock); - return 0; + return rc; } EXPORT_SYMBOL(dma_unmap); diff --git a/arch/arm/mach-ep93xx/gpio.c b/arch/arm/mach-ep93xx/gpio.c index cc377ae..cf547ad 100644 --- a/arch/arm/mach-ep93xx/gpio.c +++ b/arch/arm/mach-ep93xx/gpio.c @@ -25,7 +25,7 @@ #include <mach/hardware.h> /************************************************************************* - * GPIO handling for EP93xx + * Interrupt handling for EP93xx on-chip GPIOs *************************************************************************/ static unsigned char gpio_int_unmasked[3]; static unsigned char gpio_int_enabled[3]; @@ -40,7 +40,7 @@ static const u8 eoi_register_offset[3] = { 0x98, 0xb4, 0x54 }; static const u8 int_en_register_offset[3] = { 0x9c, 0xb8, 0x58 }; static const u8 int_debounce_register_offset[3] = { 0xa8, 0xc4, 0x64 }; -void ep93xx_gpio_update_int_params(unsigned port) +static void ep93xx_gpio_update_int_params(unsigned port) { BUG_ON(port > 2); @@ -56,7 +56,7 @@ void ep93xx_gpio_update_int_params(unsigned port) EP93XX_GPIO_REG(int_en_register_offset[port])); } -void ep93xx_gpio_int_mask(unsigned line) +static inline void ep93xx_gpio_int_mask(unsigned line) { gpio_int_unmasked[line >> 3] &= ~(1 << (line & 7)); } diff --git a/arch/arm/mach-mx3/Kconfig b/arch/arm/mach-mx3/Kconfig index 3872af1..170f68e 100644 --- a/arch/arm/mach-mx3/Kconfig +++ b/arch/arm/mach-mx3/Kconfig @@ -62,6 +62,15 @@ config MACH_MX31_3DS Include support for MX31PDK (3DS) platform. This includes specific configurations for the board and its peripherals. +config MACH_MX31_3DS_MXC_NAND_USE_BBT + bool "Make the MXC NAND driver use the in flash Bad Block Table" + depends on MACH_MX31_3DS + depends on MTD_NAND_MXC + help + Enable this if you want that the MXC NAND driver uses the in flash + Bad Block Table to know what blocks are bad instead of scanning the + entire flash looking for bad block markers. + config MACH_MX31MOBOARD bool "Support mx31moboard platforms (EPFL Mobots group)" select ARCH_MX31 @@ -95,6 +104,7 @@ config MACH_PCM043 config MACH_ARMADILLO5X0 bool "Support Atmark Armadillo-500 Development Base Board" select ARCH_MX31 + select MXC_ULPI if USB_ULPI help Include support for Atmark Armadillo-500 platform. This includes specific configurations for the board and its peripherals. diff --git a/arch/arm/mach-mx3/clock-imx31.c b/arch/arm/mach-mx3/clock-imx31.c index 80dba99..9a9eb6d 100644 --- a/arch/arm/mach-mx3/clock-imx31.c +++ b/arch/arm/mach-mx3/clock-imx31.c @@ -468,6 +468,7 @@ static struct clk ahb_clk = { } DEFINE_CLOCK(perclk_clk, 0, NULL, 0, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(ckil_clk, 0, NULL, 0, clk_ckil_get_rate, NULL, NULL); DEFINE_CLOCK(sdhc1_clk, 0, MXC_CCM_CGR0, 0, NULL, NULL, &perclk_clk); DEFINE_CLOCK(sdhc2_clk, 1, MXC_CCM_CGR0, 2, NULL, NULL, &perclk_clk); @@ -490,7 +491,7 @@ DEFINE_CLOCK(mpeg4_clk, 0, MXC_CCM_CGR1, 0, NULL, NULL, &ahb_clk); DEFINE_CLOCK(mstick1_clk, 0, MXC_CCM_CGR1, 2, mstick1_get_rate, NULL, &usb_pll_clk); DEFINE_CLOCK(mstick2_clk, 1, MXC_CCM_CGR1, 4, mstick2_get_rate, NULL, &usb_pll_clk); DEFINE_CLOCK1(csi_clk, 0, MXC_CCM_CGR1, 6, csi, NULL, &serial_pll_clk); -DEFINE_CLOCK(rtc_clk, 0, MXC_CCM_CGR1, 8, NULL, NULL, &ipg_clk); +DEFINE_CLOCK(rtc_clk, 0, MXC_CCM_CGR1, 8, NULL, NULL, &ckil_clk); DEFINE_CLOCK(wdog_clk, 0, MXC_CCM_CGR1, 10, NULL, NULL, &ipg_clk); DEFINE_CLOCK(pwm_clk, 0, MXC_CCM_CGR1, 12, NULL, NULL, &perclk_clk); DEFINE_CLOCK(usb_clk2, 0, MXC_CCM_CGR1, 18, usb_get_rate, NULL, &ahb_clk); @@ -514,7 +515,6 @@ DEFINE_CLOCK(usb_clk1, 0, NULL, 0, usb_get_rate, NULL, &usb_pll_clk) DEFINE_CLOCK(nfc_clk, 0, NULL, 0, nfc_get_rate, NULL, &ahb_clk); DEFINE_CLOCK(scc_clk, 0, NULL, 0, NULL, NULL, &ipg_clk); DEFINE_CLOCK(ipg_clk, 0, NULL, 0, ipg_get_rate, NULL, &ahb_clk); -DEFINE_CLOCK(ckil_clk, 0, NULL, 0, clk_ckil_get_rate, NULL, NULL); #define _REGISTER_CLOCK(d, n, c) \ { \ @@ -572,7 +572,6 @@ static struct clk_lookup lookups[] = { _REGISTER_CLOCK(NULL, "iim", iim_clk) _REGISTER_CLOCK(NULL, "mpeg4", mpeg4_clk) _REGISTER_CLOCK(NULL, "mbx", mbx_clk) - _REGISTER_CLOCK("mxc_rtc", NULL, ckil_clk) }; int __init mx31_clocks_init(unsigned long fref) diff --git a/arch/arm/mach-mx3/devices.c b/arch/arm/mach-mx3/devices.c index 6adb586..f891115 100644 --- a/arch/arm/mach-mx3/devices.c +++ b/arch/arm/mach-mx3/devices.c @@ -575,11 +575,26 @@ struct platform_device imx_ssi_device1 = { .resource = imx_ssi_resources1, }; -static int mx3_devices_init(void) +static struct resource imx_wdt_resources[] = { + { + .flags = IORESOURCE_MEM, + }, +}; + +struct platform_device imx_wdt_device0 = { + .name = "imx-wdt", + .id = 0, + .num_resources = ARRAY_SIZE(imx_wdt_resources), + .resource = imx_wdt_resources, +}; + +static int __init mx3_devices_init(void) { if (cpu_is_mx31()) { mxc_nand_resources[0].start = MX31_NFC_BASE_ADDR; mxc_nand_resources[0].end = MX31_NFC_BASE_ADDR + 0xfff; + imx_wdt_resources[0].start = MX31_WDOG_BASE_ADDR; + imx_wdt_resources[0].end = MX31_WDOG_BASE_ADDR + 0x3fff; mxc_register_device(&mxc_rnga_device, NULL); } if (cpu_is_mx35()) { @@ -597,6 +612,8 @@ static int mx3_devices_init(void) imx_ssi_resources0[1].end = MX35_INT_SSI1; imx_ssi_resources1[1].start = MX35_INT_SSI2; imx_ssi_resources1[1].end = MX35_INT_SSI2; + imx_wdt_resources[0].start = MX35_WDOG_BASE_ADDR; + imx_wdt_resources[0].end = MX35_WDOG_BASE_ADDR + 0x3fff; } return 0; diff --git a/arch/arm/mach-mx3/devices.h b/arch/arm/mach-mx3/devices.h index 42cf175..4f77eb5 100644 --- a/arch/arm/mach-mx3/devices.h +++ b/arch/arm/mach-mx3/devices.h @@ -25,4 +25,5 @@ extern struct platform_device mxc_spi_device1; extern struct platform_device mxc_spi_device2; extern struct platform_device imx_ssi_device0; extern struct platform_device imx_ssi_device1; - +extern struct platform_device imx_ssi_device1; +extern struct platform_device imx_wdt_device0; diff --git a/arch/arm/mach-mx3/mach-armadillo5x0.c b/arch/arm/mach-mx3/mach-armadillo5x0.c index 3d72b0b..5f72ec9 100644 --- a/arch/arm/mach-mx3/mach-armadillo5x0.c +++ b/arch/arm/mach-mx3/mach-armadillo5x0.c @@ -36,6 +36,9 @@ #include <linux/input.h> #include <linux/gpio_keys.h> #include <linux/i2c.h> +#include <linux/usb/otg.h> +#include <linux/usb/ulpi.h> +#include <linux/delay.h> #include <mach/hardware.h> #include <asm/mach-types.h> @@ -52,6 +55,8 @@ #include <mach/ipu.h> #include <mach/mx3fb.h> #include <mach/mxc_nand.h> +#include <mach/mxc_ehci.h> +#include <mach/ulpi.h> #include "devices.h" #include "crm_regs.h" @@ -103,8 +108,158 @@ static int armadillo5x0_pins[] = { /* I2C2 */ MX31_PIN_CSPI2_MOSI__SCL, MX31_PIN_CSPI2_MISO__SDA, + /* OTG */ + MX31_PIN_USBOTG_DATA0__USBOTG_DATA0, + MX31_PIN_USBOTG_DATA1__USBOTG_DATA1, + MX31_PIN_USBOTG_DATA2__USBOTG_DATA2, + MX31_PIN_USBOTG_DATA3__USBOTG_DATA3, + MX31_PIN_USBOTG_DATA4__USBOTG_DATA4, + MX31_PIN_USBOTG_DATA5__USBOTG_DATA5, + MX31_PIN_USBOTG_DATA6__USBOTG_DATA6, + MX31_PIN_USBOTG_DATA7__USBOTG_DATA7, + MX31_PIN_USBOTG_CLK__USBOTG_CLK, + MX31_PIN_USBOTG_DIR__USBOTG_DIR, + MX31_PIN_USBOTG_NXT__USBOTG_NXT, + MX31_PIN_USBOTG_STP__USBOTG_STP, + /* USB host 2 */ + IOMUX_MODE(MX31_PIN_USBH2_CLK, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_USBH2_DIR, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_USBH2_NXT, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_USBH2_STP, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_USBH2_DATA0, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_USBH2_DATA1, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_STXD3, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_SRXD3, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_SCK3, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_SFS3, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_STXD6, IOMUX_CONFIG_FUNC), + IOMUX_MODE(MX31_PIN_SRXD6, IOMUX_CONFIG_FUNC), }; +/* USB */ +#if defined(CONFIG_USB_ULPI) + +#define OTG_RESET IOMUX_TO_GPIO(MX31_PIN_STXD4) +#define USBH2_RESET IOMUX_TO_GPIO(MX31_PIN_SCK6) +#define USBH2_CS IOMUX_TO_GPIO(MX31_PIN_GPIO1_3) + +#define USB_PAD_CFG (PAD_CTL_DRV_MAX | PAD_CTL_SRE_FAST | PAD_CTL_HYS_CMOS | \ + PAD_CTL_ODE_CMOS | PAD_CTL_100K_PU) + +static int usbotg_init(struct platform_device *pdev) +{ + int err; + + mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA0, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA1, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA2, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA3, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA4, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA5, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA6, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_DATA7, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_CLK, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_DIR, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_NXT, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBOTG_STP, USB_PAD_CFG); + + /* Chip already enabled by hardware */ + /* OTG phy reset*/ + err = gpio_request(OTG_RESET, "USB-OTG-RESET"); + if (err) { + pr_err("Failed to request the usb otg reset gpio\n"); + return err; + } + + err = gpio_direction_output(OTG_RESET, 1/*HIGH*/); + if (err) { + pr_err("Failed to reset the usb otg phy\n"); + goto otg_free_reset; + } + + gpio_set_value(OTG_RESET, 0/*LOW*/); + mdelay(5); + gpio_set_value(OTG_RESET, 1/*HIGH*/); + + return 0; + +otg_free_reset: + gpio_free(OTG_RESET); + return err; +} + +static int usbh2_init(struct platform_device *pdev) +{ + int err; + + mxc_iomux_set_pad(MX31_PIN_USBH2_CLK, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBH2_DIR, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBH2_NXT, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBH2_STP, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBH2_DATA0, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_USBH2_DATA1, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_SRXD6, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_STXD6, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_SFS3, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_SCK3, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_SRXD3, USB_PAD_CFG); + mxc_iomux_set_pad(MX31_PIN_STXD3, USB_PAD_CFG); + + mxc_iomux_set_gpr(MUX_PGP_UH2, true); + + + /* Enable the chip */ + err = gpio_request(USBH2_CS, "USB-H2-CS"); + if (err) { + pr_err("Failed to request the usb host 2 CS gpio\n"); + return err; + } + + err = gpio_direction_output(USBH2_CS, 0/*Enabled*/); + if (err) { + pr_err("Failed to drive the usb host 2 CS gpio\n"); + goto h2_free_cs; + } + + /* H2 phy reset*/ + err = gpio_request(USBH2_RESET, "USB-H2-RESET"); + if (err) { + pr_err("Failed to request the usb host 2 reset gpio\n"); + goto h2_free_cs; + } + + err = gpio_direction_output(USBH2_RESET, 1/*HIGH*/); + if (err) { + pr_err("Failed to reset the usb host 2 phy\n"); + goto h2_free_reset; + } + + gpio_set_value(USBH2_RESET, 0/*LOW*/); + mdelay(5); + gpio_set_value(USBH2_RESET, 1/*HIGH*/); + + return 0; + +h2_free_reset: + gpio_free(USBH2_RESET); +h2_free_cs: + gpio_free(USBH2_CS); + return err; +} + +static struct mxc_usbh_platform_data usbotg_pdata = { + .init = usbotg_init, + .portsc = MXC_EHCI_MODE_ULPI | MXC_EHCI_UTMI_8BIT, + .flags = MXC_EHCI_POWER_PINS_ENABLED | MXC_EHCI_INTERFACE_DIFF_UNI, +}; + +static struct mxc_usbh_platform_data usbh2_pdata = { + .init = usbh2_init, + .portsc = MXC_EHCI_MODE_ULPI | MXC_EHCI_UTMI_8BIT, + .flags = MXC_EHCI_POWER_PINS_ENABLED | MXC_EHCI_INTERFACE_DIFF_UNI, +}; +#endif /* CONFIG_USB_ULPI */ + /* RTC over I2C*/ #define ARMADILLO5X0_RTC_GPIO IOMUX_TO_GPIO(MX31_PIN_SRXD4) @@ -393,6 +548,17 @@ static void __init armadillo5x0_init(void) if (armadillo5x0_i2c_rtc.irq == 0) pr_warning("armadillo5x0_init: failed to get RTC IRQ\n"); i2c_register_board_info(1, &armadillo5x0_i2c_rtc, 1); + + /* USB */ +#if defined(CONFIG_USB_ULPI) + usbotg_pdata.otg = otg_ulpi_create(&mxc_ulpi_access_ops, + USB_OTG_DRV_VBUS | USB_OTG_DRV_VBUS_EXT); + usbh2_pdata.otg = otg_ulpi_create(&mxc_ulpi_access_ops, + USB_OTG_DRV_VBUS | USB_OTG_DRV_VBUS_EXT); + + mxc_register_device(&mxc_otg_host, &usbotg_pdata); + mxc_register_device(&mxc_usbh2, &usbh2_pdata); +#endif } static void __init armadillo5x0_timer_init(void) diff --git a/arch/arm/mach-mx3/mach-mx31_3ds.c b/arch/arm/mach-mx3/mach-mx31_3ds.c index b88c18a..f54af1e 100644 --- a/arch/arm/mach-mx3/mach-mx31_3ds.c +++ b/arch/arm/mach-mx3/mach-mx31_3ds.c @@ -23,6 +23,9 @@ #include <linux/gpio.h> #include <linux/smsc911x.h> #include <linux/platform_device.h> +#include <linux/mfd/mc13783.h> +#include <linux/spi/spi.h> +#include <linux/regulator/machine.h> #include <mach/hardware.h> #include <asm/mach-types.h> @@ -31,26 +34,96 @@ #include <asm/memory.h> #include <asm/mach/map.h> #include <mach/common.h> -#include <mach/board-mx31pdk.h> +#include <mach/board-mx31_3ds.h> #include <mach/imx-uart.h> #include <mach/iomux-mx3.h> +#include <mach/mxc_nand.h> +#include <mach/spi.h> #include "devices.h" /*! - * @file mx31pdk.c + * @file mx31_3ds.c * * @brief This file contains the board-specific initialization routines. * * @ingroup System */ -static int mx31pdk_pins[] = { +static int mx31_3ds_pins[] = { /* UART1 */ MX31_PIN_CTS1__CTS1, MX31_PIN_RTS1__RTS1, MX31_PIN_TXD1__TXD1, MX31_PIN_RXD1__RXD1, IOMUX_MODE(MX31_PIN_GPIO1_1, IOMUX_CONFIG_GPIO), + /* SPI 1 */ + MX31_PIN_CSPI2_SCLK__SCLK, + MX31_PIN_CSPI2_MOSI__MOSI, + MX31_PIN_CSPI2_MISO__MISO, + MX31_PIN_CSPI2_SPI_RDY__SPI_RDY, + MX31_PIN_CSPI2_SS0__SS0, + MX31_PIN_CSPI2_SS2__SS2, /*CS for MC13783 */ + /* MC13783 IRQ */ + IOMUX_MODE(MX31_PIN_GPIO1_3, IOMUX_CONFIG_GPIO), +}; + +/* Regulators */ +static struct regulator_init_data pwgtx_init = { + .constraints = { + .boot_on = 1, + .always_on = 1, + }, +}; + +static struct mc13783_regulator_init_data mx31_3ds_regulators[] = { + { + .id = MC13783_REGU_PWGT1SPI, /* Power Gate for ARM core. */ + .init_data = &pwgtx_init, + }, { + .id = MC13783_REGU_PWGT2SPI, /* Power Gate for L2 Cache. */ + .init_data = &pwgtx_init, + }, +}; + +/* MC13783 */ +static struct mc13783_platform_data mc13783_pdata __initdata = { + .regulators = mx31_3ds_regulators, + .num_regulators = ARRAY_SIZE(mx31_3ds_regulators), + .flags = MC13783_USE_REGULATOR, +}; + +/* SPI */ +static int spi1_internal_chipselect[] = { + MXC_SPI_CS(0), + MXC_SPI_CS(2), +}; + +static struct spi_imx_master spi1_pdata = { + .chipselect = spi1_internal_chipselect, + .num_chipselect = ARRAY_SIZE(spi1_internal_chipselect), +}; + +static struct spi_board_info mx31_3ds_spi_devs[] __initdata = { + { + .modalias = "mc13783", + .max_speed_hz = 1000000, + .bus_num = 1, + .chip_select = 1, /* SS2 */ + .platform_data = &mc13783_pdata, + .irq = IOMUX_TO_IRQ(MX31_PIN_GPIO1_3), + .mode = SPI_CS_HIGH, + }, +}; + +/* + * NAND Flash + */ +static struct mxc_nand_platform_data imx31_3ds_nand_flash_pdata = { + .width = 1, + .hw_ecc = 1, +#ifdef MACH_MX31_3DS_MXC_NAND_USE_BBT + .flash_bbt = 1, +#endif }; static struct imxuart_platform_data uart_pdata = { @@ -95,7 +168,7 @@ static struct platform_device smsc911x_device = { * LEDs, switches, interrupts for Ethernet. */ -static void mx31pdk_expio_irq_handler(uint32_t irq, struct irq_desc *desc) +static void mx31_3ds_expio_irq_handler(uint32_t irq, struct irq_desc *desc) { uint32_t imr_val; uint32_t int_valid; @@ -163,7 +236,7 @@ static struct irq_chip expio_irq_chip = { .unmask = expio_unmask_irq, }; -static int __init mx31pdk_init_expio(void) +static int __init mx31_3ds_init_expio(void) { int i; int ret; @@ -176,7 +249,7 @@ static int __init mx31pdk_init_expio(void) return -ENODEV; } - pr_info("i.MX31PDK Debug board detected, rev = 0x%04X\n", + pr_info("i.MX31 3DS Debug board detected, rev = 0x%04X\n", __raw_readw(CPLD_CODE_VER_REG)); /* @@ -201,7 +274,7 @@ static int __init mx31pdk_init_expio(void) set_irq_flags(i, IRQF_VALID); } set_irq_type(EXPIO_PARENT_INT, IRQ_TYPE_LEVEL_LOW); - set_irq_chained_handler(EXPIO_PARENT_INT, mx31pdk_expio_irq_handler); + set_irq_chained_handler(EXPIO_PARENT_INT, mx31_3ds_expio_irq_handler); return 0; } @@ -209,7 +282,7 @@ static int __init mx31pdk_init_expio(void) /* * This structure defines the MX31 memory map. */ -static struct map_desc mx31pdk_io_desc[] __initdata = { +static struct map_desc mx31_3ds_io_desc[] __initdata = { { .virtual = MX31_CS5_BASE_ADDR_VIRT, .pfn = __phys_to_pfn(MX31_CS5_BASE_ADDR), @@ -221,10 +294,10 @@ static struct map_desc mx31pdk_io_desc[] __initdata = { /* * Set up static virtual mappings. */ -static void __init mx31pdk_map_io(void) +static void __init mx31_3ds_map_io(void) { mx31_map_io(); - iotable_init(mx31pdk_io_desc, ARRAY_SIZE(mx31pdk_io_desc)); + iotable_init(mx31_3ds_io_desc, ARRAY_SIZE(mx31_3ds_io_desc)); } /*! @@ -232,35 +305,40 @@ static void __init mx31pdk_map_io(void) */ static void __init mxc_board_init(void) { - mxc_iomux_setup_multiple_pins(mx31pdk_pins, ARRAY_SIZE(mx31pdk_pins), - "mx31pdk"); + mxc_iomux_setup_multiple_pins(mx31_3ds_pins, ARRAY_SIZE(mx31_3ds_pins), + "mx31_3ds"); mxc_register_device(&mxc_uart_device0, &uart_pdata); + mxc_register_device(&mxc_nand_device, &imx31_3ds_nand_flash_pdata); + + mxc_register_device(&mxc_spi_device1, &spi1_pdata); + spi_register_board_info(mx31_3ds_spi_devs, + ARRAY_SIZE(mx31_3ds_spi_devs)); - if (!mx31pdk_init_expio()) + if (!mx31_3ds_init_expio()) platform_device_register(&smsc911x_device); } -static void __init mx31pdk_timer_init(void) +static void __init mx31_3ds_timer_init(void) { mx31_clocks_init(26000000); } -static struct sys_timer mx31pdk_timer = { - .init = mx31pdk_timer_init, +static struct sys_timer mx31_3ds_timer = { + .init = mx31_3ds_timer_init, }; /* * The following uses standard kernel macros defined in arch.h in order to - * initialize __mach_desc_MX31PDK data structure. + * initialize __mach_desc_MX31_3DS data structure. */ MACHINE_START(MX31_3DS, "Freescale MX31PDK (3DS)") /* Maintainer: Freescale Semiconductor, Inc. */ .phys_io = MX31_AIPS1_BASE_ADDR, .io_pg_offst = (MX31_AIPS1_BASE_ADDR_VIRT >> 18) & 0xfffc, .boot_params = MX3x_PHYS_OFFSET + 0x100, - .map_io = mx31pdk_map_io, + .map_io = mx31_3ds_map_io, .init_irq = mx31_init_irq, .init_machine = mxc_board_init, - .timer = &mx31pdk_timer, + .timer = &mx31_3ds_timer, MACHINE_END diff --git a/arch/arm/mach-mx3/mach-pcm037.c b/arch/arm/mach-mx3/mach-pcm037.c index 034ec81..2df1ec5 100644 --- a/arch/arm/mach-mx3/mach-pcm037.c +++ b/arch/arm/mach-mx3/mach-pcm037.c @@ -35,7 +35,6 @@ #include <linux/can/platform/sja1000.h> #include <linux/usb/otg.h> #include <linux/usb/ulpi.h> -#include <linux/fsl_devices.h> #include <linux/gfp.h> #include <media/soc_camera.h> diff --git a/arch/arm/mach-mx3/mx31lite-db.c b/arch/arm/mach-mx3/mx31lite-db.c index ccd8742..093c595 100644 --- a/arch/arm/mach-mx3/mx31lite-db.c +++ b/arch/arm/mach-mx3/mx31lite-db.c @@ -28,7 +28,6 @@ #include <linux/types.h> #include <linux/init.h> #include <linux/gpio.h> -#include <linux/platform_device.h> #include <linux/leds.h> #include <linux/platform_device.h> @@ -206,5 +205,6 @@ void __init mx31lite_db_init(void) mxc_register_device(&mxcsdhc_device0, &mmc_pdata); mxc_register_device(&mxc_spi_device0, &spi0_pdata); platform_device_register(&litekit_led_device); + mxc_register_device(&imx_wdt_device0, NULL); } diff --git a/arch/arm/mach-mx5/clock-mx51.c b/arch/arm/mach-mx5/clock-mx51.c index be90c03..8f85f73 100644 --- a/arch/arm/mach-mx5/clock-mx51.c +++ b/arch/arm/mach-mx5/clock-mx51.c @@ -757,7 +757,7 @@ DEFINE_CLOCK(uart3_ipg_clk, 2, MXC_CCM_CCGR1, MXC_CCM_CCGRx_CG7_OFFSET, /* GPT */ DEFINE_CLOCK(gpt_clk, 0, MXC_CCM_CCGR2, MXC_CCM_CCGRx_CG9_OFFSET, - NULL, NULL, &ipg_perclk, NULL); + NULL, NULL, &ipg_clk, NULL); DEFINE_CLOCK(gpt_ipg_clk, 0, MXC_CCM_CCGR2, MXC_CCM_CCGRx_CG10_OFFSET, NULL, NULL, &ipg_clk, NULL); diff --git a/arch/arm/mach-mx5/cpu.c b/arch/arm/mach-mx5/cpu.c index 41c769f..2d37785 100644 --- a/arch/arm/mach-mx5/cpu.c +++ b/arch/arm/mach-mx5/cpu.c @@ -14,9 +14,62 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/init.h> +#include <linux/module.h> #include <mach/hardware.h> #include <asm/io.h> +static int cpu_silicon_rev = -1; + +#define SI_REV 0x48 + +static void query_silicon_parameter(void) +{ + void __iomem *rom = ioremap(MX51_IROM_BASE_ADDR, MX51_IROM_SIZE); + u32 rev; + + if (!rom) { + cpu_silicon_rev = -EINVAL; + return; + } + + rev = readl(rom + SI_REV); + switch (rev) { + case 0x1: + cpu_silicon_rev = MX51_CHIP_REV_1_0; + break; + case 0x2: + cpu_silicon_rev = MX51_CHIP_REV_1_1; + break; + case 0x10: + cpu_silicon_rev = MX51_CHIP_REV_2_0; + break; + case 0x20: + cpu_silicon_rev = MX51_CHIP_REV_3_0; + break; + default: + cpu_silicon_rev = 0; + } + + iounmap(rom); +} + +/* + * Returns: + * the silicon revision of the cpu + * -EINVAL - not a mx51 + */ +int mx51_revision(void) +{ + if (!cpu_is_mx51()) + return -EINVAL; + + if (cpu_silicon_rev == -1) + query_silicon_parameter(); + + return cpu_silicon_rev; +} +EXPORT_SYMBOL(mx51_revision); + static int __init post_cpu_init(void) { unsigned int reg; diff --git a/arch/arm/mach-mx5/mm.c b/arch/arm/mach-mx5/mm.c index c21e18b..b7677ef 100644 --- a/arch/arm/mach-mx5/mm.c +++ b/arch/arm/mach-mx5/mm.c @@ -35,11 +35,6 @@ static struct map_desc mxc_io_desc[] __initdata = { .length = MX51_DEBUG_SIZE, .type = MT_DEVICE }, { - .virtual = MX51_TZIC_BASE_ADDR_VIRT, - .pfn = __phys_to_pfn(MX51_TZIC_BASE_ADDR), - .length = MX51_TZIC_SIZE, - .type = MT_DEVICE - }, { .virtual = MX51_AIPS1_BASE_ADDR_VIRT, .pfn = __phys_to_pfn(MX51_AIPS1_BASE_ADDR), .length = MX51_AIPS1_SIZE, @@ -54,11 +49,6 @@ static struct map_desc mxc_io_desc[] __initdata = { .pfn = __phys_to_pfn(MX51_AIPS2_BASE_ADDR), .length = MX51_AIPS2_SIZE, .type = MT_DEVICE - }, { - .virtual = MX51_NFC_AXI_BASE_ADDR_VIRT, - .pfn = __phys_to_pfn(MX51_NFC_AXI_BASE_ADDR), - .length = MX51_NFC_AXI_SIZE, - .type = MT_DEVICE }, }; @@ -69,14 +59,6 @@ static struct map_desc mxc_io_desc[] __initdata = { */ void __init mx51_map_io(void) { - u32 tzic_addr; - - if (mx51_revision() < MX51_CHIP_REV_2_0) - tzic_addr = 0x8FFFC000; - else - tzic_addr = 0xE0003000; - mxc_io_desc[2].pfn = __phys_to_pfn(tzic_addr); - mxc_set_cpu_type(MXC_CPU_MX51); mxc_iomux_v3_init(MX51_IO_ADDRESS(MX51_IOMUXC_BASE_ADDR)); mxc_arch_reset_init(MX51_IO_ADDRESS(MX51_WDOG_BASE_ADDR)); @@ -85,5 +67,17 @@ void __init mx51_map_io(void) void __init mx51_init_irq(void) { - tzic_init_irq(MX51_IO_ADDRESS(MX51_TZIC_BASE_ADDR)); + unsigned long tzic_addr; + void __iomem *tzic_virt; + + if (mx51_revision() < MX51_CHIP_REV_2_0) + tzic_addr = MX51_TZIC_BASE_ADDR_TO1; + else + tzic_addr = MX51_TZIC_BASE_ADDR; + + tzic_virt = ioremap(tzic_addr, SZ_16K); + if (!tzic_virt) + panic("unable to map TZIC interrupt controller\n"); + + tzic_init_irq(tzic_virt); } diff --git a/arch/arm/mm/copypage-v6.c b/arch/arm/mm/copypage-v6.c index 8bca4de..f55fa10 100644 --- a/arch/arm/mm/copypage-v6.c +++ b/arch/arm/mm/copypage-v6.c @@ -41,14 +41,7 @@ static void v6_copy_user_highpage_nonaliasing(struct page *to, kfrom = kmap_atomic(from, KM_USER0); kto = kmap_atomic(to, KM_USER1); copy_page(kto, kfrom); -#ifdef CONFIG_HIGHMEM - /* - * kmap_atomic() doesn't set the page virtual address, and - * kunmap_atomic() takes care of cache flushing already. - */ - if (page_address(to) != NULL) -#endif - __cpuc_flush_dcache_area(kto, PAGE_SIZE); + __cpuc_flush_dcache_area(kto, PAGE_SIZE); kunmap_atomic(kto, KM_USER1); kunmap_atomic(kfrom, KM_USER0); } diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1351edc..13fa536 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -464,6 +464,11 @@ static void dma_cache_maint_page(struct page *page, unsigned long offset, vaddr += offset; op(vaddr, len, dir); kunmap_high(page); + } else if (cache_is_vipt()) { + pte_t saved_pte; + vaddr = kmap_high_l1_vipt(page, &saved_pte); + op(vaddr + offset, len, dir); + kunmap_high_l1_vipt(page, saved_pte); } } else { vaddr = page_address(page) + offset; diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index e34f095..c6844cb 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -13,6 +13,7 @@ #include <asm/cacheflush.h> #include <asm/cachetype.h> +#include <asm/highmem.h> #include <asm/smp_plat.h> #include <asm/system.h> #include <asm/tlbflush.h> @@ -152,21 +153,25 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, void __flush_dcache_page(struct address_space *mapping, struct page *page) { - void *addr = page_address(page); - /* * Writeback any data associated with the kernel mapping of this * page. This ensures that data in the physical page is mutually * coherent with the kernels mapping. */ -#ifdef CONFIG_HIGHMEM - /* - * kmap_atomic() doesn't set the page virtual address, and - * kunmap_atomic() takes care of cache flushing already. - */ - if (addr) -#endif - __cpuc_flush_dcache_area(addr, PAGE_SIZE); + if (!PageHighMem(page)) { + __cpuc_flush_dcache_area(page_address(page), PAGE_SIZE); + } else { + void *addr = kmap_high_get(page); + if (addr) { + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high(page); + } else if (cache_is_vipt()) { + pte_t saved_pte; + addr = kmap_high_l1_vipt(page, &saved_pte); + __cpuc_flush_dcache_area(addr, PAGE_SIZE); + kunmap_high_l1_vipt(page, saved_pte); + } + } /* * If this is a page cache page, and we have an aliasing VIPT cache, diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index 2be1ec7..77b030f 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -79,7 +79,8 @@ void kunmap_atomic(void *kvaddr, enum km_type type) unsigned int idx = type + KM_TYPE_NR * smp_processor_id(); if (kvaddr >= (void *)FIXADDR_START) { - __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); + if (cache_is_vivt()) + __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx)); set_pte_ext(TOP_PTE(vaddr), __pte(0), 0); @@ -124,3 +125,87 @@ struct page *kmap_atomic_to_page(const void *ptr) pte = TOP_PTE(vaddr); return pte_page(*pte); } + +#ifdef CONFIG_CPU_CACHE_VIPT + +#include <linux/percpu.h> + +/* + * The VIVT cache of a highmem page is always flushed before the page + * is unmapped. Hence unmapped highmem pages need no cache maintenance + * in that case. + * + * However unmapped pages may still be cached with a VIPT cache, and + * it is not possible to perform cache maintenance on them using physical + * addresses unfortunately. So we have no choice but to set up a temporary + * virtual mapping for that purpose. + * + * Yet this VIPT cache maintenance may be triggered from DMA support + * functions which are possibly called from interrupt context. As we don't + * want to keep interrupt disabled all the time when such maintenance is + * taking place, we therefore allow for some reentrancy by preserving and + * restoring the previous fixmap entry before the interrupted context is + * resumed. If the reentrancy depth is 0 then there is no need to restore + * the previous fixmap, and leaving the current one in place allow it to + * be reused the next time without a TLB flush (common with DMA). + */ + +static DEFINE_PER_CPU(int, kmap_high_l1_vipt_depth); + +void *kmap_high_l1_vipt(struct page *page, pte_t *saved_pte) +{ + unsigned int idx, cpu = smp_processor_id(); + int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); + unsigned long vaddr, flags; + pte_t pte, *ptep; + + idx = KM_L1_CACHE + KM_TYPE_NR * cpu; + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + ptep = TOP_PTE(vaddr); + pte = mk_pte(page, kmap_prot); + + if (!in_interrupt()) + preempt_disable(); + + raw_local_irq_save(flags); + (*depth)++; + if (pte_val(*ptep) == pte_val(pte)) { + *saved_pte = pte; + } else { + *saved_pte = *ptep; + set_pte_ext(ptep, pte, 0); + local_flush_tlb_kernel_page(vaddr); + } + raw_local_irq_restore(flags); + + return (void *)vaddr; +} + +void kunmap_high_l1_vipt(struct page *page, pte_t saved_pte) +{ + unsigned int idx, cpu = smp_processor_id(); + int *depth = &per_cpu(kmap_high_l1_vipt_depth, cpu); + unsigned long vaddr, flags; + pte_t pte, *ptep; + + idx = KM_L1_CACHE + KM_TYPE_NR * cpu; + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + ptep = TOP_PTE(vaddr); + pte = mk_pte(page, kmap_prot); + + BUG_ON(pte_val(*ptep) != pte_val(pte)); + BUG_ON(*depth <= 0); + + raw_local_irq_save(flags); + (*depth)--; + if (*depth != 0 && pte_val(pte) != pte_val(saved_pte)) { + set_pte_ext(ptep, saved_pte, 0); + local_flush_tlb_kernel_page(vaddr); + } + raw_local_irq_restore(flags); + + if (!in_interrupt()) + preempt_enable(); +} + +#endif /* CONFIG_CPU_CACHE_VIPT */ diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 9d4da6a..241c24a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -420,6 +420,10 @@ static void __init build_mem_type_table(void) user_pgprot |= L_PTE_SHARED; kern_pgprot |= L_PTE_SHARED; vecs_pgprot |= L_PTE_SHARED; + mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_WC].prot_pte |= L_PTE_SHARED; + mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; #endif @@ -1050,10 +1054,12 @@ void setup_mm_for_reboot(char mode) pgd_t *pgd; int i; - if (current->mm && current->mm->pgd) - pgd = current->mm->pgd; - else - pgd = init_mm.pgd; + /* + * We need to access to user-mode page tables here. For kernel threads + * we don't have any user-mode mappings so we use the context that we + * "borrowed". + */ + pgd = current->active_mm->pgd; base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT; if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) diff --git a/arch/arm/plat-mxc/include/mach/board-mx31pdk.h b/arch/arm/plat-mxc/include/mach/board-mx31_3ds.h index 2bbd6ed..da92933a 100644 --- a/arch/arm/plat-mxc/include/mach/board-mx31pdk.h +++ b/arch/arm/plat-mxc/include/mach/board-mx31_3ds.h @@ -8,8 +8,8 @@ * published by the Free Software Foundation. */ -#ifndef __ASM_ARCH_MXC_BOARD_MX31PDK_H__ -#define __ASM_ARCH_MXC_BOARD_MX31PDK_H__ +#ifndef __ASM_ARCH_MXC_BOARD_MX31_3DS_H__ +#define __ASM_ARCH_MXC_BOARD_MX31_3DS_H__ /* Definitions for components on the Debug board */ @@ -56,4 +56,4 @@ #define MXC_MAX_EXP_IO_LINES 16 -#endif /* __ASM_ARCH_MXC_BOARD_MX31PDK_H__ */ +#endif /* __ASM_ARCH_MXC_BOARD_MX31_3DS_H__ */ diff --git a/arch/arm/plat-mxc/include/mach/mx51.h b/arch/arm/plat-mxc/include/mach/mx51.h index 771532b..5aad344 100644 --- a/arch/arm/plat-mxc/include/mach/mx51.h +++ b/arch/arm/plat-mxc/include/mach/mx51.h @@ -14,7 +14,7 @@ * FB100000 70000000 1M SPBA 0 * FB000000 73F00000 1M AIPS 1 * FB200000 83F00000 1M AIPS 2 - * FA100000 8FFFC000 16K TZIC (interrupt controller) + * 8FFFC000 16K TZIC (interrupt controller) * 90000000 256M CSD0 SDRAM/DDR * A0000000 256M CSD1 SDRAM/DDR * B0000000 128M CS0 Flash @@ -23,11 +23,17 @@ * C8000000 64M CS3 Flash * CC000000 32M CS4 SRAM * CE000000 32M CS5 SRAM - * F9000000 CFFF0000 64K NFC (NAND Flash AXI) + * CFFF0000 64K NFC (NAND Flash AXI) * */ /* + * IROM + */ +#define MX51_IROM_BASE_ADDR 0x0 +#define MX51_IROM_SIZE SZ_64K + +/* * IRAM */ #define MX51_IRAM_BASE_ADDR 0x1FFE0000 /* internal ram */ @@ -40,7 +46,6 @@ * NFC */ #define MX51_NFC_AXI_BASE_ADDR 0xCFFF0000 /* NAND flash AXI */ -#define MX51_NFC_AXI_BASE_ADDR_VIRT 0xF9000000 #define MX51_NFC_AXI_SIZE SZ_64K /* @@ -49,9 +54,8 @@ #define MX51_GPU_BASE_ADDR 0x20000000 #define MX51_GPU2D_BASE_ADDR 0xD0000000 -#define MX51_TZIC_BASE_ADDR 0x8FFFC000 -#define MX51_TZIC_BASE_ADDR_VIRT 0xFA100000 -#define MX51_TZIC_SIZE SZ_16K +#define MX51_TZIC_BASE_ADDR_TO1 0x8FFFC000 +#define MX51_TZIC_BASE_ADDR 0xE0000000 #define MX51_DEBUG_BASE_ADDR 0x60000000 #define MX51_DEBUG_BASE_ADDR_VIRT 0xFA200000 @@ -232,12 +236,10 @@ #define MX51_IO_ADDRESS(x) \ (void __iomem *) \ (MX51_IS_MODULE(x, IRAM) ? MX51_IRAM_IO_ADDRESS(x) : \ - MX51_IS_MODULE(x, TZIC) ? MX51_TZIC_IO_ADDRESS(x) : \ MX51_IS_MODULE(x, DEBUG) ? MX51_DEBUG_IO_ADDRESS(x) : \ MX51_IS_MODULE(x, SPBA0) ? MX51_SPBA0_IO_ADDRESS(x) : \ MX51_IS_MODULE(x, AIPS1) ? MX51_AIPS1_IO_ADDRESS(x) : \ - MX51_IS_MODULE(x, AIPS2) ? MX51_AIPS2_IO_ADDRESS(x) : \ - MX51_IS_MODULE(x, NFC_AXI) ? MX51_NFC_AXI_IO_ADDRESS(x) : \ + MX51_IS_MODULE(x, AIPS2) ? MX51_AIPS2_IO_ADDRESS(x) : \ 0xDEADBEEF) /* @@ -246,9 +248,6 @@ #define MX51_IRAM_IO_ADDRESS(x) \ (((x) - MX51_IRAM_BASE_ADDR) + MX51_IRAM_BASE_ADDR_VIRT) -#define MX51_TZIC_IO_ADDRESS(x) \ - (((x) - MX51_TZIC_BASE_ADDR) + MX51_TZIC_BASE_ADDR_VIRT) - #define MX51_DEBUG_IO_ADDRESS(x) \ (((x) - MX51_DEBUG_BASE_ADDR) + MX51_DEBUG_BASE_ADDR_VIRT) @@ -261,9 +260,6 @@ #define MX51_AIPS2_IO_ADDRESS(x) \ (((x) - MX51_AIPS2_BASE_ADDR) + MX51_AIPS2_BASE_ADDR_VIRT) -#define MX51_NFC_AXI_IO_ADDRESS(x) \ - (((x) - MX51_NFC_AXI_BASE_ADDR) + MX51_NFC_AXI_BASE_ADDR_VIRT) - #define MX51_IS_MEM_DEVICE_NONSHARED(x) 0 /* @@ -443,12 +439,7 @@ #if !defined(__ASSEMBLY__) && !defined(__MXC_BOOT_UNCOMPRESS) -extern unsigned int system_rev; - -static inline unsigned int mx51_revision(void) -{ - return system_rev; -} +extern int mx51_revision(void); #endif #endif /* __ASM_ARCH_MXC_MX51_H__ */ diff --git a/arch/arm/plat-mxc/include/mach/uncompress.h b/arch/arm/plat-mxc/include/mach/uncompress.h index 52e476a..b6d3d0f 100644 --- a/arch/arm/plat-mxc/include/mach/uncompress.h +++ b/arch/arm/plat-mxc/include/mach/uncompress.h @@ -66,6 +66,7 @@ static inline void flush(void) #define MX2X_UART1_BASE_ADDR 0x1000a000 #define MX3X_UART1_BASE_ADDR 0x43F90000 #define MX3X_UART2_BASE_ADDR 0x43F94000 +#define MX51_UART1_BASE_ADDR 0x73fbc000 static __inline__ void __arch_decomp_setup(unsigned long arch_id) { @@ -101,6 +102,9 @@ static __inline__ void __arch_decomp_setup(unsigned long arch_id) case MACH_TYPE_MAGX_ZN5: uart_base = MX3X_UART2_BASE_ADDR; break; + case MACH_TYPE_MX51_BABBAGE: + uart_base = MX51_UART1_BASE_ADDR; + break; default: break; } diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index a420cb9..315a540 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -428,26 +428,6 @@ static void vfp_pm_init(void) static inline void vfp_pm_init(void) { } #endif /* CONFIG_PM */ -/* - * Synchronise the hardware VFP state of a thread other than current with the - * saved one. This function is used by the ptrace mechanism. - */ -#ifdef CONFIG_SMP -void vfp_sync_hwstate(struct thread_info *thread) -{ -} - -void vfp_flush_hwstate(struct thread_info *thread) -{ - /* - * On SMP systems, the VFP state is automatically saved at every - * context switch. We mark the thread VFP state as belonging to a - * non-existent CPU so that the saved one will be reloaded when - * needed. - */ - thread->vfpstate.hard.cpu = NR_CPUS; -} -#else void vfp_sync_hwstate(struct thread_info *thread) { unsigned int cpu = get_cpu(); @@ -490,9 +470,18 @@ void vfp_flush_hwstate(struct thread_info *thread) last_VFP_context[cpu] = NULL; } +#ifdef CONFIG_SMP + /* + * For SMP we still have to take care of the case where the thread + * migrates to another CPU and then back to the original CPU on which + * the last VFP user is still the same thread. Mark the thread VFP + * state as belonging to a non-existent CPU so that the saved one will + * be reloaded in the above case. + */ + thread->vfpstate.hard.cpu = NR_CPUS; +#endif put_cpu(); } -#endif #include <linux/smp.h> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 73c5c2b..d7bac1f 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -979,11 +979,13 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; + r = -ENXIO; if (irqchip_in_kernel(kvm)) { __s32 status; status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); if (ioctl == KVM_IRQ_LINE_STATUS) { + r = -EFAULT; irq_event.status = status; if (copy_to_user(argp, &irq_event, sizeof irq_event)) @@ -1535,8 +1537,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp, goto out; if (copy_to_user(user_stack, stack, - sizeof(struct kvm_ia64_vcpu_stack))) + sizeof(struct kvm_ia64_vcpu_stack))) { + r = -EFAULT; goto out; + } break; } @@ -1802,7 +1806,8 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, { struct kvm_memory_slot *memslot; int r, i; - long n, base; + long base; + unsigned long n; unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base + offsetof(struct kvm_vm_data, kvm_mem_dirty_log)); @@ -1815,7 +1820,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); base = memslot->base_gfn / BITS_PER_LONG; for (i = 0; i < n/sizeof(long); ++i) { @@ -1831,7 +1836,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { int r; - int n; + unsigned long n; struct kvm_memory_slot *memslot; int is_dirty = 0; @@ -1850,7 +1855,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (is_dirty) { kvm_flush_remote_tlbs(kvm); memslot = &kvm->memslots->memslots[log->slot]; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); } r = 0; diff --git a/arch/m68k/include/asm/atomic_mm.h b/arch/m68k/include/asm/atomic_mm.h index 88b7af2..d9d2ed6 100644 --- a/arch/m68k/include/asm/atomic_mm.h +++ b/arch/m68k/include/asm/atomic_mm.h @@ -148,14 +148,18 @@ static inline int atomic_xchg(atomic_t *v, int new) static inline int atomic_sub_and_test(int i, atomic_t *v) { char c; - __asm__ __volatile__("subl %2,%1; seq %0" : "=d" (c), "+m" (*v): "g" (i)); + __asm__ __volatile__("subl %2,%1; seq %0" + : "=d" (c), "+m" (*v) + : "id" (i)); return c != 0; } static inline int atomic_add_negative(int i, atomic_t *v) { char c; - __asm__ __volatile__("addl %2,%1; smi %0" : "=d" (c), "+m" (*v): "g" (i)); + __asm__ __volatile__("addl %2,%1; smi %0" + : "=d" (c), "+m" (*v) + : "id" (i)); return c != 0; } diff --git a/arch/m68k/include/asm/mcfuart.h b/arch/m68k/include/asm/mcfuart.h index ef22938..01a8716 100644 --- a/arch/m68k/include/asm/mcfuart.h +++ b/arch/m68k/include/asm/mcfuart.h @@ -212,5 +212,10 @@ struct mcf_platform_uart { #define MCFUART_URF_RXS 0xc0 /* Receiver status */ #endif +#if defined(CONFIG_M5272) +#define MCFUART_TXFIFOSIZE 25 +#else +#define MCFUART_TXFIFOSIZE 1 +#endif /****************************************************************************/ #endif /* mcfuart_h */ diff --git a/arch/m68k/include/asm/sigcontext.h b/arch/m68k/include/asm/sigcontext.h index 1320eaa..a29dd74 100644 --- a/arch/m68k/include/asm/sigcontext.h +++ b/arch/m68k/include/asm/sigcontext.h @@ -17,13 +17,11 @@ struct sigcontext { #ifndef __uClinux__ # ifdef __mcoldfire__ unsigned long sc_fpregs[2][2]; /* room for two fp registers */ - unsigned long sc_fpcntl[3]; - unsigned char sc_fpstate[16+6*8]; # else unsigned long sc_fpregs[2*3]; /* room for two fp registers */ +# endif unsigned long sc_fpcntl[3]; unsigned char sc_fpstate[216]; -# endif #endif }; diff --git a/arch/m68knommu/Makefile b/arch/m68knommu/Makefile index ce404bc..1404257 100644 --- a/arch/m68knommu/Makefile +++ b/arch/m68knommu/Makefile @@ -94,7 +94,7 @@ cflags-$(CONFIG_M520x) := $(call cc-option,-mcpu=5208,-m5200) cflags-$(CONFIG_M523x) := $(call cc-option,-mcpu=523x,-m5307) cflags-$(CONFIG_M5249) := $(call cc-option,-mcpu=5249,-m5200) cflags-$(CONFIG_M5271) := $(call cc-option,-mcpu=5271,-m5307) -cflags-$(CONFIG_M5272) := $(call cc-option,-mcpu=5271,-m5200) +cflags-$(CONFIG_M5272) := $(call cc-option,-mcpu=5272,-m5307) cflags-$(CONFIG_M5275) := $(call cc-option,-mcpu=5275,-m5307) cflags-$(CONFIG_M528x) := $(call cc-option,-m528x,-m5307) cflags-$(CONFIG_M5307) := $(call cc-option,-m5307,-m5200) diff --git a/arch/m68knommu/kernel/entry.S b/arch/m68knommu/kernel/entry.S index 56043ad..aff6f57 100644 --- a/arch/m68knommu/kernel/entry.S +++ b/arch/m68knommu/kernel/entry.S @@ -145,6 +145,6 @@ ENTRY(ret_from_user_signal) trap #0 ENTRY(ret_from_user_rt_signal) - move #__NR_rt_sigreturn,%d0 + movel #__NR_rt_sigreturn,%d0 trap #0 diff --git a/arch/m68knommu/platform/68360/ints.c b/arch/m68knommu/platform/68360/ints.c index 1143f77..6f22970 100644 --- a/arch/m68knommu/platform/68360/ints.c +++ b/arch/m68knommu/platform/68360/ints.c @@ -107,7 +107,6 @@ void init_IRQ(void) _ramvec[vba+CPMVEC_PIO_PC7] = inthandler; /* pio - pc7 */ _ramvec[vba+CPMVEC_PIO_PC6] = inthandler; /* pio - pc6 */ _ramvec[vba+CPMVEC_TIMER3] = inthandler; /* timer 3 */ - _ramvec[vba+CPMVEC_RISCTIMER] = inthandler; /* reserved */ _ramvec[vba+CPMVEC_PIO_PC5] = inthandler; /* pio - pc5 */ _ramvec[vba+CPMVEC_PIO_PC4] = inthandler; /* pio - pc4 */ _ramvec[vba+CPMVEC_RESERVED2] = inthandler; /* reserved */ diff --git a/arch/mips/alchemy/devboards/db1200/setup.c b/arch/mips/alchemy/devboards/db1200/setup.c index 379536e..be7e92e 100644 --- a/arch/mips/alchemy/devboards/db1200/setup.c +++ b/arch/mips/alchemy/devboards/db1200/setup.c @@ -60,43 +60,6 @@ void __init board_setup(void) wmb(); } -/* use the hexleds to count the number of times the cpu has entered - * wait, the dots to indicate whether the CPU is currently idle or - * active (dots off = sleeping, dots on = working) for cases where - * the number doesn't change for a long(er) period of time. - */ -static void db1200_wait(void) -{ - __asm__(" .set push \n" - " .set mips3 \n" - " .set noreorder \n" - " cache 0x14, 0(%0) \n" - " cache 0x14, 32(%0) \n" - " cache 0x14, 64(%0) \n" - /* dots off: we're about to call wait */ - " lui $26, 0xb980 \n" - " ori $27, $0, 3 \n" - " sb $27, 0x18($26) \n" - " sync \n" - " nop \n" - " wait \n" - " nop \n" - " nop \n" - " nop \n" - " nop \n" - " nop \n" - /* dots on: there's work to do, increment cntr */ - " lui $26, 0xb980 \n" - " sb $0, 0x18($26) \n" - " lui $26, 0xb9c0 \n" - " lb $27, 0($26) \n" - " addiu $27, $27, 1 \n" - " sb $27, 0($26) \n" - " sync \n" - " .set pop \n" - : : "r" (db1200_wait)); -} - static int __init db1200_arch_init(void) { /* GPIO7 is low-level triggered CPLD cascade */ @@ -110,9 +73,6 @@ static int __init db1200_arch_init(void) irq_to_desc(DB1200_SD0_INSERT_INT)->status |= IRQ_NOAUTOEN; irq_to_desc(DB1200_SD0_EJECT_INT)->status |= IRQ_NOAUTOEN; - if (cpu_wait) - cpu_wait = db1200_wait; - return 0; } arch_initcall(db1200_arch_init); diff --git a/arch/mips/ar7/platform.c b/arch/mips/ar7/platform.c index 246df7a..2fafc78 100644 --- a/arch/mips/ar7/platform.c +++ b/arch/mips/ar7/platform.c @@ -168,7 +168,7 @@ static struct plat_vlynq_data vlynq_high_data = { .on = vlynq_on, .off = vlynq_off, }, - .reset_bit = 26, + .reset_bit = 16, .gpio_bit = 19, }; @@ -600,6 +600,7 @@ static int __init ar7_register_devices(void) } if (ar7_has_high_cpmac()) { + res = fixed_phy_add(PHY_POLL, cpmac_high.id, &fixed_phy_status); if (!res) { cpmac_get_mac(1, cpmac_high_data.dev_addr); diff --git a/arch/mips/bcm63xx/boards/board_bcm963xx.c b/arch/mips/bcm63xx/boards/board_bcm963xx.c index ea17941..8dba8cf 100644 --- a/arch/mips/bcm63xx/boards/board_bcm963xx.c +++ b/arch/mips/bcm63xx/boards/board_bcm963xx.c @@ -18,6 +18,7 @@ #include <asm/addrspace.h> #include <bcm63xx_board.h> #include <bcm63xx_cpu.h> +#include <bcm63xx_dev_uart.h> #include <bcm63xx_regs.h> #include <bcm63xx_io.h> #include <bcm63xx_dev_pci.h> @@ -40,6 +41,7 @@ static struct board_info __initdata board_96338gw = { .name = "96338GW", .expected_cpu_id = 0x6338, + .has_uart0 = 1, .has_enet0 = 1, .enet0 = { .force_speed_100 = 1, @@ -82,6 +84,7 @@ static struct board_info __initdata board_96338w = { .name = "96338W", .expected_cpu_id = 0x6338, + .has_uart0 = 1, .has_enet0 = 1, .enet0 = { .force_speed_100 = 1, @@ -126,6 +129,8 @@ static struct board_info __initdata board_96338w = { static struct board_info __initdata board_96345gw2 = { .name = "96345GW2", .expected_cpu_id = 0x6345, + + .has_uart0 = 1, }; #endif @@ -137,6 +142,7 @@ static struct board_info __initdata board_96348r = { .name = "96348R", .expected_cpu_id = 0x6348, + .has_uart0 = 1, .has_enet0 = 1, .has_pci = 1, @@ -180,6 +186,7 @@ static struct board_info __initdata board_96348gw_10 = { .name = "96348GW-10", .expected_cpu_id = 0x6348, + .has_uart0 = 1, .has_enet0 = 1, .has_enet1 = 1, .has_pci = 1, @@ -239,6 +246,7 @@ static struct board_info __initdata board_96348gw_11 = { .name = "96348GW-11", .expected_cpu_id = 0x6348, + .has_uart0 = 1, .has_enet0 = 1, .has_enet1 = 1, .has_pci = 1, @@ -292,6 +300,7 @@ static struct board_info __initdata board_96348gw = { .name = "96348GW", .expected_cpu_id = 0x6348, + .has_uart0 = 1, .has_enet0 = 1, .has_enet1 = 1, .has_pci = 1, @@ -349,9 +358,10 @@ static struct board_info __initdata board_FAST2404 = { .name = "F@ST2404", .expected_cpu_id = 0x6348, - .has_enet0 = 1, - .has_enet1 = 1, - .has_pci = 1, + .has_uart0 = 1, + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, .enet0 = { .has_phy = 1, @@ -368,10 +378,30 @@ static struct board_info __initdata board_FAST2404 = { .has_ehci0 = 1, }; +static struct board_info __initdata board_rta1025w_16 = { + .name = "RTA1025W_16", + .expected_cpu_id = 0x6348, + + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, +}; + + static struct board_info __initdata board_DV201AMR = { .name = "DV201AMR", .expected_cpu_id = 0x6348, + .has_uart0 = 1, .has_pci = 1, .has_ohci0 = 1, @@ -391,6 +421,7 @@ static struct board_info __initdata board_96348gw_a = { .name = "96348GW-A", .expected_cpu_id = 0x6348, + .has_uart0 = 1, .has_enet0 = 1, .has_enet1 = 1, .has_pci = 1, @@ -416,6 +447,7 @@ static struct board_info __initdata board_96358vw = { .name = "96358VW", .expected_cpu_id = 0x6358, + .has_uart0 = 1, .has_enet0 = 1, .has_enet1 = 1, .has_pci = 1, @@ -467,6 +499,7 @@ static struct board_info __initdata board_96358vw2 = { .name = "96358VW2", .expected_cpu_id = 0x6358, + .has_uart0 = 1, .has_enet0 = 1, .has_enet1 = 1, .has_pci = 1, @@ -514,6 +547,7 @@ static struct board_info __initdata board_AGPFS0 = { .name = "AGPF-S0", .expected_cpu_id = 0x6358, + .has_uart0 = 1, .has_enet0 = 1, .has_enet1 = 1, .has_pci = 1, @@ -531,6 +565,27 @@ static struct board_info __initdata board_AGPFS0 = { .has_ohci0 = 1, .has_ehci0 = 1, }; + +static struct board_info __initdata board_DWVS0 = { + .name = "DWV-S0", + .expected_cpu_id = 0x6358, + + .has_enet0 = 1, + .has_enet1 = 1, + .has_pci = 1, + + .enet0 = { + .has_phy = 1, + .use_internal_phy = 1, + }, + + .enet1 = { + .force_speed_100 = 1, + .force_duplex_full = 1, + }, + + .has_ohci0 = 1, +}; #endif /* @@ -552,16 +607,88 @@ static const struct board_info __initdata *bcm963xx_boards[] = { &board_FAST2404, &board_DV201AMR, &board_96348gw_a, + &board_rta1025w_16, #endif #ifdef CONFIG_BCM63XX_CPU_6358 &board_96358vw, &board_96358vw2, &board_AGPFS0, + &board_DWVS0, #endif }; /* + * Register a sane SPROMv2 to make the on-board + * bcm4318 WLAN work + */ +#ifdef CONFIG_SSB_PCIHOST +static struct ssb_sprom bcm63xx_sprom = { + .revision = 0x02, + .board_rev = 0x17, + .country_code = 0x0, + .ant_available_bg = 0x3, + .pa0b0 = 0x15ae, + .pa0b1 = 0xfa85, + .pa0b2 = 0xfe8d, + .pa1b0 = 0xffff, + .pa1b1 = 0xffff, + .pa1b2 = 0xffff, + .gpio0 = 0xff, + .gpio1 = 0xff, + .gpio2 = 0xff, + .gpio3 = 0xff, + .maxpwr_bg = 0x004c, + .itssi_bg = 0x00, + .boardflags_lo = 0x2848, + .boardflags_hi = 0x0000, +}; +#endif + +/* + * return board name for /proc/cpuinfo + */ +const char *board_get_name(void) +{ + return board.name; +} + +/* + * register & return a new board mac address + */ +static int board_get_mac_address(u8 *mac) +{ + u8 *p; + int count; + + if (mac_addr_used >= nvram.mac_addr_count) { + printk(KERN_ERR PFX "not enough mac address\n"); + return -ENODEV; + } + + memcpy(mac, nvram.mac_addr_base, ETH_ALEN); + p = mac + ETH_ALEN - 1; + count = mac_addr_used; + + while (count--) { + do { + (*p)++; + if (*p != 0) + break; + p--; + } while (p != mac); + } + + if (p == mac) { + printk(KERN_ERR PFX "unable to fetch mac address\n"); + return -ENODEV; + } + + mac_addr_used++; + return 0; +} + +/* * early init callback, read nvram data from flash and checksum it */ void __init board_prom_init(void) @@ -659,6 +786,17 @@ void __init board_prom_init(void) } bcm_gpio_writel(val, GPIO_MODE_REG); + + /* Generate MAC address for WLAN and + * register our SPROM */ +#ifdef CONFIG_SSB_PCIHOST + if (!board_get_mac_address(bcm63xx_sprom.il0mac)) { + memcpy(bcm63xx_sprom.et0mac, bcm63xx_sprom.il0mac, ETH_ALEN); + memcpy(bcm63xx_sprom.et1mac, bcm63xx_sprom.il0mac, ETH_ALEN); + if (ssb_arch_set_fallback_sprom(&bcm63xx_sprom) < 0) + printk(KERN_ERR "failed to register fallback SPROM\n"); + } +#endif } /* @@ -676,49 +814,6 @@ void __init board_setup(void) panic("unexpected CPU for bcm963xx board"); } -/* - * return board name for /proc/cpuinfo - */ -const char *board_get_name(void) -{ - return board.name; -} - -/* - * register & return a new board mac address - */ -static int board_get_mac_address(u8 *mac) -{ - u8 *p; - int count; - - if (mac_addr_used >= nvram.mac_addr_count) { - printk(KERN_ERR PFX "not enough mac address\n"); - return -ENODEV; - } - - memcpy(mac, nvram.mac_addr_base, ETH_ALEN); - p = mac + ETH_ALEN - 1; - count = mac_addr_used; - - while (count--) { - do { - (*p)++; - if (*p != 0) - break; - p--; - } while (p != mac); - } - - if (p == mac) { - printk(KERN_ERR PFX "unable to fetch mac address\n"); - return -ENODEV; - } - - mac_addr_used++; - return 0; -} - static struct mtd_partition mtd_partitions[] = { { .name = "cfe", @@ -750,33 +845,6 @@ static struct platform_device mtd_dev = { }, }; -/* - * Register a sane SPROMv2 to make the on-board - * bcm4318 WLAN work - */ -#ifdef CONFIG_SSB_PCIHOST -static struct ssb_sprom bcm63xx_sprom = { - .revision = 0x02, - .board_rev = 0x17, - .country_code = 0x0, - .ant_available_bg = 0x3, - .pa0b0 = 0x15ae, - .pa0b1 = 0xfa85, - .pa0b2 = 0xfe8d, - .pa1b0 = 0xffff, - .pa1b1 = 0xffff, - .pa1b2 = 0xffff, - .gpio0 = 0xff, - .gpio1 = 0xff, - .gpio2 = 0xff, - .gpio3 = 0xff, - .maxpwr_bg = 0x004c, - .itssi_bg = 0x00, - .boardflags_lo = 0x2848, - .boardflags_hi = 0x0000, -}; -#endif - static struct gpio_led_platform_data bcm63xx_led_data; static struct platform_device bcm63xx_gpio_leds = { @@ -792,6 +860,12 @@ int __init board_register_devices(void) { u32 val; + if (board.has_uart0) + bcm63xx_uart_register(0); + + if (board.has_uart1) + bcm63xx_uart_register(1); + if (board.has_pccard) bcm63xx_pcmcia_register(); @@ -806,17 +880,6 @@ int __init board_register_devices(void) if (board.has_dsp) bcm63xx_dsp_register(&board.dsp); - /* Generate MAC address for WLAN and - * register our SPROM */ -#ifdef CONFIG_SSB_PCIHOST - if (!board_get_mac_address(bcm63xx_sprom.il0mac)) { - memcpy(bcm63xx_sprom.et0mac, bcm63xx_sprom.il0mac, ETH_ALEN); - memcpy(bcm63xx_sprom.et1mac, bcm63xx_sprom.il0mac, ETH_ALEN); - if (ssb_arch_set_fallback_sprom(&bcm63xx_sprom) < 0) - printk(KERN_ERR "failed to register fallback SPROM\n"); - } -#endif - /* read base address of boot chip select (0) */ if (BCMCPU_IS_6345()) val = 0x1fc00000; diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c index 70378bb..cbb7caf 100644 --- a/arch/mips/bcm63xx/cpu.c +++ b/arch/mips/bcm63xx/cpu.c @@ -36,6 +36,7 @@ static const unsigned long bcm96338_regs_base[] = { [RSET_TIMER] = BCM_6338_TIMER_BASE, [RSET_WDT] = BCM_6338_WDT_BASE, [RSET_UART0] = BCM_6338_UART0_BASE, + [RSET_UART1] = BCM_6338_UART1_BASE, [RSET_GPIO] = BCM_6338_GPIO_BASE, [RSET_SPI] = BCM_6338_SPI_BASE, [RSET_OHCI0] = BCM_6338_OHCI0_BASE, @@ -72,6 +73,7 @@ static const unsigned long bcm96345_regs_base[] = { [RSET_TIMER] = BCM_6345_TIMER_BASE, [RSET_WDT] = BCM_6345_WDT_BASE, [RSET_UART0] = BCM_6345_UART0_BASE, + [RSET_UART1] = BCM_6345_UART1_BASE, [RSET_GPIO] = BCM_6345_GPIO_BASE, [RSET_SPI] = BCM_6345_SPI_BASE, [RSET_UDC0] = BCM_6345_UDC0_BASE, @@ -109,6 +111,7 @@ static const unsigned long bcm96348_regs_base[] = { [RSET_TIMER] = BCM_6348_TIMER_BASE, [RSET_WDT] = BCM_6348_WDT_BASE, [RSET_UART0] = BCM_6348_UART0_BASE, + [RSET_UART1] = BCM_6348_UART1_BASE, [RSET_GPIO] = BCM_6348_GPIO_BASE, [RSET_SPI] = BCM_6348_SPI_BASE, [RSET_OHCI0] = BCM_6348_OHCI0_BASE, @@ -150,6 +153,7 @@ static const unsigned long bcm96358_regs_base[] = { [RSET_TIMER] = BCM_6358_TIMER_BASE, [RSET_WDT] = BCM_6358_WDT_BASE, [RSET_UART0] = BCM_6358_UART0_BASE, + [RSET_UART1] = BCM_6358_UART1_BASE, [RSET_GPIO] = BCM_6358_GPIO_BASE, [RSET_SPI] = BCM_6358_SPI_BASE, [RSET_OHCI0] = BCM_6358_OHCI0_BASE, @@ -170,6 +174,7 @@ static const unsigned long bcm96358_regs_base[] = { static const int bcm96358_irqs[] = { [IRQ_TIMER] = BCM_6358_TIMER_IRQ, [IRQ_UART0] = BCM_6358_UART0_IRQ, + [IRQ_UART1] = BCM_6358_UART1_IRQ, [IRQ_DSL] = BCM_6358_DSL_IRQ, [IRQ_ENET0] = BCM_6358_ENET0_IRQ, [IRQ_ENET1] = BCM_6358_ENET1_IRQ, diff --git a/arch/mips/bcm63xx/dev-uart.c b/arch/mips/bcm63xx/dev-uart.c index b051946..c2963da 100644 --- a/arch/mips/bcm63xx/dev-uart.c +++ b/arch/mips/bcm63xx/dev-uart.c @@ -11,31 +11,65 @@ #include <linux/platform_device.h> #include <bcm63xx_cpu.h> -static struct resource uart_resources[] = { +static struct resource uart0_resources[] = { { - .start = -1, /* filled at runtime */ - .end = -1, /* filled at runtime */ + /* start & end filled at runtime */ .flags = IORESOURCE_MEM, }, { - .start = -1, /* filled at runtime */ + /* start filled at runtime */ .flags = IORESOURCE_IRQ, }, }; -static struct platform_device bcm63xx_uart_device = { - .name = "bcm63xx_uart", - .id = 0, - .num_resources = ARRAY_SIZE(uart_resources), - .resource = uart_resources, +static struct resource uart1_resources[] = { + { + /* start & end filled at runtime */ + .flags = IORESOURCE_MEM, + }, + { + /* start filled at runtime */ + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device bcm63xx_uart_devices[] = { + { + .name = "bcm63xx_uart", + .id = 0, + .num_resources = ARRAY_SIZE(uart0_resources), + .resource = uart0_resources, + }, + + { + .name = "bcm63xx_uart", + .id = 1, + .num_resources = ARRAY_SIZE(uart1_resources), + .resource = uart1_resources, + } }; -int __init bcm63xx_uart_register(void) +int __init bcm63xx_uart_register(unsigned int id) { - uart_resources[0].start = bcm63xx_regset_address(RSET_UART0); - uart_resources[0].end = uart_resources[0].start; - uart_resources[0].end += RSET_UART_SIZE - 1; - uart_resources[1].start = bcm63xx_get_irq_number(IRQ_UART0); - return platform_device_register(&bcm63xx_uart_device); + if (id >= ARRAY_SIZE(bcm63xx_uart_devices)) + return -ENODEV; + + if (id == 1 && !BCMCPU_IS_6358()) + return -ENODEV; + + if (id == 0) { + uart0_resources[0].start = bcm63xx_regset_address(RSET_UART0); + uart0_resources[0].end = uart0_resources[0].start + + RSET_UART_SIZE - 1; + uart0_resources[1].start = bcm63xx_get_irq_number(IRQ_UART0); + } + + if (id == 1) { + uart1_resources[0].start = bcm63xx_regset_address(RSET_UART1); + uart1_resources[0].end = uart1_resources[0].start + + RSET_UART_SIZE - 1; + uart1_resources[1].start = bcm63xx_get_irq_number(IRQ_UART1); + } + + return platform_device_register(&bcm63xx_uart_devices[id]); } -arch_initcall(bcm63xx_uart_register); diff --git a/arch/mips/bcm63xx/gpio.c b/arch/mips/bcm63xx/gpio.c index 87ca390..315bc7f 100644 --- a/arch/mips/bcm63xx/gpio.c +++ b/arch/mips/bcm63xx/gpio.c @@ -125,10 +125,10 @@ static struct gpio_chip bcm63xx_gpio_chip = { int __init bcm63xx_gpio_init(void) { + gpio_out_low = bcm_gpio_readl(GPIO_DATA_LO_REG); + gpio_out_high = bcm_gpio_readl(GPIO_DATA_HI_REG); bcm63xx_gpio_chip.ngpio = bcm63xx_gpio_count(); pr_info("registering %d GPIOs\n", bcm63xx_gpio_chip.ngpio); return gpiochip_add(&bcm63xx_gpio_chip); } - -arch_initcall(bcm63xx_gpio_init); diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index b321d3b..9a06fa9 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -45,9 +45,6 @@ extern struct plat_smp_ops octeon_smp_ops; extern void pci_console_init(const char *arg); #endif -#ifdef CONFIG_CAVIUM_RESERVE32 -extern uint64_t octeon_reserve32_memory; -#endif static unsigned long long MAX_MEMORY = 512ull << 20; struct octeon_boot_descriptor *octeon_boot_desc_ptr; @@ -186,54 +183,6 @@ void octeon_check_cpu_bist(void) write_octeon_c0_dcacheerr(0); } -#ifdef CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB -/** - * Called on every core to setup the wired tlb entry needed - * if CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB is set. - * - */ -static void octeon_hal_setup_per_cpu_reserved32(void *unused) -{ - /* - * The config has selected to wire the reserve32 memory for all - * userspace applications. We need to put a wired TLB entry in for each - * 512MB of reserve32 memory. We only handle double 256MB pages here, - * so reserve32 must be multiple of 512MB. - */ - uint32_t size = CONFIG_CAVIUM_RESERVE32; - uint32_t entrylo0 = - 0x7 | ((octeon_reserve32_memory & ((1ul << 40) - 1)) >> 6); - uint32_t entrylo1 = entrylo0 + (256 << 14); - uint32_t entryhi = (0x80000000UL - (CONFIG_CAVIUM_RESERVE32 << 20)); - while (size >= 512) { -#if 0 - pr_info("CPU%d: Adding double wired TLB entry for 0x%lx\n", - smp_processor_id(), entryhi); -#endif - add_wired_entry(entrylo0, entrylo1, entryhi, PM_256M); - entrylo0 += 512 << 14; - entrylo1 += 512 << 14; - entryhi += 512 << 20; - size -= 512; - } -} -#endif /* CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB */ - -/** - * Called to release the named block which was used to made sure - * that nobody used the memory for something else during - * init. Now we'll free it so userspace apps can use this - * memory region with bootmem_alloc. - * - * This function is called only once from prom_free_prom_memory(). - */ -void octeon_hal_setup_reserved32(void) -{ -#ifdef CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB - on_each_cpu(octeon_hal_setup_per_cpu_reserved32, NULL, 0, 1); -#endif -} - /** * Reboot Octeon * @@ -294,18 +243,6 @@ static void octeon_halt(void) octeon_kill_core(NULL); } -#if 0 -/** - * Platform time init specifics. - * Returns - */ -void __init plat_time_init(void) -{ - /* Nothing special here, but we are required to have one */ -} - -#endif - /** * Handle all the error condition interrupts that might occur. * @@ -502,25 +439,13 @@ void __init prom_init(void) * memory when it is getting memory from the * bootloader. Later, after the memory allocations are * complete, the reserve32 will be freed. - */ -#ifdef CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB - if (CONFIG_CAVIUM_RESERVE32 & 0x1ff) - pr_err("CAVIUM_RESERVE32 isn't a multiple of 512MB. " - "This is required if CAVIUM_RESERVE32_USE_WIRED_TLB " - "is set\n"); - else - addr = cvmx_bootmem_phy_named_block_alloc(CONFIG_CAVIUM_RESERVE32 << 20, - 0, 0, 512 << 20, - "CAVIUM_RESERVE32", 0); -#else - /* + * * Allocate memory for RESERVED32 aligned on 2MB boundary. This * is in case we later use hugetlb entries with it. */ addr = cvmx_bootmem_phy_named_block_alloc(CONFIG_CAVIUM_RESERVE32 << 20, 0, 0, 2 << 20, "CAVIUM_RESERVE32", 0); -#endif if (addr < 0) pr_err("Failed to allocate CAVIUM_RESERVE32 memory area\n"); else @@ -817,9 +742,4 @@ void prom_free_prom_memory(void) panic("Unable to request_irq(OCTEON_IRQ_RML)\n"); } #endif - - /* This call is here so that it is performed after any TLB - initializations. It needs to be after these in case the - CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB option is set */ - octeon_hal_setup_reserved32(); } diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 51e9802..6d99b9d 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -279,14 +279,6 @@ static void octeon_cpu_die(unsigned int cpu) uint32_t avail_coremask; struct cvmx_bootmem_named_block_desc *block_desc; -#ifdef CONFIG_CAVIUM_OCTEON_WATCHDOG - /* Disable the watchdog */ - cvmx_ciu_wdogx_t ciu_wdog; - ciu_wdog.u64 = cvmx_read_csr(CVMX_CIU_WDOGX(cpu)); - ciu_wdog.s.mode = 0; - cvmx_write_csr(CVMX_CIU_WDOGX(cpu), ciu_wdog.u64); -#endif - while (per_cpu(cpu_state, cpu) != CPU_DEAD) cpu_relax(); diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig index c2f06e3..0583bb2 100644 --- a/arch/mips/configs/bigsur_defconfig +++ b/arch/mips/configs/bigsur_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.26-rc8 -# Wed Jul 2 17:02:55 2008 +# Linux kernel version: 2.6.34-rc3 +# Sat Apr 3 16:32:11 2010 # CONFIG_MIPS=y @@ -9,20 +9,25 @@ CONFIG_MIPS=y # Machine selection # # CONFIG_MACH_ALCHEMY is not set +# CONFIG_AR7 is not set # CONFIG_BCM47XX is not set +# CONFIG_BCM63XX is not set # CONFIG_MIPS_COBALT is not set # CONFIG_MACH_DECSTATION is not set # CONFIG_MACH_JAZZ is not set # CONFIG_LASAT is not set -# CONFIG_LEMOTE_FULONG is not set +# CONFIG_MACH_LOONGSON is not set # CONFIG_MIPS_MALTA is not set # CONFIG_MIPS_SIM is not set -# CONFIG_MARKEINS is not set +# CONFIG_NEC_MARKEINS is not set # CONFIG_MACH_VR41XX is not set +# CONFIG_NXP_STB220 is not set +# CONFIG_NXP_STB225 is not set # CONFIG_PNX8550_JBS is not set # CONFIG_PNX8550_STB810 is not set # CONFIG_PMC_MSP is not set # CONFIG_PMC_YOSEMITE is not set +# CONFIG_POWERTV is not set # CONFIG_SGI_IP22 is not set # CONFIG_SGI_IP27 is not set # CONFIG_SGI_IP28 is not set @@ -36,10 +41,13 @@ CONFIG_MIPS=y # CONFIG_SIBYTE_SENTOSA is not set CONFIG_SIBYTE_BIGSUR=y # CONFIG_SNI_RM is not set -# CONFIG_TOSHIBA_JMR3927 is not set -# CONFIG_TOSHIBA_RBTX4927 is not set -# CONFIG_TOSHIBA_RBTX4938 is not set +# CONFIG_MACH_TX39XX is not set +# CONFIG_MACH_TX49XX is not set +# CONFIG_MIKROTIK_RB532 is not set # CONFIG_WR_PPMC is not set +# CONFIG_CAVIUM_OCTEON_SIMULATOR is not set +# CONFIG_CAVIUM_OCTEON_REFERENCE_BOARD is not set +# CONFIG_ALCHEMY_GPIO_INDIRECT is not set CONFIG_SIBYTE_BCM1x80=y CONFIG_SIBYTE_SB1xxx_SOC=y # CONFIG_CPU_SB1_PASS_1 is not set @@ -48,14 +56,13 @@ CONFIG_SIBYTE_SB1xxx_SOC=y # CONFIG_CPU_SB1_PASS_4 is not set # CONFIG_CPU_SB1_PASS_2_112x is not set # CONFIG_CPU_SB1_PASS_3 is not set -# CONFIG_SIMULATION is not set # CONFIG_SB1_CEX_ALWAYS_FATAL is not set # CONFIG_SB1_CERR_STALL is not set -CONFIG_SIBYTE_CFE=y # CONFIG_SIBYTE_CFE_CONSOLE is not set # CONFIG_SIBYTE_BUS_WATCHER is not set # CONFIG_SIBYTE_TBPROF is not set CONFIG_SIBYTE_HAS_ZBUS_PROFILING=y +CONFIG_LOONGSON_UART_BASE=y CONFIG_RWSEM_GENERIC_SPINLOCK=y # CONFIG_ARCH_HAS_ILOG2_U32 is not set # CONFIG_ARCH_HAS_ILOG2_U64 is not set @@ -66,15 +73,13 @@ CONFIG_GENERIC_CALIBRATE_DELAY=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_TIME=y CONFIG_GENERIC_CMOS_UPDATE=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y -# CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ is not set +CONFIG_SCHED_OMIT_FRAME_POINTER=y +CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y CONFIG_CEVT_BCM1480=y CONFIG_CSRC_BCM1480=y CONFIG_CFE=y CONFIG_DMA_COHERENT=y -CONFIG_EARLY_PRINTK=y CONFIG_SYS_HAS_EARLY_PRINTK=y -# CONFIG_HOTPLUG_CPU is not set # CONFIG_NO_IOPORT is not set CONFIG_CPU_BIG_ENDIAN=y # CONFIG_CPU_LITTLE_ENDIAN is not set @@ -88,7 +93,8 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5 # # CPU selection # -# CONFIG_CPU_LOONGSON2 is not set +# CONFIG_CPU_LOONGSON2E is not set +# CONFIG_CPU_LOONGSON2F is not set # CONFIG_CPU_MIPS32_R1 is not set # CONFIG_CPU_MIPS32_R2 is not set # CONFIG_CPU_MIPS64_R1 is not set @@ -101,6 +107,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5 # CONFIG_CPU_TX49XX is not set # CONFIG_CPU_R5000 is not set # CONFIG_CPU_R5432 is not set +# CONFIG_CPU_R5500 is not set # CONFIG_CPU_R6000 is not set # CONFIG_CPU_NEVADA is not set # CONFIG_CPU_R8000 is not set @@ -108,6 +115,7 @@ CONFIG_MIPS_L1_CACHE_SHIFT=5 # CONFIG_CPU_RM7000 is not set # CONFIG_CPU_RM9000 is not set CONFIG_CPU_SB1=y +# CONFIG_CPU_CAVIUM_OCTEON is not set CONFIG_SYS_HAS_CPU_SB1=y CONFIG_WEAK_ORDERING=y CONFIG_SYS_SUPPORTS_32BIT_KERNEL=y @@ -123,11 +131,13 @@ CONFIG_64BIT=y CONFIG_PAGE_SIZE_4KB=y # CONFIG_PAGE_SIZE_8KB is not set # CONFIG_PAGE_SIZE_16KB is not set +# CONFIG_PAGE_SIZE_32KB is not set # CONFIG_PAGE_SIZE_64KB is not set # CONFIG_SIBYTE_DMA_PAGEOPS is not set CONFIG_MIPS_MT_DISABLED=y # CONFIG_MIPS_MT_SMP is not set # CONFIG_MIPS_MT_SMTC is not set +# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set CONFIG_CPU_HAS_SYNC=y CONFIG_GENERIC_HARDIRQS=y CONFIG_GENERIC_IRQ_PROBE=y @@ -142,18 +152,17 @@ CONFIG_FLATMEM_MANUAL=y # CONFIG_SPARSEMEM_MANUAL is not set CONFIG_FLATMEM=y CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set -# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set CONFIG_PAGEFLAGS_EXTENDED=y CONFIG_SPLIT_PTLOCK_CPUS=4 -CONFIG_RESOURCES_64BIT=y +CONFIG_PHYS_ADDR_T_64BIT=y CONFIG_ZONE_DMA_FLAG=0 CONFIG_VIRT_TO_BUS=y +# CONFIG_KSM is not set +CONFIG_DEFAULT_MMAP_MIN_ADDR=4096 CONFIG_SMP=y CONFIG_SYS_SUPPORTS_SMP=y CONFIG_NR_CPUS_DEFAULT_4=y CONFIG_NR_CPUS=4 -# CONFIG_MIPS_CMP is not set CONFIG_TICK_ONESHOT=y CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y @@ -175,6 +184,7 @@ CONFIG_SECCOMP=y CONFIG_LOCKDEP_SUPPORT=y CONFIG_STACKTRACE_SUPPORT=y CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" +CONFIG_CONSTRUCTORS=y # # General setup @@ -188,6 +198,7 @@ CONFIG_SWAP=y CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y CONFIG_POSIX_MQUEUE=y +CONFIG_POSIX_MQUEUE_SYSCTL=y CONFIG_BSD_PROCESS_ACCT=y CONFIG_BSD_PROCESS_ACCT_V3=y CONFIG_TASKSTATS=y @@ -195,23 +206,39 @@ CONFIG_TASK_DELAY_ACCT=y CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_AUDIT=y + +# +# RCU Subsystem +# +CONFIG_TREE_RCU=y +# CONFIG_TREE_PREEMPT_RCU is not set +# CONFIG_TINY_RCU is not set +# CONFIG_RCU_TRACE is not set +CONFIG_RCU_FANOUT=64 +# CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_RCU_FAST_NO_HZ is not set +# CONFIG_TREE_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 # CONFIG_CGROUPS is not set -CONFIG_GROUP_SCHED=y -CONFIG_FAIR_GROUP_SCHED=y -# CONFIG_RT_GROUP_SCHED is not set -CONFIG_USER_SCHED=y -# CONFIG_CGROUP_SCHED is not set -CONFIG_SYSFS_DEPRECATED=y -CONFIG_SYSFS_DEPRECATED_V2=y +# CONFIG_SYSFS_DEPRECATED_V2 is not set CONFIG_RELAY=y -# CONFIG_NAMESPACES is not set +CONFIG_NAMESPACES=y +CONFIG_UTS_NS=y +CONFIG_IPC_NS=y +CONFIG_USER_NS=y +CONFIG_PID_NS=y +CONFIG_NET_NS=y CONFIG_BLK_DEV_INITRD=y CONFIG_INITRAMFS_SOURCE="" +CONFIG_RD_GZIP=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_LZO is not set # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y +CONFIG_ANON_INODES=y CONFIG_EMBEDDED=y # CONFIG_SYSCTL_SYSCALL is not set CONFIG_KALLSYMS=y @@ -222,29 +249,36 @@ CONFIG_PRINTK=y CONFIG_BUG=y CONFIG_ELF_CORE=y # CONFIG_PCSPKR_PLATFORM is not set -CONFIG_COMPAT_BRK=y CONFIG_BASE_FULL=y CONFIG_FUTEX=y -CONFIG_ANON_INODES=y CONFIG_EPOLL=y CONFIG_SIGNALFD=y CONFIG_TIMERFD=y CONFIG_EVENTFD=y CONFIG_SHMEM=y +CONFIG_AIO=y + +# +# Kernel Performance Events And Counters +# CONFIG_VM_EVENT_COUNTERS=y +CONFIG_PCI_QUIRKS=y +CONFIG_COMPAT_BRK=y CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set -# CONFIG_MARKERS is not set CONFIG_HAVE_OPROFILE=y -# CONFIG_HAVE_KPROBES is not set -# CONFIG_HAVE_KRETPROBES is not set -# CONFIG_HAVE_DMA_ATTRS is not set -CONFIG_PROC_PAGE_MONITOR=y +CONFIG_HAVE_SYSCALL_WRAPPERS=y +CONFIG_USE_GENERIC_SMP_HELPERS=y + +# +# GCOV-based kernel profiling +# +# CONFIG_SLOW_WORK is not set +CONFIG_HAVE_GENERIC_DMA_COHERENT=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set CONFIG_BASE_SMALL=0 CONFIG_MODULES=y # CONFIG_MODULE_FORCE_LOAD is not set @@ -252,26 +286,52 @@ CONFIG_MODULE_UNLOAD=y # CONFIG_MODULE_FORCE_UNLOAD is not set CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y -CONFIG_KMOD=y CONFIG_STOP_MACHINE=y CONFIG_BLOCK=y -# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_BLK_DEV_BSG is not set +# CONFIG_BLK_DEV_INTEGRITY is not set CONFIG_BLOCK_COMPAT=y # # IO Schedulers # CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y CONFIG_IOSCHED_DEADLINE=y CONFIG_IOSCHED_CFQ=y -CONFIG_DEFAULT_AS=y # CONFIG_DEFAULT_DEADLINE is not set -# CONFIG_DEFAULT_CFQ is not set +CONFIG_DEFAULT_CFQ=y # CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="anticipatory" -CONFIG_CLASSIC_RCU=y +CONFIG_DEFAULT_IOSCHED="cfq" +# CONFIG_INLINE_SPIN_TRYLOCK is not set +# CONFIG_INLINE_SPIN_TRYLOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK is not set +# CONFIG_INLINE_SPIN_LOCK_BH is not set +# CONFIG_INLINE_SPIN_LOCK_IRQ is not set +# CONFIG_INLINE_SPIN_LOCK_IRQSAVE is not set +CONFIG_INLINE_SPIN_UNLOCK=y +# CONFIG_INLINE_SPIN_UNLOCK_BH is not set +CONFIG_INLINE_SPIN_UNLOCK_IRQ=y +# CONFIG_INLINE_SPIN_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_READ_TRYLOCK is not set +# CONFIG_INLINE_READ_LOCK is not set +# CONFIG_INLINE_READ_LOCK_BH is not set +# CONFIG_INLINE_READ_LOCK_IRQ is not set +# CONFIG_INLINE_READ_LOCK_IRQSAVE is not set +CONFIG_INLINE_READ_UNLOCK=y +# CONFIG_INLINE_READ_UNLOCK_BH is not set +CONFIG_INLINE_READ_UNLOCK_IRQ=y +# CONFIG_INLINE_READ_UNLOCK_IRQRESTORE is not set +# CONFIG_INLINE_WRITE_TRYLOCK is not set +# CONFIG_INLINE_WRITE_LOCK is not set +# CONFIG_INLINE_WRITE_LOCK_BH is not set +# CONFIG_INLINE_WRITE_LOCK_IRQ is not set +# CONFIG_INLINE_WRITE_LOCK_IRQSAVE is not set +CONFIG_INLINE_WRITE_UNLOCK=y +# CONFIG_INLINE_WRITE_UNLOCK_BH is not set +CONFIG_INLINE_WRITE_UNLOCK_IRQ=y +# CONFIG_INLINE_WRITE_UNLOCK_IRQRESTORE is not set +CONFIG_MUTEX_SPIN_ON_OWNER=y +# CONFIG_FREEZER is not set # # Bus options (PCI, PCMCIA, EISA, ISA, TC) @@ -280,8 +340,9 @@ CONFIG_HW_HAS_PCI=y CONFIG_PCI=y CONFIG_PCI_DOMAINS=y # CONFIG_ARCH_SUPPORTS_MSI is not set -CONFIG_PCI_LEGACY=y CONFIG_PCI_DEBUG=y +# CONFIG_PCI_STUB is not set +# CONFIG_PCI_IOV is not set CONFIG_MMU=y CONFIG_ZONE_DMA32=y # CONFIG_PCCARD is not set @@ -291,6 +352,8 @@ CONFIG_ZONE_DMA32=y # Executable file formats # CONFIG_BINFMT_ELF=y +# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set +# CONFIG_HAVE_AOUT is not set # CONFIG_BINFMT_MISC is not set CONFIG_MIPS32_COMPAT=y CONFIG_COMPAT=y @@ -304,23 +367,20 @@ CONFIG_BINFMT_ELF32=y # CONFIG_PM=y # CONFIG_PM_DEBUG is not set - -# -# Networking -# +# CONFIG_PM_RUNTIME is not set CONFIG_NET=y # # Networking options # CONFIG_PACKET=y -CONFIG_PACKET_MMAP=y CONFIG_UNIX=y CONFIG_XFRM=y CONFIG_XFRM_USER=m # CONFIG_XFRM_SUB_POLICY is not set CONFIG_XFRM_MIGRATE=y # CONFIG_XFRM_STATISTICS is not set +CONFIG_XFRM_IPCOMP=m CONFIG_NET_KEY=y CONFIG_NET_KEY_MIGRATE=y CONFIG_INET=y @@ -353,36 +413,6 @@ CONFIG_INET_TCP_DIAG=y CONFIG_TCP_CONG_CUBIC=y CONFIG_DEFAULT_TCP_CONG="cubic" CONFIG_TCP_MD5SIG=y -CONFIG_IP_VS=m -# CONFIG_IP_VS_DEBUG is not set -CONFIG_IP_VS_TAB_BITS=12 - -# -# IPVS transport protocol load balancing support -# -CONFIG_IP_VS_PROTO_TCP=y -CONFIG_IP_VS_PROTO_UDP=y -CONFIG_IP_VS_PROTO_ESP=y -CONFIG_IP_VS_PROTO_AH=y - -# -# IPVS scheduler -# -CONFIG_IP_VS_RR=m -CONFIG_IP_VS_WRR=m -CONFIG_IP_VS_LC=m -CONFIG_IP_VS_WLC=m -CONFIG_IP_VS_LBLC=m -CONFIG_IP_VS_LBLCR=m -CONFIG_IP_VS_DH=m -CONFIG_IP_VS_SH=m -CONFIG_IP_VS_SED=m -CONFIG_IP_VS_NQ=m - -# -# IPVS application helper -# -CONFIG_IP_VS_FTP=m CONFIG_IPV6=m CONFIG_IPV6_PRIVACY=y CONFIG_IPV6_ROUTER_PREF=y @@ -399,11 +429,13 @@ CONFIG_INET6_XFRM_MODE_TUNNEL=m CONFIG_INET6_XFRM_MODE_BEET=m CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m CONFIG_IPV6_SIT=m +CONFIG_IPV6_SIT_6RD=y CONFIG_IPV6_NDISC_NODETYPE=y CONFIG_IPV6_TUNNEL=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_SUBTREES=y # CONFIG_IPV6_MROUTE is not set +CONFIG_NETLABEL=y CONFIG_NETWORK_SECMARK=y CONFIG_NETFILTER=y # CONFIG_NETFILTER_DEBUG is not set @@ -421,19 +453,53 @@ CONFIG_NF_CONNTRACK_IRC=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CT_NETLINK=m CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NETFILTER_XT_TARGET_NFLOG=m CONFIG_NETFILTER_XT_TARGET_SECMARK=m -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m CONFIG_NETFILTER_XT_TARGET_TCPMSS=m CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_MARK=m CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_IP_VS=m +CONFIG_IP_VS_IPV6=y +# CONFIG_IP_VS_DEBUG is not set +CONFIG_IP_VS_TAB_BITS=12 + +# +# IPVS transport protocol load balancing support +# +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_PROTO_UDP=y +CONFIG_IP_VS_PROTO_AH_ESP=y +CONFIG_IP_VS_PROTO_ESP=y +CONFIG_IP_VS_PROTO_AH=y +CONFIG_IP_VS_PROTO_SCTP=y + +# +# IPVS scheduler +# +CONFIG_IP_VS_RR=m +CONFIG_IP_VS_WRR=m +CONFIG_IP_VS_LC=m +CONFIG_IP_VS_WLC=m +CONFIG_IP_VS_LBLC=m +CONFIG_IP_VS_LBLCR=m +CONFIG_IP_VS_DH=m +CONFIG_IP_VS_SH=m +CONFIG_IP_VS_SED=m +CONFIG_IP_VS_NQ=m + +# +# IPVS application helper +# +CONFIG_IP_VS_FTP=m # # IP: Netfilter Configuration # +CONFIG_NF_DEFRAG_IPV4=m CONFIG_NF_CONNTRACK_IPV4=m CONFIG_NF_CONNTRACK_PROC_COMPAT=y CONFIG_IP_NF_IPTABLES=m @@ -459,22 +525,44 @@ CONFIG_IP_NF_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_LOG=m +CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP6_NF_MANGLE=m -# CONFIG_IP_DCCP is not set +CONFIG_IP_DCCP=m +CONFIG_INET_DCCP_DIAG=m + +# +# DCCP CCIDs Configuration (EXPERIMENTAL) +# +# CONFIG_IP_DCCP_CCID2_DEBUG is not set +CONFIG_IP_DCCP_CCID3=y +# CONFIG_IP_DCCP_CCID3_DEBUG is not set +CONFIG_IP_DCCP_CCID3_RTO=100 +CONFIG_IP_DCCP_TFRC_LIB=y + +# +# DCCP Kernel Hacking +# +# CONFIG_IP_DCCP_DEBUG is not set CONFIG_IP_SCTP=m # CONFIG_SCTP_DBG_MSG is not set # CONFIG_SCTP_DBG_OBJCNT is not set # CONFIG_SCTP_HMAC_NONE is not set -# CONFIG_SCTP_HMAC_SHA1 is not set -CONFIG_SCTP_HMAC_MD5=y +CONFIG_SCTP_HMAC_SHA1=y +# CONFIG_SCTP_HMAC_MD5 is not set +# CONFIG_RDS is not set # CONFIG_TIPC is not set # CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set +CONFIG_STP=m +CONFIG_GARP=m +CONFIG_BRIDGE=m +CONFIG_BRIDGE_IGMP_SNOOPING=y +# CONFIG_NET_DSA is not set +CONFIG_VLAN_8021Q=m +CONFIG_VLAN_8021Q_GVRP=y # CONFIG_DECNET is not set +CONFIG_LLC=m # CONFIG_LLC2 is not set # CONFIG_IPX is not set # CONFIG_ATALK is not set @@ -482,26 +570,47 @@ CONFIG_SCTP_HMAC_MD5=y # CONFIG_LAPB is not set # CONFIG_ECONET is not set # CONFIG_WAN_ROUTER is not set +# CONFIG_PHONET is not set +# CONFIG_IEEE802154 is not set # CONFIG_NET_SCHED is not set +# CONFIG_DCB is not set # # Network testing # # CONFIG_NET_PKTGEN is not set -# CONFIG_HAMRADIO is not set +CONFIG_HAMRADIO=y + +# +# Packet Radio protocols +# +CONFIG_AX25=m +CONFIG_AX25_DAMA_SLAVE=y +CONFIG_NETROM=m +CONFIG_ROSE=m + +# +# AX.25 network device drivers +# +CONFIG_MKISS=m +CONFIG_6PACK=m +CONFIG_BPQETHER=m +CONFIG_BAYCOM_SER_FDX=m +CONFIG_BAYCOM_SER_HDX=m +CONFIG_YAM=m # CONFIG_CAN is not set # CONFIG_IRDA is not set # CONFIG_BT is not set # CONFIG_AF_RXRPC is not set CONFIG_FIB_RULES=y +CONFIG_WIRELESS=y +# CONFIG_CFG80211 is not set +# CONFIG_LIB80211 is not set # -# Wireless +# CFG80211 needs to be enabled for MAC80211 # -# CONFIG_CFG80211 is not set -# CONFIG_WIRELESS_EXT is not set -# CONFIG_MAC80211 is not set -# CONFIG_IEEE80211 is not set +# CONFIG_WIMAX is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -513,9 +622,12 @@ CONFIG_FIB_RULES=y # Generic Driver Options # CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +# CONFIG_DEVTMPFS is not set CONFIG_STANDALONE=y CONFIG_PREVENT_FIRMWARE_BUILD=y CONFIG_FW_LOADER=m +CONFIG_FIRMWARE_IN_KERNEL=y +CONFIG_EXTRA_FIRMWARE="" # CONFIG_DEBUG_DRIVER is not set # CONFIG_DEBUG_DEVRES is not set # CONFIG_SYS_HYPERVISOR is not set @@ -530,33 +642,53 @@ CONFIG_BLK_DEV=y # CONFIG_BLK_DEV_COW_COMMON is not set CONFIG_BLK_DEV_LOOP=m CONFIG_BLK_DEV_CRYPTOLOOP=m + +# +# DRBD disabled because PROC_FS, INET or CONNECTOR not selected +# CONFIG_BLK_DEV_NBD=m # CONFIG_BLK_DEV_SX8 is not set # CONFIG_BLK_DEV_RAM is not set # CONFIG_CDROM_PKTCDVD is not set # CONFIG_ATA_OVER_ETH is not set +# CONFIG_BLK_DEV_HD is not set CONFIG_MISC_DEVICES=y +# CONFIG_AD525X_DPOT is not set # CONFIG_PHANTOM is not set -# CONFIG_EEPROM_93CX6 is not set CONFIG_SGI_IOC4=m # CONFIG_TIFM_CORE is not set +# CONFIG_ICS932S401 is not set # CONFIG_ENCLOSURE_SERVICES is not set +# CONFIG_HP_ILO is not set +# CONFIG_ISL29003 is not set +# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_DS1682 is not set +# CONFIG_C2PORT is not set + +# +# EEPROM support +# +# CONFIG_EEPROM_AT24 is not set +CONFIG_EEPROM_LEGACY=y +CONFIG_EEPROM_MAX6875=y +# CONFIG_EEPROM_93CX6 is not set +# CONFIG_CB710_CORE is not set CONFIG_HAVE_IDE=y CONFIG_IDE=y -CONFIG_IDE_MAX_HWIFS=4 -CONFIG_BLK_DEV_IDE=y # # Please see Documentation/ide/ide.txt for help/info on IDE drives # +CONFIG_IDE_XFER_MODE=y +CONFIG_IDE_TIMINGS=y +CONFIG_IDE_ATAPI=y # CONFIG_BLK_DEV_IDE_SATA is not set -CONFIG_BLK_DEV_IDEDISK=y -# CONFIG_IDEDISK_MULTI_MODE is not set +CONFIG_IDE_GD=y +CONFIG_IDE_GD_ATA=y +# CONFIG_IDE_GD_ATAPI is not set CONFIG_BLK_DEV_IDECD=y CONFIG_BLK_DEV_IDECD_VERBOSE_ERRORS=y CONFIG_BLK_DEV_IDETAPE=y -CONFIG_BLK_DEV_IDEFLOPPY=y -# CONFIG_BLK_DEV_IDESCSI is not set # CONFIG_IDE_TASK_IOCTL is not set CONFIG_IDE_PROC_FS=y @@ -581,14 +713,13 @@ CONFIG_BLK_DEV_IDEDMA_PCI=y # CONFIG_BLK_DEV_AMD74XX is not set CONFIG_BLK_DEV_CMD64X=y # CONFIG_BLK_DEV_TRIFLEX is not set -# CONFIG_BLK_DEV_CY82C693 is not set # CONFIG_BLK_DEV_CS5520 is not set # CONFIG_BLK_DEV_CS5530 is not set -# CONFIG_BLK_DEV_HPT34X is not set # CONFIG_BLK_DEV_HPT366 is not set # CONFIG_BLK_DEV_JMICRON is not set # CONFIG_BLK_DEV_SC1200 is not set # CONFIG_BLK_DEV_PIIX is not set +# CONFIG_BLK_DEV_IT8172 is not set CONFIG_BLK_DEV_IT8213=m # CONFIG_BLK_DEV_IT821X is not set # CONFIG_BLK_DEV_NS87415 is not set @@ -600,14 +731,12 @@ CONFIG_BLK_DEV_IT8213=m # CONFIG_BLK_DEV_TRM290 is not set # CONFIG_BLK_DEV_VIA82CXXX is not set CONFIG_BLK_DEV_TC86C001=m -# CONFIG_BLK_DEV_IDE_SWARM is not set CONFIG_BLK_DEV_IDEDMA=y -# CONFIG_BLK_DEV_HD_ONLY is not set -# CONFIG_BLK_DEV_HD is not set # # SCSI device support # +CONFIG_SCSI_MOD=y # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y CONFIG_SCSI_DMA=y @@ -625,10 +754,6 @@ CONFIG_BLK_DEV_SR=m CONFIG_BLK_DEV_SR_VENDOR=y CONFIG_CHR_DEV_SG=m CONFIG_CHR_DEV_SCH=m - -# -# Some SCSI devices (e.g. CD jukebox) support multiple LUNs -# # CONFIG_SCSI_MULTI_LUN is not set # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set @@ -645,27 +770,36 @@ CONFIG_SCSI_WAIT_SCAN=m # CONFIG_SCSI_SRP_ATTRS is not set CONFIG_SCSI_LOWLEVEL=y # CONFIG_ISCSI_TCP is not set +# CONFIG_SCSI_CXGB3_ISCSI is not set +# CONFIG_SCSI_BNX2_ISCSI is not set +# CONFIG_BE2ISCSI is not set # CONFIG_BLK_DEV_3W_XXXX_RAID is not set +# CONFIG_SCSI_HPSA is not set # CONFIG_SCSI_3W_9XXX is not set +# CONFIG_SCSI_3W_SAS is not set # CONFIG_SCSI_ACARD is not set # CONFIG_SCSI_AACRAID is not set # CONFIG_SCSI_AIC7XXX is not set # CONFIG_SCSI_AIC7XXX_OLD is not set # CONFIG_SCSI_AIC79XX is not set # CONFIG_SCSI_AIC94XX is not set +# CONFIG_SCSI_MVSAS is not set # CONFIG_SCSI_DPT_I2O is not set # CONFIG_SCSI_ADVANSYS is not set # CONFIG_SCSI_ARCMSR is not set # CONFIG_MEGARAID_NEWGEN is not set # CONFIG_MEGARAID_LEGACY is not set # CONFIG_MEGARAID_SAS is not set +# CONFIG_SCSI_MPT2SAS is not set # CONFIG_SCSI_HPTIOP is not set +# CONFIG_LIBFC is not set +# CONFIG_LIBFCOE is not set +# CONFIG_FCOE is not set # CONFIG_SCSI_DMX3191D is not set # CONFIG_SCSI_FUTURE_DOMAIN is not set # CONFIG_SCSI_IPS is not set # CONFIG_SCSI_INITIO is not set # CONFIG_SCSI_INIA100 is not set -# CONFIG_SCSI_MVSAS is not set # CONFIG_SCSI_STEX is not set # CONFIG_SCSI_SYM53C8XX_2 is not set # CONFIG_SCSI_IPR is not set @@ -676,9 +810,15 @@ CONFIG_SCSI_LOWLEVEL=y # CONFIG_SCSI_DC395x is not set # CONFIG_SCSI_DC390T is not set # CONFIG_SCSI_DEBUG is not set +# CONFIG_SCSI_PMCRAID is not set +# CONFIG_SCSI_PM8001 is not set # CONFIG_SCSI_SRP is not set +# CONFIG_SCSI_BFA_FC is not set +# CONFIG_SCSI_DH is not set +# CONFIG_SCSI_OSD_INITIATOR is not set CONFIG_ATA=y # CONFIG_ATA_NONSTANDARD is not set +CONFIG_ATA_VERBOSE_ERROR=y CONFIG_SATA_PMP=y # CONFIG_SATA_AHCI is not set CONFIG_SATA_SIL24=y @@ -700,6 +840,7 @@ CONFIG_ATA_SFF=y # CONFIG_PATA_ALI is not set # CONFIG_PATA_AMD is not set # CONFIG_PATA_ARTOP is not set +# CONFIG_PATA_ATP867X is not set # CONFIG_PATA_ATIIXP is not set # CONFIG_PATA_CMD640_PCI is not set # CONFIG_PATA_CMD64X is not set @@ -715,6 +856,7 @@ CONFIG_ATA_SFF=y # CONFIG_PATA_IT821X is not set # CONFIG_PATA_IT8213 is not set # CONFIG_PATA_JMICRON is not set +# CONFIG_PATA_LEGACY is not set # CONFIG_PATA_TRIFLEX is not set # CONFIG_PATA_MARVELL is not set # CONFIG_PATA_MPIIX is not set @@ -725,14 +867,16 @@ CONFIG_ATA_SFF=y # CONFIG_PATA_NS87415 is not set # CONFIG_PATA_OPTI is not set # CONFIG_PATA_OPTIDMA is not set +# CONFIG_PATA_PDC2027X is not set # CONFIG_PATA_PDC_OLD is not set # CONFIG_PATA_RADISYS is not set +# CONFIG_PATA_RDC is not set # CONFIG_PATA_RZ1000 is not set # CONFIG_PATA_SC1200 is not set # CONFIG_PATA_SERVERWORKS is not set -# CONFIG_PATA_PDC2027X is not set CONFIG_PATA_SIL680=y # CONFIG_PATA_SIS is not set +# CONFIG_PATA_TOSHIBA is not set # CONFIG_PATA_VIA is not set # CONFIG_PATA_WINBOND is not set # CONFIG_PATA_PLATFORM is not set @@ -745,13 +889,16 @@ CONFIG_PATA_SIL680=y # # -# Enable only one of the two stacks, unless you know what you are doing +# You can enable one or both FireWire driver stacks. +# + +# +# The newer stack is recommended. # # CONFIG_FIREWIRE is not set # CONFIG_IEEE1394 is not set # CONFIG_I2O is not set CONFIG_NETDEVICES=y -# CONFIG_NETDEVICES_MULTIQUEUE is not set # CONFIG_DUMMY is not set # CONFIG_BONDING is not set # CONFIG_MACVLAN is not set @@ -774,6 +921,9 @@ CONFIG_PHYLIB=y # CONFIG_BROADCOM_PHY is not set # CONFIG_ICPLUS_PHY is not set # CONFIG_REALTEK_PHY is not set +# CONFIG_NATIONAL_PHY is not set +# CONFIG_STE10XP is not set +# CONFIG_LSI_ET1011C_PHY is not set # CONFIG_FIXED_PHY is not set # CONFIG_MDIO_BITBANG is not set CONFIG_NET_ETHERNET=y @@ -783,23 +933,33 @@ CONFIG_MII=y # CONFIG_SUNGEM is not set # CONFIG_CASSINI is not set # CONFIG_NET_VENDOR_3COM is not set +# CONFIG_SMC91X is not set # CONFIG_DM9000 is not set +# CONFIG_ETHOC is not set +# CONFIG_SMSC911X is not set +# CONFIG_DNET is not set # CONFIG_NET_TULIP is not set # CONFIG_HP100 is not set # CONFIG_IBM_NEW_EMAC_ZMII is not set # CONFIG_IBM_NEW_EMAC_RGMII is not set # CONFIG_IBM_NEW_EMAC_TAH is not set # CONFIG_IBM_NEW_EMAC_EMAC4 is not set +# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set +# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set +# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set # CONFIG_NET_PCI is not set # CONFIG_B44 is not set +# CONFIG_KS8842 is not set +# CONFIG_KS8851_MLL is not set +# CONFIG_ATL2 is not set CONFIG_NETDEV_1000=y # CONFIG_ACENIC is not set # CONFIG_DL2K is not set # CONFIG_E1000 is not set # CONFIG_E1000E is not set -# CONFIG_E1000E_ENABLED is not set # CONFIG_IP1000 is not set # CONFIG_IGB is not set +# CONFIG_IGBVF is not set # CONFIG_NS83820 is not set # CONFIG_HAMACHI is not set # CONFIG_YELLOWFIN is not set @@ -811,29 +971,42 @@ CONFIG_SB1250_MAC=y # CONFIG_VIA_VELOCITY is not set # CONFIG_TIGON3 is not set # CONFIG_BNX2 is not set +# CONFIG_CNIC is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set +# CONFIG_ATL1E is not set +# CONFIG_ATL1C is not set +# CONFIG_JME is not set CONFIG_NETDEV_10000=y +CONFIG_MDIO=m # CONFIG_CHELSIO_T1 is not set +CONFIG_CHELSIO_T3_DEPENDS=y CONFIG_CHELSIO_T3=m +# CONFIG_ENIC is not set # CONFIG_IXGBE is not set # CONFIG_IXGB is not set # CONFIG_S2IO is not set +# CONFIG_VXGE is not set # CONFIG_MYRI10GE is not set CONFIG_NETXEN_NIC=m # CONFIG_NIU is not set +# CONFIG_MLX4_EN is not set # CONFIG_MLX4_CORE is not set # CONFIG_TEHUTI is not set # CONFIG_BNX2X is not set +# CONFIG_QLCNIC is not set +# CONFIG_QLGE is not set # CONFIG_SFC is not set +# CONFIG_BE2NET is not set # CONFIG_TR is not set +CONFIG_WLAN=y +# CONFIG_ATMEL is not set +# CONFIG_PRISM54 is not set +# CONFIG_HOSTAP is not set # -# Wireless LAN +# Enable WiMAX (Networking options) to see the WiMAX drivers # -# CONFIG_WLAN_PRE80211 is not set -# CONFIG_WLAN_80211 is not set -# CONFIG_IWLWIFI_LEDS is not set # CONFIG_WAN is not set # CONFIG_FDDI is not set # CONFIG_HIPPI is not set @@ -856,6 +1029,7 @@ CONFIG_SLIP_MODE_SLIP6=y # CONFIG_NETCONSOLE is not set # CONFIG_NETPOLL is not set # CONFIG_NET_POLL_CONTROLLER is not set +# CONFIG_VMXNET3 is not set # CONFIG_ISDN is not set # CONFIG_PHONE is not set @@ -873,6 +1047,7 @@ CONFIG_SERIO_SERPORT=y # CONFIG_SERIO_PCIPS2 is not set # CONFIG_SERIO_LIBPS2 is not set CONFIG_SERIO_RAW=m +# CONFIG_SERIO_ALTERA_PS2 is not set # CONFIG_GAMEPORT is not set # @@ -893,8 +1068,6 @@ CONFIG_SERIAL_NONSTANDARD=y # CONFIG_N_HDLC is not set # CONFIG_RISCOM8 is not set # CONFIG_SPECIALIX is not set -# CONFIG_SX is not set -# CONFIG_RIO is not set # CONFIG_STALDRV is not set # CONFIG_NOZOMI is not set @@ -911,7 +1084,9 @@ CONFIG_SERIAL_SB1250_DUART_CONSOLE=y CONFIG_SERIAL_CORE=y CONFIG_SERIAL_CORE_CONSOLE=y # CONFIG_SERIAL_JSM is not set +# CONFIG_SERIAL_TIMBERDALE is not set CONFIG_UNIX98_PTYS=y +# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set CONFIG_LEGACY_PTYS=y CONFIG_LEGACY_PTY_COUNT=256 # CONFIG_IPMI_HANDLER is not set @@ -923,89 +1098,99 @@ CONFIG_LEGACY_PTY_COUNT=256 CONFIG_DEVPORT=y CONFIG_I2C=y CONFIG_I2C_BOARDINFO=y +CONFIG_I2C_COMPAT=y CONFIG_I2C_CHARDEV=y +CONFIG_I2C_HELPER_AUTO=y # # I2C Hardware Bus support # + +# +# PC SMBus host controller drivers +# # CONFIG_I2C_ALI1535 is not set # CONFIG_I2C_ALI1563 is not set # CONFIG_I2C_ALI15X3 is not set # CONFIG_I2C_AMD756 is not set # CONFIG_I2C_AMD8111 is not set # CONFIG_I2C_I801 is not set -# CONFIG_I2C_I810 is not set +# CONFIG_I2C_ISCH is not set # CONFIG_I2C_PIIX4 is not set # CONFIG_I2C_NFORCE2 is not set -# CONFIG_I2C_OCORES is not set -# CONFIG_I2C_PARPORT_LIGHT is not set -# CONFIG_I2C_PROSAVAGE is not set -# CONFIG_I2C_SAVAGE4 is not set -CONFIG_I2C_SIBYTE=y -# CONFIG_I2C_SIMTEC is not set # CONFIG_I2C_SIS5595 is not set # CONFIG_I2C_SIS630 is not set # CONFIG_I2C_SIS96X is not set -# CONFIG_I2C_TAOS_EVM is not set -# CONFIG_I2C_STUB is not set # CONFIG_I2C_VIA is not set # CONFIG_I2C_VIAPRO is not set -# CONFIG_I2C_VOODOO3 is not set -# CONFIG_I2C_PCA_PLATFORM is not set # -# Miscellaneous I2C Chip support +# I2C system bus drivers (mostly embedded / system-on-chip) # -# CONFIG_DS1682 is not set -CONFIG_EEPROM_LEGACY=y -CONFIG_SENSORS_PCF8574=y -# CONFIG_PCF8575 is not set -CONFIG_SENSORS_PCF8591=y -CONFIG_EEPROM_MAX6875=y -# CONFIG_SENSORS_TSL2550 is not set +# CONFIG_I2C_OCORES is not set +# CONFIG_I2C_SIMTEC is not set +# CONFIG_I2C_XILINX is not set + +# +# External I2C/SMBus adapter drivers +# +# CONFIG_I2C_PARPORT_LIGHT is not set +# CONFIG_I2C_TAOS_EVM is not set + +# +# Other I2C/SMBus bus drivers +# +# CONFIG_I2C_PCA_PLATFORM is not set +CONFIG_I2C_SIBYTE=y +# CONFIG_I2C_STUB is not set CONFIG_I2C_DEBUG_CORE=y CONFIG_I2C_DEBUG_ALGO=y CONFIG_I2C_DEBUG_BUS=y -CONFIG_I2C_DEBUG_CHIP=y # CONFIG_SPI is not set + +# +# PPS support +# +# CONFIG_PPS is not set # CONFIG_W1 is not set # CONFIG_POWER_SUPPLY is not set # CONFIG_HWMON is not set # CONFIG_THERMAL is not set -# CONFIG_THERMAL_HWMON is not set # CONFIG_WATCHDOG is not set +CONFIG_SSB_POSSIBLE=y # # Sonics Silicon Backplane # -CONFIG_SSB_POSSIBLE=y # CONFIG_SSB is not set # # Multifunction device drivers # +# CONFIG_MFD_CORE is not set +# CONFIG_MFD_88PM860X is not set # CONFIG_MFD_SM501 is not set # CONFIG_HTC_PASIC3 is not set - -# -# Multimedia devices -# - -# -# Multimedia core support -# -# CONFIG_VIDEO_DEV is not set -# CONFIG_DVB_CORE is not set -# CONFIG_VIDEO_MEDIA is not set - -# -# Multimedia drivers -# -# CONFIG_DAB is not set +# CONFIG_TWL4030_CORE is not set +# CONFIG_MFD_TMIO is not set +# CONFIG_PMIC_DA903X is not set +# CONFIG_PMIC_ADP5520 is not set +# CONFIG_MFD_MAX8925 is not set +# CONFIG_MFD_WM8400 is not set +# CONFIG_MFD_WM831X is not set +# CONFIG_MFD_WM8350_I2C is not set +# CONFIG_MFD_WM8994 is not set +# CONFIG_MFD_PCF50633 is not set +# CONFIG_AB3100_CORE is not set +# CONFIG_LPC_SCH is not set +# CONFIG_REGULATOR is not set +# CONFIG_MEDIA_SUPPORT is not set # # Graphics support # +CONFIG_VGA_ARB=y +CONFIG_VGA_ARB_MAX_GPUS=16 # CONFIG_DRM is not set # CONFIG_VGASTATE is not set # CONFIG_VIDEO_OUTPUT_CONTROL is not set @@ -1016,10 +1201,6 @@ CONFIG_SSB_POSSIBLE=y # Display device support # # CONFIG_DISPLAY_SUPPORT is not set - -# -# Sound -# # CONFIG_SOUND is not set CONFIG_USB_SUPPORT=y CONFIG_USB_ARCH_HAS_HCD=y @@ -1030,9 +1211,18 @@ CONFIG_USB_ARCH_HAS_EHCI=y # CONFIG_USB_OTG_BLACKLIST_HUB is not set # -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' +# Enable Host or Gadget support to see Inventra options +# + +# +# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may # # CONFIG_USB_GADGET is not set + +# +# OTG and related infrastructure +# +# CONFIG_UWB is not set # CONFIG_MMC is not set # CONFIG_MEMSTICK is not set # CONFIG_NEW_LEDS is not set @@ -1040,41 +1230,66 @@ CONFIG_USB_ARCH_HAS_EHCI=y # CONFIG_INFINIBAND is not set CONFIG_RTC_LIB=y # CONFIG_RTC_CLASS is not set +# CONFIG_DMADEVICES is not set +# CONFIG_AUXDISPLAY is not set # CONFIG_UIO is not set # +# TI VLYNQ +# +# CONFIG_STAGING is not set + +# # File systems # CONFIG_EXT2_FS=m CONFIG_EXT2_FS_XATTR=y -# CONFIG_EXT2_FS_POSIX_ACL is not set -# CONFIG_EXT2_FS_SECURITY is not set -# CONFIG_EXT2_FS_XIP is not set -CONFIG_EXT3_FS=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT2_FS_XIP=y +CONFIG_EXT3_FS=m +CONFIG_EXT3_DEFAULTS_TO_ORDERED=y CONFIG_EXT3_FS_XATTR=y -# CONFIG_EXT3_FS_POSIX_ACL is not set -# CONFIG_EXT3_FS_SECURITY is not set -# CONFIG_EXT4DEV_FS is not set -CONFIG_JBD=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_XATTR=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +# CONFIG_EXT4_DEBUG is not set +CONFIG_FS_XIP=y +CONFIG_JBD=m +CONFIG_JBD2=y CONFIG_FS_MBCACHE=y # CONFIG_REISERFS_FS is not set # CONFIG_JFS_FS is not set -# CONFIG_FS_POSIX_ACL is not set +CONFIG_FS_POSIX_ACL=y # CONFIG_XFS_FS is not set # CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set +# CONFIG_BTRFS_FS is not set +# CONFIG_NILFS2_FS is not set +CONFIG_FILE_LOCKING=y +CONFIG_FSNOTIFY=y CONFIG_DNOTIFY=y CONFIG_INOTIFY=y CONFIG_INOTIFY_USER=y CONFIG_QUOTA=y CONFIG_QUOTA_NETLINK_INTERFACE=y # CONFIG_PRINT_QUOTA_WARNING is not set +CONFIG_QUOTA_TREE=m # CONFIG_QFMT_V1 is not set CONFIG_QFMT_V2=m CONFIG_QUOTACTL=y CONFIG_AUTOFS_FS=m CONFIG_AUTOFS4_FS=m CONFIG_FUSE_FS=m +# CONFIG_CUSE is not set + +# +# Caches +# +# CONFIG_FSCACHE is not set # # CD-ROM/DVD Filesystems @@ -1103,15 +1318,13 @@ CONFIG_NTFS_RW=y CONFIG_PROC_FS=y CONFIG_PROC_KCORE=y CONFIG_PROC_SYSCTL=y +CONFIG_PROC_PAGE_MONITOR=y CONFIG_SYSFS=y CONFIG_TMPFS=y # CONFIG_TMPFS_POSIX_ACL is not set # CONFIG_HUGETLB_PAGE is not set CONFIG_CONFIGFS_FS=m - -# -# Miscellaneous filesystems -# +CONFIG_MISC_FILESYSTEMS=y # CONFIG_ADFS_FS is not set # CONFIG_AFFS_FS is not set # CONFIG_ECRYPT_FS is not set @@ -1120,9 +1333,12 @@ CONFIG_CONFIGFS_FS=m # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set +# CONFIG_LOGFS is not set # CONFIG_CRAMFS is not set +# CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set # CONFIG_MINIX_FS is not set +# CONFIG_OMFS_FS is not set # CONFIG_HPFS_FS is not set # CONFIG_QNX4FS_FS is not set # CONFIG_ROMFS_FS is not set @@ -1133,16 +1349,17 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y # CONFIG_NFS_V3_ACL is not set # CONFIG_NFS_V4 is not set -# CONFIG_NFSD is not set CONFIG_ROOT_NFS=y +# CONFIG_NFSD is not set CONFIG_LOCKD=y CONFIG_LOCKD_V4=y CONFIG_NFS_COMMON=y CONFIG_SUNRPC=y -# CONFIG_SUNRPC_BIND34 is not set -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set +CONFIG_SUNRPC_GSS=m +CONFIG_RPCSEC_GSS_KRB5=m +CONFIG_RPCSEC_GSS_SPKM3=m # CONFIG_SMB_FS is not set +# CONFIG_CEPH_FS is not set # CONFIG_CIFS is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set @@ -1205,12 +1422,18 @@ CONFIG_ENABLE_WARN_DEPRECATED=y CONFIG_ENABLE_MUST_CHECK=y CONFIG_FRAME_WARN=2048 CONFIG_MAGIC_SYSRQ=y +# CONFIG_STRIP_ASM_SYMS is not set # CONFIG_UNUSED_SYMBOLS is not set # CONFIG_DEBUG_FS is not set # CONFIG_HEADERS_CHECK is not set CONFIG_DEBUG_KERNEL=y # CONFIG_DEBUG_SHIRQ is not set CONFIG_DETECT_SOFTLOCKUP=y +# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set +CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0 +CONFIG_DETECT_HUNG_TASK=y +# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set +CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0 CONFIG_SCHED_DEBUG=y # CONFIG_SCHEDSTATS is not set # CONFIG_TIMER_STATS is not set @@ -1219,23 +1442,53 @@ CONFIG_SCHED_DEBUG=y # CONFIG_DEBUG_RT_MUTEXES is not set # CONFIG_RT_MUTEX_TESTER is not set # CONFIG_DEBUG_SPINLOCK is not set -CONFIG_DEBUG_MUTEXES=y +# CONFIG_DEBUG_MUTEXES is not set # CONFIG_DEBUG_LOCK_ALLOC is not set # CONFIG_PROVE_LOCKING is not set # CONFIG_LOCK_STAT is not set -# CONFIG_DEBUG_SPINLOCK_SLEEP is not set +CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set # CONFIG_DEBUG_KOBJECT is not set # CONFIG_DEBUG_INFO is not set # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_WRITECOUNT is not set -# CONFIG_DEBUG_LIST is not set +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEBUG_LIST=y # CONFIG_DEBUG_SG is not set +# CONFIG_DEBUG_NOTIFIERS is not set +# CONFIG_DEBUG_CREDENTIALS is not set # CONFIG_BOOT_PRINTK_DELAY is not set # CONFIG_RCU_TORTURE_TEST is not set +CONFIG_RCU_CPU_STALL_DETECTOR=y # CONFIG_BACKTRACE_SELF_TEST is not set +# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set +# CONFIG_DEBUG_FORCE_WEAK_PER_CPU is not set # CONFIG_FAULT_INJECTION is not set +# CONFIG_SYSCTL_SYSCALL_CHECK is not set +# CONFIG_PAGE_POISONING is not set +CONFIG_HAVE_FUNCTION_TRACER=y +CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y +CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y +CONFIG_TRACING_SUPPORT=y +CONFIG_FTRACE=y +# CONFIG_FUNCTION_TRACER is not set +# CONFIG_IRQSOFF_TRACER is not set +# CONFIG_SCHED_TRACER is not set +# CONFIG_ENABLE_DEFAULT_TRACERS is not set +# CONFIG_BOOT_TRACER is not set +CONFIG_BRANCH_PROFILE_NONE=y +# CONFIG_PROFILE_ANNOTATED_BRANCHES is not set +# CONFIG_PROFILE_ALL_BRANCHES is not set +# CONFIG_STACK_TRACER is not set +# CONFIG_KMEMTRACE is not set +# CONFIG_WORKQUEUE_TRACER is not set +# CONFIG_BLK_DEV_IO_TRACE is not set # CONFIG_SAMPLES is not set +CONFIG_HAVE_ARCH_KGDB=y +# CONFIG_KGDB is not set +CONFIG_EARLY_PRINTK=y # CONFIG_CMDLINE_BOOL is not set # CONFIG_DEBUG_STACK_USAGE is not set # CONFIG_SB1XXX_CORELIS is not set @@ -1246,20 +1499,50 @@ CONFIG_DEBUG_MUTEXES=y # CONFIG_KEYS=y CONFIG_KEYS_DEBUG_PROC_KEYS=y -# CONFIG_SECURITY is not set -# CONFIG_SECURITY_FILE_CAPABILITIES is not set +CONFIG_SECURITY=y +# CONFIG_SECURITYFS is not set +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_NETWORK_XFRM=y +# CONFIG_SECURITY_PATH is not set +CONFIG_LSM_MMAP_MIN_ADDR=65536 +CONFIG_SECURITY_SELINUX=y +CONFIG_SECURITY_SELINUX_BOOTPARAM=y +CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=1 +CONFIG_SECURITY_SELINUX_DISABLE=y +CONFIG_SECURITY_SELINUX_DEVELOP=y +CONFIG_SECURITY_SELINUX_AVC_STATS=y +CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 +# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set +# CONFIG_SECURITY_SMACK is not set +# CONFIG_SECURITY_TOMOYO is not set +# CONFIG_DEFAULT_SECURITY_SELINUX is not set +# CONFIG_DEFAULT_SECURITY_SMACK is not set +# CONFIG_DEFAULT_SECURITY_TOMOYO is not set +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEFAULT_SECURITY="" CONFIG_CRYPTO=y # # Crypto core or helper # +# CONFIG_CRYPTO_FIPS is not set CONFIG_CRYPTO_ALGAPI=y +CONFIG_CRYPTO_ALGAPI2=y CONFIG_CRYPTO_AEAD=m +CONFIG_CRYPTO_AEAD2=y CONFIG_CRYPTO_BLKCIPHER=y +CONFIG_CRYPTO_BLKCIPHER2=y CONFIG_CRYPTO_HASH=y +CONFIG_CRYPTO_HASH2=y +CONFIG_CRYPTO_RNG=m +CONFIG_CRYPTO_RNG2=y +CONFIG_CRYPTO_PCOMP=y CONFIG_CRYPTO_MANAGER=y +CONFIG_CRYPTO_MANAGER2=y CONFIG_CRYPTO_GF128MUL=m CONFIG_CRYPTO_NULL=y +# CONFIG_CRYPTO_PCRYPT is not set +CONFIG_CRYPTO_WORKQUEUE=y # CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_AUTHENC=m # CONFIG_CRYPTO_TEST is not set @@ -1276,7 +1559,7 @@ CONFIG_CRYPTO_SEQIV=m # CONFIG_CRYPTO_CBC=m CONFIG_CRYPTO_CTR=m -# CONFIG_CRYPTO_CTS is not set +CONFIG_CRYPTO_CTS=m CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m @@ -1287,14 +1570,20 @@ CONFIG_CRYPTO_XTS=m # CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_XCBC=m +CONFIG_CRYPTO_VMAC=m # # Digest # -# CONFIG_CRYPTO_CRC32C is not set +CONFIG_CRYPTO_CRC32C=m +CONFIG_CRYPTO_GHASH=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m +CONFIG_CRYPTO_RMD128=m +CONFIG_CRYPTO_RMD160=m +CONFIG_CRYPTO_RMD256=m +CONFIG_CRYPTO_RMD320=m CONFIG_CRYPTO_SHA1=m CONFIG_CRYPTO_SHA256=m CONFIG_CRYPTO_SHA512=m @@ -1325,25 +1614,36 @@ CONFIG_CRYPTO_TWOFISH_COMMON=m # Compression # CONFIG_CRYPTO_DEFLATE=m -# CONFIG_CRYPTO_LZO is not set +CONFIG_CRYPTO_ZLIB=m +CONFIG_CRYPTO_LZO=m + +# +# Random Number Generation +# +CONFIG_CRYPTO_ANSI_CPRNG=m CONFIG_CRYPTO_HW=y # CONFIG_CRYPTO_DEV_HIFN_795X is not set +# CONFIG_BINARY_PRINTF is not set # # Library routines # CONFIG_BITREVERSE=y -# CONFIG_GENERIC_FIND_FIRST_BIT is not set +CONFIG_GENERIC_FIND_LAST_BIT=y CONFIG_CRC_CCITT=m -# CONFIG_CRC16 is not set +CONFIG_CRC16=y +CONFIG_CRC_T10DIF=m CONFIG_CRC_ITU_T=m CONFIG_CRC32=y -# CONFIG_CRC7 is not set +CONFIG_CRC7=m CONFIG_LIBCRC32C=m CONFIG_AUDIT_GENERIC=y -CONFIG_ZLIB_INFLATE=m +CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m -CONFIG_PLIST=y +CONFIG_LZO_COMPRESS=m +CONFIG_LZO_DECOMPRESS=m +CONFIG_DECOMPRESS_GZIP=y CONFIG_HAS_IOMEM=y CONFIG_HAS_IOPORT=y CONFIG_HAS_DMA=y +CONFIG_NLATTR=y diff --git a/arch/mips/include/asm/abi.h b/arch/mips/include/asm/abi.h index 1dd74fb..9252d9b 100644 --- a/arch/mips/include/asm/abi.h +++ b/arch/mips/include/asm/abi.h @@ -13,12 +13,14 @@ #include <asm/siginfo.h> struct mips_abi { - int (* const setup_frame)(struct k_sigaction * ka, + int (* const setup_frame)(void *sig_return, struct k_sigaction *ka, struct pt_regs *regs, int signr, sigset_t *set); - int (* const setup_rt_frame)(struct k_sigaction * ka, + const unsigned long signal_return_offset; + int (* const setup_rt_frame)(void *sig_return, struct k_sigaction *ka, struct pt_regs *regs, int signr, sigset_t *set, siginfo_t *info); + const unsigned long rt_signal_return_offset; const unsigned long restart; }; diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index e53d7be..ea77a42 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -310,6 +310,7 @@ do { \ #endif /* CONFIG_64BIT */ +struct pt_regs; struct task_struct; extern void elf_dump_regs(elf_greg_t *, struct pt_regs *regs); @@ -367,4 +368,8 @@ extern const char *__elf_platform; #define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2) #endif +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); #endif /* _ASM_ELF_H */ diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index aecada6..3b40927 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h @@ -41,7 +41,11 @@ struct mips_fpu_emulator_stats { DECLARE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); #define MIPS_FPU_EMU_INC_STATS(M) \ - cpu_local_wrap(__local_inc(&__get_cpu_var(fpuemustats).M)) +do { \ + preempt_disable(); \ + __local_inc(&__get_cpu_var(fpuemustats).M); \ + preempt_enable(); \ +} while (0) #else #define MIPS_FPU_EMU_INC_STATS(M) do { } while (0) diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h index b12c4ac..96a2391 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_cpu.h @@ -85,6 +85,7 @@ enum bcm63xx_regs_set { RSET_TIMER, RSET_WDT, RSET_UART0, + RSET_UART1, RSET_GPIO, RSET_SPI, RSET_UDC0, @@ -123,6 +124,7 @@ enum bcm63xx_regs_set { #define BCM_6338_TIMER_BASE (0xfffe0200) #define BCM_6338_WDT_BASE (0xfffe021c) #define BCM_6338_UART0_BASE (0xfffe0300) +#define BCM_6338_UART1_BASE (0xdeadbeef) #define BCM_6338_GPIO_BASE (0xfffe0400) #define BCM_6338_SPI_BASE (0xfffe0c00) #define BCM_6338_UDC0_BASE (0xdeadbeef) @@ -153,6 +155,7 @@ enum bcm63xx_regs_set { #define BCM_6345_TIMER_BASE (0xfffe0200) #define BCM_6345_WDT_BASE (0xfffe021c) #define BCM_6345_UART0_BASE (0xfffe0300) +#define BCM_6345_UART1_BASE (0xdeadbeef) #define BCM_6345_GPIO_BASE (0xfffe0400) #define BCM_6345_SPI_BASE (0xdeadbeef) #define BCM_6345_UDC0_BASE (0xdeadbeef) @@ -182,6 +185,7 @@ enum bcm63xx_regs_set { #define BCM_6348_TIMER_BASE (0xfffe0200) #define BCM_6348_WDT_BASE (0xfffe021c) #define BCM_6348_UART0_BASE (0xfffe0300) +#define BCM_6348_UART1_BASE (0xdeadbeef) #define BCM_6348_GPIO_BASE (0xfffe0400) #define BCM_6348_SPI_BASE (0xfffe0c00) #define BCM_6348_UDC0_BASE (0xfffe1000) @@ -208,6 +212,7 @@ enum bcm63xx_regs_set { #define BCM_6358_TIMER_BASE (0xfffe0040) #define BCM_6358_WDT_BASE (0xfffe005c) #define BCM_6358_UART0_BASE (0xfffe0100) +#define BCM_6358_UART1_BASE (0xfffe0120) #define BCM_6358_GPIO_BASE (0xfffe0080) #define BCM_6358_SPI_BASE (0xdeadbeef) #define BCM_6358_UDC0_BASE (0xfffe0800) @@ -246,6 +251,8 @@ static inline unsigned long bcm63xx_regset_address(enum bcm63xx_regs_set set) return BCM_6338_WDT_BASE; case RSET_UART0: return BCM_6338_UART0_BASE; + case RSET_UART1: + return BCM_6338_UART1_BASE; case RSET_GPIO: return BCM_6338_GPIO_BASE; case RSET_SPI: @@ -292,6 +299,8 @@ static inline unsigned long bcm63xx_regset_address(enum bcm63xx_regs_set set) return BCM_6345_WDT_BASE; case RSET_UART0: return BCM_6345_UART0_BASE; + case RSET_UART1: + return BCM_6345_UART1_BASE; case RSET_GPIO: return BCM_6345_GPIO_BASE; case RSET_SPI: @@ -338,6 +347,8 @@ static inline unsigned long bcm63xx_regset_address(enum bcm63xx_regs_set set) return BCM_6348_WDT_BASE; case RSET_UART0: return BCM_6348_UART0_BASE; + case RSET_UART1: + return BCM_6348_UART1_BASE; case RSET_GPIO: return BCM_6348_GPIO_BASE; case RSET_SPI: @@ -384,6 +395,8 @@ static inline unsigned long bcm63xx_regset_address(enum bcm63xx_regs_set set) return BCM_6358_WDT_BASE; case RSET_UART0: return BCM_6358_UART0_BASE; + case RSET_UART1: + return BCM_6358_UART1_BASE; case RSET_GPIO: return BCM_6358_GPIO_BASE; case RSET_SPI: @@ -429,6 +442,7 @@ static inline unsigned long bcm63xx_regset_address(enum bcm63xx_regs_set set) enum bcm63xx_irq { IRQ_TIMER = 0, IRQ_UART0, + IRQ_UART1, IRQ_DSL, IRQ_ENET0, IRQ_ENET1, @@ -510,6 +524,7 @@ enum bcm63xx_irq { */ #define BCM_6358_TIMER_IRQ (IRQ_INTERNAL_BASE + 0) #define BCM_6358_UART0_IRQ (IRQ_INTERNAL_BASE + 2) +#define BCM_6358_UART1_IRQ (IRQ_INTERNAL_BASE + 3) #define BCM_6358_OHCI0_IRQ (IRQ_INTERNAL_BASE + 5) #define BCM_6358_ENET1_IRQ (IRQ_INTERNAL_BASE + 6) #define BCM_6358_ENET0_IRQ (IRQ_INTERNAL_BASE + 8) diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h new file mode 100644 index 0000000..23c705b --- /dev/null +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_dev_uart.h @@ -0,0 +1,6 @@ +#ifndef BCM63XX_DEV_UART_H_ +#define BCM63XX_DEV_UART_H_ + +int bcm63xx_uart_register(unsigned int id); + +#endif /* BCM63XX_DEV_UART_H_ */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h index 76a0b72..43d4da0 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h @@ -10,6 +10,10 @@ static inline unsigned long bcm63xx_gpio_count(void) switch (bcm63xx_get_cpu_id()) { case BCM6358_CPU_ID: return 40; + case BCM6338_CPU_ID: + return 8; + case BCM6345_CPU_ID: + return 16; case BCM6348_CPU_ID: default: return 37; diff --git a/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h b/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h index 6479090..474daaa 100644 --- a/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h +++ b/arch/mips/include/asm/mach-bcm63xx/board_bcm963xx.h @@ -45,6 +45,8 @@ struct board_info { unsigned int has_ohci0:1; unsigned int has_ehci0:1; unsigned int has_dsp:1; + unsigned int has_uart0:1; + unsigned int has_uart1:1; /* ethernet config */ struct bcm63xx_enet_platform_data enet0; diff --git a/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h b/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h index 71742bac..f453c01 100644 --- a/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-bcm63xx/cpu-feature-overrides.h @@ -24,7 +24,7 @@ #define cpu_has_smartmips 0 #define cpu_has_vtag_icache 0 -#if !defined(BCMCPU_RUNTIME_DETECT) && (defined(CONFIG_BCMCPU_IS_6348) || defined(CONFIG_CPU_IS_6338) || defined(CONFIG_CPU_IS_BCM6345)) +#if !defined(BCMCPU_RUNTIME_DETECT) && (defined(CONFIG_BCM63XX_CPU_6348) || defined(CONFIG_BCM63XX_CPU_6345) || defined(CONFIG_BCM63XX_CPU_6338)) #define cpu_has_dc_aliases 0 #endif diff --git a/arch/mips/include/asm/mach-sibyte/war.h b/arch/mips/include/asm/mach-sibyte/war.h index 7950ef4..743385d 100644 --- a/arch/mips/include/asm/mach-sibyte/war.h +++ b/arch/mips/include/asm/mach-sibyte/war.h @@ -16,7 +16,11 @@ #if defined(CONFIG_SB1_PASS_1_WORKAROUNDS) || \ defined(CONFIG_SB1_PASS_2_WORKAROUNDS) -#define BCM1250_M3_WAR 1 +#ifndef __ASSEMBLY__ +extern int sb1250_m3_workaround_needed(void); +#endif + +#define BCM1250_M3_WAR sb1250_m3_workaround_needed() #define SIBYTE_1956_WAR 1 #else diff --git a/arch/mips/include/asm/mmu.h b/arch/mips/include/asm/mmu.h index 4063edd..c436138 100644 --- a/arch/mips/include/asm/mmu.h +++ b/arch/mips/include/asm/mmu.h @@ -1,6 +1,9 @@ #ifndef __ASM_MMU_H #define __ASM_MMU_H -typedef unsigned long mm_context_t[NR_CPUS]; +typedef struct { + unsigned long asid[NR_CPUS]; + void *vdso; +} mm_context_t; #endif /* __ASM_MMU_H */ diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index 145bb81..d959273 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h @@ -104,7 +104,7 @@ extern unsigned long smtc_asid_mask; #endif -#define cpu_context(cpu, mm) ((mm)->context[cpu]) +#define cpu_context(cpu, mm) ((mm)->context.asid[cpu]) #define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & ASID_MASK) #define asid_cache(cpu) (cpu_data[cpu].asid_cache) diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index ac32572..a16beaf 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -188,8 +188,10 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define UNCAC_ADDR(addr) ((addr) - PAGE_OFFSET + UNCAC_BASE) -#define CAC_ADDR(addr) ((addr) - UNCAC_BASE + PAGE_OFFSET) +#define UNCAC_ADDR(addr) ((addr) - PAGE_OFFSET + UNCAC_BASE + \ + PHYS_OFFSET) +#define CAC_ADDR(addr) ((addr) - UNCAC_BASE + PAGE_OFFSET - \ + PHYS_OFFSET) #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index 087a888..ab38791 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -33,13 +33,19 @@ extern void (*cpu_wait)(void); extern unsigned int vced_count, vcei_count; +/* + * A special page (the vdso) is mapped into all processes at the very + * top of the virtual memory space. + */ +#define SPECIAL_PAGES_SIZE PAGE_SIZE + #ifdef CONFIG_32BIT /* * User space process size: 2GB. This is hardcoded into a few places, * so don't change it unless you know what you are doing. */ #define TASK_SIZE 0x7fff8000UL -#define STACK_TOP TASK_SIZE +#define STACK_TOP ((TASK_SIZE & PAGE_MASK) - SPECIAL_PAGES_SIZE) /* * This decides where the kernel will search for a free chunk of vm @@ -59,7 +65,8 @@ extern unsigned int vced_count, vcei_count; #define TASK_SIZE32 0x7fff8000UL #define TASK_SIZE 0x10000000000UL #define STACK_TOP \ - (test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE) + (((test_thread_flag(TIF_32BIT_ADDR) ? \ + TASK_SIZE32 : TASK_SIZE) & PAGE_MASK) - SPECIAL_PAGES_SIZE) /* * This decides where the kernel will search for a free chunk of vm diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index 3b6da33..c841912 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -121,6 +121,25 @@ .endm #else .macro get_saved_sp /* Uniprocessor variation */ +#ifdef CONFIG_CPU_LOONGSON2F + /* + * Clear BTB (branch target buffer), forbid RAS (return address + * stack) to workaround the Out-of-order Issue in Loongson2F + * via its diagnostic register. + */ + move k0, ra + jal 1f + nop +1: jal 1f + nop +1: jal 1f + nop +1: jal 1f + nop +1: move ra, k0 + li k0, 3 + mtc0 k0, $22 +#endif /* CONFIG_CPU_LOONGSON2F */ #if defined(CONFIG_32BIT) || defined(KBUILD_64BIT_SYM32) lui k1, %hi(kernelsp) #else diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h index b99bd07..11a8b52 100644 --- a/arch/mips/include/asm/uasm.h +++ b/arch/mips/include/asm/uasm.h @@ -84,6 +84,7 @@ Ip_u2s3u1(_lw); Ip_u1u2u3(_mfc0); Ip_u1u2u3(_mtc0); Ip_u2u1u3(_ori); +Ip_u3u1u2(_or); Ip_u2s3u1(_pref); Ip_0(_rfe); Ip_u2s3u1(_sc); @@ -102,6 +103,7 @@ Ip_0(_tlbwr); Ip_u3u1u2(_xor); Ip_u2u1u3(_xori); Ip_u2u1msbu3(_dins); +Ip_u1(_syscall); /* Handle labels. */ struct uasm_label { diff --git a/arch/mips/include/asm/vdso.h b/arch/mips/include/asm/vdso.h new file mode 100644 index 0000000..cca56aa --- /dev/null +++ b/arch/mips/include/asm/vdso.h @@ -0,0 +1,29 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2009 Cavium Networks + */ + +#ifndef __ASM_VDSO_H +#define __ASM_VDSO_H + +#include <linux/types.h> + + +#ifdef CONFIG_32BIT +struct mips_vdso { + u32 signal_trampoline[2]; + u32 rt_signal_trampoline[2]; +}; +#else /* !CONFIG_32BIT */ +struct mips_vdso { + u32 o32_signal_trampoline[2]; + u32 o32_rt_signal_trampoline[2]; + u32 rt_signal_trampoline[2]; + u32 n32_rt_signal_trampoline[2]; +}; +#endif /* CONFIG_32BIT */ + +#endif /* __ASM_VDSO_H */ diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index ef20957..7a6ac50 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ ptrace.o reset.o setup.o signal.o syscall.o \ - time.o topology.o traps.o unaligned.o watch.o + time.o topology.o traps.o unaligned.o watch.o vdso.o ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg diff --git a/arch/mips/kernel/cpufreq/loongson2_clock.c b/arch/mips/kernel/cpufreq/loongson2_clock.c index d7ca256..cefc6e2 100644 --- a/arch/mips/kernel/cpufreq/loongson2_clock.c +++ b/arch/mips/kernel/cpufreq/loongson2_clock.c @@ -164,3 +164,7 @@ void loongson2_cpu_wait(void) spin_unlock_irqrestore(&loongson2_wait_lock, flags); } EXPORT_SYMBOL_GPL(loongson2_cpu_wait); + +MODULE_AUTHOR("Yanhua <yanh@lemote.com>"); +MODULE_DESCRIPTION("cpufreq driver for Loongson 2F"); +MODULE_LICENSE("GPL"); diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 463b71b..9996094 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -63,8 +63,13 @@ void __noreturn cpu_idle(void) smtc_idle_loop_hook(); #endif - if (cpu_wait) + + if (cpu_wait) { + /* Don't trace irqs off for idle */ + stop_critical_timings(); (*cpu_wait)(); + start_critical_timings(); + } } #ifdef CONFIG_HOTPLUG_CPU if (!cpu_online(cpu) && !cpu_isset(cpu, cpu_callin_map) && diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h index 6c8e8c4..10263b4 100644 --- a/arch/mips/kernel/signal-common.h +++ b/arch/mips/kernel/signal-common.h @@ -26,11 +26,6 @@ */ extern void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size); -/* - * install trampoline code to get back from the sig handler - */ -extern int install_sigtramp(unsigned int __user *tramp, unsigned int syscall); - /* Check and clear pending FPU exceptions in saved CSR */ extern int fpcsr_pending(unsigned int __user *fpcsr); diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index d0c68b5..2099d5a 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -32,6 +32,7 @@ #include <asm/ucontext.h> #include <asm/cpu-features.h> #include <asm/war.h> +#include <asm/vdso.h> #include "signal-common.h" @@ -44,47 +45,20 @@ extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc); extern asmlinkage int fpu_emulator_save_context(struct sigcontext __user *sc); extern asmlinkage int fpu_emulator_restore_context(struct sigcontext __user *sc); -/* - * Horribly complicated - with the bloody RM9000 workarounds enabled - * the signal trampolines is moving to the end of the structure so we can - * increase the alignment without breaking software compatibility. - */ -#if ICACHE_REFILLS_WORKAROUND_WAR == 0 - struct sigframe { u32 sf_ass[4]; /* argument save space for o32 */ - u32 sf_code[2]; /* signal trampoline */ + u32 sf_pad[2]; /* Was: signal trampoline */ struct sigcontext sf_sc; sigset_t sf_mask; }; struct rt_sigframe { u32 rs_ass[4]; /* argument save space for o32 */ - u32 rs_code[2]; /* signal trampoline */ + u32 rs_pad[2]; /* Was: signal trampoline */ struct siginfo rs_info; struct ucontext rs_uc; }; -#else - -struct sigframe { - u32 sf_ass[4]; /* argument save space for o32 */ - u32 sf_pad[2]; - struct sigcontext sf_sc; /* hw context */ - sigset_t sf_mask; - u32 sf_code[8] ____cacheline_aligned; /* signal trampoline */ -}; - -struct rt_sigframe { - u32 rs_ass[4]; /* argument save space for o32 */ - u32 rs_pad[2]; - struct siginfo rs_info; - struct ucontext rs_uc; - u32 rs_code[8] ____cacheline_aligned; /* signal trampoline */ -}; - -#endif - /* * Helper routines */ @@ -266,32 +240,6 @@ void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, return (void __user *)((sp - frame_size) & (ICACHE_REFILLS_WORKAROUND_WAR ? ~(cpu_icache_line_size()-1) : ALMASK)); } -int install_sigtramp(unsigned int __user *tramp, unsigned int syscall) -{ - int err; - - /* - * Set up the return code ... - * - * li v0, __NR__foo_sigreturn - * syscall - */ - - err = __put_user(0x24020000 + syscall, tramp + 0); - err |= __put_user(0x0000000c , tramp + 1); - if (ICACHE_REFILLS_WORKAROUND_WAR) { - err |= __put_user(0, tramp + 2); - err |= __put_user(0, tramp + 3); - err |= __put_user(0, tramp + 4); - err |= __put_user(0, tramp + 5); - err |= __put_user(0, tramp + 6); - err |= __put_user(0, tramp + 7); - } - flush_cache_sigtramp((unsigned long) tramp); - - return err; -} - /* * Atomically swap in the new signal mask, and wait for a signal. */ @@ -484,8 +432,8 @@ badframe: } #ifdef CONFIG_TRAD_SIGNALS -static int setup_frame(struct k_sigaction * ka, struct pt_regs *regs, - int signr, sigset_t *set) +static int setup_frame(void *sig_return, struct k_sigaction *ka, + struct pt_regs *regs, int signr, sigset_t *set) { struct sigframe __user *frame; int err = 0; @@ -494,8 +442,6 @@ static int setup_frame(struct k_sigaction * ka, struct pt_regs *regs, if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) goto give_sigsegv; - err |= install_sigtramp(frame->sf_code, __NR_sigreturn); - err |= setup_sigcontext(regs, &frame->sf_sc); err |= __copy_to_user(&frame->sf_mask, set, sizeof(*set)); if (err) @@ -515,7 +461,7 @@ static int setup_frame(struct k_sigaction * ka, struct pt_regs *regs, regs->regs[ 5] = 0; regs->regs[ 6] = (unsigned long) &frame->sf_sc; regs->regs[29] = (unsigned long) frame; - regs->regs[31] = (unsigned long) frame->sf_code; + regs->regs[31] = (unsigned long) sig_return; regs->cp0_epc = regs->regs[25] = (unsigned long) ka->sa.sa_handler; DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", @@ -529,8 +475,9 @@ give_sigsegv: } #endif -static int setup_rt_frame(struct k_sigaction * ka, struct pt_regs *regs, - int signr, sigset_t *set, siginfo_t *info) +static int setup_rt_frame(void *sig_return, struct k_sigaction *ka, + struct pt_regs *regs, int signr, sigset_t *set, + siginfo_t *info) { struct rt_sigframe __user *frame; int err = 0; @@ -539,8 +486,6 @@ static int setup_rt_frame(struct k_sigaction * ka, struct pt_regs *regs, if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) goto give_sigsegv; - err |= install_sigtramp(frame->rs_code, __NR_rt_sigreturn); - /* Create siginfo. */ err |= copy_siginfo_to_user(&frame->rs_info, info); @@ -573,7 +518,7 @@ static int setup_rt_frame(struct k_sigaction * ka, struct pt_regs *regs, regs->regs[ 5] = (unsigned long) &frame->rs_info; regs->regs[ 6] = (unsigned long) &frame->rs_uc; regs->regs[29] = (unsigned long) frame; - regs->regs[31] = (unsigned long) frame->rs_code; + regs->regs[31] = (unsigned long) sig_return; regs->cp0_epc = regs->regs[25] = (unsigned long) ka->sa.sa_handler; DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", @@ -590,8 +535,11 @@ give_sigsegv: struct mips_abi mips_abi = { #ifdef CONFIG_TRAD_SIGNALS .setup_frame = setup_frame, + .signal_return_offset = offsetof(struct mips_vdso, signal_trampoline), #endif .setup_rt_frame = setup_rt_frame, + .rt_signal_return_offset = + offsetof(struct mips_vdso, rt_signal_trampoline), .restart = __NR_restart_syscall }; @@ -599,6 +547,8 @@ static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs) { int ret; + struct mips_abi *abi = current->thread.abi; + void *vdso = current->mm->context.vdso; switch(regs->regs[0]) { case ERESTART_RESTARTBLOCK: @@ -619,9 +569,11 @@ static int handle_signal(unsigned long sig, siginfo_t *info, regs->regs[0] = 0; /* Don't deal with this again. */ if (sig_uses_siginfo(ka)) - ret = current->thread.abi->setup_rt_frame(ka, regs, sig, oldset, info); + ret = abi->setup_rt_frame(vdso + abi->rt_signal_return_offset, + ka, regs, sig, oldset, info); else - ret = current->thread.abi->setup_frame(ka, regs, sig, oldset); + ret = abi->setup_frame(vdso + abi->signal_return_offset, + ka, regs, sig, oldset); spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 03abaf0..a0ed0e0 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -32,6 +32,7 @@ #include <asm/system.h> #include <asm/fpu.h> #include <asm/war.h> +#include <asm/vdso.h> #include "signal-common.h" @@ -47,8 +48,6 @@ extern asmlinkage int fpu_emulator_restore_context32(struct sigcontext32 __user /* * Including <asm/unistd.h> would give use the 64-bit syscall numbers ... */ -#define __NR_O32_sigreturn 4119 -#define __NR_O32_rt_sigreturn 4193 #define __NR_O32_restart_syscall 4253 /* 32-bit compatibility types */ @@ -77,47 +76,20 @@ struct ucontext32 { compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; -/* - * Horribly complicated - with the bloody RM9000 workarounds enabled - * the signal trampolines is moving to the end of the structure so we can - * increase the alignment without breaking software compatibility. - */ -#if ICACHE_REFILLS_WORKAROUND_WAR == 0 - struct sigframe32 { u32 sf_ass[4]; /* argument save space for o32 */ - u32 sf_code[2]; /* signal trampoline */ + u32 sf_pad[2]; /* Was: signal trampoline */ struct sigcontext32 sf_sc; compat_sigset_t sf_mask; }; struct rt_sigframe32 { u32 rs_ass[4]; /* argument save space for o32 */ - u32 rs_code[2]; /* signal trampoline */ + u32 rs_pad[2]; /* Was: signal trampoline */ compat_siginfo_t rs_info; struct ucontext32 rs_uc; }; -#else /* ICACHE_REFILLS_WORKAROUND_WAR */ - -struct sigframe32 { - u32 sf_ass[4]; /* argument save space for o32 */ - u32 sf_pad[2]; - struct sigcontext32 sf_sc; /* hw context */ - compat_sigset_t sf_mask; - u32 sf_code[8] ____cacheline_aligned; /* signal trampoline */ -}; - -struct rt_sigframe32 { - u32 rs_ass[4]; /* argument save space for o32 */ - u32 rs_pad[2]; - compat_siginfo_t rs_info; - struct ucontext32 rs_uc; - u32 rs_code[8] __attribute__((aligned(32))); /* signal trampoline */ -}; - -#endif /* !ICACHE_REFILLS_WORKAROUND_WAR */ - /* * sigcontext handlers */ @@ -598,8 +570,8 @@ badframe: force_sig(SIGSEGV, current); } -static int setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs, - int signr, sigset_t *set) +static int setup_frame_32(void *sig_return, struct k_sigaction *ka, + struct pt_regs *regs, int signr, sigset_t *set) { struct sigframe32 __user *frame; int err = 0; @@ -608,8 +580,6 @@ static int setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs, if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) goto give_sigsegv; - err |= install_sigtramp(frame->sf_code, __NR_O32_sigreturn); - err |= setup_sigcontext32(regs, &frame->sf_sc); err |= __copy_conv_sigset_to_user(&frame->sf_mask, set); @@ -630,7 +600,7 @@ static int setup_frame_32(struct k_sigaction * ka, struct pt_regs *regs, regs->regs[ 5] = 0; regs->regs[ 6] = (unsigned long) &frame->sf_sc; regs->regs[29] = (unsigned long) frame; - regs->regs[31] = (unsigned long) frame->sf_code; + regs->regs[31] = (unsigned long) sig_return; regs->cp0_epc = regs->regs[25] = (unsigned long) ka->sa.sa_handler; DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", @@ -644,8 +614,9 @@ give_sigsegv: return -EFAULT; } -static int setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs, - int signr, sigset_t *set, siginfo_t *info) +static int setup_rt_frame_32(void *sig_return, struct k_sigaction *ka, + struct pt_regs *regs, int signr, sigset_t *set, + siginfo_t *info) { struct rt_sigframe32 __user *frame; int err = 0; @@ -655,8 +626,6 @@ static int setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs, if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) goto give_sigsegv; - err |= install_sigtramp(frame->rs_code, __NR_O32_rt_sigreturn); - /* Convert (siginfo_t -> compat_siginfo_t) and copy to user. */ err |= copy_siginfo_to_user32(&frame->rs_info, info); @@ -690,7 +659,7 @@ static int setup_rt_frame_32(struct k_sigaction * ka, struct pt_regs *regs, regs->regs[ 5] = (unsigned long) &frame->rs_info; regs->regs[ 6] = (unsigned long) &frame->rs_uc; regs->regs[29] = (unsigned long) frame; - regs->regs[31] = (unsigned long) frame->rs_code; + regs->regs[31] = (unsigned long) sig_return; regs->cp0_epc = regs->regs[25] = (unsigned long) ka->sa.sa_handler; DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", @@ -709,7 +678,11 @@ give_sigsegv: */ struct mips_abi mips_abi_32 = { .setup_frame = setup_frame_32, + .signal_return_offset = + offsetof(struct mips_vdso, o32_signal_trampoline), .setup_rt_frame = setup_rt_frame_32, + .rt_signal_return_offset = + offsetof(struct mips_vdso, o32_rt_signal_trampoline), .restart = __NR_O32_restart_syscall }; diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c index bb277e8..2c5df81 100644 --- a/arch/mips/kernel/signal_n32.c +++ b/arch/mips/kernel/signal_n32.c @@ -39,13 +39,13 @@ #include <asm/fpu.h> #include <asm/cpu-features.h> #include <asm/war.h> +#include <asm/vdso.h> #include "signal-common.h" /* * Including <asm/unistd.h> would give use the 64-bit syscall numbers ... */ -#define __NR_N32_rt_sigreturn 6211 #define __NR_N32_restart_syscall 6214 extern int setup_sigcontext(struct pt_regs *, struct sigcontext __user *); @@ -67,27 +67,13 @@ struct ucontextn32 { compat_sigset_t uc_sigmask; /* mask last for extensibility */ }; -#if ICACHE_REFILLS_WORKAROUND_WAR == 0 - -struct rt_sigframe_n32 { - u32 rs_ass[4]; /* argument save space for o32 */ - u32 rs_code[2]; /* signal trampoline */ - struct compat_siginfo rs_info; - struct ucontextn32 rs_uc; -}; - -#else /* ICACHE_REFILLS_WORKAROUND_WAR */ - struct rt_sigframe_n32 { u32 rs_ass[4]; /* argument save space for o32 */ - u32 rs_pad[2]; + u32 rs_pad[2]; /* Was: signal trampoline */ struct compat_siginfo rs_info; struct ucontextn32 rs_uc; - u32 rs_code[8] ____cacheline_aligned; /* signal trampoline */ }; -#endif /* !ICACHE_REFILLS_WORKAROUND_WAR */ - extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat); asmlinkage int sysn32_rt_sigsuspend(nabi_no_regargs struct pt_regs regs) @@ -173,7 +159,7 @@ badframe: force_sig(SIGSEGV, current); } -static int setup_rt_frame_n32(struct k_sigaction * ka, +static int setup_rt_frame_n32(void *sig_return, struct k_sigaction *ka, struct pt_regs *regs, int signr, sigset_t *set, siginfo_t *info) { struct rt_sigframe_n32 __user *frame; @@ -184,8 +170,6 @@ static int setup_rt_frame_n32(struct k_sigaction * ka, if (!access_ok(VERIFY_WRITE, frame, sizeof (*frame))) goto give_sigsegv; - install_sigtramp(frame->rs_code, __NR_N32_rt_sigreturn); - /* Create siginfo. */ err |= copy_siginfo_to_user32(&frame->rs_info, info); @@ -219,7 +203,7 @@ static int setup_rt_frame_n32(struct k_sigaction * ka, regs->regs[ 5] = (unsigned long) &frame->rs_info; regs->regs[ 6] = (unsigned long) &frame->rs_uc; regs->regs[29] = (unsigned long) frame; - regs->regs[31] = (unsigned long) frame->rs_code; + regs->regs[31] = (unsigned long) sig_return; regs->cp0_epc = regs->regs[25] = (unsigned long) ka->sa.sa_handler; DEBUGP("SIG deliver (%s:%d): sp=0x%p pc=0x%lx ra=0x%lx\n", @@ -235,5 +219,7 @@ give_sigsegv: struct mips_abi mips_abi_n32 = { .setup_rt_frame = setup_rt_frame_n32, + .rt_signal_return_offset = + offsetof(struct mips_vdso, n32_rt_signal_trampoline), .restart = __NR_N32_restart_syscall }; diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index 25e825a..a95dea5 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -182,7 +182,7 @@ static int vpemask[2][8] = { {0, 0, 0, 0, 0, 0, 0, 1} }; int tcnoprog[NR_CPUS]; -static atomic_t idle_hook_initialized = {0}; +static atomic_t idle_hook_initialized = ATOMIC_INIT(0); static int clock_hang_reported[NR_CPUS]; #endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */ diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 9587abc..dd81b0f 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -79,7 +79,11 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, int do_color_align; unsigned long task_size; - task_size = STACK_TOP; +#ifdef CONFIG_32BIT + task_size = TASK_SIZE; +#else /* Must be CONFIG_64BIT*/ + task_size = test_thread_flag(TIF_32BIT_ADDR) ? TASK_SIZE32 : TASK_SIZE; +#endif if (len > task_size) return -ENOMEM; diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 4e00f9b..1a4dd65 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -1599,7 +1599,7 @@ void __init trap_init(void) ebase = (unsigned long) __alloc_bootmem(size, 1 << fls(size), 0); } else { - ebase = CAC_BASE; + ebase = CKSEG0; if (cpu_has_mips_r2) ebase += (read_c0_ebase() & 0x3ffff000); } diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c new file mode 100644 index 0000000..b773c11 --- /dev/null +++ b/arch/mips/kernel/vdso.c @@ -0,0 +1,112 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2009, 2010 Cavium Networks, Inc. + */ + + +#include <linux/kernel.h> +#include <linux/err.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/binfmts.h> +#include <linux/elf.h> +#include <linux/vmalloc.h> +#include <linux/unistd.h> + +#include <asm/vdso.h> +#include <asm/uasm.h> + +/* + * Including <asm/unistd.h> would give use the 64-bit syscall numbers ... + */ +#define __NR_O32_sigreturn 4119 +#define __NR_O32_rt_sigreturn 4193 +#define __NR_N32_rt_sigreturn 6211 + +static struct page *vdso_page; + +static void __init install_trampoline(u32 *tramp, unsigned int sigreturn) +{ + uasm_i_addiu(&tramp, 2, 0, sigreturn); /* li v0, sigreturn */ + uasm_i_syscall(&tramp, 0); +} + +static int __init init_vdso(void) +{ + struct mips_vdso *vdso; + + vdso_page = alloc_page(GFP_KERNEL); + if (!vdso_page) + panic("Cannot allocate vdso"); + + vdso = vmap(&vdso_page, 1, 0, PAGE_KERNEL); + if (!vdso) + panic("Cannot map vdso"); + clear_page(vdso); + + install_trampoline(vdso->rt_signal_trampoline, __NR_rt_sigreturn); +#ifdef CONFIG_32BIT + install_trampoline(vdso->signal_trampoline, __NR_sigreturn); +#else + install_trampoline(vdso->n32_rt_signal_trampoline, + __NR_N32_rt_sigreturn); + install_trampoline(vdso->o32_signal_trampoline, __NR_O32_sigreturn); + install_trampoline(vdso->o32_rt_signal_trampoline, + __NR_O32_rt_sigreturn); +#endif + + vunmap(vdso); + + pr_notice("init_vdso successfull\n"); + + return 0; +} +device_initcall(init_vdso); + +static unsigned long vdso_addr(unsigned long start) +{ + return STACK_TOP; +} + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + int ret; + unsigned long addr; + struct mm_struct *mm = current->mm; + + down_write(&mm->mmap_sem); + + addr = vdso_addr(mm->start_stack); + + addr = get_unmapped_area(NULL, addr, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| + VM_ALWAYSDUMP, + &vdso_page); + + if (ret) + goto up_fail; + + mm->context.vdso = (void *)addr; + +up_fail: + up_write(&mm->mmap_sem); + return ret; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + return "[vdso]"; + return NULL; +} diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c index 6b3b1de9..5995969 100644 --- a/arch/mips/lib/delay.c +++ b/arch/mips/lib/delay.c @@ -41,7 +41,7 @@ EXPORT_SYMBOL(__delay); void __udelay(unsigned long us) { - unsigned int lpj = current_cpu_data.udelay_val; + unsigned int lpj = raw_current_cpu_data.udelay_val; __delay((us * 0x000010c7ull * HZ * lpj) >> 32); } @@ -49,7 +49,7 @@ EXPORT_SYMBOL(__udelay); void __ndelay(unsigned long ns) { - unsigned int lpj = current_cpu_data.udelay_val; + unsigned int lpj = raw_current_cpu_data.udelay_val; __delay((ns * 0x00000005ull * HZ * lpj) >> 32); } diff --git a/arch/mips/lib/libgcc.h b/arch/mips/lib/libgcc.h index 3f19d1c..05909d58 100644 --- a/arch/mips/lib/libgcc.h +++ b/arch/mips/lib/libgcc.h @@ -17,8 +17,7 @@ struct DWstruct { #error I feel sick. #endif -typedef union -{ +typedef union { struct DWstruct s; long long ll; } DWunion; diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index be8627b..12af739 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -133,7 +133,7 @@ void __update_cache(struct vm_area_struct *vma, unsigned long address, } unsigned long _page_cachable_default; -EXPORT_SYMBOL_GPL(_page_cachable_default); +EXPORT_SYMBOL(_page_cachable_default); static inline void setup_protection_map(void) { diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index 0de0e41..d1f68aa 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -788,10 +788,15 @@ static void __cpuinit build_r4000_tlb_refill_handler(void) * create the plain linear handler */ if (bcm1250_m3_war()) { - UASM_i_MFC0(&p, K0, C0_BADVADDR); - UASM_i_MFC0(&p, K1, C0_ENTRYHI); + unsigned int segbits = 44; + + uasm_i_dmfc0(&p, K0, C0_BADVADDR); + uasm_i_dmfc0(&p, K1, C0_ENTRYHI); uasm_i_xor(&p, K0, K0, K1); - UASM_i_SRL(&p, K0, K0, PAGE_SHIFT + 1); + uasm_i_dsrl32(&p, K1, K0, 62 - 32); + uasm_i_dsrl(&p, K0, K0, 12 + 1); + uasm_i_dsll32(&p, K0, K0, 64 + 12 + 1 - segbits - 32); + uasm_i_or(&p, K0, K0, K1); uasm_il_bnez(&p, &r, K0, label_leave); /* No need for uasm_i_nop */ } @@ -1312,10 +1317,15 @@ static void __cpuinit build_r4000_tlb_load_handler(void) memset(relocs, 0, sizeof(relocs)); if (bcm1250_m3_war()) { - UASM_i_MFC0(&p, K0, C0_BADVADDR); - UASM_i_MFC0(&p, K1, C0_ENTRYHI); + unsigned int segbits = 44; + + uasm_i_dmfc0(&p, K0, C0_BADVADDR); + uasm_i_dmfc0(&p, K1, C0_ENTRYHI); uasm_i_xor(&p, K0, K0, K1); - UASM_i_SRL(&p, K0, K0, PAGE_SHIFT + 1); + uasm_i_dsrl32(&p, K1, K0, 62 - 32); + uasm_i_dsrl(&p, K0, K0, 12 + 1); + uasm_i_dsll32(&p, K0, K0, 64 + 12 + 1 - segbits - 32); + uasm_i_or(&p, K0, K0, K1); uasm_il_bnez(&p, &r, K0, label_leave); /* No need for uasm_i_nop */ } diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c index 1581e98..611d564 100644 --- a/arch/mips/mm/uasm.c +++ b/arch/mips/mm/uasm.c @@ -31,7 +31,8 @@ enum fields { BIMM = 0x040, JIMM = 0x080, FUNC = 0x100, - SET = 0x200 + SET = 0x200, + SCIMM = 0x400 }; #define OP_MASK 0x3f @@ -52,6 +53,8 @@ enum fields { #define FUNC_SH 0 #define SET_MASK 0x7 #define SET_SH 0 +#define SCIMM_MASK 0xfffff +#define SCIMM_SH 6 enum opcode { insn_invalid, @@ -61,10 +64,10 @@ enum opcode { insn_dmtc0, insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32, insn_drotr, insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld, insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, - insn_mtc0, insn_ori, insn_pref, insn_rfe, insn_sc, insn_scd, + insn_mtc0, insn_or, insn_ori, insn_pref, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll, insn_sra, insn_srl, insn_rotr, insn_subu, insn_sw, insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_xor, insn_xori, - insn_dins + insn_dins, insn_syscall }; struct insn { @@ -117,6 +120,7 @@ static struct insn insn_table[] __cpuinitdata = { { insn_lw, M(lw_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_mfc0, M(cop0_op, mfc_op, 0, 0, 0, 0), RT | RD | SET}, { insn_mtc0, M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET}, + { insn_or, M(spec_op, 0, 0, 0, 0, or_op), RS | RT | RD }, { insn_ori, M(ori_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, { insn_pref, M(pref_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_rfe, M(cop0_op, cop_op, 0, 0, 0, rfe_op), 0 }, @@ -136,6 +140,7 @@ static struct insn insn_table[] __cpuinitdata = { { insn_xor, M(spec_op, 0, 0, 0, 0, xor_op), RS | RT | RD }, { insn_xori, M(xori_op, 0, 0, 0, 0, 0), RS | RT | UIMM }, { insn_dins, M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE }, + { insn_syscall, M(spec_op, 0, 0, 0, 0, syscall_op), SCIMM}, { insn_invalid, 0, 0 } }; @@ -208,6 +213,14 @@ static inline __cpuinit u32 build_jimm(u32 arg) return (arg >> 2) & JIMM_MASK; } +static inline __cpuinit u32 build_scimm(u32 arg) +{ + if (arg & ~SCIMM_MASK) + printk(KERN_WARNING "Micro-assembler field overflow\n"); + + return (arg & SCIMM_MASK) << SCIMM_SH; +} + static inline __cpuinit u32 build_func(u32 arg) { if (arg & ~FUNC_MASK) @@ -266,6 +279,8 @@ static void __cpuinit build_insn(u32 **buf, enum opcode opc, ...) op |= build_func(va_arg(ap, u32)); if (ip->fields & SET) op |= build_set(va_arg(ap, u32)); + if (ip->fields & SCIMM) + op |= build_scimm(va_arg(ap, u32)); va_end(ap); **buf = op; @@ -373,6 +388,7 @@ I_u2s3u1(_lw) I_u1u2u3(_mfc0) I_u1u2u3(_mtc0) I_u2u1u3(_ori) +I_u3u1u2(_or) I_u2s3u1(_pref) I_0(_rfe) I_u2s3u1(_sc) @@ -391,6 +407,7 @@ I_0(_tlbwr) I_u3u1u2(_xor) I_u2u1u3(_xori) I_u2u1msbu3(_dins); +I_u1(_syscall); /* Handle labels. */ void __cpuinit uasm_build_label(struct uasm_label **lab, u32 *addr, int lid) diff --git a/arch/mips/pci/ops-loongson2.c b/arch/mips/pci/ops-loongson2.c index 2bb4057..d657ee0 100644 --- a/arch/mips/pci/ops-loongson2.c +++ b/arch/mips/pci/ops-loongson2.c @@ -180,15 +180,21 @@ struct pci_ops loongson_pci_ops = { }; #ifdef CONFIG_CS5536 +DEFINE_RAW_SPINLOCK(msr_lock); + void _rdmsr(u32 msr, u32 *hi, u32 *lo) { struct pci_bus bus = { .number = PCI_BUS_CS5536 }; u32 devfn = PCI_DEVFN(PCI_IDSEL_CS5536, 0); + unsigned long flags; + + raw_spin_lock_irqsave(&msr_lock, flags); loongson_pcibios_write(&bus, devfn, PCI_MSR_ADDR, 4, msr); loongson_pcibios_read(&bus, devfn, PCI_MSR_DATA_LO, 4, lo); loongson_pcibios_read(&bus, devfn, PCI_MSR_DATA_HI, 4, hi); + raw_spin_unlock_irqrestore(&msr_lock, flags); } EXPORT_SYMBOL(_rdmsr); @@ -198,9 +204,13 @@ void _wrmsr(u32 msr, u32 hi, u32 lo) .number = PCI_BUS_CS5536 }; u32 devfn = PCI_DEVFN(PCI_IDSEL_CS5536, 0); + unsigned long flags; + + raw_spin_lock_irqsave(&msr_lock, flags); loongson_pcibios_write(&bus, devfn, PCI_MSR_ADDR, 4, msr); loongson_pcibios_write(&bus, devfn, PCI_MSR_DATA_LO, 4, lo); loongson_pcibios_write(&bus, devfn, PCI_MSR_DATA_HI, 4, hi); + raw_spin_unlock_irqrestore(&msr_lock, flags); } EXPORT_SYMBOL(_wrmsr); #endif diff --git a/arch/mips/sibyte/sb1250/setup.c b/arch/mips/sibyte/sb1250/setup.c index 0444da1..92da315 100644 --- a/arch/mips/sibyte/sb1250/setup.c +++ b/arch/mips/sibyte/sb1250/setup.c @@ -87,6 +87,21 @@ static int __init setup_bcm1250(void) return ret; } +int sb1250_m3_workaround_needed(void) +{ + switch (soc_type) { + case K_SYS_SOC_TYPE_BCM1250: + case K_SYS_SOC_TYPE_BCM1250_ALT: + case K_SYS_SOC_TYPE_BCM1250_ALT2: + case K_SYS_SOC_TYPE_BCM1125: + case K_SYS_SOC_TYPE_BCM1125H: + return soc_pass < K_SYS_REVISION_BCM1250_C0; + + default: + return 0; + } +} + static int __init setup_bcm112x(void) { int ret = 0; diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h index 81f3b0b..6c5547d 100644 --- a/arch/powerpc/include/asm/kvm.h +++ b/arch/powerpc/include/asm/kvm.h @@ -77,4 +77,14 @@ struct kvm_debug_exit_arch { struct kvm_guest_debug_arch { }; +#define KVM_REG_MASK 0x001f +#define KVM_REG_EXT_MASK 0xffe0 +#define KVM_REG_GPR 0x0000 +#define KVM_REG_FPR 0x0020 +#define KVM_REG_QPR 0x0040 +#define KVM_REG_FQPR 0x0060 + +#define KVM_INTERRUPT_SET -1U +#define KVM_INTERRUPT_UNSET -2U + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h index aadf2dd..7238c04 100644 --- a/arch/powerpc/include/asm/kvm_asm.h +++ b/arch/powerpc/include/asm/kvm_asm.h @@ -88,6 +88,7 @@ #define BOOK3S_HFLAG_DCBZ32 0x1 #define BOOK3S_HFLAG_SLB 0x2 +#define BOOK3S_HFLAG_PAIRED_SINGLE 0x4 #define RESUME_FLAG_NV (1<<0) /* Reload guest nonvolatile state? */ #define RESUME_FLAG_HOST (1<<1) /* Resume host? */ diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index db7db0a..ee79921 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -29,39 +29,40 @@ struct kvmppc_slb { u64 vsid; u64 orige; u64 origv; - bool valid; - bool Ks; - bool Kp; - bool nx; - bool large; /* PTEs are 16MB */ - bool tb; /* 1TB segment */ - bool class; + bool valid : 1; + bool Ks : 1; + bool Kp : 1; + bool nx : 1; + bool large : 1; /* PTEs are 16MB */ + bool tb : 1; /* 1TB segment */ + bool class : 1; }; struct kvmppc_sr { u32 raw; u32 vsid; - bool Ks; - bool Kp; - bool nx; + bool Ks : 1; + bool Kp : 1; + bool nx : 1; + bool valid : 1; }; struct kvmppc_bat { u64 raw; u32 bepi; u32 bepi_mask; - bool vs; - bool vp; u32 brpn; u8 wimg; u8 pp; + bool vs : 1; + bool vp : 1; }; struct kvmppc_sid_map { u64 guest_vsid; u64 guest_esid; u64 host_vsid; - bool valid; + bool valid : 1; }; #define SID_MAP_BITS 9 @@ -82,9 +83,10 @@ struct kvmppc_vcpu_book3s { struct kvmppc_bat ibat[8]; struct kvmppc_bat dbat[8]; u64 hid[6]; + u64 gqr[8]; int slb_nr; + u32 dsisr; u64 sdr1; - u64 dsisr; u64 hior; u64 msr_mask; u64 vsid_first; @@ -98,11 +100,12 @@ struct kvmppc_vcpu_book3s { #define CONTEXT_GUEST 1 #define CONTEXT_GUEST_END 2 -#define VSID_REAL 0xfffffffffff00000 -#define VSID_REAL_DR 0xffffffffffe00000 -#define VSID_REAL_IR 0xffffffffffd00000 -#define VSID_BAT 0xffffffffffc00000 -#define VSID_PR 0x8000000000000000 +#define VSID_REAL_DR 0x7ffffffffff00000ULL +#define VSID_REAL_IR 0x7fffffffffe00000ULL +#define VSID_SPLIT_MASK 0x7fffffffffe00000ULL +#define VSID_REAL 0x7fffffffffc00000ULL +#define VSID_BAT 0x7fffffffffb00000ULL +#define VSID_PR 0x8000000000000000ULL extern void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 ea, u64 ea_mask); extern void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 vp, u64 vp_mask); @@ -114,11 +117,13 @@ extern int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte); extern int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr); extern void kvmppc_mmu_flush_segments(struct kvm_vcpu *vcpu); extern struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool data); -extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, bool data); -extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr); +extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); +extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); +extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); +extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu); extern u32 kvmppc_trampoline_lowmem; extern u32 kvmppc_trampoline_enter; @@ -126,6 +131,8 @@ extern void kvmppc_rmcall(ulong srr0, ulong srr1); extern void kvmppc_load_up_fpu(void); extern void kvmppc_load_up_altivec(void); extern void kvmppc_load_up_vsx(void); +extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst); +extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst); static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu) { @@ -141,6 +148,11 @@ static inline ulong dsisr(void) extern void kvm_return_point(void); +/* Magic register values loaded into r3 and r4 before the 'sc' assembly + * instruction for the OSI hypercalls */ +#define OSI_SC_MAGIC_R3 0x113724FA +#define OSI_SC_MAGIC_R4 0x77810F9B + #define INS_DCBZ 0x7c0007ec #endif /* __ASM_KVM_BOOK3S_H__ */ diff --git a/arch/powerpc/include/asm/kvm_fpu.h b/arch/powerpc/include/asm/kvm_fpu.h new file mode 100644 index 0000000..94f05de --- /dev/null +++ b/arch/powerpc/include/asm/kvm_fpu.h @@ -0,0 +1,85 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright Novell Inc. 2010 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#ifndef __ASM_KVM_FPU_H__ +#define __ASM_KVM_FPU_H__ + +#include <linux/types.h> + +extern void fps_fres(struct thread_struct *t, u32 *dst, u32 *src1); +extern void fps_frsqrte(struct thread_struct *t, u32 *dst, u32 *src1); +extern void fps_fsqrts(struct thread_struct *t, u32 *dst, u32 *src1); + +extern void fps_fadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); +extern void fps_fdivs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); +extern void fps_fmuls(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); +extern void fps_fsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2); + +extern void fps_fmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, + u32 *src3); +extern void fps_fmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, + u32 *src3); +extern void fps_fnmadds(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, + u32 *src3); +extern void fps_fnmsubs(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, + u32 *src3); +extern void fps_fsel(struct thread_struct *t, u32 *dst, u32 *src1, u32 *src2, + u32 *src3); + +#define FPD_ONE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ + u64 *dst, u64 *src1); +#define FPD_TWO_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ + u64 *dst, u64 *src1, u64 *src2); +#define FPD_THREE_IN(name) extern void fpd_ ## name(u64 *fpscr, u32 *cr, \ + u64 *dst, u64 *src1, u64 *src2, u64 *src3); + +extern void fpd_fcmpu(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2); +extern void fpd_fcmpo(u64 *fpscr, u32 *cr, u64 *src1, u64 *src2); + +FPD_ONE_IN(fsqrts) +FPD_ONE_IN(frsqrtes) +FPD_ONE_IN(fres) +FPD_ONE_IN(frsp) +FPD_ONE_IN(fctiw) +FPD_ONE_IN(fctiwz) +FPD_ONE_IN(fsqrt) +FPD_ONE_IN(fre) +FPD_ONE_IN(frsqrte) +FPD_ONE_IN(fneg) +FPD_ONE_IN(fabs) +FPD_TWO_IN(fadds) +FPD_TWO_IN(fsubs) +FPD_TWO_IN(fdivs) +FPD_TWO_IN(fmuls) +FPD_TWO_IN(fcpsgn) +FPD_TWO_IN(fdiv) +FPD_TWO_IN(fadd) +FPD_TWO_IN(fmul) +FPD_TWO_IN(fsub) +FPD_THREE_IN(fmsubs) +FPD_THREE_IN(fmadds) +FPD_THREE_IN(fnmsubs) +FPD_THREE_IN(fnmadds) +FPD_THREE_IN(fsel) +FPD_THREE_IN(fmsub) +FPD_THREE_IN(fmadd) +FPD_THREE_IN(fnmsub) +FPD_THREE_IN(fnmadd) + +#endif diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 5e5bae7..5869a48 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -127,9 +127,9 @@ struct kvmppc_pte { u64 eaddr; u64 vpage; u64 raddr; - bool may_read; - bool may_write; - bool may_execute; + bool may_read : 1; + bool may_write : 1; + bool may_execute : 1; }; struct kvmppc_mmu { @@ -175,7 +175,7 @@ struct kvm_vcpu_arch { ulong gpr[32]; u64 fpr[32]; - u32 fpscr; + u64 fpscr; #ifdef CONFIG_ALTIVEC vector128 vr[32]; @@ -186,6 +186,11 @@ struct kvm_vcpu_arch { u64 vsr[32]; #endif +#ifdef CONFIG_PPC_BOOK3S + /* For Gekko paired singles */ + u32 qpr[32]; +#endif + ulong pc; ulong ctr; ulong lr; @@ -255,7 +260,7 @@ struct kvm_vcpu_arch { u32 last_inst; #ifdef CONFIG_PPC64 - ulong fault_dsisr; + u32 fault_dsisr; #endif ulong fault_dear; ulong fault_esr; @@ -265,8 +270,11 @@ struct kvm_vcpu_arch { u8 io_gpr; /* GPR used as IO source/target */ u8 mmio_is_bigendian; + u8 mmio_sign_extend; u8 dcr_needed; u8 dcr_is_write; + u8 osi_needed; + u8 osi_enabled; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index e264282..6a2464e 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -37,6 +37,7 @@ enum emulation_result { EMULATE_DO_MMIO, /* kvm_run filled with MMIO request */ EMULATE_DO_DCR, /* kvm_run filled with DCR request */ EMULATE_FAIL, /* can't emulate this instruction */ + EMULATE_AGAIN, /* something went wrong. go again */ }; extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); @@ -48,8 +49,11 @@ extern void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu); extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int rt, unsigned int bytes, int is_bigendian); +extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, + int is_bigendian); extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, - u32 val, unsigned int bytes, int is_bigendian); + u64 val, unsigned int bytes, int is_bigendian); extern int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu); @@ -88,6 +92,8 @@ extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu); extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); +extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq); extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int op, int *advance); @@ -99,6 +105,39 @@ extern void kvmppc_booke_exit(void); extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); +/* + * Cuts out inst bits with ordering according to spec. + * That means the leftmost bit is zero. All given bits are included. + */ +static inline u32 kvmppc_get_field(u64 inst, int msb, int lsb) +{ + u32 r; + u32 mask; + + BUG_ON(msb > lsb); + + mask = (1 << (lsb - msb + 1)) - 1; + r = (inst >> (63 - lsb)) & mask; + + return r; +} + +/* + * Replaces inst bits with ordering according to spec. + */ +static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value) +{ + u32 r; + u32 mask; + + BUG_ON(msb > lsb); + + mask = ((1 << (lsb - msb + 1)) - 1) << (63 - lsb); + r = (inst & ~mask) | ((value << (63 - lsb)) & mask); + + return r; +} + #ifdef CONFIG_PPC_BOOK3S /* We assume we're always acting on the current vcpu */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 5572e86..8a69a39 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -293,10 +293,12 @@ #define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ #define HID1_PS (1<<16) /* 750FX PLL selection */ #define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ +#define SPRN_HID2_GEKKO 0x398 /* Gekko HID2 Register */ #define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ #define SPRN_IABR2 0x3FA /* 83xx */ #define SPRN_IBCR 0x135 /* 83xx Insn Breakpoint Control Reg */ #define SPRN_HID4 0x3F4 /* 970 HID4 */ +#define SPRN_HID4_GEKKO 0x3F3 /* Gekko HID4 */ #define SPRN_HID5 0x3F6 /* 970 HID5 */ #define SPRN_HID6 0x3F9 /* BE HID 6 */ #define HID6_LB (0x0F<<12) /* Concurrent Large Page Modes */ @@ -465,6 +467,14 @@ #define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ #define SPRN_XER 0x001 /* Fixed Point Exception Register */ +#define SPRN_MMCR0_GEKKO 0x3B8 /* Gekko Monitor Mode Control Register 0 */ +#define SPRN_MMCR1_GEKKO 0x3BC /* Gekko Monitor Mode Control Register 1 */ +#define SPRN_PMC1_GEKKO 0x3B9 /* Gekko Performance Monitor Control 1 */ +#define SPRN_PMC2_GEKKO 0x3BA /* Gekko Performance Monitor Control 2 */ +#define SPRN_PMC3_GEKKO 0x3BD /* Gekko Performance Monitor Control 3 */ +#define SPRN_PMC4_GEKKO 0x3BE /* Gekko Performance Monitor Control 4 */ +#define SPRN_WPAR_GEKKO 0x399 /* Gekko Write Pipe Address Register */ + #define SPRN_SCOMC 0x114 /* SCOM Access Control */ #define SPRN_SCOMD 0x115 /* SCOM Access DATA */ diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c index ab3e392..bc9f39d 100644 --- a/arch/powerpc/kernel/ppc_ksyms.c +++ b/arch/powerpc/kernel/ppc_ksyms.c @@ -101,6 +101,10 @@ EXPORT_SYMBOL(pci_dram_offset); EXPORT_SYMBOL(start_thread); EXPORT_SYMBOL(kernel_thread); +#ifndef CONFIG_BOOKE +EXPORT_SYMBOL_GPL(cvt_df); +EXPORT_SYMBOL_GPL(cvt_fd); +#endif EXPORT_SYMBOL(giveup_fpu); #ifdef CONFIG_ALTIVEC EXPORT_SYMBOL(giveup_altivec); diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 56484d6..eba721e 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -40,6 +40,8 @@ kvm-objs-$(CONFIG_KVM_E500) := $(kvm-e500-objs) kvm-book3s_64-objs := \ $(common-objs-y) \ + fpu.o \ + book3s_paired_singles.o \ book3s.o \ book3s_64_emulate.o \ book3s_64_interrupts.o \ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 25da07f..41c23b6 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -29,6 +29,7 @@ #include <linux/gfp.h> #include <linux/sched.h> #include <linux/vmalloc.h> +#include <linux/highmem.h> #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU @@ -36,7 +37,8 @@ /* #define EXIT_DEBUG_SIMPLE */ /* #define DEBUG_EXT */ -static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr); +static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, + ulong msr); struct kvm_stats_debugfs_item debugfs_entries[] = { { "exits", VCPU_STAT(sum_exits) }, @@ -133,9 +135,21 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr) if (((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR))) || (vcpu->arch.msr & MSR_PR) != (old_msr & MSR_PR)) { + bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; + bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; + + /* Flush split mode PTEs */ + if (dr != ir) + kvmppc_mmu_pte_vflush(vcpu, VSID_SPLIT_MASK, + VSID_SPLIT_MASK); + kvmppc_mmu_flush_segments(vcpu); kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc); } + + /* Preload FPU if it's enabled */ + if (vcpu->arch.msr & MSR_FP) + kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); } void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags) @@ -218,6 +232,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); } +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL); +} + int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int deliver = 1; @@ -337,6 +357,10 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) !strcmp(cur_cpu_spec->platform, "ppc970")) vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; + /* Cell performs badly if MSR_FEx are set. So let's hope nobody + really needs them in a VM on Cell and force disable them. */ + if (!strcmp(cur_cpu_spec->platform, "ppc-cell-be")) + to_book3s(vcpu)->msr_mask &= ~(MSR_FE0 | MSR_FE1); } /* Book3s_32 CPUs always have 32 bytes cache line size, which Linux assumes. To @@ -350,34 +374,29 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr) */ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) { - bool touched = false; - hva_t hpage; + struct page *hpage; + u64 hpage_offset; u32 *page; int i; - hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT); - if (kvm_is_error_hva(hpage)) + hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT); + if (is_error_page(hpage)) return; - hpage |= pte->raddr & ~PAGE_MASK; - hpage &= ~0xFFFULL; - - page = vmalloc(HW_PAGE_SIZE); - - if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE)) - goto out; + hpage_offset = pte->raddr & ~PAGE_MASK; + hpage_offset &= ~0xFFFULL; + hpage_offset /= 4; - for (i=0; i < HW_PAGE_SIZE / 4; i++) - if ((page[i] & 0xff0007ff) == INS_DCBZ) { - page[i] &= 0xfffffff7; // reserved instruction, so we trap - touched = true; - } + get_page(hpage); + page = kmap_atomic(hpage, KM_USER0); - if (touched) - copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE); + /* patch dcbz into reserved instruction, so we trap */ + for (i=hpage_offset; i < hpage_offset + (HW_PAGE_SIZE / 4); i++) + if ((page[i] & 0xff0007ff) == INS_DCBZ) + page[i] &= 0xfffffff7; -out: - vfree(page); + kunmap_atomic(page, KM_USER0); + put_page(hpage); } static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, @@ -391,15 +410,7 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data, } else { pte->eaddr = eaddr; pte->raddr = eaddr & 0xffffffff; - pte->vpage = eaddr >> 12; - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { - case 0: - pte->vpage |= VSID_REAL; - case MSR_DR: - pte->vpage |= VSID_REAL_DR; - case MSR_IR: - pte->vpage |= VSID_REAL_IR; - } + pte->vpage = VSID_REAL | eaddr >> 12; pte->may_read = true; pte->may_write = true; pte->may_execute = true; @@ -434,55 +445,55 @@ err: return kvmppc_bad_hva(); } -int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr) +int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, + bool data) { struct kvmppc_pte pte; - hva_t hva = eaddr; vcpu->stat.st++; - if (kvmppc_xlate(vcpu, eaddr, false, &pte)) - goto err; + if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) + return -ENOENT; - hva = kvmppc_pte_to_hva(vcpu, &pte, false); - if (kvm_is_error_hva(hva)) - goto err; + *eaddr = pte.raddr; - if (copy_to_user((void __user *)hva, ptr, size)) { - printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva); - goto err; - } + if (!pte.may_write) + return -EPERM; - return 0; + if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size)) + return EMULATE_DO_MMIO; -err: - return -ENOENT; + return EMULATE_DONE; } -int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr, +int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data) { struct kvmppc_pte pte; - hva_t hva = eaddr; + hva_t hva = *eaddr; vcpu->stat.ld++; - if (kvmppc_xlate(vcpu, eaddr, data, &pte)) - goto err; + if (kvmppc_xlate(vcpu, *eaddr, data, &pte)) + goto nopte; + + *eaddr = pte.raddr; hva = kvmppc_pte_to_hva(vcpu, &pte, true); if (kvm_is_error_hva(hva)) - goto err; + goto mmio; if (copy_from_user(ptr, (void __user *)hva, size)) { printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva); - goto err; + goto mmio; } - return 0; + return EMULATE_DONE; -err: +nopte: return -ENOENT; +mmio: + return EMULATE_DO_MMIO; } static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -499,12 +510,10 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, int page_found = 0; struct kvmppc_pte pte; bool is_mmio = false; + bool dr = (vcpu->arch.msr & MSR_DR) ? true : false; + bool ir = (vcpu->arch.msr & MSR_IR) ? true : false; - if ( vec == BOOK3S_INTERRUPT_DATA_STORAGE ) { - relocated = (vcpu->arch.msr & MSR_DR); - } else { - relocated = (vcpu->arch.msr & MSR_IR); - } + relocated = data ? dr : ir; /* Resolve real address if translation turned on */ if (relocated) { @@ -516,14 +525,18 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu, pte.raddr = eaddr & 0xffffffff; pte.eaddr = eaddr; pte.vpage = eaddr >> 12; - switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { - case 0: - pte.vpage |= VSID_REAL; - case MSR_DR: - pte.vpage |= VSID_REAL_DR; - case MSR_IR: - pte.vpage |= VSID_REAL_IR; - } + } + + switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) { + case 0: + pte.vpage |= VSID_REAL; + break; + case MSR_DR: + pte.vpage |= VSID_REAL_DR; + break; + case MSR_IR: + pte.vpage |= VSID_REAL_IR; + break; } if (vcpu->arch.mmu.is_dcbz32(vcpu) && @@ -583,11 +596,13 @@ static inline int get_fpr_index(int i) } /* Give up external provider (FPU, Altivec, VSX) */ -static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) +void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) { struct thread_struct *t = ¤t->thread; u64 *vcpu_fpr = vcpu->arch.fpr; +#ifdef CONFIG_VSX u64 *vcpu_vsx = vcpu->arch.vsr; +#endif u64 *thread_fpr = (u64*)t->fpr; int i; @@ -629,21 +644,64 @@ static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr) kvmppc_recalc_shadow_msr(vcpu); } +static int kvmppc_read_inst(struct kvm_vcpu *vcpu) +{ + ulong srr0 = vcpu->arch.pc; + int ret; + + ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &vcpu->arch.last_inst, false); + if (ret == -ENOENT) { + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 33, 1); + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 34, 36, 0); + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE); + return EMULATE_AGAIN; + } + + return EMULATE_DONE; +} + +static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr) +{ + + /* Need to do paired single emulation? */ + if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) + return EMULATE_DONE; + + /* Read out the instruction */ + if (kvmppc_read_inst(vcpu) == EMULATE_DONE) + /* Need to emulate */ + return EMULATE_FAIL; + + return EMULATE_AGAIN; +} + /* Handle external providers (FPU, Altivec, VSX) */ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr, ulong msr) { struct thread_struct *t = ¤t->thread; u64 *vcpu_fpr = vcpu->arch.fpr; +#ifdef CONFIG_VSX u64 *vcpu_vsx = vcpu->arch.vsr; +#endif u64 *thread_fpr = (u64*)t->fpr; int i; + /* When we have paired singles, we emulate in software */ + if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) + return RESUME_GUEST; + if (!(vcpu->arch.msr & msr)) { kvmppc_book3s_queue_irqprio(vcpu, exit_nr); return RESUME_GUEST; } + /* We already own the ext */ + if (vcpu->arch.guest_owned_ext & msr) { + return RESUME_GUEST; + } + #ifdef DEBUG_EXT printk(KERN_INFO "Loading up ext 0x%lx\n", msr); #endif @@ -720,6 +778,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, * that no guest that needs the dcbz hack does NX. */ kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL); + r = RESUME_GUEST; } else { vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000; kvmppc_book3s_queue_irqprio(vcpu, exit_nr); @@ -769,6 +828,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, enum emulation_result er; ulong flags; +program_interrupt: flags = vcpu->arch.shadow_srr1 & 0x1f0000ull; if (vcpu->arch.msr & MSR_PR) { @@ -789,33 +849,80 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, case EMULATE_DONE: r = RESUME_GUEST_NV; break; + case EMULATE_AGAIN: + r = RESUME_GUEST; + break; case EMULATE_FAIL: printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n", __func__, vcpu->arch.pc, vcpu->arch.last_inst); kvmppc_core_queue_program(vcpu, flags); r = RESUME_GUEST; break; + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + r = RESUME_HOST_NV; + break; default: BUG(); } break; } case BOOK3S_INTERRUPT_SYSCALL: -#ifdef EXIT_DEBUG - printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0)); -#endif - vcpu->stat.syscall_exits++; - kvmppc_book3s_queue_irqprio(vcpu, exit_nr); - r = RESUME_GUEST; + // XXX make user settable + if (vcpu->arch.osi_enabled && + (((u32)kvmppc_get_gpr(vcpu, 3)) == OSI_SC_MAGIC_R3) && + (((u32)kvmppc_get_gpr(vcpu, 4)) == OSI_SC_MAGIC_R4)) { + u64 *gprs = run->osi.gprs; + int i; + + run->exit_reason = KVM_EXIT_OSI; + for (i = 0; i < 32; i++) + gprs[i] = kvmppc_get_gpr(vcpu, i); + vcpu->arch.osi_needed = 1; + r = RESUME_HOST_NV; + + } else { + vcpu->stat.syscall_exits++; + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + r = RESUME_GUEST; + } break; case BOOK3S_INTERRUPT_FP_UNAVAIL: - r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP); - break; case BOOK3S_INTERRUPT_ALTIVEC: - r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC); - break; case BOOK3S_INTERRUPT_VSX: - r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX); + { + int ext_msr = 0; + + switch (exit_nr) { + case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP; break; + case BOOK3S_INTERRUPT_ALTIVEC: ext_msr = MSR_VEC; break; + case BOOK3S_INTERRUPT_VSX: ext_msr = MSR_VSX; break; + } + + switch (kvmppc_check_ext(vcpu, exit_nr)) { + case EMULATE_DONE: + /* everything ok - let's enable the ext */ + r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr); + break; + case EMULATE_FAIL: + /* we need to emulate this instruction */ + goto program_interrupt; + break; + default: + /* nothing to worry about - go again */ + break; + } + break; + } + case BOOK3S_INTERRUPT_ALIGNMENT: + if (kvmppc_read_inst(vcpu) == EMULATE_DONE) { + to_book3s(vcpu)->dsisr = kvmppc_alignment_dsisr(vcpu, + vcpu->arch.last_inst); + vcpu->arch.dear = kvmppc_alignment_dar(vcpu, + vcpu->arch.last_inst); + kvmppc_book3s_queue_irqprio(vcpu, exit_nr); + } + r = RESUME_GUEST; break; case BOOK3S_INTERRUPT_MACHINE_CHECK: case BOOK3S_INTERRUPT_TRACE: @@ -867,6 +974,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + regs->pc = vcpu->arch.pc; regs->cr = kvmppc_get_cr(vcpu); regs->ctr = vcpu->arch.ctr; @@ -887,6 +996,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) regs->gpr[i] = kvmppc_get_gpr(vcpu, i); + vcpu_put(vcpu); + return 0; } @@ -894,6 +1005,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; + vcpu_load(vcpu); + vcpu->arch.pc = regs->pc; kvmppc_set_cr(vcpu, regs->cr); vcpu->arch.ctr = regs->ctr; @@ -913,6 +1026,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) kvmppc_set_gpr(vcpu, i, regs->gpr[i]); + vcpu_put(vcpu); + return 0; } @@ -1004,7 +1119,8 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_vcpu *vcpu; ulong ga, ga_end; int is_dirty = 0; - int r, n; + int r; + unsigned long n; mutex_lock(&kvm->slots_lock); @@ -1022,7 +1138,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, kvm_for_each_vcpu(n, vcpu, kvm) kvmppc_mmu_pte_pflush(vcpu, ga, ga_end); - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); memset(memslot->dirty_bitmap, 0, n); } @@ -1043,12 +1159,12 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) struct kvm_vcpu *vcpu; int err; - vcpu_book3s = (struct kvmppc_vcpu_book3s *)__get_free_pages( GFP_KERNEL | __GFP_ZERO, - get_order(sizeof(struct kvmppc_vcpu_book3s))); + vcpu_book3s = vmalloc(sizeof(struct kvmppc_vcpu_book3s)); if (!vcpu_book3s) { err = -ENOMEM; goto out; } + memset(vcpu_book3s, 0, sizeof(struct kvmppc_vcpu_book3s)); vcpu = &vcpu_book3s->vcpu; err = kvm_vcpu_init(vcpu, kvm, id); @@ -1082,7 +1198,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id) return vcpu; free_vcpu: - free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); + vfree(vcpu_book3s); out: return ERR_PTR(err); } @@ -1093,7 +1209,7 @@ void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu) __destroy_context(vcpu_book3s->context_id); kvm_vcpu_uninit(vcpu); - free_pages((long)vcpu_book3s, get_order(sizeof(struct kvmppc_vcpu_book3s))); + vfree(vcpu_book3s); } extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu); @@ -1101,8 +1217,12 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) { int ret; struct thread_struct ext_bkp; +#ifdef CONFIG_ALTIVEC bool save_vec = current->thread.used_vr; +#endif +#ifdef CONFIG_VSX bool save_vsx = current->thread.used_vsr; +#endif ulong ext_msr; /* No need to go into the guest when all we do is going out */ @@ -1143,6 +1263,10 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) /* XXX we get called with irq disabled - change that! */ local_irq_enable(); + /* Preload FPU if it's enabled */ + if (vcpu->arch.msr & MSR_FP) + kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP); + ret = __kvmppc_vcpu_entry(kvm_run, vcpu); local_irq_disable(); diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index faf99f2..7071e22 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -37,7 +37,7 @@ #define dprintk(X...) do { } while(0) #endif -#ifdef DEBUG_PTE +#ifdef DEBUG_MMU_PTE #define dprintk_pte(X...) printk(KERN_INFO X) #else #define dprintk_pte(X...) do { } while(0) @@ -57,6 +57,8 @@ static inline bool check_debug_ip(struct kvm_vcpu *vcpu) static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *pte, bool data); +static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, + u64 *vsid); static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t eaddr) { @@ -66,13 +68,14 @@ static struct kvmppc_sr *find_sr(struct kvmppc_vcpu_book3s *vcpu_book3s, gva_t e static u64 kvmppc_mmu_book3s_32_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr, bool data) { - struct kvmppc_sr *sre = find_sr(to_book3s(vcpu), eaddr); + u64 vsid; struct kvmppc_pte pte; if (!kvmppc_mmu_book3s_32_xlate_bat(vcpu, eaddr, &pte, data)) return pte.vpage; - return (((u64)eaddr >> 12) & 0xffff) | (((u64)sre->vsid) << 16); + kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid); + return (((u64)eaddr >> 12) & 0xffff) | (vsid << 16); } static void kvmppc_mmu_book3s_32_reset_msr(struct kvm_vcpu *vcpu) @@ -142,8 +145,13 @@ static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr, bat->bepi_mask); } if ((eaddr & bat->bepi_mask) == bat->bepi) { + u64 vsid; + kvmppc_mmu_book3s_32_esid_to_vsid(vcpu, + eaddr >> SID_SHIFT, &vsid); + vsid <<= 16; + pte->vpage = (((u64)eaddr >> 12) & 0xffff) | vsid; + pte->raddr = bat->brpn | (eaddr & ~bat->bepi_mask); - pte->vpage = (eaddr >> 12) | VSID_BAT; pte->may_read = bat->pp; pte->may_write = bat->pp > 1; pte->may_execute = true; @@ -302,6 +310,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, /* And then put in the new SR */ sre->raw = value; sre->vsid = (value & 0x0fffffff); + sre->valid = (value & 0x80000000) ? false : true; sre->Ks = (value & 0x40000000) ? true : false; sre->Kp = (value & 0x20000000) ? true : false; sre->nx = (value & 0x10000000) ? true : false; @@ -312,7 +321,7 @@ static void kvmppc_mmu_book3s_32_mtsrin(struct kvm_vcpu *vcpu, u32 srnum, static void kvmppc_mmu_book3s_32_tlbie(struct kvm_vcpu *vcpu, ulong ea, bool large) { - kvmppc_mmu_pte_flush(vcpu, ea, ~0xFFFULL); + kvmppc_mmu_pte_flush(vcpu, ea, 0x0FFFF000); } static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, @@ -333,15 +342,22 @@ static int kvmppc_mmu_book3s_32_esid_to_vsid(struct kvm_vcpu *vcpu, u64 esid, break; case MSR_DR|MSR_IR: { - ulong ea; - ea = esid << SID_SHIFT; - *vsid = find_sr(to_book3s(vcpu), ea)->vsid; + ulong ea = esid << SID_SHIFT; + struct kvmppc_sr *sr = find_sr(to_book3s(vcpu), ea); + + if (!sr->valid) + return -1; + + *vsid = sr->vsid; break; } default: BUG(); } + if (vcpu->arch.msr & MSR_PR) + *vsid |= VSID_PR; + return 0; } diff --git a/arch/powerpc/kvm/book3s_64_emulate.c b/arch/powerpc/kvm/book3s_64_emulate.c index 2b0ee7e..8f50776 100644 --- a/arch/powerpc/kvm/book3s_64_emulate.c +++ b/arch/powerpc/kvm/book3s_64_emulate.c @@ -28,13 +28,16 @@ #define OP_31_XOP_MFMSR 83 #define OP_31_XOP_MTMSR 146 #define OP_31_XOP_MTMSRD 178 +#define OP_31_XOP_MTSR 210 #define OP_31_XOP_MTSRIN 242 #define OP_31_XOP_TLBIEL 274 #define OP_31_XOP_TLBIE 306 #define OP_31_XOP_SLBMTE 402 #define OP_31_XOP_SLBIE 434 #define OP_31_XOP_SLBIA 498 +#define OP_31_XOP_MFSR 595 #define OP_31_XOP_MFSRIN 659 +#define OP_31_XOP_DCBA 758 #define OP_31_XOP_SLBMFEV 851 #define OP_31_XOP_EIOIO 854 #define OP_31_XOP_SLBMFEE 915 @@ -42,6 +45,20 @@ /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */ #define OP_31_XOP_DCBZ 1010 +#define OP_LFS 48 +#define OP_LFD 50 +#define OP_STFS 52 +#define OP_STFD 54 + +#define SPRN_GQR0 912 +#define SPRN_GQR1 913 +#define SPRN_GQR2 914 +#define SPRN_GQR3 915 +#define SPRN_GQR4 916 +#define SPRN_GQR5 917 +#define SPRN_GQR6 918 +#define SPRN_GQR7 919 + int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -80,6 +97,18 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, case OP_31_XOP_MTMSR: kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst))); break; + case OP_31_XOP_MFSR: + { + int srnum; + + srnum = kvmppc_get_field(inst, 12 + 32, 15 + 32); + if (vcpu->arch.mmu.mfsrin) { + u32 sr; + sr = vcpu->arch.mmu.mfsrin(vcpu, srnum); + kvmppc_set_gpr(vcpu, get_rt(inst), sr); + } + break; + } case OP_31_XOP_MFSRIN: { int srnum; @@ -92,6 +121,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, } break; } + case OP_31_XOP_MTSR: + vcpu->arch.mmu.mtsrin(vcpu, + (inst >> 16) & 0xf, + kvmppc_get_gpr(vcpu, get_rs(inst))); + break; case OP_31_XOP_MTSRIN: vcpu->arch.mmu.mtsrin(vcpu, (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf, @@ -150,12 +184,17 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, kvmppc_set_gpr(vcpu, get_rt(inst), t); } break; + case OP_31_XOP_DCBA: + /* Gets treated as NOP */ + break; case OP_31_XOP_DCBZ: { ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst)); ulong ra = 0; - ulong addr; + ulong addr, vaddr; u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + u32 dsisr; + int r; if (get_ra(inst)) ra = kvmppc_get_gpr(vcpu, get_ra(inst)); @@ -163,15 +202,25 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, addr = (ra + rb) & ~31ULL; if (!(vcpu->arch.msr & MSR_SF)) addr &= 0xffffffff; + vaddr = addr; + + r = kvmppc_st(vcpu, &addr, 32, zeros, true); + if ((r == -ENOENT) || (r == -EPERM)) { + *advance = 0; + vcpu->arch.dear = vaddr; + vcpu->arch.fault_dear = vaddr; + + dsisr = DSISR_ISSTORE; + if (r == -ENOENT) + dsisr |= DSISR_NOHPTE; + else if (r == -EPERM) + dsisr |= DSISR_PROTFAULT; + + to_book3s(vcpu)->dsisr = dsisr; + vcpu->arch.fault_dsisr = dsisr; - if (kvmppc_st(vcpu, addr, 32, zeros)) { - vcpu->arch.dear = addr; - vcpu->arch.fault_dear = addr; - to_book3s(vcpu)->dsisr = DSISR_PROTFAULT | - DSISR_ISSTORE; kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); - kvmppc_mmu_pte_flush(vcpu, addr, ~0xFFFULL); } break; @@ -184,6 +233,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = EMULATE_FAIL; } + if (emulated == EMULATE_FAIL) + emulated = kvmppc_emulate_paired_single(run, vcpu); + return emulated; } @@ -207,6 +259,34 @@ void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, } } +static u32 kvmppc_read_bat(struct kvm_vcpu *vcpu, int sprn) +{ + struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); + struct kvmppc_bat *bat; + + switch (sprn) { + case SPRN_IBAT0U ... SPRN_IBAT3L: + bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; + break; + case SPRN_IBAT4U ... SPRN_IBAT7L: + bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; + break; + case SPRN_DBAT0U ... SPRN_DBAT3L: + bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; + break; + case SPRN_DBAT4U ... SPRN_DBAT7L: + bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; + break; + default: + BUG(); + } + + if (sprn % 2) + return bat->raw >> 32; + else + return bat->raw; +} + static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) { struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu); @@ -217,13 +297,13 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val) bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT0U) / 2]; break; case SPRN_IBAT4U ... SPRN_IBAT7L: - bat = &vcpu_book3s->ibat[(sprn - SPRN_IBAT4U) / 2]; + bat = &vcpu_book3s->ibat[4 + ((sprn - SPRN_IBAT4U) / 2)]; break; case SPRN_DBAT0U ... SPRN_DBAT3L: bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT0U) / 2]; break; case SPRN_DBAT4U ... SPRN_DBAT7L: - bat = &vcpu_book3s->dbat[(sprn - SPRN_DBAT4U) / 2]; + bat = &vcpu_book3s->dbat[4 + ((sprn - SPRN_DBAT4U) / 2)]; break; default: BUG(); @@ -258,6 +338,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) /* BAT writes happen so rarely that we're ok to flush * everything here */ kvmppc_mmu_pte_flush(vcpu, 0, 0); + kvmppc_mmu_flush_segments(vcpu); break; case SPRN_HID0: to_book3s(vcpu)->hid[0] = spr_val; @@ -268,7 +349,29 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_HID2: to_book3s(vcpu)->hid[2] = spr_val; break; + case SPRN_HID2_GEKKO: + to_book3s(vcpu)->hid[2] = spr_val; + /* HID2.PSE controls paired single on gekko */ + switch (vcpu->arch.pvr) { + case 0x00080200: /* lonestar 2.0 */ + case 0x00088202: /* lonestar 2.2 */ + case 0x70000100: /* gekko 1.0 */ + case 0x00080100: /* gekko 2.0 */ + case 0x00083203: /* gekko 2.3a */ + case 0x00083213: /* gekko 2.3b */ + case 0x00083204: /* gekko 2.4 */ + case 0x00083214: /* gekko 2.4e (8SE) - retail HW2 */ + if (spr_val & (1 << 29)) { /* HID2.PSE */ + vcpu->arch.hflags |= BOOK3S_HFLAG_PAIRED_SINGLE; + kvmppc_giveup_ext(vcpu, MSR_FP); + } else { + vcpu->arch.hflags &= ~BOOK3S_HFLAG_PAIRED_SINGLE; + } + break; + } + break; case SPRN_HID4: + case SPRN_HID4_GEKKO: to_book3s(vcpu)->hid[4] = spr_val; break; case SPRN_HID5: @@ -278,12 +381,30 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) (mfmsr() & MSR_HV)) vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32; break; + case SPRN_GQR0: + case SPRN_GQR1: + case SPRN_GQR2: + case SPRN_GQR3: + case SPRN_GQR4: + case SPRN_GQR5: + case SPRN_GQR6: + case SPRN_GQR7: + to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val; + break; case SPRN_ICTC: case SPRN_THRM1: case SPRN_THRM2: case SPRN_THRM3: case SPRN_CTRLF: case SPRN_CTRLT: + case SPRN_L2CR: + case SPRN_MMCR0_GEKKO: + case SPRN_MMCR1_GEKKO: + case SPRN_PMC1_GEKKO: + case SPRN_PMC2_GEKKO: + case SPRN_PMC3_GEKKO: + case SPRN_PMC4_GEKKO: + case SPRN_WPAR_GEKKO: break; default: printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn); @@ -301,6 +422,12 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) int emulated = EMULATE_DONE; switch (sprn) { + case SPRN_IBAT0U ... SPRN_IBAT3L: + case SPRN_IBAT4U ... SPRN_IBAT7L: + case SPRN_DBAT0U ... SPRN_DBAT3L: + case SPRN_DBAT4U ... SPRN_DBAT7L: + kvmppc_set_gpr(vcpu, rt, kvmppc_read_bat(vcpu, sprn)); + break; case SPRN_SDR1: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1); break; @@ -320,19 +447,40 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]); break; case SPRN_HID2: + case SPRN_HID2_GEKKO: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]); break; case SPRN_HID4: + case SPRN_HID4_GEKKO: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]); break; case SPRN_HID5: kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]); break; + case SPRN_GQR0: + case SPRN_GQR1: + case SPRN_GQR2: + case SPRN_GQR3: + case SPRN_GQR4: + case SPRN_GQR5: + case SPRN_GQR6: + case SPRN_GQR7: + kvmppc_set_gpr(vcpu, rt, + to_book3s(vcpu)->gqr[sprn - SPRN_GQR0]); + break; case SPRN_THRM1: case SPRN_THRM2: case SPRN_THRM3: case SPRN_CTRLF: case SPRN_CTRLT: + case SPRN_L2CR: + case SPRN_MMCR0_GEKKO: + case SPRN_MMCR1_GEKKO: + case SPRN_PMC1_GEKKO: + case SPRN_PMC2_GEKKO: + case SPRN_PMC3_GEKKO: + case SPRN_PMC4_GEKKO: + case SPRN_WPAR_GEKKO: kvmppc_set_gpr(vcpu, rt, 0); break; default: @@ -346,3 +494,73 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) return emulated; } +u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst) +{ + u32 dsisr = 0; + + /* + * This is what the spec says about DSISR bits (not mentioned = 0): + * + * 12:13 [DS] Set to bits 30:31 + * 15:16 [X] Set to bits 29:30 + * 17 [X] Set to bit 25 + * [D/DS] Set to bit 5 + * 18:21 [X] Set to bits 21:24 + * [D/DS] Set to bits 1:4 + * 22:26 Set to bits 6:10 (RT/RS/FRT/FRS) + * 27:31 Set to bits 11:15 (RA) + */ + + switch (get_op(inst)) { + /* D-form */ + case OP_LFS: + case OP_LFD: + case OP_STFD: + case OP_STFS: + dsisr |= (inst >> 12) & 0x4000; /* bit 17 */ + dsisr |= (inst >> 17) & 0x3c00; /* bits 18:21 */ + break; + /* X-form */ + case 31: + dsisr |= (inst << 14) & 0x18000; /* bits 15:16 */ + dsisr |= (inst << 8) & 0x04000; /* bit 17 */ + dsisr |= (inst << 3) & 0x03c00; /* bits 18:21 */ + break; + default: + printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); + break; + } + + dsisr |= (inst >> 16) & 0x03ff; /* bits 22:31 */ + + return dsisr; +} + +ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst) +{ + ulong dar = 0; + ulong ra; + + switch (get_op(inst)) { + case OP_LFS: + case OP_LFD: + case OP_STFD: + case OP_STFS: + ra = get_ra(inst); + if (ra) + dar = kvmppc_get_gpr(vcpu, ra); + dar += (s32)((s16)inst); + break; + case 31: + ra = get_ra(inst); + if (ra) + dar = kvmppc_get_gpr(vcpu, ra); + dar += kvmppc_get_gpr(vcpu, get_rb(inst)); + break; + default: + printk(KERN_INFO "KVM: Unaligned instruction 0x%x\n", inst); + break; + } + + return dar; +} diff --git a/arch/powerpc/kvm/book3s_64_interrupts.S b/arch/powerpc/kvm/book3s_64_interrupts.S index c1584d0..faca876 100644 --- a/arch/powerpc/kvm/book3s_64_interrupts.S +++ b/arch/powerpc/kvm/book3s_64_interrupts.S @@ -171,7 +171,7 @@ kvmppc_handler_highmem: std r3, VCPU_PC(r7) std r4, VCPU_SHADOW_SRR1(r7) std r5, VCPU_FAULT_DEAR(r7) - std r6, VCPU_FAULT_DSISR(r7) + stw r6, VCPU_FAULT_DSISR(r7) ld r5, VCPU_HFLAGS(r7) rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c index f2899b2..a01e9c5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_host.c +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c @@ -257,16 +257,9 @@ map_again: if (ret < 0) { /* If we couldn't map a primary PTE, try a secondary */ -#ifdef USE_SECONDARY hash = ~hash; + vflags ^= HPTE_V_SECONDARY; attempt++; - if (attempt % 2) - vflags = HPTE_V_SECONDARY; - else - vflags = 0; -#else - attempt = 2; -#endif goto map_again; } else { int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu); @@ -277,6 +270,13 @@ map_again: (rflags & HPTE_R_N) ? '-' : 'x', orig_pte->eaddr, hpteg, va, orig_pte->vpage, hpaddr); + /* The ppc_md code may give us a secondary entry even though we + asked for a primary. Fix up. */ + if ((ret & _PTEIDX_SECONDARY) && !(vflags & HPTE_V_SECONDARY)) { + hash = ~hash; + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + } + pte->slot = hpteg + (ret & 7); pte->host_va = va; pte->pte = *orig_pte; diff --git a/arch/powerpc/kvm/book3s_64_rmhandlers.S b/arch/powerpc/kvm/book3s_64_rmhandlers.S index c83c60a..bd08535 100644 --- a/arch/powerpc/kvm/book3s_64_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_64_rmhandlers.S @@ -164,24 +164,15 @@ _GLOBAL(kvmppc_rmcall) #define define_load_up(what) \ \ _GLOBAL(kvmppc_load_up_ ## what); \ - subi r1, r1, INT_FRAME_SIZE; \ + stdu r1, -INT_FRAME_SIZE(r1); \ mflr r3; \ std r3, _LINK(r1); \ - mfmsr r4; \ - std r31, GPR3(r1); \ - mr r31, r4; \ - li r5, MSR_DR; \ - oris r5, r5, MSR_EE@h; \ - andc r4, r4, r5; \ - mtmsr r4; \ \ bl .load_up_ ## what; \ \ - mtmsr r31; \ ld r3, _LINK(r1); \ - ld r31, GPR3(r1); \ - addi r1, r1, INT_FRAME_SIZE; \ mtlr r3; \ + addi r1, r1, INT_FRAME_SIZE; \ blr define_load_up(fpu) diff --git a/arch/powerpc/kvm/book3s_64_slb.S b/arch/powerpc/kvm/book3s_64_slb.S index 35b76272..0919679 100644 --- a/arch/powerpc/kvm/book3s_64_slb.S +++ b/arch/powerpc/kvm/book3s_64_slb.S @@ -145,7 +145,7 @@ slb_do_enter: lwz r11, (PACA_KVM_CR)(r13) mtcr r11 - ld r11, (PACA_KVM_XER)(r13) + lwz r11, (PACA_KVM_XER)(r13) mtxer r11 ld r11, (PACA_KVM_R11)(r13) diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c new file mode 100644 index 0000000..7a27bac --- /dev/null +++ b/arch/powerpc/kvm/book3s_paired_singles.c @@ -0,0 +1,1289 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright Novell Inc 2010 + * + * Authors: Alexander Graf <agraf@suse.de> + */ + +#include <asm/kvm.h> +#include <asm/kvm_ppc.h> +#include <asm/disassemble.h> +#include <asm/kvm_book3s.h> +#include <asm/kvm_fpu.h> +#include <asm/reg.h> +#include <asm/cacheflush.h> +#include <linux/vmalloc.h> + +/* #define DEBUG */ + +#ifdef DEBUG +#define dprintk printk +#else +#define dprintk(...) do { } while(0); +#endif + +#define OP_LFS 48 +#define OP_LFSU 49 +#define OP_LFD 50 +#define OP_LFDU 51 +#define OP_STFS 52 +#define OP_STFSU 53 +#define OP_STFD 54 +#define OP_STFDU 55 +#define OP_PSQ_L 56 +#define OP_PSQ_LU 57 +#define OP_PSQ_ST 60 +#define OP_PSQ_STU 61 + +#define OP_31_LFSX 535 +#define OP_31_LFSUX 567 +#define OP_31_LFDX 599 +#define OP_31_LFDUX 631 +#define OP_31_STFSX 663 +#define OP_31_STFSUX 695 +#define OP_31_STFX 727 +#define OP_31_STFUX 759 +#define OP_31_LWIZX 887 +#define OP_31_STFIWX 983 + +#define OP_59_FADDS 21 +#define OP_59_FSUBS 20 +#define OP_59_FSQRTS 22 +#define OP_59_FDIVS 18 +#define OP_59_FRES 24 +#define OP_59_FMULS 25 +#define OP_59_FRSQRTES 26 +#define OP_59_FMSUBS 28 +#define OP_59_FMADDS 29 +#define OP_59_FNMSUBS 30 +#define OP_59_FNMADDS 31 + +#define OP_63_FCMPU 0 +#define OP_63_FCPSGN 8 +#define OP_63_FRSP 12 +#define OP_63_FCTIW 14 +#define OP_63_FCTIWZ 15 +#define OP_63_FDIV 18 +#define OP_63_FADD 21 +#define OP_63_FSQRT 22 +#define OP_63_FSEL 23 +#define OP_63_FRE 24 +#define OP_63_FMUL 25 +#define OP_63_FRSQRTE 26 +#define OP_63_FMSUB 28 +#define OP_63_FMADD 29 +#define OP_63_FNMSUB 30 +#define OP_63_FNMADD 31 +#define OP_63_FCMPO 32 +#define OP_63_MTFSB1 38 // XXX +#define OP_63_FSUB 20 +#define OP_63_FNEG 40 +#define OP_63_MCRFS 64 +#define OP_63_MTFSB0 70 +#define OP_63_FMR 72 +#define OP_63_MTFSFI 134 +#define OP_63_FABS 264 +#define OP_63_MFFS 583 +#define OP_63_MTFSF 711 + +#define OP_4X_PS_CMPU0 0 +#define OP_4X_PSQ_LX 6 +#define OP_4XW_PSQ_STX 7 +#define OP_4A_PS_SUM0 10 +#define OP_4A_PS_SUM1 11 +#define OP_4A_PS_MULS0 12 +#define OP_4A_PS_MULS1 13 +#define OP_4A_PS_MADDS0 14 +#define OP_4A_PS_MADDS1 15 +#define OP_4A_PS_DIV 18 +#define OP_4A_PS_SUB 20 +#define OP_4A_PS_ADD 21 +#define OP_4A_PS_SEL 23 +#define OP_4A_PS_RES 24 +#define OP_4A_PS_MUL 25 +#define OP_4A_PS_RSQRTE 26 +#define OP_4A_PS_MSUB 28 +#define OP_4A_PS_MADD 29 +#define OP_4A_PS_NMSUB 30 +#define OP_4A_PS_NMADD 31 +#define OP_4X_PS_CMPO0 32 +#define OP_4X_PSQ_LUX 38 +#define OP_4XW_PSQ_STUX 39 +#define OP_4X_PS_NEG 40 +#define OP_4X_PS_CMPU1 64 +#define OP_4X_PS_MR 72 +#define OP_4X_PS_CMPO1 96 +#define OP_4X_PS_NABS 136 +#define OP_4X_PS_ABS 264 +#define OP_4X_PS_MERGE00 528 +#define OP_4X_PS_MERGE01 560 +#define OP_4X_PS_MERGE10 592 +#define OP_4X_PS_MERGE11 624 + +#define SCALAR_NONE 0 +#define SCALAR_HIGH (1 << 0) +#define SCALAR_LOW (1 << 1) +#define SCALAR_NO_PS0 (1 << 2) +#define SCALAR_NO_PS1 (1 << 3) + +#define GQR_ST_TYPE_MASK 0x00000007 +#define GQR_ST_TYPE_SHIFT 0 +#define GQR_ST_SCALE_MASK 0x00003f00 +#define GQR_ST_SCALE_SHIFT 8 +#define GQR_LD_TYPE_MASK 0x00070000 +#define GQR_LD_TYPE_SHIFT 16 +#define GQR_LD_SCALE_MASK 0x3f000000 +#define GQR_LD_SCALE_SHIFT 24 + +#define GQR_QUANTIZE_FLOAT 0 +#define GQR_QUANTIZE_U8 4 +#define GQR_QUANTIZE_U16 5 +#define GQR_QUANTIZE_S8 6 +#define GQR_QUANTIZE_S16 7 + +#define FPU_LS_SINGLE 0 +#define FPU_LS_DOUBLE 1 +#define FPU_LS_SINGLE_LOW 2 + +static inline void kvmppc_sync_qpr(struct kvm_vcpu *vcpu, int rt) +{ + struct thread_struct t; + + t.fpscr.val = vcpu->arch.fpscr; + cvt_df((double*)&vcpu->arch.fpr[rt], (float*)&vcpu->arch.qpr[rt], &t); +} + +static void kvmppc_inject_pf(struct kvm_vcpu *vcpu, ulong eaddr, bool is_store) +{ + u64 dsisr; + + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 33, 36, 0); + vcpu->arch.msr = kvmppc_set_field(vcpu->arch.msr, 42, 47, 0); + vcpu->arch.dear = eaddr; + /* Page Fault */ + dsisr = kvmppc_set_field(0, 33, 33, 1); + if (is_store) + to_book3s(vcpu)->dsisr = kvmppc_set_field(dsisr, 38, 38, 1); + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE); +} + +static int kvmppc_emulate_fpr_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + int rs, ulong addr, int ls_type) +{ + int emulated = EMULATE_FAIL; + struct thread_struct t; + int r; + char tmp[8]; + int len = sizeof(u32); + + if (ls_type == FPU_LS_DOUBLE) + len = sizeof(u64); + + t.fpscr.val = vcpu->arch.fpscr; + + /* read from memory */ + r = kvmppc_ld(vcpu, &addr, len, tmp, true); + vcpu->arch.paddr_accessed = addr; + + if (r < 0) { + kvmppc_inject_pf(vcpu, addr, false); + goto done_load; + } else if (r == EMULATE_DO_MMIO) { + emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, len, 1); + goto done_load; + } + + emulated = EMULATE_DONE; + + /* put in registers */ + switch (ls_type) { + case FPU_LS_SINGLE: + cvt_fd((float*)tmp, (double*)&vcpu->arch.fpr[rs], &t); + vcpu->arch.qpr[rs] = *((u32*)tmp); + break; + case FPU_LS_DOUBLE: + vcpu->arch.fpr[rs] = *((u64*)tmp); + break; + } + + dprintk(KERN_INFO "KVM: FPR_LD [0x%llx] at 0x%lx (%d)\n", *(u64*)tmp, + addr, len); + +done_load: + return emulated; +} + +static int kvmppc_emulate_fpr_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + int rs, ulong addr, int ls_type) +{ + int emulated = EMULATE_FAIL; + struct thread_struct t; + int r; + char tmp[8]; + u64 val; + int len; + + t.fpscr.val = vcpu->arch.fpscr; + + switch (ls_type) { + case FPU_LS_SINGLE: + cvt_df((double*)&vcpu->arch.fpr[rs], (float*)tmp, &t); + val = *((u32*)tmp); + len = sizeof(u32); + break; + case FPU_LS_SINGLE_LOW: + *((u32*)tmp) = vcpu->arch.fpr[rs]; + val = vcpu->arch.fpr[rs] & 0xffffffff; + len = sizeof(u32); + break; + case FPU_LS_DOUBLE: + *((u64*)tmp) = vcpu->arch.fpr[rs]; + val = vcpu->arch.fpr[rs]; + len = sizeof(u64); + break; + default: + val = 0; + len = 0; + } + + r = kvmppc_st(vcpu, &addr, len, tmp, true); + vcpu->arch.paddr_accessed = addr; + if (r < 0) { + kvmppc_inject_pf(vcpu, addr, true); + } else if (r == EMULATE_DO_MMIO) { + emulated = kvmppc_handle_store(run, vcpu, val, len, 1); + } else { + emulated = EMULATE_DONE; + } + + dprintk(KERN_INFO "KVM: FPR_ST [0x%llx] at 0x%lx (%d)\n", + val, addr, len); + + return emulated; +} + +static int kvmppc_emulate_psq_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + int rs, ulong addr, bool w, int i) +{ + int emulated = EMULATE_FAIL; + struct thread_struct t; + int r; + float one = 1.0; + u32 tmp[2]; + + t.fpscr.val = vcpu->arch.fpscr; + + /* read from memory */ + if (w) { + r = kvmppc_ld(vcpu, &addr, sizeof(u32), tmp, true); + memcpy(&tmp[1], &one, sizeof(u32)); + } else { + r = kvmppc_ld(vcpu, &addr, sizeof(u32) * 2, tmp, true); + } + vcpu->arch.paddr_accessed = addr; + if (r < 0) { + kvmppc_inject_pf(vcpu, addr, false); + goto done_load; + } else if ((r == EMULATE_DO_MMIO) && w) { + emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FPR | rs, 4, 1); + vcpu->arch.qpr[rs] = tmp[1]; + goto done_load; + } else if (r == EMULATE_DO_MMIO) { + emulated = kvmppc_handle_load(run, vcpu, KVM_REG_FQPR | rs, 8, 1); + goto done_load; + } + + emulated = EMULATE_DONE; + + /* put in registers */ + cvt_fd((float*)&tmp[0], (double*)&vcpu->arch.fpr[rs], &t); + vcpu->arch.qpr[rs] = tmp[1]; + + dprintk(KERN_INFO "KVM: PSQ_LD [0x%x, 0x%x] at 0x%lx (%d)\n", tmp[0], + tmp[1], addr, w ? 4 : 8); + +done_load: + return emulated; +} + +static int kvmppc_emulate_psq_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + int rs, ulong addr, bool w, int i) +{ + int emulated = EMULATE_FAIL; + struct thread_struct t; + int r; + u32 tmp[2]; + int len = w ? sizeof(u32) : sizeof(u64); + + t.fpscr.val = vcpu->arch.fpscr; + + cvt_df((double*)&vcpu->arch.fpr[rs], (float*)&tmp[0], &t); + tmp[1] = vcpu->arch.qpr[rs]; + + r = kvmppc_st(vcpu, &addr, len, tmp, true); + vcpu->arch.paddr_accessed = addr; + if (r < 0) { + kvmppc_inject_pf(vcpu, addr, true); + } else if ((r == EMULATE_DO_MMIO) && w) { + emulated = kvmppc_handle_store(run, vcpu, tmp[0], 4, 1); + } else if (r == EMULATE_DO_MMIO) { + u64 val = ((u64)tmp[0] << 32) | tmp[1]; + emulated = kvmppc_handle_store(run, vcpu, val, 8, 1); + } else { + emulated = EMULATE_DONE; + } + + dprintk(KERN_INFO "KVM: PSQ_ST [0x%x, 0x%x] at 0x%lx (%d)\n", + tmp[0], tmp[1], addr, len); + + return emulated; +} + +/* + * Cuts out inst bits with ordering according to spec. + * That means the leftmost bit is zero. All given bits are included. + */ +static inline u32 inst_get_field(u32 inst, int msb, int lsb) +{ + return kvmppc_get_field(inst, msb + 32, lsb + 32); +} + +/* + * Replaces inst bits with ordering according to spec. + */ +static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value) +{ + return kvmppc_set_field(inst, msb + 32, lsb + 32, value); +} + +bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst) +{ + if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE)) + return false; + + switch (get_op(inst)) { + case OP_PSQ_L: + case OP_PSQ_LU: + case OP_PSQ_ST: + case OP_PSQ_STU: + case OP_LFS: + case OP_LFSU: + case OP_LFD: + case OP_LFDU: + case OP_STFS: + case OP_STFSU: + case OP_STFD: + case OP_STFDU: + return true; + case 4: + /* X form */ + switch (inst_get_field(inst, 21, 30)) { + case OP_4X_PS_CMPU0: + case OP_4X_PSQ_LX: + case OP_4X_PS_CMPO0: + case OP_4X_PSQ_LUX: + case OP_4X_PS_NEG: + case OP_4X_PS_CMPU1: + case OP_4X_PS_MR: + case OP_4X_PS_CMPO1: + case OP_4X_PS_NABS: + case OP_4X_PS_ABS: + case OP_4X_PS_MERGE00: + case OP_4X_PS_MERGE01: + case OP_4X_PS_MERGE10: + case OP_4X_PS_MERGE11: + return true; + } + /* XW form */ + switch (inst_get_field(inst, 25, 30)) { + case OP_4XW_PSQ_STX: + case OP_4XW_PSQ_STUX: + return true; + } + /* A form */ + switch (inst_get_field(inst, 26, 30)) { + case OP_4A_PS_SUM1: + case OP_4A_PS_SUM0: + case OP_4A_PS_MULS0: + case OP_4A_PS_MULS1: + case OP_4A_PS_MADDS0: + case OP_4A_PS_MADDS1: + case OP_4A_PS_DIV: + case OP_4A_PS_SUB: + case OP_4A_PS_ADD: + case OP_4A_PS_SEL: + case OP_4A_PS_RES: + case OP_4A_PS_MUL: + case OP_4A_PS_RSQRTE: + case OP_4A_PS_MSUB: + case OP_4A_PS_MADD: + case OP_4A_PS_NMSUB: + case OP_4A_PS_NMADD: + return true; + } + break; + case 59: + switch (inst_get_field(inst, 21, 30)) { + case OP_59_FADDS: + case OP_59_FSUBS: + case OP_59_FDIVS: + case OP_59_FRES: + case OP_59_FRSQRTES: + return true; + } + switch (inst_get_field(inst, 26, 30)) { + case OP_59_FMULS: + case OP_59_FMSUBS: + case OP_59_FMADDS: + case OP_59_FNMSUBS: + case OP_59_FNMADDS: + return true; + } + break; + case 63: + switch (inst_get_field(inst, 21, 30)) { + case OP_63_MTFSB0: + case OP_63_MTFSB1: + case OP_63_MTFSF: + case OP_63_MTFSFI: + case OP_63_MCRFS: + case OP_63_MFFS: + case OP_63_FCMPU: + case OP_63_FCMPO: + case OP_63_FNEG: + case OP_63_FMR: + case OP_63_FABS: + case OP_63_FRSP: + case OP_63_FDIV: + case OP_63_FADD: + case OP_63_FSUB: + case OP_63_FCTIW: + case OP_63_FCTIWZ: + case OP_63_FRSQRTE: + case OP_63_FCPSGN: + return true; + } + switch (inst_get_field(inst, 26, 30)) { + case OP_63_FMUL: + case OP_63_FSEL: + case OP_63_FMSUB: + case OP_63_FMADD: + case OP_63_FNMSUB: + case OP_63_FNMADD: + return true; + } + break; + case 31: + switch (inst_get_field(inst, 21, 30)) { + case OP_31_LFSX: + case OP_31_LFSUX: + case OP_31_LFDX: + case OP_31_LFDUX: + case OP_31_STFSX: + case OP_31_STFSUX: + case OP_31_STFX: + case OP_31_STFUX: + case OP_31_STFIWX: + return true; + } + break; + } + + return false; +} + +static int get_d_signext(u32 inst) +{ + int d = inst & 0x8ff; + + if (d & 0x800) + return -(d & 0x7ff); + + return (d & 0x7ff); +} + +static int kvmppc_ps_three_in(struct kvm_vcpu *vcpu, bool rc, + int reg_out, int reg_in1, int reg_in2, + int reg_in3, int scalar, + void (*func)(struct thread_struct *t, + u32 *dst, u32 *src1, + u32 *src2, u32 *src3)) +{ + u32 *qpr = vcpu->arch.qpr; + u64 *fpr = vcpu->arch.fpr; + u32 ps0_out; + u32 ps0_in1, ps0_in2, ps0_in3; + u32 ps1_in1, ps1_in2, ps1_in3; + struct thread_struct t; + t.fpscr.val = vcpu->arch.fpscr; + + /* RC */ + WARN_ON(rc); + + /* PS0 */ + cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); + cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); + cvt_df((double*)&fpr[reg_in3], (float*)&ps0_in3, &t); + + if (scalar & SCALAR_LOW) + ps0_in2 = qpr[reg_in2]; + + func(&t, &ps0_out, &ps0_in1, &ps0_in2, &ps0_in3); + + dprintk(KERN_INFO "PS3 ps0 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", + ps0_in1, ps0_in2, ps0_in3, ps0_out); + + if (!(scalar & SCALAR_NO_PS0)) + cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); + + /* PS1 */ + ps1_in1 = qpr[reg_in1]; + ps1_in2 = qpr[reg_in2]; + ps1_in3 = qpr[reg_in3]; + + if (scalar & SCALAR_HIGH) + ps1_in2 = ps0_in2; + + if (!(scalar & SCALAR_NO_PS1)) + func(&t, &qpr[reg_out], &ps1_in1, &ps1_in2, &ps1_in3); + + dprintk(KERN_INFO "PS3 ps1 -> f(0x%x, 0x%x, 0x%x) = 0x%x\n", + ps1_in1, ps1_in2, ps1_in3, qpr[reg_out]); + + return EMULATE_DONE; +} + +static int kvmppc_ps_two_in(struct kvm_vcpu *vcpu, bool rc, + int reg_out, int reg_in1, int reg_in2, + int scalar, + void (*func)(struct thread_struct *t, + u32 *dst, u32 *src1, + u32 *src2)) +{ + u32 *qpr = vcpu->arch.qpr; + u64 *fpr = vcpu->arch.fpr; + u32 ps0_out; + u32 ps0_in1, ps0_in2; + u32 ps1_out; + u32 ps1_in1, ps1_in2; + struct thread_struct t; + t.fpscr.val = vcpu->arch.fpscr; + + /* RC */ + WARN_ON(rc); + + /* PS0 */ + cvt_df((double*)&fpr[reg_in1], (float*)&ps0_in1, &t); + + if (scalar & SCALAR_LOW) + ps0_in2 = qpr[reg_in2]; + else + cvt_df((double*)&fpr[reg_in2], (float*)&ps0_in2, &t); + + func(&t, &ps0_out, &ps0_in1, &ps0_in2); + + if (!(scalar & SCALAR_NO_PS0)) { + dprintk(KERN_INFO "PS2 ps0 -> f(0x%x, 0x%x) = 0x%x\n", + ps0_in1, ps0_in2, ps0_out); + + cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); + } + + /* PS1 */ + ps1_in1 = qpr[reg_in1]; + ps1_in2 = qpr[reg_in2]; + + if (scalar & SCALAR_HIGH) + ps1_in2 = ps0_in2; + + func(&t, &ps1_out, &ps1_in1, &ps1_in2); + + if (!(scalar & SCALAR_NO_PS1)) { + qpr[reg_out] = ps1_out; + + dprintk(KERN_INFO "PS2 ps1 -> f(0x%x, 0x%x) = 0x%x\n", + ps1_in1, ps1_in2, qpr[reg_out]); + } + + return EMULATE_DONE; +} + +static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc, + int reg_out, int reg_in, + void (*func)(struct thread_struct *t, + u32 *dst, u32 *src1)) +{ + u32 *qpr = vcpu->arch.qpr; + u64 *fpr = vcpu->arch.fpr; + u32 ps0_out, ps0_in; + u32 ps1_in; + struct thread_struct t; + t.fpscr.val = vcpu->arch.fpscr; + + /* RC */ + WARN_ON(rc); + + /* PS0 */ + cvt_df((double*)&fpr[reg_in], (float*)&ps0_in, &t); + func(&t, &ps0_out, &ps0_in); + + dprintk(KERN_INFO "PS1 ps0 -> f(0x%x) = 0x%x\n", + ps0_in, ps0_out); + + cvt_fd((float*)&ps0_out, (double*)&fpr[reg_out], &t); + + /* PS1 */ + ps1_in = qpr[reg_in]; + func(&t, &qpr[reg_out], &ps1_in); + + dprintk(KERN_INFO "PS1 ps1 -> f(0x%x) = 0x%x\n", + ps1_in, qpr[reg_out]); + + return EMULATE_DONE; +} + +int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + u32 inst = vcpu->arch.last_inst; + enum emulation_result emulated = EMULATE_DONE; + + int ax_rd = inst_get_field(inst, 6, 10); + int ax_ra = inst_get_field(inst, 11, 15); + int ax_rb = inst_get_field(inst, 16, 20); + int ax_rc = inst_get_field(inst, 21, 25); + short full_d = inst_get_field(inst, 16, 31); + + u64 *fpr_d = &vcpu->arch.fpr[ax_rd]; + u64 *fpr_a = &vcpu->arch.fpr[ax_ra]; + u64 *fpr_b = &vcpu->arch.fpr[ax_rb]; + u64 *fpr_c = &vcpu->arch.fpr[ax_rc]; + + bool rcomp = (inst & 1) ? true : false; + u32 cr = kvmppc_get_cr(vcpu); + struct thread_struct t; +#ifdef DEBUG + int i; +#endif + + t.fpscr.val = vcpu->arch.fpscr; + + if (!kvmppc_inst_is_paired_single(vcpu, inst)) + return EMULATE_FAIL; + + if (!(vcpu->arch.msr & MSR_FP)) { + kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL); + return EMULATE_AGAIN; + } + + kvmppc_giveup_ext(vcpu, MSR_FP); + preempt_disable(); + enable_kernel_fp(); + /* Do we need to clear FE0 / FE1 here? Don't think so. */ + +#ifdef DEBUG + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { + u32 f; + cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); + dprintk(KERN_INFO "FPR[%d] = 0x%x / 0x%llx QPR[%d] = 0x%x\n", + i, f, vcpu->arch.fpr[i], i, vcpu->arch.qpr[i]); + } +#endif + + switch (get_op(inst)) { + case OP_PSQ_L: + { + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; + bool w = inst_get_field(inst, 16, 16) ? true : false; + int i = inst_get_field(inst, 17, 19); + + addr += get_d_signext(inst); + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + break; + } + case OP_PSQ_LU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); + bool w = inst_get_field(inst, 16, 16) ? true : false; + int i = inst_get_field(inst, 17, 19); + + addr += get_d_signext(inst); + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_PSQ_ST: + { + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; + bool w = inst_get_field(inst, 16, 16) ? true : false; + int i = inst_get_field(inst, 17, 19); + + addr += get_d_signext(inst); + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + break; + } + case OP_PSQ_STU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); + bool w = inst_get_field(inst, 16, 16) ? true : false; + int i = inst_get_field(inst, 17, 19); + + addr += get_d_signext(inst); + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case 4: + /* X form */ + switch (inst_get_field(inst, 21, 30)) { + case OP_4X_PS_CMPU0: + /* XXX */ + emulated = EMULATE_FAIL; + break; + case OP_4X_PSQ_LX: + { + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; + bool w = inst_get_field(inst, 21, 21) ? true : false; + int i = inst_get_field(inst, 22, 24); + + addr += kvmppc_get_gpr(vcpu, ax_rb); + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + break; + } + case OP_4X_PS_CMPO0: + /* XXX */ + emulated = EMULATE_FAIL; + break; + case OP_4X_PSQ_LUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); + bool w = inst_get_field(inst, 21, 21) ? true : false; + int i = inst_get_field(inst, 22, 24); + + addr += kvmppc_get_gpr(vcpu, ax_rb); + emulated = kvmppc_emulate_psq_load(run, vcpu, ax_rd, addr, w, i); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_4X_PS_NEG: + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; + vcpu->arch.fpr[ax_rd] ^= 0x8000000000000000ULL; + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + vcpu->arch.qpr[ax_rd] ^= 0x80000000; + break; + case OP_4X_PS_CMPU1: + /* XXX */ + emulated = EMULATE_FAIL; + break; + case OP_4X_PS_MR: + WARN_ON(rcomp); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + break; + case OP_4X_PS_CMPO1: + /* XXX */ + emulated = EMULATE_FAIL; + break; + case OP_4X_PS_NABS: + WARN_ON(rcomp); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; + vcpu->arch.fpr[ax_rd] |= 0x8000000000000000ULL; + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + vcpu->arch.qpr[ax_rd] |= 0x80000000; + break; + case OP_4X_PS_ABS: + WARN_ON(rcomp); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rb]; + vcpu->arch.fpr[ax_rd] &= ~0x8000000000000000ULL; + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + vcpu->arch.qpr[ax_rd] &= ~0x80000000; + break; + case OP_4X_PS_MERGE00: + WARN_ON(rcomp); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; + /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ + cvt_df((double*)&vcpu->arch.fpr[ax_rb], + (float*)&vcpu->arch.qpr[ax_rd], &t); + break; + case OP_4X_PS_MERGE01: + WARN_ON(rcomp); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_ra]; + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + break; + case OP_4X_PS_MERGE10: + WARN_ON(rcomp); + /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ + cvt_fd((float*)&vcpu->arch.qpr[ax_ra], + (double*)&vcpu->arch.fpr[ax_rd], &t); + /* vcpu->arch.qpr[ax_rd] = vcpu->arch.fpr[ax_rb]; */ + cvt_df((double*)&vcpu->arch.fpr[ax_rb], + (float*)&vcpu->arch.qpr[ax_rd], &t); + break; + case OP_4X_PS_MERGE11: + WARN_ON(rcomp); + /* vcpu->arch.fpr[ax_rd] = vcpu->arch.qpr[ax_ra]; */ + cvt_fd((float*)&vcpu->arch.qpr[ax_ra], + (double*)&vcpu->arch.fpr[ax_rd], &t); + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rb]; + break; + } + /* XW form */ + switch (inst_get_field(inst, 25, 30)) { + case OP_4XW_PSQ_STX: + { + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; + bool w = inst_get_field(inst, 21, 21) ? true : false; + int i = inst_get_field(inst, 22, 24); + + addr += kvmppc_get_gpr(vcpu, ax_rb); + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + break; + } + case OP_4XW_PSQ_STUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra); + bool w = inst_get_field(inst, 21, 21) ? true : false; + int i = inst_get_field(inst, 22, 24); + + addr += kvmppc_get_gpr(vcpu, ax_rb); + emulated = kvmppc_emulate_psq_store(run, vcpu, ax_rd, addr, w, i); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + } + /* A form */ + switch (inst_get_field(inst, 26, 30)) { + case OP_4A_PS_SUM1: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_rb, ax_ra, SCALAR_NO_PS0 | SCALAR_HIGH, fps_fadds); + vcpu->arch.fpr[ax_rd] = vcpu->arch.fpr[ax_rc]; + break; + case OP_4A_PS_SUM0: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rb, SCALAR_NO_PS1 | SCALAR_LOW, fps_fadds); + vcpu->arch.qpr[ax_rd] = vcpu->arch.qpr[ax_rc]; + break; + case OP_4A_PS_MULS0: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, SCALAR_HIGH, fps_fmuls); + break; + case OP_4A_PS_MULS1: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, SCALAR_LOW, fps_fmuls); + break; + case OP_4A_PS_MADDS0: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_HIGH, fps_fmadds); + break; + case OP_4A_PS_MADDS1: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_LOW, fps_fmadds); + break; + case OP_4A_PS_DIV: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rb, SCALAR_NONE, fps_fdivs); + break; + case OP_4A_PS_SUB: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rb, SCALAR_NONE, fps_fsubs); + break; + case OP_4A_PS_ADD: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rb, SCALAR_NONE, fps_fadds); + break; + case OP_4A_PS_SEL: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fsel); + break; + case OP_4A_PS_RES: + emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, + ax_rb, fps_fres); + break; + case OP_4A_PS_MUL: + emulated = kvmppc_ps_two_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, SCALAR_NONE, fps_fmuls); + break; + case OP_4A_PS_RSQRTE: + emulated = kvmppc_ps_one_in(vcpu, rcomp, ax_rd, + ax_rb, fps_frsqrte); + break; + case OP_4A_PS_MSUB: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmsubs); + break; + case OP_4A_PS_MADD: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fmadds); + break; + case OP_4A_PS_NMSUB: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmsubs); + break; + case OP_4A_PS_NMADD: + emulated = kvmppc_ps_three_in(vcpu, rcomp, ax_rd, + ax_ra, ax_rc, ax_rb, SCALAR_NONE, fps_fnmadds); + break; + } + break; + + /* Real FPU operations */ + + case OP_LFS: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + FPU_LS_SINGLE); + break; + } + case OP_LFSU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + FPU_LS_SINGLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_LFD: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + FPU_LS_DOUBLE); + break; + } + case OP_LFDU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, addr, + FPU_LS_DOUBLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_STFS: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + FPU_LS_SINGLE); + break; + } + case OP_STFSU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + FPU_LS_SINGLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_STFD: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + full_d; + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + FPU_LS_DOUBLE); + break; + } + case OP_STFDU: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + full_d; + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, addr, + FPU_LS_DOUBLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case 31: + switch (inst_get_field(inst, 21, 30)) { + case OP_31_LFSX: + { + ulong addr = ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0; + + addr += kvmppc_get_gpr(vcpu, ax_rb); + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + addr, FPU_LS_SINGLE); + break; + } + case OP_31_LFSUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + addr, FPU_LS_SINGLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_31_LFDX: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + addr, FPU_LS_DOUBLE); + break; + } + case OP_31_LFDUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_load(run, vcpu, ax_rd, + addr, FPU_LS_DOUBLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_31_STFSX: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + addr, FPU_LS_SINGLE); + break; + } + case OP_31_STFSUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + addr, FPU_LS_SINGLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_31_STFX: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + addr, FPU_LS_DOUBLE); + break; + } + case OP_31_STFUX: + { + ulong addr = kvmppc_get_gpr(vcpu, ax_ra) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + addr, FPU_LS_DOUBLE); + + if (emulated == EMULATE_DONE) + kvmppc_set_gpr(vcpu, ax_ra, addr); + break; + } + case OP_31_STFIWX: + { + ulong addr = (ax_ra ? kvmppc_get_gpr(vcpu, ax_ra) : 0) + + kvmppc_get_gpr(vcpu, ax_rb); + + emulated = kvmppc_emulate_fpr_store(run, vcpu, ax_rd, + addr, + FPU_LS_SINGLE_LOW); + break; + } + break; + } + break; + case 59: + switch (inst_get_field(inst, 21, 30)) { + case OP_59_FADDS: + fpd_fadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FSUBS: + fpd_fsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FDIVS: + fpd_fdivs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FRES: + fpd_fres(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FRSQRTES: + fpd_frsqrtes(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + } + switch (inst_get_field(inst, 26, 30)) { + case OP_59_FMULS: + fpd_fmuls(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FMSUBS: + fpd_fmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FMADDS: + fpd_fmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FNMSUBS: + fpd_fnmsubs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_59_FNMADDS: + fpd_fnmadds(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + } + break; + case 63: + switch (inst_get_field(inst, 21, 30)) { + case OP_63_MTFSB0: + case OP_63_MTFSB1: + case OP_63_MCRFS: + case OP_63_MTFSFI: + /* XXX need to implement */ + break; + case OP_63_MFFS: + /* XXX missing CR */ + *fpr_d = vcpu->arch.fpscr; + break; + case OP_63_MTFSF: + /* XXX missing fm bits */ + /* XXX missing CR */ + vcpu->arch.fpscr = *fpr_b; + break; + case OP_63_FCMPU: + { + u32 tmp_cr; + u32 cr0_mask = 0xf0000000; + u32 cr_shift = inst_get_field(inst, 6, 8) * 4; + + fpd_fcmpu(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); + cr &= ~(cr0_mask >> cr_shift); + cr |= (cr & cr0_mask) >> cr_shift; + break; + } + case OP_63_FCMPO: + { + u32 tmp_cr; + u32 cr0_mask = 0xf0000000; + u32 cr_shift = inst_get_field(inst, 6, 8) * 4; + + fpd_fcmpo(&vcpu->arch.fpscr, &tmp_cr, fpr_a, fpr_b); + cr &= ~(cr0_mask >> cr_shift); + cr |= (cr & cr0_mask) >> cr_shift; + break; + } + case OP_63_FNEG: + fpd_fneg(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + break; + case OP_63_FMR: + *fpr_d = *fpr_b; + break; + case OP_63_FABS: + fpd_fabs(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + break; + case OP_63_FCPSGN: + fpd_fcpsgn(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + break; + case OP_63_FDIV: + fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + break; + case OP_63_FADD: + fpd_fadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + break; + case OP_63_FSUB: + fpd_fsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_b); + break; + case OP_63_FCTIW: + fpd_fctiw(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + break; + case OP_63_FCTIWZ: + fpd_fctiwz(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + break; + case OP_63_FRSP: + fpd_frsp(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + kvmppc_sync_qpr(vcpu, ax_rd); + break; + case OP_63_FRSQRTE: + { + double one = 1.0f; + + /* fD = sqrt(fB) */ + fpd_fsqrt(&vcpu->arch.fpscr, &cr, fpr_d, fpr_b); + /* fD = 1.0f / fD */ + fpd_fdiv(&vcpu->arch.fpscr, &cr, fpr_d, (u64*)&one, fpr_d); + break; + } + } + switch (inst_get_field(inst, 26, 30)) { + case OP_63_FMUL: + fpd_fmul(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c); + break; + case OP_63_FSEL: + fpd_fsel(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + break; + case OP_63_FMSUB: + fpd_fmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + break; + case OP_63_FMADD: + fpd_fmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + break; + case OP_63_FNMSUB: + fpd_fnmsub(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + break; + case OP_63_FNMADD: + fpd_fnmadd(&vcpu->arch.fpscr, &cr, fpr_d, fpr_a, fpr_c, fpr_b); + break; + } + break; + } + +#ifdef DEBUG + for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++) { + u32 f; + cvt_df((double*)&vcpu->arch.fpr[i], (float*)&f, &t); + dprintk(KERN_INFO "FPR[%d] = 0x%x\n", i, f); + } +#endif + + if (rcomp) + kvmppc_set_cr(vcpu, cr); + + preempt_enable(); + + return emulated; +} diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 2a3a195..c922240 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -133,6 +133,12 @@ void kvmppc_core_queue_external(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_EXTERNAL); } +void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, + struct kvm_interrupt *irq) +{ + clear_bit(BOOKE_IRQPRIO_EXTERNAL, &vcpu->arch.pending_exceptions); +} + /* Deliver the interrupt of the corresponding priority, if possible. */ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index cb72a65..dbb5d684 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -38,10 +38,12 @@ #define OP_31_XOP_LBZX 87 #define OP_31_XOP_STWX 151 #define OP_31_XOP_STBX 215 +#define OP_31_XOP_LBZUX 119 #define OP_31_XOP_STBUX 247 #define OP_31_XOP_LHZX 279 #define OP_31_XOP_LHZUX 311 #define OP_31_XOP_MFSPR 339 +#define OP_31_XOP_LHAX 343 #define OP_31_XOP_STHX 407 #define OP_31_XOP_STHUX 439 #define OP_31_XOP_MTSPR 467 @@ -62,6 +64,8 @@ #define OP_STBU 39 #define OP_LHZ 40 #define OP_LHZU 41 +#define OP_LHA 42 +#define OP_LHAU 43 #define OP_STH 44 #define OP_STHU 45 @@ -171,6 +175,19 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); break; + case OP_31_XOP_LBZUX: + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = kvmppc_get_gpr(vcpu, rb); + if (ra) + ea += kvmppc_get_gpr(vcpu, ra); + + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + kvmppc_set_gpr(vcpu, ra, ea); + break; + case OP_31_XOP_STWX: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -200,6 +217,11 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, rs, ea); break; + case OP_31_XOP_LHAX: + rt = get_rt(inst); + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + case OP_31_XOP_LHZX: rt = get_rt(inst); emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); @@ -450,6 +472,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); break; + case OP_LHA: + rt = get_rt(inst); + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + break; + + case OP_LHAU: + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1); + kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed); + break; + case OP_STH: rs = get_rs(inst); emulated = kvmppc_handle_store(run, vcpu, @@ -472,7 +506,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) if (emulated == EMULATE_FAIL) { emulated = kvmppc_core_emulate_op(run, vcpu, inst, &advance); - if (emulated == EMULATE_FAIL) { + if (emulated == EMULATE_AGAIN) { + advance = 0; + } else if (emulated == EMULATE_FAIL) { advance = 0; printk(KERN_ERR "Couldn't emulate instruction 0x%08x " "(op %d xop %d)\n", inst, get_op(inst), get_xop(inst)); diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S new file mode 100644 index 0000000..2b340a3 --- /dev/null +++ b/arch/powerpc/kvm/fpu.S @@ -0,0 +1,273 @@ +/* + * FPU helper code to use FPU operations from inside the kernel + * + * Copyright (C) 2010 Alexander Graf (agraf@suse.de) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* Instructions operating on single parameters */ + +/* + * Single operation with one input operand + * + * R3 = (double*)&fpscr + * R4 = (short*)&result + * R5 = (short*)¶m1 + */ +#define FPS_ONE_IN(name) \ +_GLOBAL(fps_ ## name); \ + lfd 0,0(r3); /* load up fpscr value */ \ + MTFSF_L(0); \ + lfs 0,0(r5); \ + \ + name 0,0; \ + \ + stfs 0,0(r4); \ + mffs 0; \ + stfd 0,0(r3); /* save new fpscr value */ \ + blr + +/* + * Single operation with two input operands + * + * R3 = (double*)&fpscr + * R4 = (short*)&result + * R5 = (short*)¶m1 + * R6 = (short*)¶m2 + */ +#define FPS_TWO_IN(name) \ +_GLOBAL(fps_ ## name); \ + lfd 0,0(r3); /* load up fpscr value */ \ + MTFSF_L(0); \ + lfs 0,0(r5); \ + lfs 1,0(r6); \ + \ + name 0,0,1; \ + \ + stfs 0,0(r4); \ + mffs 0; \ + stfd 0,0(r3); /* save new fpscr value */ \ + blr + +/* + * Single operation with three input operands + * + * R3 = (double*)&fpscr + * R4 = (short*)&result + * R5 = (short*)¶m1 + * R6 = (short*)¶m2 + * R7 = (short*)¶m3 + */ +#define FPS_THREE_IN(name) \ +_GLOBAL(fps_ ## name); \ + lfd 0,0(r3); /* load up fpscr value */ \ + MTFSF_L(0); \ + lfs 0,0(r5); \ + lfs 1,0(r6); \ + lfs 2,0(r7); \ + \ + name 0,0,1,2; \ + \ + stfs 0,0(r4); \ + mffs 0; \ + stfd 0,0(r3); /* save new fpscr value */ \ + blr + +FPS_ONE_IN(fres) +FPS_ONE_IN(frsqrte) +FPS_ONE_IN(fsqrts) +FPS_TWO_IN(fadds) +FPS_TWO_IN(fdivs) +FPS_TWO_IN(fmuls) +FPS_TWO_IN(fsubs) +FPS_THREE_IN(fmadds) +FPS_THREE_IN(fmsubs) +FPS_THREE_IN(fnmadds) +FPS_THREE_IN(fnmsubs) +FPS_THREE_IN(fsel) + + +/* Instructions operating on double parameters */ + +/* + * Beginning of double instruction processing + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + * R6 = (double*)¶m1 + * R7 = (double*)¶m2 [load_two] + * R8 = (double*)¶m3 [load_three] + * LR = instruction call function + */ +fpd_load_three: + lfd 2,0(r8) /* load param3 */ +fpd_load_two: + lfd 1,0(r7) /* load param2 */ +fpd_load_one: + lfd 0,0(r6) /* load param1 */ +fpd_load_none: + lfd 3,0(r3) /* load up fpscr value */ + MTFSF_L(3) + lwz r6, 0(r4) /* load cr */ + mtcr r6 + blr + +/* + * End of double instruction processing + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + * LR = caller of instruction call function + */ +fpd_return: + mfcr r6 + stfd 0,0(r5) /* save result */ + mffs 0 + stfd 0,0(r3) /* save new fpscr value */ + stw r6,0(r4) /* save new cr value */ + blr + +/* + * Double operation with no input operand + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + */ +#define FPD_NONE_IN(name) \ +_GLOBAL(fpd_ ## name); \ + mflr r12; \ + bl fpd_load_none; \ + mtlr r12; \ + \ + name. 0; /* call instruction */ \ + b fpd_return + +/* + * Double operation with one input operand + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + * R6 = (double*)¶m1 + */ +#define FPD_ONE_IN(name) \ +_GLOBAL(fpd_ ## name); \ + mflr r12; \ + bl fpd_load_one; \ + mtlr r12; \ + \ + name. 0,0; /* call instruction */ \ + b fpd_return + +/* + * Double operation with two input operands + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + * R6 = (double*)¶m1 + * R7 = (double*)¶m2 + * R8 = (double*)¶m3 + */ +#define FPD_TWO_IN(name) \ +_GLOBAL(fpd_ ## name); \ + mflr r12; \ + bl fpd_load_two; \ + mtlr r12; \ + \ + name. 0,0,1; /* call instruction */ \ + b fpd_return + +/* + * CR Double operation with two input operands + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)¶m1 + * R6 = (double*)¶m2 + * R7 = (double*)¶m3 + */ +#define FPD_TWO_IN_CR(name) \ +_GLOBAL(fpd_ ## name); \ + lfd 1,0(r6); /* load param2 */ \ + lfd 0,0(r5); /* load param1 */ \ + lfd 3,0(r3); /* load up fpscr value */ \ + MTFSF_L(3); \ + lwz r6, 0(r4); /* load cr */ \ + mtcr r6; \ + \ + name 0,0,1; /* call instruction */ \ + mfcr r6; \ + mffs 0; \ + stfd 0,0(r3); /* save new fpscr value */ \ + stw r6,0(r4); /* save new cr value */ \ + blr + +/* + * Double operation with three input operands + * + * R3 = (double*)&fpscr + * R4 = (u32*)&cr + * R5 = (double*)&result + * R6 = (double*)¶m1 + * R7 = (double*)¶m2 + * R8 = (double*)¶m3 + */ +#define FPD_THREE_IN(name) \ +_GLOBAL(fpd_ ## name); \ + mflr r12; \ + bl fpd_load_three; \ + mtlr r12; \ + \ + name. 0,0,1,2; /* call instruction */ \ + b fpd_return + +FPD_ONE_IN(fsqrts) +FPD_ONE_IN(frsqrtes) +FPD_ONE_IN(fres) +FPD_ONE_IN(frsp) +FPD_ONE_IN(fctiw) +FPD_ONE_IN(fctiwz) +FPD_ONE_IN(fsqrt) +FPD_ONE_IN(fre) +FPD_ONE_IN(frsqrte) +FPD_ONE_IN(fneg) +FPD_ONE_IN(fabs) +FPD_TWO_IN(fadds) +FPD_TWO_IN(fsubs) +FPD_TWO_IN(fdivs) +FPD_TWO_IN(fmuls) +FPD_TWO_IN_CR(fcmpu) +FPD_TWO_IN(fcpsgn) +FPD_TWO_IN(fdiv) +FPD_TWO_IN(fadd) +FPD_TWO_IN(fmul) +FPD_TWO_IN_CR(fcmpo) +FPD_TWO_IN(fsub) +FPD_THREE_IN(fmsubs) +FPD_THREE_IN(fmadds) +FPD_THREE_IN(fnmsubs) +FPD_THREE_IN(fnmadds) +FPD_THREE_IN(fsel) +FPD_THREE_IN(fmsub) +FPD_THREE_IN(fmadd) +FPD_THREE_IN(fnmsub) +FPD_THREE_IN(fnmadd) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 297fcd2..ffbe4ca 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -148,6 +148,10 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_PPC_SEGSTATE: + case KVM_CAP_PPC_PAIRED_SINGLES: + case KVM_CAP_PPC_UNSET_IRQ: + case KVM_CAP_ENABLE_CAP: + case KVM_CAP_PPC_OSI: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -193,12 +197,17 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { struct kvm_vcpu *vcpu; vcpu = kvmppc_core_vcpu_create(kvm, id); - kvmppc_create_vcpu_debugfs(vcpu, id); + if (!IS_ERR(vcpu)) + kvmppc_create_vcpu_debugfs(vcpu, id); return vcpu; } void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { + /* Make sure we're not using the vcpu anymore */ + hrtimer_cancel(&vcpu->arch.dec_timer); + tasklet_kill(&vcpu->arch.tasklet); + kvmppc_remove_vcpu_debugfs(vcpu); kvmppc_core_vcpu_free(vcpu); } @@ -278,7 +287,7 @@ static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run) { - ulong gpr; + u64 gpr; if (run->mmio.len > sizeof(gpr)) { printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); @@ -287,6 +296,7 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, if (vcpu->arch.mmio_is_bigendian) { switch (run->mmio.len) { + case 8: gpr = *(u64 *)run->mmio.data; break; case 4: gpr = *(u32 *)run->mmio.data; break; case 2: gpr = *(u16 *)run->mmio.data; break; case 1: gpr = *(u8 *)run->mmio.data; break; @@ -300,7 +310,43 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, } } + if (vcpu->arch.mmio_sign_extend) { + switch (run->mmio.len) { +#ifdef CONFIG_PPC64 + case 4: + gpr = (s64)(s32)gpr; + break; +#endif + case 2: + gpr = (s64)(s16)gpr; + break; + case 1: + gpr = (s64)(s8)gpr; + break; + } + } + kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); + + switch (vcpu->arch.io_gpr & KVM_REG_EXT_MASK) { + case KVM_REG_GPR: + kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr); + break; + case KVM_REG_FPR: + vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; + break; +#ifdef CONFIG_PPC_BOOK3S + case KVM_REG_QPR: + vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; + break; + case KVM_REG_FQPR: + vcpu->arch.fpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; + vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_REG_MASK] = gpr; + break; +#endif + default: + BUG(); + } } int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -319,12 +365,25 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.mmio_is_bigendian = is_bigendian; vcpu->mmio_needed = 1; vcpu->mmio_is_write = 0; + vcpu->arch.mmio_sign_extend = 0; return EMULATE_DO_MMIO; } +/* Same as above, but sign extends */ +int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, int is_bigendian) +{ + int r; + + r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian); + vcpu->arch.mmio_sign_extend = 1; + + return r; +} + int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, - u32 val, unsigned int bytes, int is_bigendian) + u64 val, unsigned int bytes, int is_bigendian) { void *data = run->mmio.data; @@ -342,6 +401,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Store the value at the lowest bytes in 'data'. */ if (is_bigendian) { switch (bytes) { + case 8: *(u64 *)data = val; break; case 4: *(u32 *)data = val; break; case 2: *(u16 *)data = val; break; case 1: *(u8 *)data = val; break; @@ -376,6 +436,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (!vcpu->arch.dcr_is_write) kvmppc_complete_dcr_load(vcpu, run); vcpu->arch.dcr_needed = 0; + } else if (vcpu->arch.osi_needed) { + u64 *gprs = run->osi.gprs; + int i; + + for (i = 0; i < 32; i++) + kvmppc_set_gpr(vcpu, i, gprs[i]); + vcpu->arch.osi_needed = 0; } kvmppc_core_deliver_interrupts(vcpu); @@ -396,7 +463,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) { - kvmppc_core_queue_external(vcpu, irq); + if (irq->irq == KVM_INTERRUPT_UNSET) + kvmppc_core_dequeue_external(vcpu, irq); + else + kvmppc_core_queue_external(vcpu, irq); if (waitqueue_active(&vcpu->wq)) { wake_up_interruptible(&vcpu->wq); @@ -406,6 +476,27 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) return 0; } +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_PPC_OSI: + r = 0; + vcpu->arch.osi_enabled = true; + break; + default: + r = -EINVAL; + break; + } + + return r; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -434,6 +525,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); break; } + case KVM_ENABLE_CAP: + { + struct kvm_enable_cap cap; + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } default: r = -EINVAL; } diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 7ae71cc..bcd6884 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.33-rc2 -# Mon Jan 4 09:03:07 2010 +# Linux kernel version: 2.6.34-rc3 +# Fri Apr 9 09:57:10 2010 # CONFIG_SCHED_MC=y CONFIG_MMU=y @@ -17,6 +17,7 @@ CONFIG_GENERIC_TIME=y CONFIG_GENERIC_TIME_VSYSCALL=y CONFIG_GENERIC_CLOCKEVENTS=y CONFIG_GENERIC_BUG=y +CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y CONFIG_NO_IOMEM=y CONFIG_NO_DMA=y CONFIG_GENERIC_LOCKBREAK=y @@ -62,15 +63,11 @@ CONFIG_TREE_RCU=y # CONFIG_RCU_TRACE is not set CONFIG_RCU_FANOUT=64 # CONFIG_RCU_FANOUT_EXACT is not set +# CONFIG_RCU_FAST_NO_HZ is not set # CONFIG_TREE_RCU_TRACE is not set CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=17 -CONFIG_GROUP_SCHED=y -CONFIG_FAIR_GROUP_SCHED=y -# CONFIG_RT_GROUP_SCHED is not set -CONFIG_USER_SCHED=y -# CONFIG_CGROUP_SCHED is not set CONFIG_CGROUPS=y # CONFIG_CGROUP_DEBUG is not set CONFIG_CGROUP_NS=y @@ -79,6 +76,7 @@ CONFIG_CGROUP_NS=y # CONFIG_CPUSETS is not set # CONFIG_CGROUP_CPUACCT is not set # CONFIG_RESOURCE_COUNTERS is not set +# CONFIG_CGROUP_SCHED is not set CONFIG_SYSFS_DEPRECATED=y CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set @@ -93,6 +91,7 @@ CONFIG_INITRAMFS_SOURCE="" CONFIG_RD_GZIP=y CONFIG_RD_BZIP2=y CONFIG_RD_LZMA=y +CONFIG_RD_LZO=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_SYSCTL=y CONFIG_ANON_INODES=y @@ -126,6 +125,7 @@ CONFIG_SLAB=y # CONFIG_SLUB is not set # CONFIG_SLOB is not set # CONFIG_PROFILING is not set +CONFIG_TRACEPOINTS=y CONFIG_HAVE_OPROFILE=y CONFIG_KPROBES=y CONFIG_HAVE_SYSCALL_WRAPPERS=y @@ -134,6 +134,7 @@ CONFIG_HAVE_KPROBES=y CONFIG_HAVE_KRETPROBES=y CONFIG_HAVE_ARCH_TRACEHOOK=y CONFIG_USE_GENERIC_SMP_HELPERS=y +CONFIG_HAVE_REGS_AND_STACK_ACCESS_API=y CONFIG_HAVE_DEFAULT_NO_SPIN_MUTEXES=y # @@ -246,6 +247,7 @@ CONFIG_64BIT=y CONFIG_SMP=y CONFIG_NR_CPUS=32 CONFIG_HOTPLUG_CPU=y +# CONFIG_SCHED_BOOK is not set CONFIG_COMPAT=y CONFIG_SYSVIPC_COMPAT=y CONFIG_AUDIT_ARCH=y @@ -345,13 +347,13 @@ CONFIG_PM_SLEEP=y CONFIG_HIBERNATION=y CONFIG_PM_STD_PARTITION="" # CONFIG_PM_RUNTIME is not set +CONFIG_PM_OPS=y CONFIG_NET=y # # Networking options # CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set CONFIG_UNIX=y CONFIG_XFRM=y # CONFIG_XFRM_USER is not set @@ -529,6 +531,7 @@ CONFIG_NET_SCH_FIFO=y # # CONFIG_NET_PKTGEN is not set # CONFIG_NET_TCPPROBE is not set +# CONFIG_NET_DROP_MONITOR is not set CONFIG_CAN=m CONFIG_CAN_RAW=m CONFIG_CAN_BCM=m @@ -605,6 +608,7 @@ CONFIG_MISC_DEVICES=y # # SCSI device support # +CONFIG_SCSI_MOD=y # CONFIG_RAID_ATTRS is not set CONFIG_SCSI=y # CONFIG_SCSI_DMA is not set @@ -863,6 +867,7 @@ CONFIG_MISC_FILESYSTEMS=y # CONFIG_BEFS_FS is not set # CONFIG_BFS_FS is not set # CONFIG_EFS_FS is not set +# CONFIG_LOGFS is not set # CONFIG_CRAMFS is not set # CONFIG_SQUASHFS is not set # CONFIG_VXFS_FS is not set @@ -891,6 +896,7 @@ CONFIG_SUNRPC=y # CONFIG_RPCSEC_GSS_KRB5 is not set # CONFIG_RPCSEC_GSS_SPKM3 is not set # CONFIG_SMB_FS is not set +# CONFIG_CEPH_FS is not set # CONFIG_CIFS is not set # CONFIG_NCP_FS is not set # CONFIG_CODA_FS is not set @@ -952,6 +958,7 @@ CONFIG_DEBUG_MUTEXES=y # CONFIG_LOCK_STAT is not set CONFIG_DEBUG_SPINLOCK_SLEEP=y # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set +CONFIG_STACKTRACE=y # CONFIG_DEBUG_KOBJECT is not set CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_INFO is not set @@ -973,12 +980,17 @@ CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y # CONFIG_LATENCYTOP is not set CONFIG_SYSCTL_SYSCALL_CHECK=y # CONFIG_DEBUG_PAGEALLOC is not set +CONFIG_NOP_TRACER=y CONFIG_HAVE_FUNCTION_TRACER=y CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y CONFIG_HAVE_DYNAMIC_FTRACE=y CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y CONFIG_HAVE_SYSCALL_TRACEPOINTS=y +CONFIG_RING_BUFFER=y +CONFIG_EVENT_TRACING=y +CONFIG_CONTEXT_SWITCH_TRACER=y +CONFIG_TRACING=y CONFIG_TRACING_SUPPORT=y CONFIG_FTRACE=y # CONFIG_FUNCTION_TRACER is not set @@ -995,10 +1007,15 @@ CONFIG_BRANCH_PROFILE_NONE=y # CONFIG_KMEMTRACE is not set # CONFIG_WORKQUEUE_TRACER is not set # CONFIG_BLK_DEV_IO_TRACE is not set +CONFIG_KPROBE_EVENT=y +# CONFIG_RING_BUFFER_BENCHMARK is not set # CONFIG_DYNAMIC_DEBUG is not set CONFIG_SAMPLES=y +# CONFIG_SAMPLE_TRACEPOINTS is not set +# CONFIG_SAMPLE_TRACE_EVENTS is not set # CONFIG_SAMPLE_KOBJECT is not set # CONFIG_SAMPLE_KPROBES is not set +# CONFIG_DEBUG_STRICT_USER_COPY_CHECKS is not set # # Security options @@ -1032,6 +1049,7 @@ CONFIG_CRYPTO_MANAGER=y CONFIG_CRYPTO_MANAGER2=y CONFIG_CRYPTO_GF128MUL=m # CONFIG_CRYPTO_NULL is not set +# CONFIG_CRYPTO_PCRYPT is not set CONFIG_CRYPTO_WORKQUEUE=y # CONFIG_CRYPTO_CRYPTD is not set CONFIG_CRYPTO_AUTHENC=m @@ -1119,7 +1137,7 @@ CONFIG_CRYPTO_SHA512_S390=m # CONFIG_CRYPTO_DES_S390 is not set # CONFIG_CRYPTO_AES_S390 is not set CONFIG_S390_PRNG=m -# CONFIG_BINARY_PRINTF is not set +CONFIG_BINARY_PRINTF=y # # Library routines @@ -1136,14 +1154,16 @@ CONFIG_LIBCRC32C=m CONFIG_ZLIB_INFLATE=y CONFIG_ZLIB_DEFLATE=m CONFIG_LZO_COMPRESS=m -CONFIG_LZO_DECOMPRESS=m +CONFIG_LZO_DECOMPRESS=y CONFIG_DECOMPRESS_GZIP=y CONFIG_DECOMPRESS_BZIP2=y CONFIG_DECOMPRESS_LZMA=y +CONFIG_DECOMPRESS_LZO=y CONFIG_NLATTR=y CONFIG_HAVE_KVM=y CONFIG_VIRTUALIZATION=y CONFIG_KVM=m +# CONFIG_VHOST_NET is not set CONFIG_VIRTIO=y CONFIG_VIRTIO_RING=y CONFIG_VIRTIO_BALLOON=m diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 9b5b918..89a504c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -105,7 +105,7 @@ extern char empty_zero_page[PAGE_SIZE]; #ifndef __ASSEMBLY__ /* * The vmalloc area will always be on the topmost area of the kernel - * mapping. We reserve 96MB (31bit) / 1GB (64bit) for vmalloc, + * mapping. We reserve 96MB (31bit) / 128GB (64bit) for vmalloc, * which should be enough for any sane case. * By putting vmalloc at the top, we maximise the gap between physical * memory and vmalloc to catch misplaced memory accesses. As a side @@ -120,8 +120,8 @@ extern unsigned long VMALLOC_START; #define VMALLOC_END 0x7e000000UL #define VMEM_MAP_END 0x80000000UL #else /* __s390x__ */ -#define VMALLOC_SIZE (1UL << 30) -#define VMALLOC_END 0x3e040000000UL +#define VMALLOC_SIZE (128UL << 30) +#define VMALLOC_END 0x3e000000000UL #define VMEM_MAP_END 0x40000000000UL #endif /* __s390x__ */ diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 31d618a..2d92c2c 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -82,7 +82,8 @@ asm( " lm 6,15,24(15)\n" #endif " br 14\n" - " .size savesys_ipl_nss, .-savesys_ipl_nss\n"); + " .size savesys_ipl_nss, .-savesys_ipl_nss\n" + " .previous\n"); static __initdata char upper_command_line[COMMAND_LINE_SIZE]; diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4348f9b..6af7045 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -964,7 +964,7 @@ cleanup_critical: clc 4(4,%r12),BASED(cleanup_table_io_work_loop) bl BASED(0f) clc 4(4,%r12),BASED(cleanup_table_io_work_loop+4) - bl BASED(cleanup_io_return) + bl BASED(cleanup_io_work_loop) 0: br %r14 @@ -1039,6 +1039,12 @@ cleanup_sysc_leave_insn: cleanup_io_return: mvc __LC_RETURN_PSW(4),0(%r12) + mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_return) + la %r12,__LC_RETURN_PSW + br %r14 + +cleanup_io_work_loop: + mvc __LC_RETURN_PSW(4),0(%r12) mvc __LC_RETURN_PSW+4(4),BASED(cleanup_table_io_work_loop) la %r12,__LC_RETURN_PSW br %r14 diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S index 29fd0f1..52106d5 100644 --- a/arch/s390/kernel/entry64.S +++ b/arch/s390/kernel/entry64.S @@ -946,7 +946,7 @@ cleanup_critical: clc 8(8,%r12),BASED(cleanup_table_io_work_loop) jl 0f clc 8(8,%r12),BASED(cleanup_table_io_work_loop+8) - jl cleanup_io_return + jl cleanup_io_work_loop 0: br %r14 @@ -1021,6 +1021,12 @@ cleanup_sysc_leave_insn: cleanup_io_return: mvc __LC_RETURN_PSW(8),0(%r12) + mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_return) + la %r12,__LC_RETURN_PSW + br %r14 + +cleanup_io_work_loop: + mvc __LC_RETURN_PSW(8),0(%r12) mvc __LC_RETURN_PSW+8(8),BASED(cleanup_table_io_work_loop) la %r12,__LC_RETURN_PSW br %r14 diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 14ef6f0..247b4c2 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -165,10 +165,11 @@ static void tl_to_cores(struct tl_info *info) default: clear_cores(); machine_has_topology = 0; - return; + goto out; } tle = next_tle(tle); } +out: spin_unlock_irq(&topology_lock); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4929286..ee7c713 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -341,11 +341,13 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) - goto out_free_cpu; + goto out_free_sie_block; VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, vcpu->arch.sie_block); return vcpu; +out_free_sie_block: + free_page((unsigned long)(vcpu->arch.sie_block)); out_free_cpu: kfree(vcpu); out_nomem: diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 8ea3144..90165e7 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -71,12 +71,8 @@ static pte_t __ref *vmem_pte_alloc(void) pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t)); if (!pte) return NULL; - if (MACHINE_HAS_HPAGE) - clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY | _PAGE_CO, - PTRS_PER_PTE * sizeof(pte_t)); - else - clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, - PTRS_PER_PTE * sizeof(pte_t)); + clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY, + PTRS_PER_PTE * sizeof(pte_t)); return pte; } @@ -117,8 +113,7 @@ static int vmem_add_mem(unsigned long start, unsigned long size, int ro) if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) && (address + HPAGE_SIZE <= start + size) && (address >= HPAGE_SIZE)) { - pte_val(pte) |= _SEGMENT_ENTRY_LARGE | - _SEGMENT_ENTRY_CO; + pte_val(pte) |= _SEGMENT_ENTRY_LARGE; pmd_val(*pm_dir) = pte_val(pte); address += HPAGE_SIZE - PAGE_SIZE; continue; diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 6db5136..9908d47 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -37,6 +37,9 @@ config SPARC64 def_bool 64BIT select ARCH_SUPPORTS_MSI select HAVE_FUNCTION_TRACER + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_GRAPH_FP_TEST + select HAVE_FUNCTION_TRACE_MCOUNT_TEST select HAVE_KRETPROBES select HAVE_KPROBES select HAVE_LMB diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug index 9d3c889..1b4a831 100644 --- a/arch/sparc/Kconfig.debug +++ b/arch/sparc/Kconfig.debug @@ -19,13 +19,10 @@ config DEBUG_DCFLUSH bool "D-cache flush debugging" depends on SPARC64 && DEBUG_KERNEL -config STACK_DEBUG - bool "Stack Overflow Detection Support" - config MCOUNT bool depends on SPARC64 - depends on STACK_DEBUG || FUNCTION_TRACER + depends on FUNCTION_TRACER default y config FRAME_POINTER diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index 926397d..050ef35 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -17,7 +17,7 @@ typedef struct { unsigned int __nmi_count; unsigned long clock_tick; /* %tick's per second */ unsigned long __pad; - unsigned int __pad1; + unsigned int irq0_irqs; unsigned int __pad2; /* Dcache line 2, rarely used */ diff --git a/arch/sparc/include/asm/irqflags_64.h b/arch/sparc/include/asm/irqflags_64.h index 8b49bf9..bfa1ea4 100644 --- a/arch/sparc/include/asm/irqflags_64.h +++ b/arch/sparc/include/asm/irqflags_64.h @@ -76,9 +76,26 @@ static inline int raw_irqs_disabled(void) */ static inline unsigned long __raw_local_irq_save(void) { - unsigned long flags = __raw_local_save_flags(); - - raw_local_irq_disable(); + unsigned long flags, tmp; + + /* Disable interrupts to PIL_NORMAL_MAX unless we already + * are using PIL_NMI, in which case PIL_NMI is retained. + * + * The only values we ever program into the %pil are 0, + * PIL_NORMAL_MAX and PIL_NMI. + * + * Since PIL_NMI is the largest %pil value and all bits are + * set in it (0xf), it doesn't matter what PIL_NORMAL_MAX + * actually is. + */ + __asm__ __volatile__( + "rdpr %%pil, %0\n\t" + "or %0, %2, %1\n\t" + "wrpr %1, 0x0, %%pil" + : "=r" (flags), "=r" (tmp) + : "i" (PIL_NORMAL_MAX) + : "memory" + ); return flags; } diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index 9e2d944..4827a3a 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h @@ -111,7 +111,7 @@ struct thread_info { #define THREAD_SHIFT PAGE_SHIFT #endif /* PAGE_SHIFT == 13 */ -#define PREEMPT_ACTIVE 0x4000000 +#define PREEMPT_ACTIVE 0x10000000 /* * macros/functions for gaining access to the thread information structure diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index c631614..0c2dc1f 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -13,6 +13,14 @@ extra-y += init_task.o CPPFLAGS_vmlinux.lds := -Usparc -m$(BITS) extra-y += vmlinux.lds +ifdef CONFIG_FUNCTION_TRACER +# Do not profile debug and lowlevel utilities +CFLAGS_REMOVE_ftrace.o := -pg +CFLAGS_REMOVE_time_$(BITS).o := -pg +CFLAGS_REMOVE_perf_event.o := -pg +CFLAGS_REMOVE_pcr.o := -pg +endif + obj-$(CONFIG_SPARC32) += entry.o wof.o wuf.o obj-$(CONFIG_SPARC32) += etrap_32.o obj-$(CONFIG_SPARC32) += rtrap_32.o @@ -85,7 +93,7 @@ obj-$(CONFIG_KGDB) += kgdb_$(BITS).o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -CFLAGS_REMOVE_ftrace.o := -pg +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_EARLYFB) += btext.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 9103a56..03ab022 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -13,7 +13,7 @@ static const u32 ftrace_nop = 0x01000000; static u32 ftrace_call_replace(unsigned long ip, unsigned long addr) { - static u32 call; + u32 call; s32 off; off = ((s32)addr - (s32)ip); @@ -91,3 +91,61 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } #endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_graph_call(void); + +int ftrace_enable_ftrace_graph_caller(void) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + u32 old, new; + + old = *(u32 *) &ftrace_graph_call; + new = ftrace_call_replace(ip, (unsigned long) &ftrace_graph_caller); + return ftrace_modify_code(ip, old, new); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + u32 old, new; + + old = *(u32 *) &ftrace_graph_call; + new = ftrace_call_replace(ip, (unsigned long) &ftrace_stub); + + return ftrace_modify_code(ip, old, new); +} + +#endif /* !CONFIG_DYNAMIC_FTRACE */ + +/* + * Hook the return address and push it in the stack of return addrs + * in current thread info. + */ +unsigned long prepare_ftrace_return(unsigned long parent, + unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return parent + 8UL; + + if (ftrace_push_return_trace(parent, self_addr, &trace.depth, + frame_pointer) == -EBUSY) + return parent + 8UL; + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + return parent + 8UL; + } + + return return_hooker; +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/sparc/kernel/irq_64.c b/arch/sparc/kernel/irq_64.c index e1cbdb9..830d70a 100644 --- a/arch/sparc/kernel/irq_64.c +++ b/arch/sparc/kernel/irq_64.c @@ -20,7 +20,9 @@ #include <linux/delay.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/ftrace.h> #include <linux/irq.h> +#include <linux/kmemleak.h> #include <asm/ptrace.h> #include <asm/processor.h> @@ -45,6 +47,7 @@ #include "entry.h" #include "cpumap.h" +#include "kstack.h" #define NUM_IVECS (IMAP_INR + 1) @@ -647,6 +650,14 @@ unsigned int sun4v_build_virq(u32 devhandle, unsigned int devino) bucket = kzalloc(sizeof(struct ino_bucket), GFP_ATOMIC); if (unlikely(!bucket)) return 0; + + /* The only reference we store to the IRQ bucket is + * by physical address which kmemleak can't see, tell + * it that this object explicitly is not a leak and + * should be scanned. + */ + kmemleak_not_leak(bucket); + __flush_dcache_range((unsigned long) bucket, ((unsigned long) bucket + sizeof(struct ino_bucket))); @@ -703,25 +714,7 @@ void ack_bad_irq(unsigned int virt_irq) void *hardirq_stack[NR_CPUS]; void *softirq_stack[NR_CPUS]; -static __attribute__((always_inline)) void *set_hardirq_stack(void) -{ - void *orig_sp, *sp = hardirq_stack[smp_processor_id()]; - - __asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp)); - if (orig_sp < sp || - orig_sp > (sp + THREAD_SIZE)) { - sp += THREAD_SIZE - 192 - STACK_BIAS; - __asm__ __volatile__("mov %0, %%sp" : : "r" (sp)); - } - - return orig_sp; -} -static __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp) -{ - __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp)); -} - -void handler_irq(int irq, struct pt_regs *regs) +void __irq_entry handler_irq(int irq, struct pt_regs *regs) { unsigned long pstate, bucket_pa; struct pt_regs *old_regs; diff --git a/arch/sparc/kernel/kgdb_64.c b/arch/sparc/kernel/kgdb_64.c index f5a0fd4..0a2bd0f 100644 --- a/arch/sparc/kernel/kgdb_64.c +++ b/arch/sparc/kernel/kgdb_64.c @@ -5,6 +5,7 @@ #include <linux/kgdb.h> #include <linux/kdebug.h> +#include <linux/ftrace.h> #include <asm/kdebug.h> #include <asm/ptrace.h> @@ -108,7 +109,7 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) } #ifdef CONFIG_SMP -void smp_kgdb_capture_client(int irq, struct pt_regs *regs) +void __irq_entry smp_kgdb_capture_client(int irq, struct pt_regs *regs) { unsigned long flags; diff --git a/arch/sparc/kernel/kstack.h b/arch/sparc/kernel/kstack.h index 5247283..53dfb92 100644 --- a/arch/sparc/kernel/kstack.h +++ b/arch/sparc/kernel/kstack.h @@ -61,4 +61,23 @@ check_magic: } +static inline __attribute__((always_inline)) void *set_hardirq_stack(void) +{ + void *orig_sp, *sp = hardirq_stack[smp_processor_id()]; + + __asm__ __volatile__("mov %%sp, %0" : "=r" (orig_sp)); + if (orig_sp < sp || + orig_sp > (sp + THREAD_SIZE)) { + sp += THREAD_SIZE - 192 - STACK_BIAS; + __asm__ __volatile__("mov %0, %%sp" : : "r" (sp)); + } + + return orig_sp; +} + +static inline __attribute__((always_inline)) void restore_hardirq_stack(void *orig_sp) +{ + __asm__ __volatile__("mov %0, %%sp" : : "r" (orig_sp)); +} + #endif /* _KSTACK_H */ diff --git a/arch/sparc/kernel/nmi.c b/arch/sparc/kernel/nmi.c index b287b62..a4bd7ba 100644 --- a/arch/sparc/kernel/nmi.c +++ b/arch/sparc/kernel/nmi.c @@ -23,6 +23,8 @@ #include <asm/ptrace.h> #include <asm/pcr.h> +#include "kstack.h" + /* We don't have a real NMI on sparc64, but we can fake one * up using profiling counter overflow interrupts and interrupt * levels. @@ -92,7 +94,7 @@ static void die_nmi(const char *str, struct pt_regs *regs, int do_panic) notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) { unsigned int sum, touched = 0; - int cpu = smp_processor_id(); + void *orig_sp; clear_softint(1 << irq); @@ -100,13 +102,15 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) nmi_enter(); + orig_sp = set_hardirq_stack(); + if (notify_die(DIE_NMI, "nmi", regs, 0, pt_regs_trap_type(regs), SIGINT) == NOTIFY_STOP) touched = 1; else pcr_ops->write(PCR_PIC_PRIV); - sum = kstat_irqs_cpu(0, cpu); + sum = local_cpu_data().irq0_irqs; if (__get_cpu_var(nmi_touch)) { __get_cpu_var(nmi_touch) = 0; touched = 1; @@ -125,6 +129,8 @@ notrace __kprobes void perfctr_irq(int irq, struct pt_regs *regs) pcr_ops->write(pcr_enable); } + restore_hardirq_stack(orig_sp); + nmi_exit(); } diff --git a/arch/sparc/kernel/pci_common.c b/arch/sparc/kernel/pci_common.c index b775658..8a00058 100644 --- a/arch/sparc/kernel/pci_common.c +++ b/arch/sparc/kernel/pci_common.c @@ -371,14 +371,19 @@ static void pci_register_iommu_region(struct pci_pbm_info *pbm) struct resource *rp = kzalloc(sizeof(*rp), GFP_KERNEL); if (!rp) { - prom_printf("Cannot allocate IOMMU resource.\n"); - prom_halt(); + pr_info("%s: Cannot allocate IOMMU resource.\n", + pbm->name); + return; } rp->name = "IOMMU"; rp->start = pbm->mem_space.start + (unsigned long) vdma[0]; rp->end = rp->start + (unsigned long) vdma[1] - 1UL; rp->flags = IORESOURCE_BUSY; - request_resource(&pbm->mem_space, rp); + if (request_resource(&pbm->mem_space, rp)) { + pr_info("%s: Unable to request IOMMU resource.\n", + pbm->name); + kfree(rp); + } } } diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 2d94e7a..c4a6a50 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -8,6 +8,7 @@ #include <linux/irq.h> #include <linux/perf_event.h> +#include <linux/ftrace.h> #include <asm/pil.h> #include <asm/pcr.h> @@ -34,7 +35,7 @@ unsigned int picl_shift; * Therefore in such situations we defer the work by signalling * a lower level cpu IRQ. */ -void deferred_pcr_work_irq(int irq, struct pt_regs *regs) +void __irq_entry deferred_pcr_work_irq(int irq, struct pt_regs *regs) { struct pt_regs *old_regs; diff --git a/arch/sparc/kernel/rtrap_64.S b/arch/sparc/kernel/rtrap_64.S index 83f1873..090b9e9 100644 --- a/arch/sparc/kernel/rtrap_64.S +++ b/arch/sparc/kernel/rtrap_64.S @@ -130,7 +130,17 @@ rtrap_xcall: nop call trace_hardirqs_on nop - wrpr %l4, %pil + /* Do not actually set the %pil here. We will do that + * below after we clear PSTATE_IE in the %pstate register. + * If we re-enable interrupts here, we can recurse down + * the hardirq stack potentially endlessly, causing a + * stack overflow. + * + * It is tempting to put this test and trace_hardirqs_on + * call at the 'rt_continue' label, but that will not work + * as that path hits unconditionally and we do not want to + * execute this in NMI return paths, for example. + */ #endif rtrap_no_irq_enable: andcc %l1, TSTATE_PRIV, %l3 diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 4c53345..b6a2b8f 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -22,6 +22,7 @@ #include <linux/profile.h> #include <linux/bootmem.h> #include <linux/vmalloc.h> +#include <linux/ftrace.h> #include <linux/cpu.h> #include <linux/slab.h> @@ -823,13 +824,13 @@ void arch_send_call_function_single_ipi(int cpu) &cpumask_of_cpu(cpu)); } -void smp_call_function_client(int irq, struct pt_regs *regs) +void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); generic_smp_call_function_interrupt(); } -void smp_call_function_single_client(int irq, struct pt_regs *regs) +void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); generic_smp_call_function_single_interrupt(); @@ -965,7 +966,7 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page) put_cpu(); } -void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) +void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) { struct mm_struct *mm; unsigned long flags; @@ -1149,7 +1150,7 @@ void smp_release(void) */ extern void prom_world(int); -void smp_penguin_jailcell(int irq, struct pt_regs *regs) +void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs) { clear_softint(1 << irq); @@ -1365,7 +1366,7 @@ void smp_send_reschedule(int cpu) &cpumask_of_cpu(cpu)); } -void smp_receive_signal_client(int irq, struct pt_regs *regs) +void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs) { clear_softint(1 << irq); } diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 67e1651..c7bbe6cf 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -35,6 +35,7 @@ #include <linux/clocksource.h> #include <linux/of_device.h> #include <linux/platform_device.h> +#include <linux/ftrace.h> #include <asm/oplib.h> #include <asm/timer.h> @@ -717,7 +718,7 @@ static struct clock_event_device sparc64_clockevent = { }; static DEFINE_PER_CPU(struct clock_event_device, sparc64_events); -void timer_interrupt(int irq, struct pt_regs *regs) +void __irq_entry timer_interrupt(int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); unsigned long tick_mask = tick_ops->softint_mask; @@ -728,6 +729,7 @@ void timer_interrupt(int irq, struct pt_regs *regs) irq_enter(); + local_cpu_data().irq0_irqs++; kstat_incr_irqs_this_cpu(0, irq_to_desc(0)); if (unlikely(!evt->event_handler)) { diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 837dfc2..9da57f0 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2203,27 +2203,6 @@ void dump_stack(void) EXPORT_SYMBOL(dump_stack); -static inline int is_kernel_stack(struct task_struct *task, - struct reg_window *rw) -{ - unsigned long rw_addr = (unsigned long) rw; - unsigned long thread_base, thread_end; - - if (rw_addr < PAGE_OFFSET) { - if (task != &init_task) - return 0; - } - - thread_base = (unsigned long) task_stack_page(task); - thread_end = thread_base + sizeof(union thread_union); - if (rw_addr >= thread_base && - rw_addr < thread_end && - !(rw_addr & 0x7UL)) - return 1; - - return 0; -} - static inline struct reg_window *kernel_stack_up(struct reg_window *rw) { unsigned long fp = rw->ins[6]; @@ -2252,6 +2231,7 @@ void die_if_kernel(char *str, struct pt_regs *regs) show_regs(regs); add_taint(TAINT_DIE); if (regs->tstate & TSTATE_PRIV) { + struct thread_info *tp = current_thread_info(); struct reg_window *rw = (struct reg_window *) (regs->u_regs[UREG_FP] + STACK_BIAS); @@ -2259,8 +2239,8 @@ void die_if_kernel(char *str, struct pt_regs *regs) * find some badly aligned kernel stack. */ while (rw && - count++ < 30&& - is_kernel_stack(current, rw)) { + count++ < 30 && + kstack_valid(tp, (unsigned long) rw)) { printk("Caller[%016lx]: %pS\n", rw->ins[7], (void *) rw->ins[7]); diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index ebce430..c752c4c 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -50,7 +50,7 @@ static inline enum direction decode_direction(unsigned int insn) } /* 16 = double-word, 8 = extra-word, 4 = word, 2 = half-word */ -static inline int decode_access_size(unsigned int insn) +static inline int decode_access_size(struct pt_regs *regs, unsigned int insn) { unsigned int tmp; @@ -66,7 +66,7 @@ static inline int decode_access_size(unsigned int insn) return 2; else { printk("Impossible unaligned trap. insn=%08x\n", insn); - die_if_kernel("Byte sized unaligned access?!?!", current_thread_info()->kregs); + die_if_kernel("Byte sized unaligned access?!?!", regs); /* GCC should never warn that control reaches the end * of this function without returning a value because @@ -286,7 +286,7 @@ static void log_unaligned(struct pt_regs *regs) asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn) { enum direction dir = decode_direction(insn); - int size = decode_access_size(insn); + int size = decode_access_size(regs, insn); int orig_asi, asi; current_thread_info()->kern_una_regs = regs; diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index 4e59925..0c1e678 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S @@ -46,11 +46,16 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT + IRQENTRY_TEXT *(.gnu.warning) } = 0 _etext = .; RO_DATA(PAGE_SIZE) + + /* Start of data section */ + _sdata = .; + .data1 : { *(.data1) } diff --git a/arch/sparc/lib/mcount.S b/arch/sparc/lib/mcount.S index 24b8b12..3ad6cbd 100644 --- a/arch/sparc/lib/mcount.S +++ b/arch/sparc/lib/mcount.S @@ -7,26 +7,11 @@ #include <linux/linkage.h> -#include <asm/ptrace.h> -#include <asm/thread_info.h> - /* * This is the main variant and is called by C code. GCC's -pg option * automatically instruments every C function with a call to this. */ -#ifdef CONFIG_STACK_DEBUG - -#define OVSTACKSIZE 4096 /* lets hope this is enough */ - - .data - .align 8 -panicstring: - .asciz "Stack overflow\n" - .align 8 -ovstack: - .skip OVSTACKSIZE -#endif .text .align 32 .globl _mcount @@ -35,84 +20,48 @@ ovstack: .type mcount,#function _mcount: mcount: -#ifdef CONFIG_STACK_DEBUG - /* - * Check whether %sp is dangerously low. - */ - ldub [%g6 + TI_FPDEPTH], %g1 - srl %g1, 1, %g3 - add %g3, 1, %g3 - sllx %g3, 8, %g3 ! each fpregs frame is 256b - add %g3, 192, %g3 - add %g6, %g3, %g3 ! where does task_struct+frame end? - sub %g3, STACK_BIAS, %g3 - cmp %sp, %g3 - bg,pt %xcc, 1f - nop - lduh [%g6 + TI_CPU], %g1 - sethi %hi(hardirq_stack), %g3 - or %g3, %lo(hardirq_stack), %g3 - sllx %g1, 3, %g1 - ldx [%g3 + %g1], %g7 - sub %g7, STACK_BIAS, %g7 - cmp %sp, %g7 - bleu,pt %xcc, 2f - sethi %hi(THREAD_SIZE), %g3 - add %g7, %g3, %g7 - cmp %sp, %g7 - blu,pn %xcc, 1f -2: sethi %hi(softirq_stack), %g3 - or %g3, %lo(softirq_stack), %g3 - ldx [%g3 + %g1], %g7 - sub %g7, STACK_BIAS, %g7 - cmp %sp, %g7 - bleu,pt %xcc, 3f - sethi %hi(THREAD_SIZE), %g3 - add %g7, %g3, %g7 - cmp %sp, %g7 - blu,pn %xcc, 1f - nop - /* If we are already on ovstack, don't hop onto it - * again, we are already trying to output the stack overflow - * message. - */ -3: sethi %hi(ovstack), %g7 ! cant move to panic stack fast enough - or %g7, %lo(ovstack), %g7 - add %g7, OVSTACKSIZE, %g3 - sub %g3, STACK_BIAS + 192, %g3 - sub %g7, STACK_BIAS, %g7 - cmp %sp, %g7 - blu,pn %xcc, 2f - cmp %sp, %g3 - bleu,pn %xcc, 1f - nop -2: mov %g3, %sp - sethi %hi(panicstring), %g3 - call prom_printf - or %g3, %lo(panicstring), %o0 - call prom_halt - nop -1: -#endif #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE - mov %o7, %o0 - .globl mcount_call -mcount_call: - call ftrace_stub - mov %o0, %o7 + /* Do nothing, the retl/nop below is all we need. */ #else - sethi %hi(ftrace_trace_function), %g1 + sethi %hi(function_trace_stop), %g1 + lduw [%g1 + %lo(function_trace_stop)], %g2 + brnz,pn %g2, 2f + sethi %hi(ftrace_trace_function), %g1 sethi %hi(ftrace_stub), %g2 ldx [%g1 + %lo(ftrace_trace_function)], %g1 or %g2, %lo(ftrace_stub), %g2 cmp %g1, %g2 be,pn %icc, 1f - mov %i7, %o1 - jmpl %g1, %g0 - mov %o7, %o0 + mov %i7, %g3 + save %sp, -176, %sp + mov %g3, %o1 + jmpl %g1, %o7 + mov %i7, %o0 + ret + restore /* not reached */ 1: +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + sethi %hi(ftrace_graph_return), %g1 + ldx [%g1 + %lo(ftrace_graph_return)], %g3 + cmp %g2, %g3 + bne,pn %xcc, 5f + sethi %hi(ftrace_graph_entry_stub), %g2 + sethi %hi(ftrace_graph_entry), %g1 + or %g2, %lo(ftrace_graph_entry_stub), %g2 + ldx [%g1 + %lo(ftrace_graph_entry)], %g1 + cmp %g1, %g2 + be,pt %xcc, 2f + nop +5: mov %i7, %g2 + mov %fp, %g3 + save %sp, -176, %sp + mov %g2, %l0 + ba,pt %xcc, ftrace_graph_caller + mov %g3, %l1 +#endif +2: #endif #endif retl @@ -131,14 +80,50 @@ ftrace_stub: .globl ftrace_caller .type ftrace_caller,#function ftrace_caller: - mov %i7, %o1 - mov %o7, %o0 + sethi %hi(function_trace_stop), %g1 + mov %i7, %g2 + lduw [%g1 + %lo(function_trace_stop)], %g1 + brnz,pn %g1, ftrace_stub + mov %fp, %g3 + save %sp, -176, %sp + mov %g2, %o1 + mov %g2, %l0 + mov %g3, %l1 .globl ftrace_call ftrace_call: call ftrace_stub - mov %o0, %o7 - retl + mov %i7, %o0 +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call +ftrace_graph_call: + call ftrace_stub nop +#endif + ret + restore +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .size ftrace_graph_call,.-ftrace_graph_call +#endif + .size ftrace_call,.-ftrace_call .size ftrace_caller,.-ftrace_caller #endif #endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + mov %l0, %o0 + mov %i7, %o1 + call prepare_ftrace_return + mov %l1, %o2 + ret + restore %o0, -8, %i7 +END(ftrace_graph_caller) + +ENTRY(return_to_handler) + save %sp, -176, %sp + call ftrace_return_to_handler + mov %fp, %o0 + jmpl %o0 + 8, %g0 + restore +END(return_to_handler) +#endif diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 59b4556..e790bc1 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -626,7 +626,7 @@ ia32_sys_call_table: .quad stub32_sigreturn .quad stub32_clone /* 120 */ .quad sys_setdomainname - .quad sys_uname + .quad sys_newuname .quad sys_modify_ldt .quad compat_sys_adjtimex .quad sys32_mprotect /* 125 */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ba19ad4..86a0ff0 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -21,6 +21,7 @@ #define _ASM_X86_AMD_IOMMU_TYPES_H #include <linux/types.h> +#include <linux/mutex.h> #include <linux/list.h> #include <linux/spinlock.h> @@ -140,6 +141,7 @@ /* constants to configure the command buffer */ #define CMD_BUFFER_SIZE 8192 +#define CMD_BUFFER_UNINITIALIZED 1 #define CMD_BUFFER_ENTRIES 512 #define MMIO_CMD_SIZE_SHIFT 56 #define MMIO_CMD_SIZE_512 (0x9ULL << MMIO_CMD_SIZE_SHIFT) @@ -237,6 +239,7 @@ struct protection_domain { struct list_head list; /* for list of all protection domains */ struct list_head dev_list; /* List of all devices in this domain */ spinlock_t lock; /* mostly used to lock the page table*/ + struct mutex api_lock; /* protect page tables in the iommu-api path */ u16 id; /* the domain id written to the device table */ int mode; /* paging mode (0-6 levels) */ u64 *pt_root; /* page table root pointer */ diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index f46b79f..ff90055 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h @@ -21,6 +21,7 @@ #define __KVM_HAVE_PIT_STATE2 #define __KVM_HAVE_XEN_HVM #define __KVM_HAVE_VCPU_EVENTS +#define __KVM_HAVE_DEBUGREGS /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 @@ -257,6 +258,11 @@ struct kvm_reinject_control { /* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */ #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 +#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 + +/* Interrupt shadow states */ +#define KVM_X86_SHADOW_INT_MOV_SS 0x01 +#define KVM_X86_SHADOW_INT_STI 0x02 /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { @@ -271,7 +277,7 @@ struct kvm_vcpu_events { __u8 injected; __u8 nr; __u8 soft; - __u8 pad; + __u8 shadow; } interrupt; struct { __u8 injected; @@ -284,4 +290,13 @@ struct kvm_vcpu_events { __u32 reserved[10]; }; +/* for KVM_GET/SET_DEBUGREGS */ +struct kvm_debugregs { + __u64 db[4]; + __u64 dr6; + __u64 dr7; + __u64 flags; + __u64 reserved[9]; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 7a6f54f..0b2729b 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -11,6 +11,8 @@ #ifndef _ASM_X86_KVM_X86_EMULATE_H #define _ASM_X86_KVM_X86_EMULATE_H +#include <asm/desc_defs.h> + struct x86_emulate_ctxt; /* @@ -63,6 +65,15 @@ struct x86_emulate_ops { unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); /* + * write_std: Write bytes of standard (non-emulated/special) memory. + * Used for descriptor writing. + * @addr: [IN ] Linear address to which to write. + * @val: [OUT] Value write to memory, zero-extended to 'u_long'. + * @bytes: [IN ] Number of bytes to write to memory. + */ + int (*write_std)(unsigned long addr, void *val, + unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error); + /* * fetch: Read bytes of standard (non-emulated/special) memory. * Used for instruction fetch. * @addr: [IN ] Linear address from which to read. @@ -109,6 +120,23 @@ struct x86_emulate_ops { unsigned int bytes, struct kvm_vcpu *vcpu); + int (*pio_in_emulated)(int size, unsigned short port, void *val, + unsigned int count, struct kvm_vcpu *vcpu); + + int (*pio_out_emulated)(int size, unsigned short port, const void *val, + unsigned int count, struct kvm_vcpu *vcpu); + + bool (*get_cached_descriptor)(struct desc_struct *desc, + int seg, struct kvm_vcpu *vcpu); + void (*set_cached_descriptor)(struct desc_struct *desc, + int seg, struct kvm_vcpu *vcpu); + u16 (*get_segment_selector)(int seg, struct kvm_vcpu *vcpu); + void (*set_segment_selector)(u16 sel, int seg, struct kvm_vcpu *vcpu); + void (*get_gdt)(struct desc_ptr *dt, struct kvm_vcpu *vcpu); + ulong (*get_cr)(int cr, struct kvm_vcpu *vcpu); + void (*set_cr)(int cr, ulong val, struct kvm_vcpu *vcpu); + int (*cpl)(struct kvm_vcpu *vcpu); + void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); }; /* Type, address-of, and value of an instruction's operand. */ @@ -124,6 +152,12 @@ struct fetch_cache { unsigned long end; }; +struct read_cache { + u8 data[1024]; + unsigned long pos; + unsigned long end; +}; + struct decode_cache { u8 twobyte; u8 b; @@ -139,7 +173,7 @@ struct decode_cache { u8 seg_override; unsigned int d; unsigned long regs[NR_VCPU_REGS]; - unsigned long eip, eip_orig; + unsigned long eip; /* modrm */ u8 modrm; u8 modrm_mod; @@ -151,16 +185,15 @@ struct decode_cache { void *modrm_ptr; unsigned long modrm_val; struct fetch_cache fetch; + struct read_cache io_read; }; -#define X86_SHADOW_INT_MOV_SS 1 -#define X86_SHADOW_INT_STI 2 - struct x86_emulate_ctxt { /* Register state before/after emulation. */ struct kvm_vcpu *vcpu; unsigned long eflags; + unsigned long eip; /* eip before instruction emulation */ /* Emulated execution mode, represented by an X86EMUL_MODE value. */ int mode; u32 cs_base; @@ -168,6 +201,7 @@ struct x86_emulate_ctxt { /* interruptibility state, as a result of execution of STI or MOV SS */ int interruptibility; + bool restart; /* restart string instruction after writeback */ /* decode cache */ struct decode_cache decode; }; @@ -194,5 +228,9 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops); int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops); +int emulator_task_switch(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + u16 tss_selector, int reason, + bool has_error_code, u32 error_code); #endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 06d9e79..3c31c5a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -171,8 +171,8 @@ struct kvm_pte_chain { union kvm_mmu_page_role { unsigned word; struct { - unsigned glevels:4; unsigned level:4; + unsigned cr4_pae:1; unsigned quadrant:2; unsigned pad_for_nice_hex_output:6; unsigned direct:1; @@ -187,8 +187,6 @@ struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; - struct list_head oos_link; - /* * The following two entries are used to key the shadow page in the * hash table. @@ -204,9 +202,9 @@ struct kvm_mmu_page { * in this shadow page. */ DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); - int multimapped; /* More than one parent_pte? */ - int root_count; /* Currently serving as active root */ + bool multimapped; /* More than one parent_pte? */ bool unsync; + int root_count; /* Currently serving as active root */ unsigned int unsync_children; union { u64 *parent_pte; /* !multimapped */ @@ -224,14 +222,9 @@ struct kvm_pv_mmu_op_buffer { struct kvm_pio_request { unsigned long count; - int cur_count; - gva_t guest_gva; int in; int port; int size; - int string; - int down; - int rep; }; /* @@ -362,8 +355,8 @@ struct kvm_vcpu_arch { u64 *mce_banks; /* used for guest single stepping over the given code position */ - u16 singlestep_cs; unsigned long singlestep_rip; + /* fields used by HYPER-V emulation */ u64 hv_vapic; }; @@ -389,6 +382,7 @@ struct kvm_arch { unsigned int n_free_mmu_pages; unsigned int n_requested_mmu_pages; unsigned int n_alloc_mmu_pages; + atomic_t invlpg_counter; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; /* * Hash table of struct kvm_mmu_page. @@ -461,11 +455,6 @@ struct kvm_vcpu_stat { u32 nmi_injections; }; -struct descriptor_table { - u16 limit; - unsigned long base; -} __attribute__((packed)); - struct kvm_x86_ops { int (*cpu_has_kvm_support)(void); /* __init */ int (*disabled_by_bios)(void); /* __init */ @@ -503,12 +492,11 @@ struct kvm_x86_ops { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); - void (*get_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); - void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); - void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); - void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt); - int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest); - int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value); + void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); @@ -587,23 +575,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address); -void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, - unsigned long *rflags); -unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr); -void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long value, - unsigned long *rflags); void kvm_enable_efer_bits(u64); int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); struct x86_emulate_ctxt; -int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, - int size, unsigned port); -int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, - int size, unsigned long count, int down, - gva_t address, int rep, unsigned port); +int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); @@ -616,12 +595,15 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); -int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason); +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, + bool has_error_code, u32 error_code); void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); +int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); +int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l); @@ -649,8 +631,6 @@ int emulator_write_emulated(unsigned long addr, unsigned int bytes, struct kvm_vcpu *vcpu); -unsigned long segment_base(u16 selector); - void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes, @@ -675,7 +655,6 @@ void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_enable_tdp(void); void kvm_disable_tdp(void); -int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int complete_pio(struct kvm_vcpu *vcpu); bool kvm_check_iopl(struct kvm_vcpu *vcpu); @@ -724,23 +703,6 @@ static inline void kvm_load_ldt(u16 sel) asm("lldt %0" : : "rm"(sel)); } -static inline void kvm_get_idt(struct descriptor_table *table) -{ - asm("sidt %0" : "=m"(*table)); -} - -static inline void kvm_get_gdt(struct descriptor_table *table) -{ - asm("sgdt %0" : "=m"(*table)); -} - -static inline unsigned long kvm_read_tr_base(void) -{ - u16 tr; - asm("str %0" : "=g"(tr)); - return segment_base(tr); -} - #ifdef CONFIG_X86_64 static inline unsigned long read_msr(unsigned long msr) { @@ -826,4 +788,6 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_define_shared_msr(unsigned index, u32 msr); void kvm_set_shared_msr(unsigned index, u64 val, u64 mask); +bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index ba0eed8..b60f292 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -28,22 +28,39 @@ #ifndef __ASSEMBLY__ #include <asm/hw_irq.h> -#include <asm/kvm_para.h> /*G:030 * But first, how does our Guest contact the Host to ask for privileged * operations? There are two ways: the direct way is to make a "hypercall", * to make requests of the Host Itself. * - * We use the KVM hypercall mechanism, though completely different hypercall - * numbers. Seventeen hypercalls are available: the hypercall number is put in - * the %eax register, and the arguments (when required) are placed in %ebx, - * %ecx, %edx and %esi. If a return value makes sense, it's returned in %eax. + * Our hypercall mechanism uses the highest unused trap code (traps 32 and + * above are used by real hardware interrupts). Seventeen hypercalls are + * available: the hypercall number is put in the %eax register, and the + * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. + * If a return value makes sense, it's returned in %eax. * * Grossly invalid calls result in Sudden Death at the hands of the vengeful * Host, rather than returning failure. This reflects Winston Churchill's * definition of a gentleman: "someone who is only rude intentionally". -:*/ + */ +static inline unsigned long +hcall(unsigned long call, + unsigned long arg1, unsigned long arg2, unsigned long arg3, + unsigned long arg4) +{ + /* "int" is the Intel instruction to trigger a trap. */ + asm volatile("int $" __stringify(LGUEST_TRAP_ENTRY) + /* The call in %eax (aka "a") might be overwritten */ + : "=a"(call) + /* The arguments are in %eax, %ebx, %ecx, %edx & %esi */ + : "a"(call), "b"(arg1), "c"(arg2), "d"(arg3), "S"(arg4) + /* "memory" means this might write somewhere in memory. + * This isn't true for all calls, but it's safe to tell + * gcc that it might happen so it doesn't get clever. */ + : "memory"); + return call; +} /* Can't use our min() macro here: needs to be a constant */ #define LGUEST_IRQS (NR_IRQS < 32 ? NR_IRQS: 32) diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 38638cd..0e83105 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -81,7 +81,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u32 event_inj_err; u64 nested_cr3; u64 lbr_ctl; - u8 reserved_5[832]; + u64 reserved_5; + u64 next_rip; + u8 reserved_6[816]; }; @@ -115,6 +117,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { #define SVM_IOIO_SIZE_MASK (7 << SVM_IOIO_SIZE_SHIFT) #define SVM_IOIO_ASIZE_MASK (7 << SVM_IOIO_ASIZE_SHIFT) +#define SVM_VM_CR_VALID_MASK 0x001fULL +#define SVM_VM_CR_SVM_LOCK_MASK 0x0008ULL +#define SVM_VM_CR_SVM_DIS_MASK 0x0010ULL + struct __attribute__ ((__packed__)) vmcb_seg { u16 selector; u16 attrib; @@ -238,6 +244,7 @@ struct __attribute__ ((__packed__)) vmcb { #define SVM_EXITINFOSHIFT_TS_REASON_IRET 36 #define SVM_EXITINFOSHIFT_TS_REASON_JMP 38 +#define SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE 44 #define SVM_EXIT_READ_CR0 0x000 #define SVM_EXIT_READ_CR3 0x003 diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f3dadb5..f854d89b 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -118,7 +118,7 @@ static bool check_device(struct device *dev) return false; /* No device or no PCI device */ - if (!dev || dev->bus != &pci_bus_type) + if (dev->bus != &pci_bus_type) return false; devid = get_device_id(dev); @@ -392,6 +392,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) u32 tail, head; u8 *target; + WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); target = iommu->cmd_buf + tail; memcpy_toio(target, cmd, sizeof(*cmd)); @@ -2186,7 +2187,7 @@ static void prealloc_protection_domains(void) struct dma_ops_domain *dma_dom; u16 devid; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) { /* Do we handle this device? */ if (!check_device(&dev->dev)) @@ -2298,7 +2299,7 @@ static void cleanup_domain(struct protection_domain *domain) list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { struct device *dev = dev_data->dev; - do_detach(dev); + __detach_device(dev); atomic_set(&dev_data->bind, 0); } @@ -2327,6 +2328,7 @@ static struct protection_domain *protection_domain_alloc(void) return NULL; spin_lock_init(&domain->lock); + mutex_init(&domain->api_lock); domain->id = domain_id_alloc(); if (!domain->id) goto out_err; @@ -2379,9 +2381,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) free_pagetable(domain); - domain_id_free(domain->id); - - kfree(domain); + protection_domain_free(domain); dom->priv = NULL; } @@ -2456,6 +2456,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom, iova &= PAGE_MASK; paddr &= PAGE_MASK; + mutex_lock(&domain->api_lock); + for (i = 0; i < npages; ++i) { ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); if (ret) @@ -2465,6 +2467,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom, paddr += PAGE_SIZE; } + mutex_unlock(&domain->api_lock); + return 0; } @@ -2477,12 +2481,16 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, iova &= PAGE_MASK; + mutex_lock(&domain->api_lock); + for (i = 0; i < npages; ++i) { iommu_unmap_page(domain, iova, PM_MAP_4k); iova += PAGE_SIZE; } iommu_flush_tlb_pde(domain); + + mutex_unlock(&domain->api_lock); } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 42f5350..6360abf 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -138,9 +138,9 @@ int amd_iommus_present; bool amd_iommu_np_cache __read_mostly; /* - * Set to true if ACPI table parsing and hardware intialization went properly + * The ACPI table parsing functions set this variable on an error */ -static bool amd_iommu_initialized; +static int __initdata amd_iommu_init_err; /* * List of protection domains - used during resume @@ -391,9 +391,11 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) */ for (i = 0; i < table->length; ++i) checksum += p[i]; - if (checksum != 0) + if (checksum != 0) { /* ACPI table corrupt */ - return -ENODEV; + amd_iommu_init_err = -ENODEV; + return 0; + } p += IVRS_HEADER_LENGTH; @@ -436,7 +438,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) if (cmd_buf == NULL) return NULL; - iommu->cmd_buf_size = CMD_BUFFER_SIZE; + iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; return cmd_buf; } @@ -472,12 +474,13 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) &entry, sizeof(entry)); amd_iommu_reset_cmd_buffer(iommu); + iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); } static void __init free_command_buffer(struct amd_iommu *iommu) { free_pages((unsigned long)iommu->cmd_buf, - get_order(iommu->cmd_buf_size)); + get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); } /* allocates the memory where the IOMMU will log its events to */ @@ -920,11 +923,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) h->mmio_phys); iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); - if (iommu == NULL) - return -ENOMEM; + if (iommu == NULL) { + amd_iommu_init_err = -ENOMEM; + return 0; + } + ret = init_iommu_one(iommu, h); - if (ret) - return ret; + if (ret) { + amd_iommu_init_err = ret; + return 0; + } break; default: break; @@ -934,8 +942,6 @@ static int __init init_iommu_all(struct acpi_table_header *table) } WARN_ON(p != end); - amd_iommu_initialized = true; - return 0; } @@ -1211,6 +1217,10 @@ static int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) return -ENODEV; + ret = amd_iommu_init_err; + if (ret) + goto out; + dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); @@ -1270,12 +1280,19 @@ static int __init amd_iommu_init(void) if (acpi_table_parse("IVRS", init_iommu_all) != 0) goto free; - if (!amd_iommu_initialized) + if (amd_iommu_init_err) { + ret = amd_iommu_init_err; goto free; + } if (acpi_table_parse("IVRS", init_memory_definitions) != 0) goto free; + if (amd_iommu_init_err) { + ret = amd_iommu_init_err; + goto free; + } + ret = sysdev_class_register(&amd_iommu_sysdev_class); if (ret) goto free; @@ -1288,6 +1305,8 @@ static int __init amd_iommu_init(void) if (ret) goto free; + enable_iommus(); + if (iommu_pass_through) ret = amd_iommu_init_passthrough(); else @@ -1300,8 +1319,6 @@ static int __init amd_iommu_init(void) amd_iommu_init_notifier(); - enable_iommus(); - if (iommu_pass_through) goto out; @@ -1315,6 +1332,7 @@ out: return ret; free: + disable_iommus(); amd_iommu_uninit_devices(); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 3704997..b5d8b0b 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -393,6 +393,7 @@ void __init gart_iommu_hole_init(void) for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus; int dev_base, dev_limit; + u32 ctl; bus = bus_dev_ranges[i].bus; dev_base = bus_dev_ranges[i].dev_base; @@ -406,7 +407,19 @@ void __init gart_iommu_hole_init(void) gart_iommu_aperture = 1; x86_init.iommu.iommu_init = gart_iommu_init; - aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; + ctl = read_pci_config(bus, slot, 3, + AMD64_GARTAPERTURECTL); + + /* + * Before we do anything else disable the GART. It may + * still be enabled if we boot into a crash-kernel here. + * Reconfiguring the GART while it is enabled could have + * unknown side-effects. + */ + ctl &= ~GARTEN; + write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); + + aper_order = (ctl >> 1) & 7; aper_size = (32 * 1024 * 1024) << aper_order; aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; aper_base <<= 25; diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index a4849c1..ebd4c51 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -27,7 +27,6 @@ #include <asm/cpu.h> #include <asm/reboot.h> #include <asm/virtext.h> -#include <asm/x86_init.h> #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) @@ -103,10 +102,5 @@ void native_machine_crash_shutdown(struct pt_regs *regs) #ifdef CONFIG_HPET_TIMER hpet_disable(); #endif - -#ifdef CONFIG_X86_64 - x86_platform.iommu_shutdown(); -#endif - crash_save_cpu(regs, safe_smp_processor_id()); } diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index e39e771..e1a93be 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h @@ -14,6 +14,8 @@ #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) #endif +#include <linux/uaccess.h> + extern void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl); @@ -42,8 +44,10 @@ static inline unsigned long rewind_frame_pointer(int n) get_bp(frame); #ifdef CONFIG_FRAME_POINTER - while (n--) - frame = frame->next_frame; + while (n--) { + if (probe_kernel_address(&frame->next_frame, frame)) + break; + } #endif return (unsigned long)frame; diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 68cd24f..0f7f130 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -565,6 +565,9 @@ static void enable_gart_translations(void) enable_gart_translation(dev, __pa(agp_gatt_table)); } + + /* Flush the GART-TLB to remove stale entries */ + k8_flush_garts(); } /* diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4dade6a..5ac0bb4 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -33,6 +33,7 @@ #include <asm/kvm_emulate.h> #include "x86.h" +#include "tss.h" /* * Opcode effective-address decode tables. @@ -50,6 +51,8 @@ #define DstReg (2<<1) /* Register operand. */ #define DstMem (3<<1) /* Memory operand. */ #define DstAcc (4<<1) /* Destination Accumulator */ +#define DstDI (5<<1) /* Destination is in ES:(E)DI */ +#define DstMem64 (6<<1) /* 64bit memory operand */ #define DstMask (7<<1) /* Source operand type. */ #define SrcNone (0<<4) /* No source operand. */ @@ -63,6 +66,7 @@ #define SrcOne (7<<4) /* Implied '1' */ #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ #define SrcImmU (9<<4) /* Immediate operand, unsigned */ +#define SrcSI (0xa<<4) /* Source is in the DS:RSI */ #define SrcMask (0xf<<4) /* Generic ModRM decode. */ #define ModRM (1<<8) @@ -85,6 +89,9 @@ #define Src2ImmByte (2<<29) #define Src2One (3<<29) #define Src2Imm16 (4<<29) +#define Src2Mem16 (5<<29) /* Used for Ep encoding. First argument has to be + in memory and second argument is located + immediately after the first one in memory. */ #define Src2Mask (7<<29) enum { @@ -147,8 +154,8 @@ static u32 opcode_table[256] = { 0, 0, 0, 0, /* 0x68 - 0x6F */ SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, - SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ - SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ + DstDI | ByteOp | Mov | String, DstDI | Mov | String, /* insb, insw/insd */ + SrcSI | ByteOp | ImplicitOps | String, SrcSI | ImplicitOps | String, /* outsb, outsw/outsd */ /* 0x70 - 0x77 */ SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, @@ -173,12 +180,12 @@ static u32 opcode_table[256] = { /* 0xA0 - 0xA7 */ ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, - ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, - ByteOp | ImplicitOps | String, ImplicitOps | String, + ByteOp | SrcSI | DstDI | Mov | String, SrcSI | DstDI | Mov | String, + ByteOp | SrcSI | DstDI | String, SrcSI | DstDI | String, /* 0xA8 - 0xAF */ - 0, 0, ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, - ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, - ByteOp | ImplicitOps | String, ImplicitOps | String, + 0, 0, ByteOp | DstDI | Mov | String, DstDI | Mov | String, + ByteOp | SrcSI | DstAcc | Mov | String, SrcSI | DstAcc | Mov | String, + ByteOp | DstDI | String, DstDI | String, /* 0xB0 - 0xB7 */ ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, ByteOp | DstReg | SrcImm | Mov, @@ -204,13 +211,13 @@ static u32 opcode_table[256] = { 0, 0, 0, 0, 0, 0, 0, 0, /* 0xE0 - 0xE7 */ 0, 0, 0, 0, - ByteOp | SrcImmUByte, SrcImmUByte, - ByteOp | SrcImmUByte, SrcImmUByte, + ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, + ByteOp | SrcImmUByte | DstAcc, SrcImmUByte | DstAcc, /* 0xE8 - 0xEF */ SrcImm | Stack, SrcImm | ImplicitOps, SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps, - SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, - SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, + SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, + SrcNone | ByteOp | DstAcc, SrcNone | DstAcc, /* 0xF0 - 0xF7 */ 0, 0, 0, 0, ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3, @@ -343,7 +350,8 @@ static u32 group_table[] = { [Group5*8] = DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, SrcMem | ModRM | Stack, 0, - SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0, + SrcMem | ModRM | Stack, SrcMem | ModRM | Src2Mem16 | ImplicitOps, + SrcMem | ModRM | Stack, 0, [Group7*8] = 0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv, SrcNone | ModRM | DstMem | Mov, 0, @@ -353,14 +361,14 @@ static u32 group_table[] = { DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock, [Group9*8] = - 0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0, + 0, DstMem64 | ModRM | Lock, 0, 0, 0, 0, 0, 0, }; static u32 group2_table[] = { [Group7*8] = - SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM, + SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM | Priv, SrcNone | ModRM | DstMem | Mov, 0, - SrcMem16 | ModRM | Mov, 0, + SrcMem16 | ModRM | Mov | Priv, 0, [Group9*8] = 0, 0, 0, 0, 0, 0, 0, 0, }; @@ -562,7 +570,7 @@ static u32 group2_table[] = { #define insn_fetch(_type, _size, _eip) \ ({ unsigned long _x; \ rc = do_insn_fetch(ctxt, ops, (_eip), &_x, (_size)); \ - if (rc != 0) \ + if (rc != X86EMUL_CONTINUE) \ goto done; \ (_eip) += (_size); \ (_type)_x; \ @@ -638,40 +646,40 @@ static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, - unsigned long linear, u8 *dest) + unsigned long eip, u8 *dest) { struct fetch_cache *fc = &ctxt->decode.fetch; int rc; - int size; + int size, cur_size; - if (linear < fc->start || linear >= fc->end) { - size = min(15UL, PAGE_SIZE - offset_in_page(linear)); - rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL); - if (rc) + if (eip == fc->end) { + cur_size = fc->end - fc->start; + size = min(15UL - cur_size, PAGE_SIZE - offset_in_page(eip)); + rc = ops->fetch(ctxt->cs_base + eip, fc->data + cur_size, + size, ctxt->vcpu, NULL); + if (rc != X86EMUL_CONTINUE) return rc; - fc->start = linear; - fc->end = linear + size; + fc->end += size; } - *dest = fc->data[linear - fc->start]; - return 0; + *dest = fc->data[eip - fc->start]; + return X86EMUL_CONTINUE; } static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, unsigned long eip, void *dest, unsigned size) { - int rc = 0; + int rc; /* x86 instructions are limited to 15 bytes. */ - if (eip + size - ctxt->decode.eip_orig > 15) + if (eip + size - ctxt->eip > 15) return X86EMUL_UNHANDLEABLE; - eip += ctxt->cs_base; while (size--) { rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++); - if (rc) + if (rc != X86EMUL_CONTINUE) return rc; } - return 0; + return X86EMUL_CONTINUE; } /* @@ -702,7 +710,7 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, *address = 0; rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2, ctxt->vcpu, NULL); - if (rc) + if (rc != X86EMUL_CONTINUE) return rc; rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes, ctxt->vcpu, NULL); @@ -782,7 +790,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, struct decode_cache *c = &ctxt->decode; u8 sib; int index_reg = 0, base_reg = 0, scale; - int rc = 0; + int rc = X86EMUL_CONTINUE; if (c->rex_prefix) { c->modrm_reg = (c->rex_prefix & 4) << 1; /* REX.R */ @@ -895,7 +903,7 @@ static int decode_abs(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - int rc = 0; + int rc = X86EMUL_CONTINUE; switch (c->ad_bytes) { case 2: @@ -916,14 +924,18 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - int rc = 0; + int rc = X86EMUL_CONTINUE; int mode = ctxt->mode; int def_op_bytes, def_ad_bytes, group; - /* Shadow copy of register state. Committed on successful emulation. */ + /* we cannot decode insn before we complete previous rep insn */ + WARN_ON(ctxt->restart); + + /* Shadow copy of register state. Committed on successful emulation. */ memset(c, 0, sizeof(struct decode_cache)); - c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu); + c->eip = ctxt->eip; + c->fetch.start = c->fetch.end = c->eip; ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); @@ -1015,11 +1027,6 @@ done_prefixes: } } - if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { - kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction"); - return -1; - } - if (c->d & Group) { group = c->d & GroupMask; c->modrm = insn_fetch(u8, 1, c->eip); @@ -1046,7 +1053,7 @@ done_prefixes: rc = decode_modrm(ctxt, ops); else if (c->d & MemAbs) rc = decode_abs(ctxt, ops); - if (rc) + if (rc != X86EMUL_CONTINUE) goto done; if (!c->has_seg_override) @@ -1057,6 +1064,10 @@ done_prefixes: if (c->ad_bytes != 8) c->modrm_ea = (u32)c->modrm_ea; + + if (c->rip_relative) + c->modrm_ea += c->eip; + /* * Decode and fetch the source operand: register, memory * or immediate. @@ -1091,6 +1102,8 @@ done_prefixes: break; } c->src.type = OP_MEM; + c->src.ptr = (unsigned long *)c->modrm_ea; + c->src.val = 0; break; case SrcImm: case SrcImmU: @@ -1139,6 +1152,14 @@ done_prefixes: c->src.bytes = 1; c->src.val = 1; break; + case SrcSI: + c->src.type = OP_MEM; + c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; + c->src.ptr = (unsigned long *) + register_address(c, seg_override_base(ctxt, c), + c->regs[VCPU_REGS_RSI]); + c->src.val = 0; + break; } /* @@ -1168,6 +1189,12 @@ done_prefixes: c->src2.bytes = 1; c->src2.val = 1; break; + case Src2Mem16: + c->src2.type = OP_MEM; + c->src2.bytes = 2; + c->src2.ptr = (unsigned long *)(c->modrm_ea + c->src.bytes); + c->src2.val = 0; + break; } /* Decode and fetch the destination operand: register or memory. */ @@ -1180,6 +1207,7 @@ done_prefixes: c->twobyte && (c->b == 0xb6 || c->b == 0xb7)); break; case DstMem: + case DstMem64: if ((c->d & ModRM) && c->modrm_mod == 3) { c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.type = OP_REG; @@ -1188,12 +1216,24 @@ done_prefixes: break; } c->dst.type = OP_MEM; + c->dst.ptr = (unsigned long *)c->modrm_ea; + if ((c->d & DstMask) == DstMem64) + c->dst.bytes = 8; + else + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; + c->dst.val = 0; + if (c->d & BitOp) { + unsigned long mask = ~(c->dst.bytes * 8 - 1); + + c->dst.ptr = (void *)c->dst.ptr + + (c->src.val & mask) / 8; + } break; case DstAcc: c->dst.type = OP_REG; - c->dst.bytes = c->op_bytes; + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; c->dst.ptr = &c->regs[VCPU_REGS_RAX]; - switch (c->op_bytes) { + switch (c->dst.bytes) { case 1: c->dst.val = *(u8 *)c->dst.ptr; break; @@ -1203,18 +1243,248 @@ done_prefixes: case 4: c->dst.val = *(u32 *)c->dst.ptr; break; + case 8: + c->dst.val = *(u64 *)c->dst.ptr; + break; } c->dst.orig_val = c->dst.val; break; + case DstDI: + c->dst.type = OP_MEM; + c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; + c->dst.ptr = (unsigned long *) + register_address(c, es_base(ctxt), + c->regs[VCPU_REGS_RDI]); + c->dst.val = 0; + break; } - if (c->rip_relative) - c->modrm_ea += c->eip; - done: return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; } +static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + unsigned int size, unsigned short port, + void *dest) +{ + struct read_cache *rc = &ctxt->decode.io_read; + + if (rc->pos == rc->end) { /* refill pio read ahead */ + struct decode_cache *c = &ctxt->decode; + unsigned int in_page, n; + unsigned int count = c->rep_prefix ? + address_mask(c, c->regs[VCPU_REGS_RCX]) : 1; + in_page = (ctxt->eflags & EFLG_DF) ? + offset_in_page(c->regs[VCPU_REGS_RDI]) : + PAGE_SIZE - offset_in_page(c->regs[VCPU_REGS_RDI]); + n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, + count); + if (n == 0) + n = 1; + rc->pos = rc->end = 0; + if (!ops->pio_in_emulated(size, port, rc->data, n, ctxt->vcpu)) + return 0; + rc->end = n * size; + } + + memcpy(dest, rc->data + rc->pos, size); + rc->pos += size; + return 1; +} + +static u32 desc_limit_scaled(struct desc_struct *desc) +{ + u32 limit = get_desc_limit(desc); + + return desc->g ? (limit << 12) | 0xfff : limit; +} + +static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + u16 selector, struct desc_ptr *dt) +{ + if (selector & 1 << 2) { + struct desc_struct desc; + memset (dt, 0, sizeof *dt); + if (!ops->get_cached_descriptor(&desc, VCPU_SREG_LDTR, ctxt->vcpu)) + return; + + dt->size = desc_limit_scaled(&desc); /* what if limit > 65535? */ + dt->address = get_desc_base(&desc); + } else + ops->get_gdt(dt, ctxt->vcpu); +} + +/* allowed just for 8 bytes segments */ +static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + u16 selector, struct desc_struct *desc) +{ + struct desc_ptr dt; + u16 index = selector >> 3; + int ret; + u32 err; + ulong addr; + + get_descriptor_table_ptr(ctxt, ops, selector, &dt); + + if (dt.size < index * 8 + 7) { + kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); + return X86EMUL_PROPAGATE_FAULT; + } + addr = dt.address + index * 8; + ret = ops->read_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); + if (ret == X86EMUL_PROPAGATE_FAULT) + kvm_inject_page_fault(ctxt->vcpu, addr, err); + + return ret; +} + +/* allowed just for 8 bytes segments */ +static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + u16 selector, struct desc_struct *desc) +{ + struct desc_ptr dt; + u16 index = selector >> 3; + u32 err; + ulong addr; + int ret; + + get_descriptor_table_ptr(ctxt, ops, selector, &dt); + + if (dt.size < index * 8 + 7) { + kvm_inject_gp(ctxt->vcpu, selector & 0xfffc); + return X86EMUL_PROPAGATE_FAULT; + } + + addr = dt.address + index * 8; + ret = ops->write_std(addr, desc, sizeof *desc, ctxt->vcpu, &err); + if (ret == X86EMUL_PROPAGATE_FAULT) + kvm_inject_page_fault(ctxt->vcpu, addr, err); + + return ret; +} + +static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + u16 selector, int seg) +{ + struct desc_struct seg_desc; + u8 dpl, rpl, cpl; + unsigned err_vec = GP_VECTOR; + u32 err_code = 0; + bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ + int ret; + + memset(&seg_desc, 0, sizeof seg_desc); + + if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86) + || ctxt->mode == X86EMUL_MODE_REAL) { + /* set real mode segment descriptor */ + set_desc_base(&seg_desc, selector << 4); + set_desc_limit(&seg_desc, 0xffff); + seg_desc.type = 3; + seg_desc.p = 1; + seg_desc.s = 1; + goto load; + } + + /* NULL selector is not valid for TR, CS and SS */ + if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) + && null_selector) + goto exception; + + /* TR should be in GDT only */ + if (seg == VCPU_SREG_TR && (selector & (1 << 2))) + goto exception; + + if (null_selector) /* for NULL selector skip all following checks */ + goto load; + + ret = read_segment_descriptor(ctxt, ops, selector, &seg_desc); + if (ret != X86EMUL_CONTINUE) + return ret; + + err_code = selector & 0xfffc; + err_vec = GP_VECTOR; + + /* can't load system descriptor into segment selecor */ + if (seg <= VCPU_SREG_GS && !seg_desc.s) + goto exception; + + if (!seg_desc.p) { + err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; + goto exception; + } + + rpl = selector & 3; + dpl = seg_desc.dpl; + cpl = ops->cpl(ctxt->vcpu); + + switch (seg) { + case VCPU_SREG_SS: + /* + * segment is not a writable data segment or segment + * selector's RPL != CPL or segment selector's RPL != CPL + */ + if (rpl != cpl || (seg_desc.type & 0xa) != 0x2 || dpl != cpl) + goto exception; + break; + case VCPU_SREG_CS: + if (!(seg_desc.type & 8)) + goto exception; + + if (seg_desc.type & 4) { + /* conforming */ + if (dpl > cpl) + goto exception; + } else { + /* nonconforming */ + if (rpl > cpl || dpl != cpl) + goto exception; + } + /* CS(RPL) <- CPL */ + selector = (selector & 0xfffc) | cpl; + break; + case VCPU_SREG_TR: + if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) + goto exception; + break; + case VCPU_SREG_LDTR: + if (seg_desc.s || seg_desc.type != 2) + goto exception; + break; + default: /* DS, ES, FS, or GS */ + /* + * segment is not a data or readable code segment or + * ((segment is a data or nonconforming code segment) + * and (both RPL and CPL > DPL)) + */ + if ((seg_desc.type & 0xa) == 0x8 || + (((seg_desc.type & 0xc) != 0xc) && + (rpl > dpl && cpl > dpl))) + goto exception; + break; + } + + if (seg_desc.s) { + /* mark segment as accessed */ + seg_desc.type |= 1; + ret = write_segment_descriptor(ctxt, ops, selector, &seg_desc); + if (ret != X86EMUL_CONTINUE) + return ret; + } +load: + ops->set_segment_selector(selector, seg, ctxt->vcpu); + ops->set_cached_descriptor(&seg_desc, seg, ctxt->vcpu); + return X86EMUL_CONTINUE; +exception: + kvm_queue_exception_e(ctxt->vcpu, err_vec, err_code); + return X86EMUL_PROPAGATE_FAULT; +} + static inline void emulate_push(struct x86_emulate_ctxt *ctxt) { struct decode_cache *c = &ctxt->decode; @@ -1251,7 +1521,7 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, int rc; unsigned long val, change_mask; int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu); + int cpl = ops->cpl(ctxt->vcpu); rc = emulate_pop(ctxt, ops, &val, len); if (rc != X86EMUL_CONTINUE) @@ -1306,10 +1576,10 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt, int rc; rc = emulate_pop(ctxt, ops, &selector, c->op_bytes); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) return rc; - rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg); + rc = load_segment_descriptor(ctxt, ops, (u16)selector, seg); return rc; } @@ -1332,7 +1602,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - int rc = 0; + int rc = X86EMUL_CONTINUE; int reg = VCPU_REGS_RDI; while (reg >= VCPU_REGS_RAX) { @@ -1343,7 +1613,7 @@ static int emulate_popa(struct x86_emulate_ctxt *ctxt, } rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) break; --reg; } @@ -1354,12 +1624,8 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - int rc; - rc = emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); - if (rc != 0) - return rc; - return 0; + return emulate_pop(ctxt, ops, &c->dst.val, c->dst.bytes); } static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt) @@ -1395,7 +1661,6 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - int rc = 0; switch (c->modrm_reg) { case 0 ... 1: /* test */ @@ -1408,11 +1673,9 @@ static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt, emulate_1op("neg", c->dst, ctxt->eflags); break; default: - DPRINTF("Cannot emulate %02x\n", c->b); - rc = X86EMUL_UNHANDLEABLE; - break; + return 0; } - return rc; + return 1; } static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, @@ -1442,20 +1705,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt, emulate_push(ctxt); break; } - return 0; + return X86EMUL_CONTINUE; } static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, - struct x86_emulate_ops *ops, - unsigned long memop) + struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - u64 old, new; - int rc; - - rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - return rc; + u64 old = c->dst.orig_val; if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { @@ -1463,17 +1720,13 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); ctxt->eflags &= ~EFLG_ZF; - } else { - new = ((u64)c->regs[VCPU_REGS_RCX] << 32) | + c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | (u32) c->regs[VCPU_REGS_RBX]; - rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - return rc; ctxt->eflags |= EFLG_ZF; } - return 0; + return X86EMUL_CONTINUE; } static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, @@ -1484,14 +1737,14 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt, unsigned long cs; rc = emulate_pop(ctxt, ops, &c->eip, c->op_bytes); - if (rc) + if (rc != X86EMUL_CONTINUE) return rc; if (c->op_bytes == 4) c->eip = (u32)c->eip; rc = emulate_pop(ctxt, ops, &cs, c->op_bytes); - if (rc) + if (rc != X86EMUL_CONTINUE) return rc; - rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS); + rc = load_segment_descriptor(ctxt, ops, (u16)cs, VCPU_SREG_CS); return rc; } @@ -1544,7 +1797,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, default: break; } - return 0; + return X86EMUL_CONTINUE; } static void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) @@ -1598,8 +1851,11 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt) u64 msr_data; /* syscall is not available in real mode */ - if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) - return X86EMUL_UNHANDLEABLE; + if (ctxt->mode == X86EMUL_MODE_REAL || + ctxt->mode == X86EMUL_MODE_VM86) { + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); + return X86EMUL_PROPAGATE_FAULT; + } setup_syscalls_segments(ctxt, &cs, &ss); @@ -1649,14 +1905,16 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt) /* inject #GP if in real mode */ if (ctxt->mode == X86EMUL_MODE_REAL) { kvm_inject_gp(ctxt->vcpu, 0); - return X86EMUL_UNHANDLEABLE; + return X86EMUL_PROPAGATE_FAULT; } /* XXX sysenter/sysexit have not been tested in 64bit mode. * Therefore, we inject an #UD. */ - if (ctxt->mode == X86EMUL_MODE_PROT64) - return X86EMUL_UNHANDLEABLE; + if (ctxt->mode == X86EMUL_MODE_PROT64) { + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); + return X86EMUL_PROPAGATE_FAULT; + } setup_syscalls_segments(ctxt, &cs, &ss); @@ -1711,7 +1969,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86) { kvm_inject_gp(ctxt->vcpu, 0); - return X86EMUL_UNHANDLEABLE; + return X86EMUL_PROPAGATE_FAULT; } setup_syscalls_segments(ctxt, &cs, &ss); @@ -1756,7 +2014,8 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) +static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops) { int iopl; if (ctxt->mode == X86EMUL_MODE_REAL) @@ -1764,7 +2023,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) if (ctxt->mode == X86EMUL_MODE_VM86) return true; iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; - return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl; + return ops->cpl(ctxt->vcpu) > iopl; } static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt, @@ -1801,22 +2060,419 @@ static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops, u16 port, u16 len) { - if (emulator_bad_iopl(ctxt)) + if (emulator_bad_iopl(ctxt, ops)) if (!emulator_io_port_access_allowed(ctxt, ops, port, len)) return false; return true; } +static u32 get_cached_descriptor_base(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + int seg) +{ + struct desc_struct desc; + if (ops->get_cached_descriptor(&desc, seg, ctxt->vcpu)) + return get_desc_base(&desc); + else + return ~0; +} + +static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + struct tss_segment_16 *tss) +{ + struct decode_cache *c = &ctxt->decode; + + tss->ip = c->eip; + tss->flag = ctxt->eflags; + tss->ax = c->regs[VCPU_REGS_RAX]; + tss->cx = c->regs[VCPU_REGS_RCX]; + tss->dx = c->regs[VCPU_REGS_RDX]; + tss->bx = c->regs[VCPU_REGS_RBX]; + tss->sp = c->regs[VCPU_REGS_RSP]; + tss->bp = c->regs[VCPU_REGS_RBP]; + tss->si = c->regs[VCPU_REGS_RSI]; + tss->di = c->regs[VCPU_REGS_RDI]; + + tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); + tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); + tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); + tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); + tss->ldt = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); +} + +static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + struct tss_segment_16 *tss) +{ + struct decode_cache *c = &ctxt->decode; + int ret; + + c->eip = tss->ip; + ctxt->eflags = tss->flag | 2; + c->regs[VCPU_REGS_RAX] = tss->ax; + c->regs[VCPU_REGS_RCX] = tss->cx; + c->regs[VCPU_REGS_RDX] = tss->dx; + c->regs[VCPU_REGS_RBX] = tss->bx; + c->regs[VCPU_REGS_RSP] = tss->sp; + c->regs[VCPU_REGS_RBP] = tss->bp; + c->regs[VCPU_REGS_RSI] = tss->si; + c->regs[VCPU_REGS_RDI] = tss->di; + + /* + * SDM says that segment selectors are loaded before segment + * descriptors + */ + ops->set_segment_selector(tss->ldt, VCPU_SREG_LDTR, ctxt->vcpu); + ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); + ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); + ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); + + /* + * Now load segment descriptors. If fault happenes at this stage + * it is handled in a context of new task + */ + ret = load_segment_descriptor(ctxt, ops, tss->ldt, VCPU_SREG_LDTR); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); + if (ret != X86EMUL_CONTINUE) + return ret; + + return X86EMUL_CONTINUE; +} + +static int task_switch_16(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + u16 tss_selector, u16 old_tss_sel, + ulong old_tss_base, struct desc_struct *new_desc) +{ + struct tss_segment_16 tss_seg; + int ret; + u32 err, new_tss_base = get_desc_base(new_desc); + + ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + &err); + if (ret == X86EMUL_PROPAGATE_FAULT) { + /* FIXME: need to provide precise fault address */ + kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); + return ret; + } + + save_state_to_tss16(ctxt, ops, &tss_seg); + + ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + &err); + if (ret == X86EMUL_PROPAGATE_FAULT) { + /* FIXME: need to provide precise fault address */ + kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); + return ret; + } + + ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + &err); + if (ret == X86EMUL_PROPAGATE_FAULT) { + /* FIXME: need to provide precise fault address */ + kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); + return ret; + } + + if (old_tss_sel != 0xffff) { + tss_seg.prev_task_link = old_tss_sel; + + ret = ops->write_std(new_tss_base, + &tss_seg.prev_task_link, + sizeof tss_seg.prev_task_link, + ctxt->vcpu, &err); + if (ret == X86EMUL_PROPAGATE_FAULT) { + /* FIXME: need to provide precise fault address */ + kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); + return ret; + } + } + + return load_state_from_tss16(ctxt, ops, &tss_seg); +} + +static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + struct tss_segment_32 *tss) +{ + struct decode_cache *c = &ctxt->decode; + + tss->cr3 = ops->get_cr(3, ctxt->vcpu); + tss->eip = c->eip; + tss->eflags = ctxt->eflags; + tss->eax = c->regs[VCPU_REGS_RAX]; + tss->ecx = c->regs[VCPU_REGS_RCX]; + tss->edx = c->regs[VCPU_REGS_RDX]; + tss->ebx = c->regs[VCPU_REGS_RBX]; + tss->esp = c->regs[VCPU_REGS_RSP]; + tss->ebp = c->regs[VCPU_REGS_RBP]; + tss->esi = c->regs[VCPU_REGS_RSI]; + tss->edi = c->regs[VCPU_REGS_RDI]; + + tss->es = ops->get_segment_selector(VCPU_SREG_ES, ctxt->vcpu); + tss->cs = ops->get_segment_selector(VCPU_SREG_CS, ctxt->vcpu); + tss->ss = ops->get_segment_selector(VCPU_SREG_SS, ctxt->vcpu); + tss->ds = ops->get_segment_selector(VCPU_SREG_DS, ctxt->vcpu); + tss->fs = ops->get_segment_selector(VCPU_SREG_FS, ctxt->vcpu); + tss->gs = ops->get_segment_selector(VCPU_SREG_GS, ctxt->vcpu); + tss->ldt_selector = ops->get_segment_selector(VCPU_SREG_LDTR, ctxt->vcpu); +} + +static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + struct tss_segment_32 *tss) +{ + struct decode_cache *c = &ctxt->decode; + int ret; + + ops->set_cr(3, tss->cr3, ctxt->vcpu); + c->eip = tss->eip; + ctxt->eflags = tss->eflags | 2; + c->regs[VCPU_REGS_RAX] = tss->eax; + c->regs[VCPU_REGS_RCX] = tss->ecx; + c->regs[VCPU_REGS_RDX] = tss->edx; + c->regs[VCPU_REGS_RBX] = tss->ebx; + c->regs[VCPU_REGS_RSP] = tss->esp; + c->regs[VCPU_REGS_RBP] = tss->ebp; + c->regs[VCPU_REGS_RSI] = tss->esi; + c->regs[VCPU_REGS_RDI] = tss->edi; + + /* + * SDM says that segment selectors are loaded before segment + * descriptors + */ + ops->set_segment_selector(tss->ldt_selector, VCPU_SREG_LDTR, ctxt->vcpu); + ops->set_segment_selector(tss->es, VCPU_SREG_ES, ctxt->vcpu); + ops->set_segment_selector(tss->cs, VCPU_SREG_CS, ctxt->vcpu); + ops->set_segment_selector(tss->ss, VCPU_SREG_SS, ctxt->vcpu); + ops->set_segment_selector(tss->ds, VCPU_SREG_DS, ctxt->vcpu); + ops->set_segment_selector(tss->fs, VCPU_SREG_FS, ctxt->vcpu); + ops->set_segment_selector(tss->gs, VCPU_SREG_GS, ctxt->vcpu); + + /* + * Now load segment descriptors. If fault happenes at this stage + * it is handled in a context of new task + */ + ret = load_segment_descriptor(ctxt, ops, tss->ldt_selector, VCPU_SREG_LDTR); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = load_segment_descriptor(ctxt, ops, tss->es, VCPU_SREG_ES); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = load_segment_descriptor(ctxt, ops, tss->cs, VCPU_SREG_CS); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = load_segment_descriptor(ctxt, ops, tss->ss, VCPU_SREG_SS); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = load_segment_descriptor(ctxt, ops, tss->ds, VCPU_SREG_DS); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = load_segment_descriptor(ctxt, ops, tss->fs, VCPU_SREG_FS); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = load_segment_descriptor(ctxt, ops, tss->gs, VCPU_SREG_GS); + if (ret != X86EMUL_CONTINUE) + return ret; + + return X86EMUL_CONTINUE; +} + +static int task_switch_32(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + u16 tss_selector, u16 old_tss_sel, + ulong old_tss_base, struct desc_struct *new_desc) +{ + struct tss_segment_32 tss_seg; + int ret; + u32 err, new_tss_base = get_desc_base(new_desc); + + ret = ops->read_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + &err); + if (ret == X86EMUL_PROPAGATE_FAULT) { + /* FIXME: need to provide precise fault address */ + kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); + return ret; + } + + save_state_to_tss32(ctxt, ops, &tss_seg); + + ret = ops->write_std(old_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + &err); + if (ret == X86EMUL_PROPAGATE_FAULT) { + /* FIXME: need to provide precise fault address */ + kvm_inject_page_fault(ctxt->vcpu, old_tss_base, err); + return ret; + } + + ret = ops->read_std(new_tss_base, &tss_seg, sizeof tss_seg, ctxt->vcpu, + &err); + if (ret == X86EMUL_PROPAGATE_FAULT) { + /* FIXME: need to provide precise fault address */ + kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); + return ret; + } + + if (old_tss_sel != 0xffff) { + tss_seg.prev_task_link = old_tss_sel; + + ret = ops->write_std(new_tss_base, + &tss_seg.prev_task_link, + sizeof tss_seg.prev_task_link, + ctxt->vcpu, &err); + if (ret == X86EMUL_PROPAGATE_FAULT) { + /* FIXME: need to provide precise fault address */ + kvm_inject_page_fault(ctxt->vcpu, new_tss_base, err); + return ret; + } + } + + return load_state_from_tss32(ctxt, ops, &tss_seg); +} + +static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + u16 tss_selector, int reason, + bool has_error_code, u32 error_code) +{ + struct desc_struct curr_tss_desc, next_tss_desc; + int ret; + u16 old_tss_sel = ops->get_segment_selector(VCPU_SREG_TR, ctxt->vcpu); + ulong old_tss_base = + get_cached_descriptor_base(ctxt, ops, VCPU_SREG_TR); + u32 desc_limit; + + /* FIXME: old_tss_base == ~0 ? */ + + ret = read_segment_descriptor(ctxt, ops, tss_selector, &next_tss_desc); + if (ret != X86EMUL_CONTINUE) + return ret; + ret = read_segment_descriptor(ctxt, ops, old_tss_sel, &curr_tss_desc); + if (ret != X86EMUL_CONTINUE) + return ret; + + /* FIXME: check that next_tss_desc is tss */ + + if (reason != TASK_SWITCH_IRET) { + if ((tss_selector & 3) > next_tss_desc.dpl || + ops->cpl(ctxt->vcpu) > next_tss_desc.dpl) { + kvm_inject_gp(ctxt->vcpu, 0); + return X86EMUL_PROPAGATE_FAULT; + } + } + + desc_limit = desc_limit_scaled(&next_tss_desc); + if (!next_tss_desc.p || + ((desc_limit < 0x67 && (next_tss_desc.type & 8)) || + desc_limit < 0x2b)) { + kvm_queue_exception_e(ctxt->vcpu, TS_VECTOR, + tss_selector & 0xfffc); + return X86EMUL_PROPAGATE_FAULT; + } + + if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { + curr_tss_desc.type &= ~(1 << 1); /* clear busy flag */ + write_segment_descriptor(ctxt, ops, old_tss_sel, + &curr_tss_desc); + } + + if (reason == TASK_SWITCH_IRET) + ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT; + + /* set back link to prev task only if NT bit is set in eflags + note that old_tss_sel is not used afetr this point */ + if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) + old_tss_sel = 0xffff; + + if (next_tss_desc.type & 8) + ret = task_switch_32(ctxt, ops, tss_selector, old_tss_sel, + old_tss_base, &next_tss_desc); + else + ret = task_switch_16(ctxt, ops, tss_selector, old_tss_sel, + old_tss_base, &next_tss_desc); + if (ret != X86EMUL_CONTINUE) + return ret; + + if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) + ctxt->eflags = ctxt->eflags | X86_EFLAGS_NT; + + if (reason != TASK_SWITCH_IRET) { + next_tss_desc.type |= (1 << 1); /* set busy flag */ + write_segment_descriptor(ctxt, ops, tss_selector, + &next_tss_desc); + } + + ops->set_cr(0, ops->get_cr(0, ctxt->vcpu) | X86_CR0_TS, ctxt->vcpu); + ops->set_cached_descriptor(&next_tss_desc, VCPU_SREG_TR, ctxt->vcpu); + ops->set_segment_selector(tss_selector, VCPU_SREG_TR, ctxt->vcpu); + + if (has_error_code) { + struct decode_cache *c = &ctxt->decode; + + c->op_bytes = c->ad_bytes = (next_tss_desc.type & 8) ? 4 : 2; + c->lock_prefix = 0; + c->src.val = (unsigned long) error_code; + emulate_push(ctxt); + } + + return ret; +} + +int emulator_task_switch(struct x86_emulate_ctxt *ctxt, + struct x86_emulate_ops *ops, + u16 tss_selector, int reason, + bool has_error_code, u32 error_code) +{ + struct decode_cache *c = &ctxt->decode; + int rc; + + memset(c, 0, sizeof(struct decode_cache)); + c->eip = ctxt->eip; + memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); + c->dst.type = OP_NONE; + + rc = emulator_do_task_switch(ctxt, ops, tss_selector, reason, + has_error_code, error_code); + + if (rc == X86EMUL_CONTINUE) { + memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); + kvm_rip_write(ctxt->vcpu, c->eip); + rc = writeback(ctxt, ops); + } + + return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; +} + +static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned long base, + int reg, struct operand *op) +{ + struct decode_cache *c = &ctxt->decode; + int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; + + register_address_increment(c, &c->regs[reg], df * op->bytes); + op->ptr = (unsigned long *)register_address(c, base, c->regs[reg]); +} + int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { - unsigned long memop = 0; u64 msr_data; - unsigned long saved_eip = 0; struct decode_cache *c = &ctxt->decode; - unsigned int port; - int io_dir_in; - int rc = 0; + int rc = X86EMUL_CONTINUE; + int saved_dst_type = c->dst.type; ctxt->interruptibility = 0; @@ -1826,26 +2482,30 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) */ memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); - saved_eip = c->eip; + + if (ctxt->mode == X86EMUL_MODE_PROT64 && (c->d & No64)) { + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); + goto done; + } /* LOCK prefix is allowed only with some instructions */ - if (c->lock_prefix && !(c->d & Lock)) { + if (c->lock_prefix && (!(c->d & Lock) || c->dst.type != OP_MEM)) { kvm_queue_exception(ctxt->vcpu, UD_VECTOR); goto done; } /* Privileged instruction can be executed only in CPL=0 */ - if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) { + if ((c->d & Priv) && ops->cpl(ctxt->vcpu)) { kvm_inject_gp(ctxt->vcpu, 0); goto done; } - if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs)) - memop = c->modrm_ea; - if (c->rep_prefix && (c->d & String)) { + ctxt->restart = true; /* All REP prefixes have the same first termination condition */ - if (c->regs[VCPU_REGS_RCX] == 0) { + if (address_mask(c, c->regs[VCPU_REGS_RCX]) == 0) { + string_done: + ctxt->restart = false; kvm_rip_write(ctxt->vcpu, c->eip); goto done; } @@ -1857,25 +2517,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) * - if REPNE/REPNZ and ZF = 1 then done */ if ((c->b == 0xa6) || (c->b == 0xa7) || - (c->b == 0xae) || (c->b == 0xaf)) { + (c->b == 0xae) || (c->b == 0xaf)) { if ((c->rep_prefix == REPE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == 0)) { - kvm_rip_write(ctxt->vcpu, c->eip); - goto done; - } + ((ctxt->eflags & EFLG_ZF) == 0)) + goto string_done; if ((c->rep_prefix == REPNE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) { - kvm_rip_write(ctxt->vcpu, c->eip); - goto done; - } + ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)) + goto string_done; } - c->regs[VCPU_REGS_RCX]--; - c->eip = kvm_rip_read(ctxt->vcpu); + c->eip = ctxt->eip; } if (c->src.type == OP_MEM) { - c->src.ptr = (unsigned long *)memop; - c->src.val = 0; rc = ops->read_emulated((unsigned long)c->src.ptr, &c->src.val, c->src.bytes, @@ -1885,29 +2538,25 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) c->src.orig_val = c->src.val; } + if (c->src2.type == OP_MEM) { + rc = ops->read_emulated((unsigned long)c->src2.ptr, + &c->src2.val, + c->src2.bytes, + ctxt->vcpu); + if (rc != X86EMUL_CONTINUE) + goto done; + } + if ((c->d & DstMask) == ImplicitOps) goto special_insn; - if (c->dst.type == OP_MEM) { - c->dst.ptr = (unsigned long *)memop; - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->dst.val = 0; - if (c->d & BitOp) { - unsigned long mask = ~(c->dst.bytes * 8 - 1); - - c->dst.ptr = (void *)c->dst.ptr + - (c->src.val & mask) / 8; - } - if (!(c->d & Mov)) { - /* optimisation - avoid slow emulated read */ - rc = ops->read_emulated((unsigned long)c->dst.ptr, - &c->dst.val, - c->dst.bytes, - ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - goto done; - } + if ((c->dst.type == OP_MEM) && !(c->d & Mov)) { + /* optimisation - avoid slow emulated read if Mov */ + rc = ops->read_emulated((unsigned long)c->dst.ptr, &c->dst.val, + c->dst.bytes, ctxt->vcpu); + if (rc != X86EMUL_CONTINUE) + goto done; } c->dst.orig_val = c->dst.val; @@ -1926,7 +2575,7 @@ special_insn: break; case 0x07: /* pop es */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) goto done; break; case 0x08 ... 0x0d: @@ -1945,7 +2594,7 @@ special_insn: break; case 0x17: /* pop ss */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) goto done; break; case 0x18 ... 0x1d: @@ -1957,7 +2606,7 @@ special_insn: break; case 0x1f: /* pop ds */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) goto done; break; case 0x20 ... 0x25: @@ -1988,7 +2637,7 @@ special_insn: case 0x58 ... 0x5f: /* pop reg */ pop_instruction: rc = emulate_pop(ctxt, ops, &c->dst.val, c->op_bytes); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) goto done; break; case 0x60: /* pusha */ @@ -1996,7 +2645,7 @@ special_insn: break; case 0x61: /* popa */ rc = emulate_popa(ctxt, ops); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) goto done; break; case 0x63: /* movsxd */ @@ -2010,47 +2659,29 @@ special_insn: break; case 0x6c: /* insb */ case 0x6d: /* insw/insd */ + c->dst.bytes = min(c->dst.bytes, 4u); if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], - (c->d & ByteOp) ? 1 : c->op_bytes)) { + c->dst.bytes)) { kvm_inject_gp(ctxt->vcpu, 0); goto done; } - if (kvm_emulate_pio_string(ctxt->vcpu, - 1, - (c->d & ByteOp) ? 1 : c->op_bytes, - c->rep_prefix ? - address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, - (ctxt->eflags & EFLG_DF), - register_address(c, es_base(ctxt), - c->regs[VCPU_REGS_RDI]), - c->rep_prefix, - c->regs[VCPU_REGS_RDX]) == 0) { - c->eip = saved_eip; - return -1; - } - return 0; + if (!pio_in_emulated(ctxt, ops, c->dst.bytes, + c->regs[VCPU_REGS_RDX], &c->dst.val)) + goto done; /* IO is needed, skip writeback */ + break; case 0x6e: /* outsb */ case 0x6f: /* outsw/outsd */ + c->src.bytes = min(c->src.bytes, 4u); if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX], - (c->d & ByteOp) ? 1 : c->op_bytes)) { + c->src.bytes)) { kvm_inject_gp(ctxt->vcpu, 0); goto done; } - if (kvm_emulate_pio_string(ctxt->vcpu, - 0, - (c->d & ByteOp) ? 1 : c->op_bytes, - c->rep_prefix ? - address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, - (ctxt->eflags & EFLG_DF), - register_address(c, - seg_override_base(ctxt, c), - c->regs[VCPU_REGS_RSI]), - c->rep_prefix, - c->regs[VCPU_REGS_RDX]) == 0) { - c->eip = saved_eip; - return -1; - } - return 0; + ops->pio_out_emulated(c->src.bytes, c->regs[VCPU_REGS_RDX], + &c->src.val, 1, ctxt->vcpu); + + c->dst.type = OP_NONE; /* nothing to writeback */ + break; case 0x70 ... 0x7f: /* jcc (short) */ if (test_cc(c->b, ctxt->eflags)) jmp_rel(c, c->src.val); @@ -2107,12 +2738,11 @@ special_insn: case 0x8c: { /* mov r/m, sreg */ struct kvm_segment segreg; - if (c->modrm_reg <= 5) + if (c->modrm_reg <= VCPU_SREG_GS) kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); else { - printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", - c->modrm); - goto cannot_emulate; + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); + goto done; } c->dst.val = segreg.selector; break; @@ -2132,16 +2762,16 @@ special_insn: } if (c->modrm_reg == VCPU_SREG_SS) - toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); + toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_MOV_SS); - rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg); + rc = load_segment_descriptor(ctxt, ops, sel, c->modrm_reg); c->dst.type = OP_NONE; /* Disable writeback. */ break; } case 0x8f: /* pop (sole member of Grp1a) */ rc = emulate_grp1a(ctxt, ops); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) goto done; break; case 0x90: /* nop / xchg r8,rax */ @@ -2175,89 +2805,16 @@ special_insn: c->dst.val = (unsigned long)c->regs[VCPU_REGS_RAX]; break; case 0xa4 ... 0xa5: /* movs */ - c->dst.type = OP_MEM; - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->dst.ptr = (unsigned long *)register_address(c, - es_base(ctxt), - c->regs[VCPU_REGS_RDI]); - rc = ops->read_emulated(register_address(c, - seg_override_base(ctxt, c), - c->regs[VCPU_REGS_RSI]), - &c->dst.val, - c->dst.bytes, ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - goto done; - register_address_increment(c, &c->regs[VCPU_REGS_RSI], - (ctxt->eflags & EFLG_DF) ? -c->dst.bytes - : c->dst.bytes); - register_address_increment(c, &c->regs[VCPU_REGS_RDI], - (ctxt->eflags & EFLG_DF) ? -c->dst.bytes - : c->dst.bytes); - break; + goto mov; case 0xa6 ... 0xa7: /* cmps */ - c->src.type = OP_NONE; /* Disable writeback. */ - c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->src.ptr = (unsigned long *)register_address(c, - seg_override_base(ctxt, c), - c->regs[VCPU_REGS_RSI]); - rc = ops->read_emulated((unsigned long)c->src.ptr, - &c->src.val, - c->src.bytes, - ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - goto done; - c->dst.type = OP_NONE; /* Disable writeback. */ - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->dst.ptr = (unsigned long *)register_address(c, - es_base(ctxt), - c->regs[VCPU_REGS_RDI]); - rc = ops->read_emulated((unsigned long)c->dst.ptr, - &c->dst.val, - c->dst.bytes, - ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - goto done; - DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr); - - emulate_2op_SrcV("cmp", c->src, c->dst, ctxt->eflags); - - register_address_increment(c, &c->regs[VCPU_REGS_RSI], - (ctxt->eflags & EFLG_DF) ? -c->src.bytes - : c->src.bytes); - register_address_increment(c, &c->regs[VCPU_REGS_RDI], - (ctxt->eflags & EFLG_DF) ? -c->dst.bytes - : c->dst.bytes); - - break; + goto cmp; case 0xaa ... 0xab: /* stos */ - c->dst.type = OP_MEM; - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->dst.ptr = (unsigned long *)register_address(c, - es_base(ctxt), - c->regs[VCPU_REGS_RDI]); c->dst.val = c->regs[VCPU_REGS_RAX]; - register_address_increment(c, &c->regs[VCPU_REGS_RDI], - (ctxt->eflags & EFLG_DF) ? -c->dst.bytes - : c->dst.bytes); break; case 0xac ... 0xad: /* lods */ - c->dst.type = OP_REG; - c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; - c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; - rc = ops->read_emulated(register_address(c, - seg_override_base(ctxt, c), - c->regs[VCPU_REGS_RSI]), - &c->dst.val, - c->dst.bytes, - ctxt->vcpu); - if (rc != X86EMUL_CONTINUE) - goto done; - register_address_increment(c, &c->regs[VCPU_REGS_RSI], - (ctxt->eflags & EFLG_DF) ? -c->dst.bytes - : c->dst.bytes); - break; + goto mov; case 0xae ... 0xaf: /* scas */ DPRINTF("Urk! I don't handle SCAS.\n"); goto cannot_emulate; @@ -2277,7 +2834,7 @@ special_insn: break; case 0xcb: /* ret far */ rc = emulate_ret_far(ctxt, ops); - if (rc) + if (rc != X86EMUL_CONTINUE) goto done; break; case 0xd0 ... 0xd1: /* Grp2 */ @@ -2290,14 +2847,10 @@ special_insn: break; case 0xe4: /* inb */ case 0xe5: /* in */ - port = c->src.val; - io_dir_in = 1; - goto do_io; + goto do_io_in; case 0xe6: /* outb */ case 0xe7: /* out */ - port = c->src.val; - io_dir_in = 0; - goto do_io; + goto do_io_out; case 0xe8: /* call (near) */ { long int rel = c->src.val; c->src.val = (unsigned long) c->eip; @@ -2308,8 +2861,9 @@ special_insn: case 0xe9: /* jmp rel */ goto jmp; case 0xea: /* jmp far */ - if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, - VCPU_SREG_CS)) + jump_far: + if (load_segment_descriptor(ctxt, ops, c->src2.val, + VCPU_SREG_CS)) goto done; c->eip = c->src.val; @@ -2321,25 +2875,29 @@ special_insn: break; case 0xec: /* in al,dx */ case 0xed: /* in (e/r)ax,dx */ - port = c->regs[VCPU_REGS_RDX]; - io_dir_in = 1; - goto do_io; + c->src.val = c->regs[VCPU_REGS_RDX]; + do_io_in: + c->dst.bytes = min(c->dst.bytes, 4u); + if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { + kvm_inject_gp(ctxt->vcpu, 0); + goto done; + } + if (!pio_in_emulated(ctxt, ops, c->dst.bytes, c->src.val, + &c->dst.val)) + goto done; /* IO is needed */ + break; case 0xee: /* out al,dx */ case 0xef: /* out (e/r)ax,dx */ - port = c->regs[VCPU_REGS_RDX]; - io_dir_in = 0; - do_io: - if (!emulator_io_permited(ctxt, ops, port, - (c->d & ByteOp) ? 1 : c->op_bytes)) { + c->src.val = c->regs[VCPU_REGS_RDX]; + do_io_out: + c->dst.bytes = min(c->dst.bytes, 4u); + if (!emulator_io_permited(ctxt, ops, c->src.val, c->dst.bytes)) { kvm_inject_gp(ctxt->vcpu, 0); goto done; } - if (kvm_emulate_pio(ctxt->vcpu, io_dir_in, - (c->d & ByteOp) ? 1 : c->op_bytes, - port) != 0) { - c->eip = saved_eip; - goto cannot_emulate; - } + ops->pio_out_emulated(c->dst.bytes, c->src.val, &c->dst.val, 1, + ctxt->vcpu); + c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf4: /* hlt */ ctxt->vcpu->arch.halt_request = 1; @@ -2350,16 +2908,15 @@ special_insn: c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xf6 ... 0xf7: /* Grp3 */ - rc = emulate_grp3(ctxt, ops); - if (rc != 0) - goto done; + if (!emulate_grp3(ctxt, ops)) + goto cannot_emulate; break; case 0xf8: /* clc */ ctxt->eflags &= ~EFLG_CF; c->dst.type = OP_NONE; /* Disable writeback. */ break; case 0xfa: /* cli */ - if (emulator_bad_iopl(ctxt)) + if (emulator_bad_iopl(ctxt, ops)) kvm_inject_gp(ctxt->vcpu, 0); else { ctxt->eflags &= ~X86_EFLAGS_IF; @@ -2367,10 +2924,10 @@ special_insn: } break; case 0xfb: /* sti */ - if (emulator_bad_iopl(ctxt)) + if (emulator_bad_iopl(ctxt, ops)) kvm_inject_gp(ctxt->vcpu, 0); else { - toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); + toggle_interruptibility(ctxt, KVM_X86_SHADOW_INT_STI); ctxt->eflags |= X86_EFLAGS_IF; c->dst.type = OP_NONE; /* Disable writeback. */ } @@ -2383,28 +2940,55 @@ special_insn: ctxt->eflags |= EFLG_DF; c->dst.type = OP_NONE; /* Disable writeback. */ break; - case 0xfe ... 0xff: /* Grp4/Grp5 */ + case 0xfe: /* Grp4 */ + grp45: rc = emulate_grp45(ctxt, ops); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) goto done; break; + case 0xff: /* Grp5 */ + if (c->modrm_reg == 5) + goto jump_far; + goto grp45; } writeback: rc = writeback(ctxt, ops); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) goto done; + /* + * restore dst type in case the decoding will be reused + * (happens for string instruction ) + */ + c->dst.type = saved_dst_type; + + if ((c->d & SrcMask) == SrcSI) + string_addr_inc(ctxt, seg_override_base(ctxt, c), VCPU_REGS_RSI, + &c->src); + + if ((c->d & DstMask) == DstDI) + string_addr_inc(ctxt, es_base(ctxt), VCPU_REGS_RDI, &c->dst); + + if (c->rep_prefix && (c->d & String)) { + struct read_cache *rc = &ctxt->decode.io_read; + register_address_increment(c, &c->regs[VCPU_REGS_RCX], -1); + /* + * Re-enter guest when pio read ahead buffer is empty or, + * if it is not used, after each 1024 iteration. + */ + if ((rc->end == 0 && !(c->regs[VCPU_REGS_RCX] & 0x3ff)) || + (rc->end != 0 && rc->end == rc->pos)) + ctxt->restart = false; + } + /* Commit shadow register state. */ memcpy(ctxt->vcpu->arch.regs, c->regs, sizeof c->regs); kvm_rip_write(ctxt->vcpu, c->eip); + ops->set_rflags(ctxt->vcpu, ctxt->eflags); done: - if (rc == X86EMUL_UNHANDLEABLE) { - c->eip = saved_eip; - return -1; - } - return 0; + return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; twobyte_insn: switch (c->b) { @@ -2418,18 +3002,18 @@ twobyte_insn: goto cannot_emulate; rc = kvm_fix_hypercall(ctxt->vcpu); - if (rc) + if (rc != X86EMUL_CONTINUE) goto done; /* Let the processor re-execute the fixed hypercall */ - c->eip = kvm_rip_read(ctxt->vcpu); + c->eip = ctxt->eip; /* Disable writeback. */ c->dst.type = OP_NONE; break; case 2: /* lgdt */ rc = read_descriptor(ctxt, ops, c->src.ptr, &size, &address, c->op_bytes); - if (rc) + if (rc != X86EMUL_CONTINUE) goto done; realmode_lgdt(ctxt->vcpu, size, address); /* Disable writeback. */ @@ -2440,7 +3024,7 @@ twobyte_insn: switch (c->modrm_rm) { case 1: rc = kvm_fix_hypercall(ctxt->vcpu); - if (rc) + if (rc != X86EMUL_CONTINUE) goto done; break; default: @@ -2450,7 +3034,7 @@ twobyte_insn: rc = read_descriptor(ctxt, ops, c->src.ptr, &size, &address, c->op_bytes); - if (rc) + if (rc != X86EMUL_CONTINUE) goto done; realmode_lidt(ctxt->vcpu, size, address); } @@ -2459,15 +3043,18 @@ twobyte_insn: break; case 4: /* smsw */ c->dst.bytes = 2; - c->dst.val = realmode_get_cr(ctxt->vcpu, 0); + c->dst.val = ops->get_cr(0, ctxt->vcpu); break; case 6: /* lmsw */ - realmode_lmsw(ctxt->vcpu, (u16)c->src.val, - &ctxt->eflags); + ops->set_cr(0, (ops->get_cr(0, ctxt->vcpu) & ~0x0ful) | + (c->src.val & 0x0f), ctxt->vcpu); c->dst.type = OP_NONE; break; + case 5: /* not defined */ + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); + goto done; case 7: /* invlpg*/ - emulate_invlpg(ctxt->vcpu, memop); + emulate_invlpg(ctxt->vcpu, c->modrm_ea); /* Disable writeback. */ c->dst.type = OP_NONE; break; @@ -2493,54 +3080,54 @@ twobyte_insn: c->dst.type = OP_NONE; break; case 0x20: /* mov cr, reg */ - if (c->modrm_mod != 3) - goto cannot_emulate; - c->regs[c->modrm_rm] = - realmode_get_cr(ctxt->vcpu, c->modrm_reg); + switch (c->modrm_reg) { + case 1: + case 5 ... 7: + case 9 ... 15: + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); + goto done; + } + c->regs[c->modrm_rm] = ops->get_cr(c->modrm_reg, ctxt->vcpu); c->dst.type = OP_NONE; /* no writeback */ break; case 0x21: /* mov from dr to reg */ - if (c->modrm_mod != 3) - goto cannot_emulate; - rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); - if (rc) - goto cannot_emulate; + if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && + (c->modrm_reg == 4 || c->modrm_reg == 5)) { + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); + goto done; + } + emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]); c->dst.type = OP_NONE; /* no writeback */ break; case 0x22: /* mov reg, cr */ - if (c->modrm_mod != 3) - goto cannot_emulate; - realmode_set_cr(ctxt->vcpu, - c->modrm_reg, c->modrm_val, &ctxt->eflags); + ops->set_cr(c->modrm_reg, c->modrm_val, ctxt->vcpu); c->dst.type = OP_NONE; break; case 0x23: /* mov from reg to dr */ - if (c->modrm_mod != 3) - goto cannot_emulate; - rc = emulator_set_dr(ctxt, c->modrm_reg, - c->regs[c->modrm_rm]); - if (rc) - goto cannot_emulate; + if ((ops->get_cr(4, ctxt->vcpu) & X86_CR4_DE) && + (c->modrm_reg == 4 || c->modrm_reg == 5)) { + kvm_queue_exception(ctxt->vcpu, UD_VECTOR); + goto done; + } + emulator_set_dr(ctxt, c->modrm_reg, c->regs[c->modrm_rm]); c->dst.type = OP_NONE; /* no writeback */ break; case 0x30: /* wrmsr */ msr_data = (u32)c->regs[VCPU_REGS_RAX] | ((u64)c->regs[VCPU_REGS_RDX] << 32); - rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data); - if (rc) { + if (kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data)) { kvm_inject_gp(ctxt->vcpu, 0); - c->eip = kvm_rip_read(ctxt->vcpu); + goto done; } rc = X86EMUL_CONTINUE; c->dst.type = OP_NONE; break; case 0x32: /* rdmsr */ - rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data); - if (rc) { + if (kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data)) { kvm_inject_gp(ctxt->vcpu, 0); - c->eip = kvm_rip_read(ctxt->vcpu); + goto done; } else { c->regs[VCPU_REGS_RAX] = (u32)msr_data; c->regs[VCPU_REGS_RDX] = msr_data >> 32; @@ -2577,7 +3164,7 @@ twobyte_insn: break; case 0xa1: /* pop fs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) goto done; break; case 0xa3: @@ -2596,7 +3183,7 @@ twobyte_insn: break; case 0xa9: /* pop gs */ rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS); - if (rc != 0) + if (rc != X86EMUL_CONTINUE) goto done; break; case 0xab: @@ -2668,16 +3255,14 @@ twobyte_insn: (u64) c->src.val; break; case 0xc7: /* Grp9 (cmpxchg8b) */ - rc = emulate_grp9(ctxt, ops, memop); - if (rc != 0) + rc = emulate_grp9(ctxt, ops); + if (rc != X86EMUL_CONTINUE) goto done; - c->dst.type = OP_NONE; break; } goto writeback; cannot_emulate: DPRINTF("Cannot emulate %02x\n", c->b); - c->eip = saved_eip; return -1; } diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index a790fa1..93825ff 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -33,6 +33,29 @@ #include <linux/kvm_host.h> #include "trace.h" +static void pic_lock(struct kvm_pic *s) + __acquires(&s->lock) +{ + raw_spin_lock(&s->lock); +} + +static void pic_unlock(struct kvm_pic *s) + __releases(&s->lock) +{ + bool wakeup = s->wakeup_needed; + struct kvm_vcpu *vcpu; + + s->wakeup_needed = false; + + raw_spin_unlock(&s->lock); + + if (wakeup) { + vcpu = s->kvm->bsp_vcpu; + if (vcpu) + kvm_vcpu_kick(vcpu); + } +} + static void pic_clear_isr(struct kvm_kpic_state *s, int irq) { s->isr &= ~(1 << irq); @@ -45,19 +68,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq) * Other interrupt may be delivered to PIC while lock is dropped but * it should be safe since PIC state is already updated at this stage. */ - raw_spin_unlock(&s->pics_state->lock); + pic_unlock(s->pics_state); kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq); - raw_spin_lock(&s->pics_state->lock); + pic_lock(s->pics_state); } void kvm_pic_clear_isr_ack(struct kvm *kvm) { struct kvm_pic *s = pic_irqchip(kvm); - raw_spin_lock(&s->lock); + pic_lock(s); s->pics[0].isr_ack = 0xff; s->pics[1].isr_ack = 0xff; - raw_spin_unlock(&s->lock); + pic_unlock(s); } /* @@ -158,9 +181,9 @@ static void pic_update_irq(struct kvm_pic *s) void kvm_pic_update_irq(struct kvm_pic *s) { - raw_spin_lock(&s->lock); + pic_lock(s); pic_update_irq(s); - raw_spin_unlock(&s->lock); + pic_unlock(s); } int kvm_pic_set_irq(void *opaque, int irq, int level) @@ -168,14 +191,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level) struct kvm_pic *s = opaque; int ret = -1; - raw_spin_lock(&s->lock); + pic_lock(s); if (irq >= 0 && irq < PIC_NUM_PINS) { ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); pic_update_irq(s); trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr, s->pics[irq >> 3].imr, ret == 0); } - raw_spin_unlock(&s->lock); + pic_unlock(s); return ret; } @@ -205,7 +228,7 @@ int kvm_pic_read_irq(struct kvm *kvm) int irq, irq2, intno; struct kvm_pic *s = pic_irqchip(kvm); - raw_spin_lock(&s->lock); + pic_lock(s); irq = pic_get_irq(&s->pics[0]); if (irq >= 0) { pic_intack(&s->pics[0], irq); @@ -230,7 +253,7 @@ int kvm_pic_read_irq(struct kvm *kvm) intno = s->pics[0].irq_base + irq; } pic_update_irq(s); - raw_spin_unlock(&s->lock); + pic_unlock(s); return intno; } @@ -444,7 +467,7 @@ static int picdev_write(struct kvm_io_device *this, printk(KERN_ERR "PIC: non byte write\n"); return 0; } - raw_spin_lock(&s->lock); + pic_lock(s); switch (addr) { case 0x20: case 0x21: @@ -457,7 +480,7 @@ static int picdev_write(struct kvm_io_device *this, elcr_ioport_write(&s->pics[addr & 1], addr, data); break; } - raw_spin_unlock(&s->lock); + pic_unlock(s); return 0; } @@ -474,7 +497,7 @@ static int picdev_read(struct kvm_io_device *this, printk(KERN_ERR "PIC: non byte read\n"); return 0; } - raw_spin_lock(&s->lock); + pic_lock(s); switch (addr) { case 0x20: case 0x21: @@ -488,7 +511,7 @@ static int picdev_read(struct kvm_io_device *this, break; } *(unsigned char *)val = data; - raw_spin_unlock(&s->lock); + pic_unlock(s); return 0; } @@ -505,7 +528,7 @@ static void pic_irq_request(void *opaque, int level) s->output = level; if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { s->pics[0].isr_ack &= ~(1 << irq); - kvm_vcpu_kick(vcpu); + s->wakeup_needed = true; } } diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 34b15915..cd1f362 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -63,6 +63,7 @@ struct kvm_kpic_state { struct kvm_pic { raw_spinlock_t lock; + bool wakeup_needed; unsigned pending_acks; struct kvm *kvm; struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 55c7524..64bc6ea 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h @@ -10,9 +10,7 @@ struct kvm_timer { }; struct kvm_timer_ops { - bool (*is_periodic)(struct kvm_timer *); + bool (*is_periodic)(struct kvm_timer *); }; - enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); - diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 48aeee8..7a17db1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -148,7 +148,6 @@ module_param(oos_shadow, bool, 0644); #include <trace/events/kvm.h> -#undef TRACE_INCLUDE_FILE #define CREATE_TRACE_POINTS #include "mmutrace.h" @@ -174,12 +173,7 @@ struct kvm_shadow_walk_iterator { shadow_walk_okay(&(_walker)); \ shadow_walk_next(&(_walker))) - -struct kvm_unsync_walk { - int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk); -}; - -typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp); +typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp); static struct kmem_cache *pte_chain_cache; static struct kmem_cache *rmap_desc_cache; @@ -327,7 +321,6 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache, page = alloc_page(GFP_KERNEL); if (!page) return -ENOMEM; - set_page_private(page, 0); cache->objects[cache->nobjs++] = page_address(page); } return 0; @@ -925,7 +918,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - INIT_LIST_HEAD(&sp->oos_link); bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); sp->multimapped = 0; sp->parent_pte = parent_pte; @@ -1009,8 +1001,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, } -static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - mmu_parent_walk_fn fn) +static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn) { struct kvm_pte_chain *pte_chain; struct hlist_node *node; @@ -1019,8 +1010,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (!sp->multimapped && sp->parent_pte) { parent_sp = page_header(__pa(sp->parent_pte)); - fn(vcpu, parent_sp); - mmu_parent_walk(vcpu, parent_sp, fn); + fn(parent_sp); + mmu_parent_walk(parent_sp, fn); return; } hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link) @@ -1028,8 +1019,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (!pte_chain->parent_ptes[i]) break; parent_sp = page_header(__pa(pte_chain->parent_ptes[i])); - fn(vcpu, parent_sp); - mmu_parent_walk(vcpu, parent_sp, fn); + fn(parent_sp); + mmu_parent_walk(parent_sp, fn); } } @@ -1066,16 +1057,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) } } -static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) +static int unsync_walk_fn(struct kvm_mmu_page *sp) { kvm_mmu_update_parents_unsync(sp); return 1; } -static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu, - struct kvm_mmu_page *sp) +static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) { - mmu_parent_walk(vcpu, sp, unsync_walk_fn); + mmu_parent_walk(sp, unsync_walk_fn); kvm_mmu_update_parents_unsync(sp); } @@ -1209,7 +1199,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp); static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { - if (sp->role.glevels != vcpu->arch.mmu.root_level) { + if (sp->role.cr4_pae != !!is_pae(vcpu)) { kvm_mmu_zap_page(vcpu->kvm, sp); return 1; } @@ -1331,6 +1321,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role = vcpu->arch.mmu.base_role; role.level = level; role.direct = direct; + if (role.direct) + role.cr4_pae = 0; role.access = access; if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) { quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level)); @@ -1351,7 +1343,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, mmu_page_add_parent_pte(vcpu, sp, parent_pte); if (sp->unsync_children) { set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); - kvm_mmu_mark_parents_unsync(vcpu, sp); + kvm_mmu_mark_parents_unsync(sp); } trace_kvm_mmu_get_page(sp, false); return sp; @@ -1490,8 +1482,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm, for_each_sp(pages, sp, parents, i) { kvm_mmu_zap_page(kvm, sp); mmu_pages_clear_parents(&parents); + zapped++; } - zapped += pages.nr; kvm_mmu_pages_init(parent, &parents, &pages); } @@ -1542,14 +1534,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) */ if (used_pages > kvm_nr_mmu_pages) { - while (used_pages > kvm_nr_mmu_pages) { + while (used_pages > kvm_nr_mmu_pages && + !list_empty(&kvm->arch.active_mmu_pages)) { struct kvm_mmu_page *page; page = container_of(kvm->arch.active_mmu_pages.prev, struct kvm_mmu_page, link); - kvm_mmu_zap_page(kvm, page); + used_pages -= kvm_mmu_zap_page(kvm, page); used_pages--; } + kvm_nr_mmu_pages = used_pages; kvm->arch.n_free_mmu_pages = 0; } else @@ -1571,13 +1565,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) r = 0; index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; +restart: hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) if (sp->gfn == gfn && !sp->role.direct) { pgprintk("%s: gfn %lx role %x\n", __func__, gfn, sp->role.word); r = 1; if (kvm_mmu_zap_page(kvm, sp)) - n = bucket->first; + goto restart; } return r; } @@ -1591,12 +1586,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) index = kvm_page_table_hashfn(gfn); bucket = &kvm->arch.mmu_page_hash[index]; +restart: hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) { if (sp->gfn == gfn && !sp->role.direct && !sp->role.invalid) { pgprintk("%s: zap %lx %x\n", __func__, gfn, sp->role.word); - kvm_mmu_zap_page(kvm, sp); + if (kvm_mmu_zap_page(kvm, sp)) + goto restart; } } } @@ -1762,7 +1759,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ++vcpu->kvm->stat.mmu_unsync; sp->unsync = 1; - kvm_mmu_mark_parents_unsync(vcpu, sp); + kvm_mmu_mark_parents_unsync(sp); mmu_convert_notrap(sp); return 0; @@ -2296,13 +2293,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) /* no rsvd bits for 2 level 4K page table entries */ context->rsvd_bits_mask[0][1] = 0; context->rsvd_bits_mask[0][0] = 0; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; + + if (!is_pse(vcpu)) { + context->rsvd_bits_mask[1][1] = 0; + break; + } + if (is_cpuid_PSE36()) /* 36bits PSE 4MB page */ context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); else /* 32 bits PSE 4MB page */ context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; break; case PT32E_ROOT_LEVEL: context->rsvd_bits_mask[0][2] = @@ -2315,7 +2318,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) context->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 62) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; break; case PT64_ROOT_LEVEL: context->rsvd_bits_mask[0][3] = exb_bit_rsvd | @@ -2333,7 +2336,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) context->rsvd_bits_mask[1][1] = exb_bit_rsvd | rsvd_bits(maxphyaddr, 51) | rsvd_bits(13, 20); /* large page */ - context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0]; + context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0]; break; } } @@ -2435,7 +2438,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) else r = paging32_init_context(vcpu); - vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level; + vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); return r; } @@ -2524,7 +2527,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, } ++vcpu->kvm->stat.mmu_pte_updated; - if (sp->role.glevels == PT32_ROOT_LEVEL) + if (!sp->role.cr4_pae) paging32_update_pte(vcpu, sp, spte, new); else paging64_update_pte(vcpu, sp, spte, new); @@ -2559,36 +2562,11 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu) } static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, - const u8 *new, int bytes) + u64 gpte) { gfn_t gfn; - int r; - u64 gpte = 0; pfn_t pfn; - if (bytes != 4 && bytes != 8) - return; - - /* - * Assume that the pte write on a page table of the same type - * as the current vcpu paging mode. This is nearly always true - * (might be false while changing modes). Note it is verified later - * by update_pte(). - */ - if (is_pae(vcpu)) { - /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ - if ((bytes == 4) && (gpa % 4 == 0)) { - r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8); - if (r) - return; - memcpy((void *)&gpte + (gpa % 8), new, 4); - } else if ((bytes == 8) && (gpa % 8 == 0)) { - memcpy((void *)&gpte, new, 8); - } - } else { - if ((bytes == 4) && (gpa % 4 == 0)) - memcpy((void *)&gpte, new, 4); - } if (!is_present_gpte(gpte)) return; gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; @@ -2637,10 +2615,46 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, int flooded = 0; int npte; int r; + int invlpg_counter; pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); + + invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter); + + /* + * Assume that the pte write on a page table of the same type + * as the current vcpu paging mode. This is nearly always true + * (might be false while changing modes). Note it is verified later + * by update_pte(). + */ + if ((is_pae(vcpu) && bytes == 4) || !new) { + /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ + if (is_pae(vcpu)) { + gpa &= ~(gpa_t)7; + bytes = 8; + } + r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8)); + if (r) + gentry = 0; + new = (const u8 *)&gentry; + } + + switch (bytes) { + case 4: + gentry = *(const u32 *)new; + break; + case 8: + gentry = *(const u64 *)new; + break; + default: + gentry = 0; + break; + } + + mmu_guess_page_from_pte_write(vcpu, gpa, gentry); spin_lock(&vcpu->kvm->mmu_lock); + if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter) + gentry = 0; kvm_mmu_access_page(vcpu, gfn); kvm_mmu_free_some_pages(vcpu); ++vcpu->kvm->stat.mmu_pte_write; @@ -2659,10 +2673,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, } index = kvm_page_table_hashfn(gfn); bucket = &vcpu->kvm->arch.mmu_page_hash[index]; + +restart: hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) { if (sp->gfn != gfn || sp->role.direct || sp->role.invalid) continue; - pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8; + pte_size = sp->role.cr4_pae ? 8 : 4; misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1); misaligned |= bytes < 4; if (misaligned || flooded) { @@ -2679,14 +2695,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("misaligned: gpa %llx bytes %d role %x\n", gpa, bytes, sp->role.word); if (kvm_mmu_zap_page(vcpu->kvm, sp)) - n = bucket->first; + goto restart; ++vcpu->kvm->stat.mmu_flooded; continue; } page_offset = offset; level = sp->role.level; npte = 1; - if (sp->role.glevels == PT32_ROOT_LEVEL) { + if (!sp->role.cr4_pae) { page_offset <<= 1; /* 32->64 */ /* * A 32-bit pde maps 4MB while the shadow pdes map @@ -2704,20 +2720,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, continue; } spte = &sp->spt[page_offset / sizeof(*spte)]; - if ((gpa & (pte_size - 1)) || (bytes < pte_size)) { - gentry = 0; - r = kvm_read_guest_atomic(vcpu->kvm, - gpa & ~(u64)(pte_size - 1), - &gentry, pte_size); - new = (const void *)&gentry; - if (r < 0) - new = NULL; - } while (npte--) { entry = *spte; mmu_pte_write_zap_pte(vcpu, sp, spte); - if (new) - mmu_pte_write_new_pte(vcpu, sp, spte, new); + if (gentry) + mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); mmu_pte_write_flush_tlb(vcpu, entry, *spte); ++spte; } @@ -2897,10 +2904,11 @@ void kvm_mmu_zap_all(struct kvm *kvm) struct kvm_mmu_page *sp, *node; spin_lock(&kvm->mmu_lock); +restart: list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) if (kvm_mmu_zap_page(kvm, sp)) - node = container_of(kvm->arch.active_mmu_pages.next, - struct kvm_mmu_page, link); + goto restart; + spin_unlock(&kvm->mmu_lock); kvm_flush_remote_tlbs(kvm); @@ -3171,8 +3179,7 @@ static gva_t canonicalize(gva_t gva) } -typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp, - u64 *sptep); +typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep); static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, inspect_spte_fn fn) @@ -3188,7 +3195,7 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp, child = page_header(ent & PT64_BASE_ADDR_MASK); __mmu_spte_walk(kvm, child, fn); } else - fn(kvm, sp, &sp->spt[i]); + fn(kvm, &sp->spt[i]); } } } @@ -3279,6 +3286,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu) static int count_rmaps(struct kvm_vcpu *vcpu) { + struct kvm *kvm = vcpu->kvm; + struct kvm_memslots *slots; int nmaps = 0; int i, j, k, idx; @@ -3312,7 +3321,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu) return nmaps; } -void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) +void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) { unsigned long *rmapp; struct kvm_mmu_page *rev_sp; @@ -3328,14 +3337,14 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep) printk(KERN_ERR "%s: no memslot for gfn %ld\n", audit_msg, gfn); printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n", - audit_msg, sptep - rev_sp->spt, + audit_msg, (long int)(sptep - rev_sp->spt), rev_sp->gfn); dump_stack(); return; } rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt], - is_large_pte(*sptep)); + rev_sp->role.level); if (!*rmapp) { if (!printk_ratelimit()) return; @@ -3370,7 +3379,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu) continue; if (!(ent & PT_WRITABLE_MASK)) continue; - inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]); + inspect_spte_has_rmap(vcpu->kvm, &pt[i]); } } return; diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 3e4a5c6..3851f1f 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvmmmu -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE mmutrace #define KVM_MMU_PAGE_FIELDS \ __field(__u64, gfn) \ @@ -30,9 +28,10 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gfn %llx %u/%u q%u%s %s%s %spge" \ + trace_seq_printf(p, "sp gfn %llx %u%s q%u%s %s%s %spge" \ " %snxe root %u %s%c", \ - __entry->gfn, role.level, role.glevels, \ + __entry->gfn, role.level, \ + role.cr4_pae ? " pae" : "", \ role.quadrant, \ role.direct ? " direct" : "", \ access_str[role.access], \ @@ -216,5 +215,10 @@ TRACE_EVENT( #endif /* _TRACE_KVMMMU_H */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mmutrace + /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 81eab9a..d9dea28 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -170,7 +170,7 @@ walk: goto access_error; #if PTTYPE == 64 - if (fetch_fault && is_nx(vcpu) && (pte & PT64_NX_MASK)) + if (fetch_fault && (pte & PT64_NX_MASK)) goto access_error; #endif @@ -258,11 +258,17 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, pt_element_t gpte; unsigned pte_access; pfn_t pfn; + u64 new_spte; gpte = *(const pt_element_t *)pte; if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { - if (!is_present_gpte(gpte)) - __set_spte(spte, shadow_notrap_nonpresent_pte); + if (!is_present_gpte(gpte)) { + if (page->unsync) + new_spte = shadow_trap_nonpresent_pte; + else + new_spte = shadow_notrap_nonpresent_pte; + __set_spte(spte, new_spte); + } return; } pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); @@ -457,6 +463,7 @@ out_unlock: static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) { struct kvm_shadow_walk_iterator iterator; + gpa_t pte_gpa = -1; int level; u64 *sptep; int need_flush = 0; @@ -470,6 +477,10 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (level == PT_PAGE_TABLE_LEVEL || ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) || ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) { + struct kvm_mmu_page *sp = page_header(__pa(sptep)); + + pte_gpa = (sp->gfn << PAGE_SHIFT); + pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); if (is_shadow_present_pte(*sptep)) { rmap_remove(vcpu->kvm, sptep); @@ -487,7 +498,17 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) if (need_flush) kvm_flush_remote_tlbs(vcpu->kvm); + + atomic_inc(&vcpu->kvm->arch.invlpg_counter); + spin_unlock(&vcpu->kvm->mmu_lock); + + if (pte_gpa == -1) + return; + + if (mmu_topup_memory_caches(vcpu)) + return; + kvm_mmu_pte_write(vcpu, pte_gpa, NULL, sizeof(pt_element_t), 0); } static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 445c594..ab78eb8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -44,10 +44,11 @@ MODULE_LICENSE("GPL"); #define SEG_TYPE_LDT 2 #define SEG_TYPE_BUSY_TSS16 3 -#define SVM_FEATURE_NPT (1 << 0) -#define SVM_FEATURE_LBRV (1 << 1) -#define SVM_FEATURE_SVML (1 << 2) -#define SVM_FEATURE_PAUSE_FILTER (1 << 10) +#define SVM_FEATURE_NPT (1 << 0) +#define SVM_FEATURE_LBRV (1 << 1) +#define SVM_FEATURE_SVML (1 << 2) +#define SVM_FEATURE_NRIP (1 << 3) +#define SVM_FEATURE_PAUSE_FILTER (1 << 10) #define NESTED_EXIT_HOST 0 /* Exit handled on host level */ #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ @@ -70,6 +71,7 @@ struct kvm_vcpu; struct nested_state { struct vmcb *hsave; u64 hsave_msr; + u64 vm_cr_msr; u64 vmcb; /* These are the merged vectors */ @@ -77,6 +79,7 @@ struct nested_state { /* gpa pointers to the real vectors */ u64 vmcb_msrpm; + u64 vmcb_iopm; /* A VMEXIT is required but not yet emulated */ bool exit_required; @@ -91,6 +94,9 @@ struct nested_state { }; +#define MSRPM_OFFSETS 16 +static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; + struct vcpu_svm { struct kvm_vcpu vcpu; struct vmcb *vmcb; @@ -110,13 +116,39 @@ struct vcpu_svm { struct nested_state nested; bool nmi_singlestep; + + unsigned int3_injected; + unsigned long int3_rip; +}; + +#define MSR_INVALID 0xffffffffU + +static struct svm_direct_access_msrs { + u32 index; /* Index of the MSR */ + bool always; /* True if intercept is always on */ +} direct_access_msrs[] = { + { .index = MSR_K6_STAR, .always = true }, + { .index = MSR_IA32_SYSENTER_CS, .always = true }, +#ifdef CONFIG_X86_64 + { .index = MSR_GS_BASE, .always = true }, + { .index = MSR_FS_BASE, .always = true }, + { .index = MSR_KERNEL_GS_BASE, .always = true }, + { .index = MSR_LSTAR, .always = true }, + { .index = MSR_CSTAR, .always = true }, + { .index = MSR_SYSCALL_MASK, .always = true }, +#endif + { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, + { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, + { .index = MSR_IA32_LASTINTFROMIP, .always = false }, + { .index = MSR_IA32_LASTINTTOIP, .always = false }, + { .index = MSR_INVALID, .always = false }, }; /* enable NPT for AMD64 and X86 with PAE */ #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) static bool npt_enabled = true; #else -static bool npt_enabled = false; +static bool npt_enabled; #endif static int npt = 1; @@ -129,6 +161,7 @@ static void svm_flush_tlb(struct kvm_vcpu *vcpu); static void svm_complete_interrupts(struct vcpu_svm *svm); static int nested_svm_exit_handled(struct vcpu_svm *svm); +static int nested_svm_intercept(struct vcpu_svm *svm); static int nested_svm_vmexit(struct vcpu_svm *svm); static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); @@ -163,8 +196,8 @@ static unsigned long iopm_base; struct kvm_ldttss_desc { u16 limit0; u16 base0; - unsigned base1 : 8, type : 5, dpl : 2, p : 1; - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; + unsigned base1:8, type:5, dpl:2, p:1; + unsigned limit1:4, zero0:3, g:1, base2:8; u32 base3; u32 zero1; } __attribute__((packed)); @@ -194,6 +227,27 @@ static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; #define MSRS_RANGE_SIZE 2048 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2) +static u32 svm_msrpm_offset(u32 msr) +{ + u32 offset; + int i; + + for (i = 0; i < NUM_MSR_MAPS; i++) { + if (msr < msrpm_ranges[i] || + msr >= msrpm_ranges[i] + MSRS_IN_RANGE) + continue; + + offset = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */ + offset += (i * MSRS_RANGE_SIZE); /* add range offset */ + + /* Now we have the u8 offset - but need the u32 offset */ + return offset / 4; + } + + /* MSR not in any range */ + return MSR_INVALID; +} + #define MAX_INST_SIZE 15 static inline u32 svm_has(u32 feat) @@ -213,7 +267,7 @@ static inline void stgi(void) static inline void invlpga(unsigned long addr, u32 asid) { - asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); + asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid)); } static inline void force_new_asid(struct kvm_vcpu *vcpu) @@ -235,23 +289,6 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) vcpu->arch.efer = efer; } -static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, - bool has_error_code, u32 error_code) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - /* If we are within a nested VM we'd better #VMEXIT and let the - guest handle the exception */ - if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) - return; - - svm->vmcb->control.event_inj = nr - | SVM_EVTINJ_VALID - | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) - | SVM_EVTINJ_TYPE_EXEPT; - svm->vmcb->control.event_inj_err = error_code; -} - static int is_external_interrupt(u32 info) { info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; @@ -264,7 +301,7 @@ static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) u32 ret = 0; if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) - ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; + ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS; return ret & mask; } @@ -283,6 +320,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (svm->vmcb->control.next_rip != 0) + svm->next_rip = svm->vmcb->control.next_rip; + if (!svm->next_rip) { if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) != EMULATE_DONE) @@ -297,6 +337,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) svm_set_interrupt_shadow(vcpu, 0); } +static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, + bool has_error_code, u32 error_code) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + /* + * If we are within a nested VM we'd better #VMEXIT and let the guest + * handle the exception + */ + if (nested_svm_check_exception(svm, nr, has_error_code, error_code)) + return; + + if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) { + unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu); + + /* + * For guest debugging where we have to reinject #BP if some + * INT3 is guest-owned: + * Emulate nRIP by moving RIP forward. Will fail if injection + * raises a fault that is not intercepted. Still better than + * failing in all cases. + */ + skip_emulated_instruction(&svm->vcpu); + rip = kvm_rip_read(&svm->vcpu); + svm->int3_rip = rip + svm->vmcb->save.cs.base; + svm->int3_injected = rip - old_rip; + } + + svm->vmcb->control.event_inj = nr + | SVM_EVTINJ_VALID + | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0) + | SVM_EVTINJ_TYPE_EXEPT; + svm->vmcb->control.event_inj_err = error_code; +} + static int has_svm(void) { const char *msg; @@ -319,7 +394,7 @@ static int svm_hardware_enable(void *garbage) struct svm_cpu_data *sd; uint64_t efer; - struct descriptor_table gdt_descr; + struct desc_ptr gdt_descr; struct desc_struct *gdt; int me = raw_smp_processor_id(); @@ -344,8 +419,8 @@ static int svm_hardware_enable(void *garbage) sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; sd->next_asid = sd->max_asid + 1; - kvm_get_gdt(&gdt_descr); - gdt = (struct desc_struct *)gdt_descr.base; + native_store_gdt(&gdt_descr); + gdt = (struct desc_struct *)gdt_descr.address; sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); wrmsrl(MSR_EFER, efer | EFER_SVME); @@ -391,42 +466,98 @@ err_1: } +static bool valid_msr_intercept(u32 index) +{ + int i; + + for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) + if (direct_access_msrs[i].index == index) + return true; + + return false; +} + static void set_msr_interception(u32 *msrpm, unsigned msr, int read, int write) { + u8 bit_read, bit_write; + unsigned long tmp; + u32 offset; + + /* + * If this warning triggers extend the direct_access_msrs list at the + * beginning of the file + */ + WARN_ON(!valid_msr_intercept(msr)); + + offset = svm_msrpm_offset(msr); + bit_read = 2 * (msr & 0x0f); + bit_write = 2 * (msr & 0x0f) + 1; + tmp = msrpm[offset]; + + BUG_ON(offset == MSR_INVALID); + + read ? clear_bit(bit_read, &tmp) : set_bit(bit_read, &tmp); + write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); + + msrpm[offset] = tmp; +} + +static void svm_vcpu_init_msrpm(u32 *msrpm) +{ int i; - for (i = 0; i < NUM_MSR_MAPS; i++) { - if (msr >= msrpm_ranges[i] && - msr < msrpm_ranges[i] + MSRS_IN_RANGE) { - u32 msr_offset = (i * MSRS_IN_RANGE + msr - - msrpm_ranges[i]) * 2; - - u32 *base = msrpm + (msr_offset / 32); - u32 msr_shift = msr_offset % 32; - u32 mask = ((write) ? 0 : 2) | ((read) ? 0 : 1); - *base = (*base & ~(0x3 << msr_shift)) | - (mask << msr_shift); + memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); + + for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { + if (!direct_access_msrs[i].always) + continue; + + set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1); + } +} + +static void add_msr_offset(u32 offset) +{ + int i; + + for (i = 0; i < MSRPM_OFFSETS; ++i) { + + /* Offset already in list? */ + if (msrpm_offsets[i] == offset) return; - } + + /* Slot used by another offset? */ + if (msrpm_offsets[i] != MSR_INVALID) + continue; + + /* Add offset to list */ + msrpm_offsets[i] = offset; + + return; } + + /* + * If this BUG triggers the msrpm_offsets table has an overflow. Just + * increase MSRPM_OFFSETS in this case. + */ BUG(); } -static void svm_vcpu_init_msrpm(u32 *msrpm) +static void init_msrpm_offsets(void) { - memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER)); + int i; -#ifdef CONFIG_X86_64 - set_msr_interception(msrpm, MSR_GS_BASE, 1, 1); - set_msr_interception(msrpm, MSR_FS_BASE, 1, 1); - set_msr_interception(msrpm, MSR_KERNEL_GS_BASE, 1, 1); - set_msr_interception(msrpm, MSR_LSTAR, 1, 1); - set_msr_interception(msrpm, MSR_CSTAR, 1, 1); - set_msr_interception(msrpm, MSR_SYSCALL_MASK, 1, 1); -#endif - set_msr_interception(msrpm, MSR_K6_STAR, 1, 1); - set_msr_interception(msrpm, MSR_IA32_SYSENTER_CS, 1, 1); + memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets)); + + for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) { + u32 offset; + + offset = svm_msrpm_offset(direct_access_msrs[i].index); + BUG_ON(offset == MSR_INVALID); + + add_msr_offset(offset); + } } static void svm_enable_lbrv(struct vcpu_svm *svm) @@ -467,6 +598,8 @@ static __init int svm_hardware_setup(void) memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; + init_msrpm_offsets(); + if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); @@ -523,7 +656,7 @@ static void init_seg(struct vmcb_seg *seg) { seg->selector = 0; seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK | - SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ + SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */ seg->limit = 0xffff; seg->base = 0; } @@ -543,16 +676,16 @@ static void init_vmcb(struct vcpu_svm *svm) svm->vcpu.fpu_active = 1; - control->intercept_cr_read = INTERCEPT_CR0_MASK | + control->intercept_cr_read = INTERCEPT_CR0_MASK | INTERCEPT_CR3_MASK | INTERCEPT_CR4_MASK; - control->intercept_cr_write = INTERCEPT_CR0_MASK | + control->intercept_cr_write = INTERCEPT_CR0_MASK | INTERCEPT_CR3_MASK | INTERCEPT_CR4_MASK | INTERCEPT_CR8_MASK; - control->intercept_dr_read = INTERCEPT_DR0_MASK | + control->intercept_dr_read = INTERCEPT_DR0_MASK | INTERCEPT_DR1_MASK | INTERCEPT_DR2_MASK | INTERCEPT_DR3_MASK | @@ -561,7 +694,7 @@ static void init_vmcb(struct vcpu_svm *svm) INTERCEPT_DR6_MASK | INTERCEPT_DR7_MASK; - control->intercept_dr_write = INTERCEPT_DR0_MASK | + control->intercept_dr_write = INTERCEPT_DR0_MASK | INTERCEPT_DR1_MASK | INTERCEPT_DR2_MASK | INTERCEPT_DR3_MASK | @@ -575,7 +708,7 @@ static void init_vmcb(struct vcpu_svm *svm) (1 << MC_VECTOR); - control->intercept = (1ULL << INTERCEPT_INTR) | + control->intercept = (1ULL << INTERCEPT_INTR) | (1ULL << INTERCEPT_NMI) | (1ULL << INTERCEPT_SMI) | (1ULL << INTERCEPT_SELECTIVE_CR0) | @@ -636,7 +769,8 @@ static void init_vmcb(struct vcpu_svm *svm) save->rip = 0x0000fff0; svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip; - /* This is the guest-visible cr0 value. + /* + * This is the guest-visible cr0 value. * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0. */ svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET; @@ -706,30 +840,30 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) if (err) goto free_svm; + err = -ENOMEM; page = alloc_page(GFP_KERNEL); - if (!page) { - err = -ENOMEM; + if (!page) goto uninit; - } - err = -ENOMEM; msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!msrpm_pages) - goto uninit; + goto free_page1; nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER); if (!nested_msrpm_pages) - goto uninit; - - svm->msrpm = page_address(msrpm_pages); - svm_vcpu_init_msrpm(svm->msrpm); + goto free_page2; hsave_page = alloc_page(GFP_KERNEL); if (!hsave_page) - goto uninit; + goto free_page3; + svm->nested.hsave = page_address(hsave_page); + svm->msrpm = page_address(msrpm_pages); + svm_vcpu_init_msrpm(svm->msrpm); + svm->nested.msrpm = page_address(nested_msrpm_pages); + svm_vcpu_init_msrpm(svm->nested.msrpm); svm->vmcb = page_address(page); clear_page(svm->vmcb); @@ -744,6 +878,12 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) return &svm->vcpu; +free_page3: + __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER); +free_page2: + __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER); +free_page1: + __free_page(page); uninit: kvm_vcpu_uninit(&svm->vcpu); free_svm: @@ -877,7 +1017,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; - /* AMD's VMCB does not have an explicit unusable field, so emulate it + /* + * AMD's VMCB does not have an explicit unusable field, so emulate it * for cross vendor migration purposes by "not present" */ var->unusable = !var->present || (var->type == 0); @@ -913,7 +1054,8 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, var->type |= 0x1; break; case VCPU_SREG_SS: - /* On AMD CPUs sometimes the DB bit in the segment + /* + * On AMD CPUs sometimes the DB bit in the segment * descriptor is left as 1, although the whole segment has * been made unusable. Clear it here to pass an Intel VMX * entry check when cross vendor migrating. @@ -931,36 +1073,36 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu) return save->cpl; } -static void svm_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) +static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { struct vcpu_svm *svm = to_svm(vcpu); - dt->limit = svm->vmcb->save.idtr.limit; - dt->base = svm->vmcb->save.idtr.base; + dt->size = svm->vmcb->save.idtr.limit; + dt->address = svm->vmcb->save.idtr.base; } -static void svm_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) +static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->save.idtr.limit = dt->limit; - svm->vmcb->save.idtr.base = dt->base ; + svm->vmcb->save.idtr.limit = dt->size; + svm->vmcb->save.idtr.base = dt->address ; } -static void svm_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) +static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { struct vcpu_svm *svm = to_svm(vcpu); - dt->limit = svm->vmcb->save.gdtr.limit; - dt->base = svm->vmcb->save.gdtr.base; + dt->size = svm->vmcb->save.gdtr.limit; + dt->address = svm->vmcb->save.gdtr.base; } -static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) +static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->save.gdtr.limit = dt->limit; - svm->vmcb->save.gdtr.base = dt->base ; + svm->vmcb->save.gdtr.limit = dt->size; + svm->vmcb->save.gdtr.base = dt->address ; } static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) @@ -973,6 +1115,7 @@ static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) static void update_cr0_intercept(struct vcpu_svm *svm) { + struct vmcb *vmcb = svm->vmcb; ulong gcr0 = svm->vcpu.arch.cr0; u64 *hcr0 = &svm->vmcb->save.cr0; @@ -984,11 +1127,25 @@ static void update_cr0_intercept(struct vcpu_svm *svm) if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { - svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; - svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; + vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; + vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; + if (is_nested(svm)) { + struct vmcb *hsave = svm->nested.hsave; + + hsave->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK; + hsave->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK; + vmcb->control.intercept_cr_read |= svm->nested.intercept_cr_read; + vmcb->control.intercept_cr_write |= svm->nested.intercept_cr_write; + } } else { svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK; svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK; + if (is_nested(svm)) { + struct vmcb *hsave = svm->nested.hsave; + + hsave->control.intercept_cr_read |= INTERCEPT_CR0_MASK; + hsave->control.intercept_cr_write |= INTERCEPT_CR0_MASK; + } } } @@ -996,6 +1153,27 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { struct vcpu_svm *svm = to_svm(vcpu); + if (is_nested(svm)) { + /* + * We are here because we run in nested mode, the host kvm + * intercepts cr0 writes but the l1 hypervisor does not. + * But the L1 hypervisor may intercept selective cr0 writes. + * This needs to be checked here. + */ + unsigned long old, new; + + /* Remove bits that would trigger a real cr0 write intercept */ + old = vcpu->arch.cr0 & SVM_CR0_SELECTIVE_MASK; + new = cr0 & SVM_CR0_SELECTIVE_MASK; + + if (old == new) { + /* cr0 write with ts and mp unchanged */ + svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; + if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) + return; + } + } + #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { @@ -1129,70 +1307,11 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) svm->vmcb->control.asid = sd->next_asid++; } -static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - switch (dr) { - case 0 ... 3: - *dest = vcpu->arch.db[dr]; - break; - case 4: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return EMULATE_FAIL; /* will re-inject UD */ - /* fall through */ - case 6: - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - *dest = vcpu->arch.dr6; - else - *dest = svm->vmcb->save.dr6; - break; - case 5: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return EMULATE_FAIL; /* will re-inject UD */ - /* fall through */ - case 7: - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) - *dest = vcpu->arch.dr7; - else - *dest = svm->vmcb->save.dr7; - break; - } - - return EMULATE_DONE; -} - -static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value) +static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) { struct vcpu_svm *svm = to_svm(vcpu); - switch (dr) { - case 0 ... 3: - vcpu->arch.db[dr] = value; - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) - vcpu->arch.eff_db[dr] = value; - break; - case 4: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return EMULATE_FAIL; /* will re-inject UD */ - /* fall through */ - case 6: - vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1; - break; - case 5: - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) - return EMULATE_FAIL; /* will re-inject UD */ - /* fall through */ - case 7: - vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1; - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { - svm->vmcb->save.dr7 = vcpu->arch.dr7; - vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK); - } - break; - } - - return EMULATE_DONE; + svm->vmcb->save.dr7 = value; } static int pf_interception(struct vcpu_svm *svm) @@ -1229,7 +1348,7 @@ static int db_interception(struct vcpu_svm *svm) } if (svm->vcpu.guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) { kvm_run->exit_reason = KVM_EXIT_DEBUG; kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; @@ -1263,7 +1382,22 @@ static int ud_interception(struct vcpu_svm *svm) static void svm_fpu_activate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); + u32 excp; + + if (is_nested(svm)) { + u32 h_excp, n_excp; + + h_excp = svm->nested.hsave->control.intercept_exceptions; + n_excp = svm->nested.intercept_exceptions; + h_excp &= ~(1 << NM_VECTOR); + excp = h_excp | n_excp; + } else { + excp = svm->vmcb->control.intercept_exceptions; + excp &= ~(1 << NM_VECTOR); + } + + svm->vmcb->control.intercept_exceptions = excp; + svm->vcpu.fpu_active = 1; update_cr0_intercept(svm); } @@ -1304,29 +1438,23 @@ static int shutdown_interception(struct vcpu_svm *svm) static int io_interception(struct vcpu_svm *svm) { + struct kvm_vcpu *vcpu = &svm->vcpu; u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ int size, in, string; unsigned port; ++svm->vcpu.stat.io_exits; - - svm->next_rip = svm->vmcb->control.exit_info_2; - string = (io_info & SVM_IOIO_STR_MASK) != 0; - - if (string) { - if (emulate_instruction(&svm->vcpu, - 0, 0, 0) == EMULATE_DO_MMIO) - return 0; - return 1; - } - in = (io_info & SVM_IOIO_TYPE_MASK) != 0; + if (string || in) + return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); + port = io_info >> 16; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; - + svm->next_rip = svm->vmcb->control.exit_info_2; skip_emulated_instruction(&svm->vcpu); - return kvm_emulate_pio(&svm->vcpu, in, size, port); + + return kvm_fast_pio_out(vcpu, size, port); } static int nmi_interception(struct vcpu_svm *svm) @@ -1379,6 +1507,8 @@ static int nested_svm_check_permissions(struct vcpu_svm *svm) static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code) { + int vmexit; + if (!is_nested(svm)) return 0; @@ -1387,21 +1517,28 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, svm->vmcb->control.exit_info_1 = error_code; svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; - return nested_svm_exit_handled(svm); + vmexit = nested_svm_intercept(svm); + if (vmexit == NESTED_EXIT_DONE) + svm->nested.exit_required = true; + + return vmexit; } -static inline int nested_svm_intr(struct vcpu_svm *svm) +/* This function returns true if it is save to enable the irq window */ +static inline bool nested_svm_intr(struct vcpu_svm *svm) { if (!is_nested(svm)) - return 0; + return true; if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) - return 0; + return true; if (!(svm->vcpu.arch.hflags & HF_HIF_MASK)) - return 0; + return false; - svm->vmcb->control.exit_code = SVM_EXIT_INTR; + svm->vmcb->control.exit_code = SVM_EXIT_INTR; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; if (svm->nested.intercept & 1ULL) { /* @@ -1412,21 +1549,40 @@ static inline int nested_svm_intr(struct vcpu_svm *svm) */ svm->nested.exit_required = true; trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); - return 1; + return false; } - return 0; + return true; +} + +/* This function returns true if it is save to enable the nmi window */ +static inline bool nested_svm_nmi(struct vcpu_svm *svm) +{ + if (!is_nested(svm)) + return true; + + if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) + return true; + + svm->vmcb->control.exit_code = SVM_EXIT_NMI; + svm->nested.exit_required = true; + + return false; } -static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx) +static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page) { struct page *page; + might_sleep(); + page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT); if (is_error_page(page)) goto error; - return kmap_atomic(page, idx); + *_page = page; + + return kmap(page); error: kvm_release_page_clean(page); @@ -1435,61 +1591,55 @@ error: return NULL; } -static void nested_svm_unmap(void *addr, enum km_type idx) +static void nested_svm_unmap(struct page *page) { - struct page *page; + kunmap(page); + kvm_release_page_dirty(page); +} - if (!addr) - return; +static int nested_svm_intercept_ioio(struct vcpu_svm *svm) +{ + unsigned port; + u8 val, bit; + u64 gpa; - page = kmap_atomic_to_page(addr); + if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) + return NESTED_EXIT_HOST; - kunmap_atomic(addr, idx); - kvm_release_page_dirty(page); + port = svm->vmcb->control.exit_info_1 >> 16; + gpa = svm->nested.vmcb_iopm + (port / 8); + bit = port % 8; + val = 0; + + if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1)) + val &= (1 << bit); + + return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; } -static bool nested_svm_exit_handled_msr(struct vcpu_svm *svm) +static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) { - u32 param = svm->vmcb->control.exit_info_1 & 1; - u32 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; - bool ret = false; - u32 t0, t1; - u8 *msrpm; + u32 offset, msr, value; + int write, mask; if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) - return false; - - msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); + return NESTED_EXIT_HOST; - if (!msrpm) - goto out; + msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; + offset = svm_msrpm_offset(msr); + write = svm->vmcb->control.exit_info_1 & 1; + mask = 1 << ((2 * (msr & 0xf)) + write); - switch (msr) { - case 0 ... 0x1fff: - t0 = (msr * 2) % 8; - t1 = msr / 8; - break; - case 0xc0000000 ... 0xc0001fff: - t0 = (8192 + msr - 0xc0000000) * 2; - t1 = (t0 / 8); - t0 %= 8; - break; - case 0xc0010000 ... 0xc0011fff: - t0 = (16384 + msr - 0xc0010000) * 2; - t1 = (t0 / 8); - t0 %= 8; - break; - default: - ret = true; - goto out; - } + if (offset == MSR_INVALID) + return NESTED_EXIT_DONE; - ret = msrpm[t1] & ((1 << param) << t0); + /* Offset is in 32 bit units but need in 8 bit units */ + offset *= 4; -out: - nested_svm_unmap(msrpm, KM_USER0); + if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4)) + return NESTED_EXIT_DONE; - return ret; + return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; } static int nested_svm_exit_special(struct vcpu_svm *svm) @@ -1500,16 +1650,19 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) case SVM_EXIT_INTR: case SVM_EXIT_NMI: return NESTED_EXIT_HOST; - /* For now we are always handling NPFs when using them */ case SVM_EXIT_NPF: + /* For now we are always handling NPFs when using them */ if (npt_enabled) return NESTED_EXIT_HOST; break; - /* When we're shadowing, trap PFs */ case SVM_EXIT_EXCP_BASE + PF_VECTOR: + /* When we're shadowing, trap PFs */ if (!npt_enabled) return NESTED_EXIT_HOST; break; + case SVM_EXIT_EXCP_BASE + NM_VECTOR: + nm_interception(svm); + break; default: break; } @@ -1520,7 +1673,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) /* * If this function returns true, this #vmexit was already handled */ -static int nested_svm_exit_handled(struct vcpu_svm *svm) +static int nested_svm_intercept(struct vcpu_svm *svm) { u32 exit_code = svm->vmcb->control.exit_code; int vmexit = NESTED_EXIT_HOST; @@ -1529,6 +1682,9 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) case SVM_EXIT_MSR: vmexit = nested_svm_exit_handled_msr(svm); break; + case SVM_EXIT_IOIO: + vmexit = nested_svm_intercept_ioio(svm); + break; case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR8: { u32 cr_bits = 1 << (exit_code - SVM_EXIT_READ_CR0); if (svm->nested.intercept_cr_read & cr_bits) @@ -1566,9 +1722,17 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm) } } - if (vmexit == NESTED_EXIT_DONE) { + return vmexit; +} + +static int nested_svm_exit_handled(struct vcpu_svm *svm) +{ + int vmexit; + + vmexit = nested_svm_intercept(svm); + + if (vmexit == NESTED_EXIT_DONE) nested_svm_vmexit(svm); - } return vmexit; } @@ -1610,6 +1774,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) struct vmcb *nested_vmcb; struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; + struct page *page; trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, vmcb->control.exit_info_1, @@ -1617,10 +1782,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) vmcb->control.exit_int_info, vmcb->control.exit_int_info_err); - nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0); + nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page); if (!nested_vmcb) return 1; + /* Exit nested SVM mode */ + svm->nested.vmcb = 0; + /* Give the current vmcb to the guest */ disable_gif(svm); @@ -1630,9 +1798,13 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.ds = vmcb->save.ds; nested_vmcb->save.gdtr = vmcb->save.gdtr; nested_vmcb->save.idtr = vmcb->save.idtr; + nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); if (npt_enabled) nested_vmcb->save.cr3 = vmcb->save.cr3; + else + nested_vmcb->save.cr3 = svm->vcpu.arch.cr3; nested_vmcb->save.cr2 = vmcb->save.cr2; + nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; nested_vmcb->save.rflags = vmcb->save.rflags; nested_vmcb->save.rip = vmcb->save.rip; nested_vmcb->save.rsp = vmcb->save.rsp; @@ -1704,10 +1876,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) svm->vmcb->save.cpl = 0; svm->vmcb->control.exit_int_info = 0; - /* Exit nested SVM mode */ - svm->nested.vmcb = 0; - - nested_svm_unmap(nested_vmcb, KM_USER0); + nested_svm_unmap(page); kvm_mmu_reset_context(&svm->vcpu); kvm_mmu_load(&svm->vcpu); @@ -1717,19 +1886,33 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { - u32 *nested_msrpm; + /* + * This function merges the msr permission bitmaps of kvm and the + * nested vmcb. It is omptimized in that it only merges the parts where + * the kvm msr permission bitmap may contain zero bits + */ int i; - nested_msrpm = nested_svm_map(svm, svm->nested.vmcb_msrpm, KM_USER0); - if (!nested_msrpm) - return false; + if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) + return true; - for (i=0; i< PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER) / 4; i++) - svm->nested.msrpm[i] = svm->msrpm[i] | nested_msrpm[i]; + for (i = 0; i < MSRPM_OFFSETS; i++) { + u32 value, p; + u64 offset; - svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); + if (msrpm_offsets[i] == 0xffffffff) + break; + + p = msrpm_offsets[i]; + offset = svm->nested.vmcb_msrpm + (p * 4); + + if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4)) + return false; + + svm->nested.msrpm[p] = svm->msrpm[p] | value; + } - nested_svm_unmap(nested_msrpm, KM_USER0); + svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm); return true; } @@ -1739,26 +1922,34 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) struct vmcb *nested_vmcb; struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; + struct page *page; + u64 vmcb_gpa; + + vmcb_gpa = svm->vmcb->save.rax; - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); + nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return false; - /* nested_vmcb is our indicator if nested SVM is activated */ - svm->nested.vmcb = svm->vmcb->save.rax; - - trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb, + trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, vmcb_gpa, nested_vmcb->save.rip, nested_vmcb->control.int_ctl, nested_vmcb->control.event_inj, nested_vmcb->control.nested_ctl); + trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr_read, + nested_vmcb->control.intercept_cr_write, + nested_vmcb->control.intercept_exceptions, + nested_vmcb->control.intercept); + /* Clear internal status */ kvm_clear_exception_queue(&svm->vcpu); kvm_clear_interrupt_queue(&svm->vcpu); - /* Save the old vmcb, so we don't need to pick what we save, but - can restore everything when a VMEXIT occurs */ + /* + * Save the old vmcb, so we don't need to pick what we save, but can + * restore everything when a VMEXIT occurs + */ hsave->save.es = vmcb->save.es; hsave->save.cs = vmcb->save.cs; hsave->save.ss = vmcb->save.ss; @@ -1798,14 +1989,17 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) if (npt_enabled) { svm->vmcb->save.cr3 = nested_vmcb->save.cr3; svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; - } else { + } else kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); - kvm_mmu_reset_context(&svm->vcpu); - } + + /* Guest paging mode is active - reset mmu */ + kvm_mmu_reset_context(&svm->vcpu); + svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax); kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp); kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip); + /* In case we don't even reach vcpu_run, the fields are not updated */ svm->vmcb->save.rax = nested_vmcb->save.rax; svm->vmcb->save.rsp = nested_vmcb->save.rsp; @@ -1814,22 +2008,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) svm->vmcb->save.dr6 = nested_vmcb->save.dr6; svm->vmcb->save.cpl = nested_vmcb->save.cpl; - /* We don't want a nested guest to be more powerful than the guest, - so all intercepts are ORed */ - svm->vmcb->control.intercept_cr_read |= - nested_vmcb->control.intercept_cr_read; - svm->vmcb->control.intercept_cr_write |= - nested_vmcb->control.intercept_cr_write; - svm->vmcb->control.intercept_dr_read |= - nested_vmcb->control.intercept_dr_read; - svm->vmcb->control.intercept_dr_write |= - nested_vmcb->control.intercept_dr_write; - svm->vmcb->control.intercept_exceptions |= - nested_vmcb->control.intercept_exceptions; - - svm->vmcb->control.intercept |= nested_vmcb->control.intercept; - - svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa; + svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; + svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; /* cache intercepts */ svm->nested.intercept_cr_read = nested_vmcb->control.intercept_cr_read; @@ -1846,13 +2026,40 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) else svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; + if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { + /* We only want the cr8 intercept bits of the guest */ + svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR8_MASK; + svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; + } + + /* + * We don't want a nested guest to be more powerful than the guest, so + * all intercepts are ORed + */ + svm->vmcb->control.intercept_cr_read |= + nested_vmcb->control.intercept_cr_read; + svm->vmcb->control.intercept_cr_write |= + nested_vmcb->control.intercept_cr_write; + svm->vmcb->control.intercept_dr_read |= + nested_vmcb->control.intercept_dr_read; + svm->vmcb->control.intercept_dr_write |= + nested_vmcb->control.intercept_dr_write; + svm->vmcb->control.intercept_exceptions |= + nested_vmcb->control.intercept_exceptions; + + svm->vmcb->control.intercept |= nested_vmcb->control.intercept; + + svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; svm->vmcb->control.int_state = nested_vmcb->control.int_state; svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; - nested_svm_unmap(nested_vmcb, KM_USER0); + nested_svm_unmap(page); + + /* nested_vmcb is our indicator if nested SVM is activated */ + svm->nested.vmcb = vmcb_gpa; enable_gif(svm); @@ -1878,6 +2085,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) static int vmload_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; + struct page *page; if (nested_svm_check_permissions(svm)) return 1; @@ -1885,12 +2093,12 @@ static int vmload_interception(struct vcpu_svm *svm) svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); + nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; nested_svm_vmloadsave(nested_vmcb, svm->vmcb); - nested_svm_unmap(nested_vmcb, KM_USER0); + nested_svm_unmap(page); return 1; } @@ -1898,6 +2106,7 @@ static int vmload_interception(struct vcpu_svm *svm) static int vmsave_interception(struct vcpu_svm *svm) { struct vmcb *nested_vmcb; + struct page *page; if (nested_svm_check_permissions(svm)) return 1; @@ -1905,12 +2114,12 @@ static int vmsave_interception(struct vcpu_svm *svm) svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); - nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, KM_USER0); + nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page); if (!nested_vmcb) return 1; nested_svm_vmloadsave(svm->vmcb, nested_vmcb); - nested_svm_unmap(nested_vmcb, KM_USER0); + nested_svm_unmap(page); return 1; } @@ -2013,6 +2222,8 @@ static int task_switch_interception(struct vcpu_svm *svm) svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; uint32_t idt_v = svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; + bool has_error_code = false; + u32 error_code = 0; tss_selector = (u16)svm->vmcb->control.exit_info_1; @@ -2033,6 +2244,12 @@ static int task_switch_interception(struct vcpu_svm *svm) svm->vcpu.arch.nmi_injected = false; break; case SVM_EXITINTINFO_TYPE_EXEPT: + if (svm->vmcb->control.exit_info_2 & + (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) { + has_error_code = true; + error_code = + (u32)svm->vmcb->control.exit_info_2; + } kvm_clear_exception_queue(&svm->vcpu); break; case SVM_EXITINTINFO_TYPE_INTR: @@ -2049,7 +2266,14 @@ static int task_switch_interception(struct vcpu_svm *svm) (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) skip_emulated_instruction(&svm->vcpu); - return kvm_task_switch(&svm->vcpu, tss_selector, reason); + if (kvm_task_switch(&svm->vcpu, tss_selector, reason, + has_error_code, error_code) == EMULATE_FAIL) { + svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + svm->vcpu.run->internal.ndata = 0; + return 0; + } + return 1; } static int cpuid_interception(struct vcpu_svm *svm) @@ -2140,9 +2364,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) case MSR_IA32_SYSENTER_ESP: *data = svm->sysenter_esp; break; - /* Nobody will change the following 5 values in the VMCB so - we can safely return them on rdmsr. They will always be 0 - until LBRV is implemented. */ + /* + * Nobody will change the following 5 values in the VMCB so we can + * safely return them on rdmsr. They will always be 0 until LBRV is + * implemented. + */ case MSR_IA32_DEBUGCTLMSR: *data = svm->vmcb->save.dbgctl; break; @@ -2162,7 +2388,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) *data = svm->nested.hsave_msr; break; case MSR_VM_CR: - *data = 0; + *data = svm->nested.vm_cr_msr; break; case MSR_IA32_UCODE_REV: *data = 0x01000065; @@ -2192,6 +2418,31 @@ static int rdmsr_interception(struct vcpu_svm *svm) return 1; } +static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) +{ + struct vcpu_svm *svm = to_svm(vcpu); + int svm_dis, chg_mask; + + if (data & ~SVM_VM_CR_VALID_MASK) + return 1; + + chg_mask = SVM_VM_CR_VALID_MASK; + + if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK) + chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK); + + svm->nested.vm_cr_msr &= ~chg_mask; + svm->nested.vm_cr_msr |= (data & chg_mask); + + svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK; + + /* check for svm_disable while efer.svme is set */ + if (svm_dis && (vcpu->arch.efer & EFER_SVME)) + return 1; + + return 0; +} + static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) { struct vcpu_svm *svm = to_svm(vcpu); @@ -2258,6 +2509,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) svm->nested.hsave_msr = data; break; case MSR_VM_CR: + return svm_set_vm_cr(vcpu, data); case MSR_VM_IGNNE: pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data); break; @@ -2321,16 +2573,16 @@ static int pause_interception(struct vcpu_svm *svm) } static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { - [SVM_EXIT_READ_CR0] = emulate_on_interception, - [SVM_EXIT_READ_CR3] = emulate_on_interception, - [SVM_EXIT_READ_CR4] = emulate_on_interception, - [SVM_EXIT_READ_CR8] = emulate_on_interception, + [SVM_EXIT_READ_CR0] = emulate_on_interception, + [SVM_EXIT_READ_CR3] = emulate_on_interception, + [SVM_EXIT_READ_CR4] = emulate_on_interception, + [SVM_EXIT_READ_CR8] = emulate_on_interception, [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, - [SVM_EXIT_WRITE_CR0] = emulate_on_interception, - [SVM_EXIT_WRITE_CR3] = emulate_on_interception, - [SVM_EXIT_WRITE_CR4] = emulate_on_interception, - [SVM_EXIT_WRITE_CR8] = cr8_write_interception, - [SVM_EXIT_READ_DR0] = emulate_on_interception, + [SVM_EXIT_WRITE_CR0] = emulate_on_interception, + [SVM_EXIT_WRITE_CR3] = emulate_on_interception, + [SVM_EXIT_WRITE_CR4] = emulate_on_interception, + [SVM_EXIT_WRITE_CR8] = cr8_write_interception, + [SVM_EXIT_READ_DR0] = emulate_on_interception, [SVM_EXIT_READ_DR1] = emulate_on_interception, [SVM_EXIT_READ_DR2] = emulate_on_interception, [SVM_EXIT_READ_DR3] = emulate_on_interception, @@ -2349,15 +2601,14 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception, [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, - [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, - [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, - [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, - [SVM_EXIT_INTR] = intr_interception, + [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, + [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, + [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, + [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, [SVM_EXIT_INIT] = nop_on_interception, [SVM_EXIT_VINTR] = interrupt_window_interception, - /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ [SVM_EXIT_CPUID] = cpuid_interception, [SVM_EXIT_IRET] = iret_interception, [SVM_EXIT_INVD] = emulate_on_interception, @@ -2365,7 +2616,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_HLT] = halt_interception, [SVM_EXIT_INVLPG] = invlpg_interception, [SVM_EXIT_INVLPGA] = invlpga_interception, - [SVM_EXIT_IOIO] = io_interception, + [SVM_EXIT_IOIO] = io_interception, [SVM_EXIT_MSR] = msr_interception, [SVM_EXIT_TASK_SWITCH] = task_switch_interception, [SVM_EXIT_SHUTDOWN] = shutdown_interception, @@ -2388,7 +2639,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) struct kvm_run *kvm_run = vcpu->run; u32 exit_code = svm->vmcb->control.exit_code; - trace_kvm_exit(exit_code, svm->vmcb->save.rip); + trace_kvm_exit(exit_code, vcpu); if (unlikely(svm->nested.exit_required)) { nested_svm_vmexit(svm); @@ -2506,6 +2757,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); + if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) + return; + if (irr == -1) return; @@ -2563,13 +2817,13 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - nested_svm_intr(svm); - - /* In case GIF=0 we can't rely on the CPU to tell us when - * GIF becomes 1, because that's a separate STGI/VMRUN intercept. - * The next time we get that intercept, this function will be - * called again though and we'll get the vintr intercept. */ - if (gif_set(svm)) { + /* + * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes + * 1, because that's a separate STGI/VMRUN intercept. The next time we + * get that intercept, this function will be called again though and + * we'll get the vintr intercept. + */ + if (gif_set(svm) && nested_svm_intr(svm)) { svm_set_vintr(svm); svm_inject_irq(svm, 0x0); } @@ -2583,12 +2837,15 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) == HF_NMI_MASK) return; /* IRET will cause a vm exit */ - /* Something prevents NMI from been injected. Single step over - possible problem (IRET or exception injection or interrupt - shadow) */ - svm->nmi_singlestep = true; - svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); - update_db_intercept(vcpu); + /* + * Something prevents NMI from been injected. Single step over possible + * problem (IRET or exception injection or interrupt shadow) + */ + if (gif_set(svm) && nested_svm_nmi(svm)) { + svm->nmi_singlestep = true; + svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); + update_db_intercept(vcpu); + } } static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -2609,6 +2866,9 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) + return; + if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; kvm_set_cr8(vcpu, cr8); @@ -2620,6 +2880,9 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); u64 cr8; + if (is_nested(svm) && (vcpu->arch.hflags & HF_VINTR_MASK)) + return; + cr8 = kvm_get_cr8(vcpu); svm->vmcb->control.int_ctl &= ~V_TPR_MASK; svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; @@ -2630,6 +2893,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) u8 vector; int type; u32 exitintinfo = svm->vmcb->control.exit_int_info; + unsigned int3_injected = svm->int3_injected; + + svm->int3_injected = 0; if (svm->vcpu.arch.hflags & HF_IRET_MASK) svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); @@ -2649,12 +2915,21 @@ static void svm_complete_interrupts(struct vcpu_svm *svm) svm->vcpu.arch.nmi_injected = true; break; case SVM_EXITINTINFO_TYPE_EXEPT: - /* In case of software exception do not reinject an exception - vector, but re-execute and instruction instead */ if (is_nested(svm)) break; - if (kvm_exception_is_soft(vector)) + /* + * In case of software exceptions, do not reinject the vector, + * but re-execute the instruction instead. Rewind RIP first + * if we emulated INT3 before. + */ + if (kvm_exception_is_soft(vector)) { + if (vector == BP_VECTOR && int3_injected && + kvm_is_linear_rip(&svm->vcpu, svm->int3_rip)) + kvm_rip_write(&svm->vcpu, + kvm_rip_read(&svm->vcpu) - + int3_injected); break; + } if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { u32 err = svm->vmcb->control.exit_int_info_err; kvm_queue_exception_e(&svm->vcpu, vector, err); @@ -2875,24 +3150,24 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) } static const struct trace_print_flags svm_exit_reasons_str[] = { - { SVM_EXIT_READ_CR0, "read_cr0" }, - { SVM_EXIT_READ_CR3, "read_cr3" }, - { SVM_EXIT_READ_CR4, "read_cr4" }, - { SVM_EXIT_READ_CR8, "read_cr8" }, - { SVM_EXIT_WRITE_CR0, "write_cr0" }, - { SVM_EXIT_WRITE_CR3, "write_cr3" }, - { SVM_EXIT_WRITE_CR4, "write_cr4" }, - { SVM_EXIT_WRITE_CR8, "write_cr8" }, - { SVM_EXIT_READ_DR0, "read_dr0" }, - { SVM_EXIT_READ_DR1, "read_dr1" }, - { SVM_EXIT_READ_DR2, "read_dr2" }, - { SVM_EXIT_READ_DR3, "read_dr3" }, - { SVM_EXIT_WRITE_DR0, "write_dr0" }, - { SVM_EXIT_WRITE_DR1, "write_dr1" }, - { SVM_EXIT_WRITE_DR2, "write_dr2" }, - { SVM_EXIT_WRITE_DR3, "write_dr3" }, - { SVM_EXIT_WRITE_DR5, "write_dr5" }, - { SVM_EXIT_WRITE_DR7, "write_dr7" }, + { SVM_EXIT_READ_CR0, "read_cr0" }, + { SVM_EXIT_READ_CR3, "read_cr3" }, + { SVM_EXIT_READ_CR4, "read_cr4" }, + { SVM_EXIT_READ_CR8, "read_cr8" }, + { SVM_EXIT_WRITE_CR0, "write_cr0" }, + { SVM_EXIT_WRITE_CR3, "write_cr3" }, + { SVM_EXIT_WRITE_CR4, "write_cr4" }, + { SVM_EXIT_WRITE_CR8, "write_cr8" }, + { SVM_EXIT_READ_DR0, "read_dr0" }, + { SVM_EXIT_READ_DR1, "read_dr1" }, + { SVM_EXIT_READ_DR2, "read_dr2" }, + { SVM_EXIT_READ_DR3, "read_dr3" }, + { SVM_EXIT_WRITE_DR0, "write_dr0" }, + { SVM_EXIT_WRITE_DR1, "write_dr1" }, + { SVM_EXIT_WRITE_DR2, "write_dr2" }, + { SVM_EXIT_WRITE_DR3, "write_dr3" }, + { SVM_EXIT_WRITE_DR5, "write_dr5" }, + { SVM_EXIT_WRITE_DR7, "write_dr7" }, { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, @@ -2941,8 +3216,10 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - update_cr0_intercept(svm); svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR; + if (is_nested(svm)) + svm->nested.hsave->control.intercept_exceptions |= 1 << NM_VECTOR; + update_cr0_intercept(svm); } static struct kvm_x86_ops svm_x86_ops = { @@ -2981,8 +3258,7 @@ static struct kvm_x86_ops svm_x86_ops = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, - .get_dr = svm_get_dr, - .set_dr = svm_set_dr, + .set_dr7 = svm_set_dr7, .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c index eea4043..4ddadb1 100644 --- a/arch/x86/kvm/timer.c +++ b/arch/x86/kvm/timer.c @@ -12,7 +12,8 @@ static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) /* * There is a race window between reading and incrementing, but we do * not care about potentially loosing timer events in the !reinject - * case anyway. + * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked + * in vcpu_enter_guest. */ if (ktimer->reinject || !atomic_read(&ktimer->pending)) { atomic_inc(&ktimer->pending); diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 6ad30a2..a6544b8 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -5,8 +5,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm -#define TRACE_INCLUDE_PATH arch/x86/kvm -#define TRACE_INCLUDE_FILE trace /* * Tracepoint for guest mode entry. @@ -184,8 +182,8 @@ TRACE_EVENT(kvm_apic, * Tracepoint for kvm guest exit: */ TRACE_EVENT(kvm_exit, - TP_PROTO(unsigned int exit_reason, unsigned long guest_rip), - TP_ARGS(exit_reason, guest_rip), + TP_PROTO(unsigned int exit_reason, struct kvm_vcpu *vcpu), + TP_ARGS(exit_reason, vcpu), TP_STRUCT__entry( __field( unsigned int, exit_reason ) @@ -194,7 +192,7 @@ TRACE_EVENT(kvm_exit, TP_fast_assign( __entry->exit_reason = exit_reason; - __entry->guest_rip = guest_rip; + __entry->guest_rip = kvm_rip_read(vcpu); ), TP_printk("reason %s rip 0x%lx", @@ -221,6 +219,38 @@ TRACE_EVENT(kvm_inj_virq, TP_printk("irq %u", __entry->irq) ); +#define EXS(x) { x##_VECTOR, "#" #x } + +#define kvm_trace_sym_exc \ + EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ + EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ + EXS(MF), EXS(MC) + +/* + * Tracepoint for kvm interrupt injection: + */ +TRACE_EVENT(kvm_inj_exception, + TP_PROTO(unsigned exception, bool has_error, unsigned error_code), + TP_ARGS(exception, has_error, error_code), + + TP_STRUCT__entry( + __field( u8, exception ) + __field( u8, has_error ) + __field( u32, error_code ) + ), + + TP_fast_assign( + __entry->exception = exception; + __entry->has_error = has_error; + __entry->error_code = error_code; + ), + + TP_printk("%s (0x%x)", + __print_symbolic(__entry->exception, kvm_trace_sym_exc), + /* FIXME: don't print error_code if not present */ + __entry->has_error ? __entry->error_code : 0) +); + /* * Tracepoint for page fault. */ @@ -413,12 +443,34 @@ TRACE_EVENT(kvm_nested_vmrun, ), TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x " - "event_inj: 0x%08x npt: %s\n", + "event_inj: 0x%08x npt: %s", __entry->rip, __entry->vmcb, __entry->nested_rip, __entry->int_ctl, __entry->event_inj, __entry->npt ? "on" : "off") ); +TRACE_EVENT(kvm_nested_intercepts, + TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u64 intercept), + TP_ARGS(cr_read, cr_write, exceptions, intercept), + + TP_STRUCT__entry( + __field( __u16, cr_read ) + __field( __u16, cr_write ) + __field( __u32, exceptions ) + __field( __u64, intercept ) + ), + + TP_fast_assign( + __entry->cr_read = cr_read; + __entry->cr_write = cr_write; + __entry->exceptions = exceptions; + __entry->intercept = intercept; + ), + + TP_printk("cr_read: %04x cr_write: %04x excp: %08x intercept: %016llx", + __entry->cr_read, __entry->cr_write, __entry->exceptions, + __entry->intercept) +); /* * Tracepoint for #VMEXIT while nested */ @@ -447,7 +499,7 @@ TRACE_EVENT(kvm_nested_vmexit, __entry->exit_int_info_err = exit_int_info_err; ), TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " - "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", + "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", __entry->rip, ftrace_print_symbols_seq(p, __entry->exit_code, kvm_x86_ops->exit_reasons_str), @@ -482,7 +534,7 @@ TRACE_EVENT(kvm_nested_vmexit_inject, ), TP_printk("reason: %s ext_inf1: 0x%016llx " - "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n", + "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", ftrace_print_symbols_seq(p, __entry->exit_code, kvm_x86_ops->exit_reasons_str), __entry->exit_info1, __entry->exit_info2, @@ -504,7 +556,7 @@ TRACE_EVENT(kvm_nested_intr_vmexit, __entry->rip = rip ), - TP_printk("rip: 0x%016llx\n", __entry->rip) + TP_printk("rip: 0x%016llx", __entry->rip) ); /* @@ -526,7 +578,7 @@ TRACE_EVENT(kvm_invlpga, __entry->address = address; ), - TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n", + TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", __entry->rip, __entry->asid, __entry->address) ); @@ -547,11 +599,102 @@ TRACE_EVENT(kvm_skinit, __entry->slb = slb; ), - TP_printk("rip: 0x%016llx slb: 0x%08x\n", + TP_printk("rip: 0x%016llx slb: 0x%08x", __entry->rip, __entry->slb) ); +#define __print_insn(insn, ilen) ({ \ + int i; \ + const char *ret = p->buffer + p->len; \ + \ + for (i = 0; i < ilen; ++i) \ + trace_seq_printf(p, " %02x", insn[i]); \ + trace_seq_printf(p, "%c", 0); \ + ret; \ + }) + +#define KVM_EMUL_INSN_F_CR0_PE (1 << 0) +#define KVM_EMUL_INSN_F_EFL_VM (1 << 1) +#define KVM_EMUL_INSN_F_CS_D (1 << 2) +#define KVM_EMUL_INSN_F_CS_L (1 << 3) + +#define kvm_trace_symbol_emul_flags \ + { 0, "real" }, \ + { KVM_EMUL_INSN_F_CR0_PE \ + | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ + { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ + { KVM_EMUL_INSN_F_CR0_PE \ + | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ + { KVM_EMUL_INSN_F_CR0_PE \ + | KVM_EMUL_INSN_F_CS_L, "prot64" } + +#define kei_decode_mode(mode) ({ \ + u8 flags = 0xff; \ + switch (mode) { \ + case X86EMUL_MODE_REAL: \ + flags = 0; \ + break; \ + case X86EMUL_MODE_VM86: \ + flags = KVM_EMUL_INSN_F_EFL_VM; \ + break; \ + case X86EMUL_MODE_PROT16: \ + flags = KVM_EMUL_INSN_F_CR0_PE; \ + break; \ + case X86EMUL_MODE_PROT32: \ + flags = KVM_EMUL_INSN_F_CR0_PE \ + | KVM_EMUL_INSN_F_CS_D; \ + break; \ + case X86EMUL_MODE_PROT64: \ + flags = KVM_EMUL_INSN_F_CR0_PE \ + | KVM_EMUL_INSN_F_CS_L; \ + break; \ + } \ + flags; \ + }) + +TRACE_EVENT(kvm_emulate_insn, + TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), + TP_ARGS(vcpu, failed), + + TP_STRUCT__entry( + __field( __u64, rip ) + __field( __u32, csbase ) + __field( __u8, len ) + __array( __u8, insn, 15 ) + __field( __u8, flags ) + __field( __u8, failed ) + ), + + TP_fast_assign( + __entry->rip = vcpu->arch.emulate_ctxt.decode.fetch.start; + __entry->csbase = kvm_x86_ops->get_segment_base(vcpu, VCPU_SREG_CS); + __entry->len = vcpu->arch.emulate_ctxt.decode.eip + - vcpu->arch.emulate_ctxt.decode.fetch.start; + memcpy(__entry->insn, + vcpu->arch.emulate_ctxt.decode.fetch.data, + 15); + __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt.mode); + __entry->failed = failed; + ), + + TP_printk("%x:%llx:%s (%s)%s", + __entry->csbase, __entry->rip, + __print_insn(__entry->insn, __entry->len), + __print_symbolic(__entry->flags, + kvm_trace_symbol_emul_flags), + __entry->failed ? " failed" : "" + ) + ); + +#define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) +#define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) + #endif /* _TRACE_KVM_H */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH arch/x86/kvm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + /* This part must be outside protection */ #include <trace/define_trace.h> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 82be6da..0b896ac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -77,6 +77,8 @@ module_param(emulate_invalid_guest_state, bool, S_IRUGO); #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE) #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE) +#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) + /* * These 2 parameters are used to config the controls for Pause-Loop Exiting: * ple_gap: upper bound on the amount of time between two successive @@ -131,7 +133,7 @@ struct vcpu_vmx { } host_state; struct { int vm86_active; - u8 save_iopl; + ulong save_rflags; struct kvm_save_segment { u16 selector; unsigned long base; @@ -232,56 +234,56 @@ static const u32 vmx_msr_index[] = { }; #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index) -static inline int is_page_fault(u32 intr_info) +static inline bool is_page_fault(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | PF_VECTOR | INTR_INFO_VALID_MASK); } -static inline int is_no_device(u32 intr_info) +static inline bool is_no_device(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | NM_VECTOR | INTR_INFO_VALID_MASK); } -static inline int is_invalid_opcode(u32 intr_info) +static inline bool is_invalid_opcode(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | UD_VECTOR | INTR_INFO_VALID_MASK); } -static inline int is_external_interrupt(u32 intr_info) +static inline bool is_external_interrupt(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); } -static inline int is_machine_check(u32 intr_info) +static inline bool is_machine_check(u32 intr_info) { return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | INTR_INFO_VALID_MASK)) == (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); } -static inline int cpu_has_vmx_msr_bitmap(void) +static inline bool cpu_has_vmx_msr_bitmap(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; } -static inline int cpu_has_vmx_tpr_shadow(void) +static inline bool cpu_has_vmx_tpr_shadow(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; } -static inline int vm_need_tpr_shadow(struct kvm *kvm) +static inline bool vm_need_tpr_shadow(struct kvm *kvm) { return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); } -static inline int cpu_has_secondary_exec_ctrls(void) +static inline bool cpu_has_secondary_exec_ctrls(void) { return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; @@ -301,80 +303,80 @@ static inline bool cpu_has_vmx_flexpriority(void) static inline bool cpu_has_vmx_ept_execute_only(void) { - return !!(vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT); + return vmx_capability.ept & VMX_EPT_EXECUTE_ONLY_BIT; } static inline bool cpu_has_vmx_eptp_uncacheable(void) { - return !!(vmx_capability.ept & VMX_EPTP_UC_BIT); + return vmx_capability.ept & VMX_EPTP_UC_BIT; } static inline bool cpu_has_vmx_eptp_writeback(void) { - return !!(vmx_capability.ept & VMX_EPTP_WB_BIT); + return vmx_capability.ept & VMX_EPTP_WB_BIT; } static inline bool cpu_has_vmx_ept_2m_page(void) { - return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT); + return vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT; } static inline bool cpu_has_vmx_ept_1g_page(void) { - return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT); + return vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT; } -static inline int cpu_has_vmx_invept_individual_addr(void) +static inline bool cpu_has_vmx_invept_individual_addr(void) { - return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); + return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT; } -static inline int cpu_has_vmx_invept_context(void) +static inline bool cpu_has_vmx_invept_context(void) { - return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); + return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT; } -static inline int cpu_has_vmx_invept_global(void) +static inline bool cpu_has_vmx_invept_global(void) { - return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); + return vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT; } -static inline int cpu_has_vmx_ept(void) +static inline bool cpu_has_vmx_ept(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_EPT; } -static inline int cpu_has_vmx_unrestricted_guest(void) +static inline bool cpu_has_vmx_unrestricted_guest(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_UNRESTRICTED_GUEST; } -static inline int cpu_has_vmx_ple(void) +static inline bool cpu_has_vmx_ple(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_PAUSE_LOOP_EXITING; } -static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) +static inline bool vm_need_virtualize_apic_accesses(struct kvm *kvm) { return flexpriority_enabled && irqchip_in_kernel(kvm); } -static inline int cpu_has_vmx_vpid(void) +static inline bool cpu_has_vmx_vpid(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_ENABLE_VPID; } -static inline int cpu_has_vmx_rdtscp(void) +static inline bool cpu_has_vmx_rdtscp(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & SECONDARY_EXEC_RDTSCP; } -static inline int cpu_has_virtual_nmis(void) +static inline bool cpu_has_virtual_nmis(void) { return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; } @@ -598,11 +600,11 @@ static void reload_tss(void) /* * VT restores TR but not its size. Useless. */ - struct descriptor_table gdt; + struct desc_ptr gdt; struct desc_struct *descs; - kvm_get_gdt(&gdt); - descs = (void *)gdt.base; + native_store_gdt(&gdt); + descs = (void *)gdt.address; descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ load_TR_desc(); } @@ -632,6 +634,43 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) return true; } +static unsigned long segment_base(u16 selector) +{ + struct desc_ptr gdt; + struct desc_struct *d; + unsigned long table_base; + unsigned long v; + + if (!(selector & ~3)) + return 0; + + native_store_gdt(&gdt); + table_base = gdt.address; + + if (selector & 4) { /* from ldt */ + u16 ldt_selector = kvm_read_ldt(); + + if (!(ldt_selector & ~3)) + return 0; + + table_base = segment_base(ldt_selector); + } + d = (struct desc_struct *)(table_base + (selector & ~7)); + v = get_desc_base(d); +#ifdef CONFIG_X86_64 + if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) + v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; +#endif + return v; +} + +static inline unsigned long kvm_read_tr_base(void) +{ + u16 tr; + asm("str %0" : "=g"(tr)); + return segment_base(tr); +} + static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -756,7 +795,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } if (vcpu->cpu != cpu) { - struct descriptor_table dt; + struct desc_ptr dt; unsigned long sysenter_esp; vcpu->cpu = cpu; @@ -765,8 +804,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) * processors. */ vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ - kvm_get_gdt(&dt); - vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ + native_store_gdt(&dt); + vmcs_writel(HOST_GDTR_BASE, dt.address); /* 22.2.4 */ rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ @@ -818,18 +857,23 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { - unsigned long rflags; + unsigned long rflags, save_rflags; rflags = vmcs_readl(GUEST_RFLAGS); - if (to_vmx(vcpu)->rmode.vm86_active) - rflags &= ~(unsigned long)(X86_EFLAGS_IOPL | X86_EFLAGS_VM); + if (to_vmx(vcpu)->rmode.vm86_active) { + rflags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + save_rflags = to_vmx(vcpu)->rmode.save_rflags; + rflags |= save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; + } return rflags; } static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { - if (to_vmx(vcpu)->rmode.vm86_active) + if (to_vmx(vcpu)->rmode.vm86_active) { + to_vmx(vcpu)->rmode.save_rflags = rflags; rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; + } vmcs_writel(GUEST_RFLAGS, rflags); } @@ -839,9 +883,9 @@ static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) int ret = 0; if (interruptibility & GUEST_INTR_STATE_STI) - ret |= X86_SHADOW_INT_STI; + ret |= KVM_X86_SHADOW_INT_STI; if (interruptibility & GUEST_INTR_STATE_MOV_SS) - ret |= X86_SHADOW_INT_MOV_SS; + ret |= KVM_X86_SHADOW_INT_MOV_SS; return ret & mask; } @@ -853,9 +897,9 @@ static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); - if (mask & X86_SHADOW_INT_MOV_SS) + if (mask & KVM_X86_SHADOW_INT_MOV_SS) interruptibility |= GUEST_INTR_STATE_MOV_SS; - if (mask & X86_SHADOW_INT_STI) + else if (mask & KVM_X86_SHADOW_INT_STI) interruptibility |= GUEST_INTR_STATE_STI; if ((interruptibility != interruptibility_old)) @@ -1483,8 +1527,8 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar); flags = vmcs_readl(GUEST_RFLAGS); - flags &= ~(X86_EFLAGS_IOPL | X86_EFLAGS_VM); - flags |= (vmx->rmode.save_iopl << IOPL_SHIFT); + flags &= RMODE_GUEST_OWNED_EFLAGS_BITS; + flags |= vmx->rmode.save_rflags & ~RMODE_GUEST_OWNED_EFLAGS_BITS; vmcs_writel(GUEST_RFLAGS, flags); vmcs_writel(GUEST_CR4, (vmcs_readl(GUEST_CR4) & ~X86_CR4_VME) | @@ -1557,8 +1601,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmcs_write32(GUEST_TR_AR_BYTES, 0x008b); flags = vmcs_readl(GUEST_RFLAGS); - vmx->rmode.save_iopl - = (flags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; + vmx->rmode.save_rflags = flags; flags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; @@ -1928,28 +1971,28 @@ static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) *l = (ar >> 13) & 1; } -static void vmx_get_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) +static void vmx_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { - dt->limit = vmcs_read32(GUEST_IDTR_LIMIT); - dt->base = vmcs_readl(GUEST_IDTR_BASE); + dt->size = vmcs_read32(GUEST_IDTR_LIMIT); + dt->address = vmcs_readl(GUEST_IDTR_BASE); } -static void vmx_set_idt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) +static void vmx_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { - vmcs_write32(GUEST_IDTR_LIMIT, dt->limit); - vmcs_writel(GUEST_IDTR_BASE, dt->base); + vmcs_write32(GUEST_IDTR_LIMIT, dt->size); + vmcs_writel(GUEST_IDTR_BASE, dt->address); } -static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) +static void vmx_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { - dt->limit = vmcs_read32(GUEST_GDTR_LIMIT); - dt->base = vmcs_readl(GUEST_GDTR_BASE); + dt->size = vmcs_read32(GUEST_GDTR_LIMIT); + dt->address = vmcs_readl(GUEST_GDTR_BASE); } -static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt) +static void vmx_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { - vmcs_write32(GUEST_GDTR_LIMIT, dt->limit); - vmcs_writel(GUEST_GDTR_BASE, dt->base); + vmcs_write32(GUEST_GDTR_LIMIT, dt->size); + vmcs_writel(GUEST_GDTR_BASE, dt->address); } static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) @@ -2328,7 +2371,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) u32 junk; u64 host_pat, tsc_this, tsc_base; unsigned long a; - struct descriptor_table dt; + struct desc_ptr dt; int i; unsigned long kvm_vmx_return; u32 exec_control; @@ -2409,8 +2452,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ - kvm_get_idt(&dt); - vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ + native_store_idt(&dt); + vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */ asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); vmcs_writel(HOST_RIP, kvm_vmx_return); /* 22.2.5 */ @@ -2942,22 +2985,20 @@ static int handle_io(struct kvm_vcpu *vcpu) int size, in, string; unsigned port; - ++vcpu->stat.io_exits; exit_qualification = vmcs_readl(EXIT_QUALIFICATION); string = (exit_qualification & 16) != 0; + in = (exit_qualification & 8) != 0; - if (string) { - if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO) - return 0; - return 1; - } + ++vcpu->stat.io_exits; - size = (exit_qualification & 7) + 1; - in = (exit_qualification & 8) != 0; - port = exit_qualification >> 16; + if (string || in) + return !(emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO); + port = exit_qualification >> 16; + size = (exit_qualification & 7) + 1; skip_emulated_instruction(vcpu); - return kvm_emulate_pio(vcpu, in, size, port); + + return kvm_fast_pio_out(vcpu, size, port); } static void @@ -3048,19 +3089,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) return 0; } -static int check_dr_alias(struct kvm_vcpu *vcpu) -{ - if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return -1; - } - return 0; -} - static int handle_dr(struct kvm_vcpu *vcpu) { unsigned long exit_qualification; - unsigned long val; int dr, reg; /* Do not handle if the CPL > 0, will trigger GP on re-entry */ @@ -3095,67 +3126,20 @@ static int handle_dr(struct kvm_vcpu *vcpu) dr = exit_qualification & DEBUG_REG_ACCESS_NUM; reg = DEBUG_REG_ACCESS_REG(exit_qualification); if (exit_qualification & TYPE_MOV_FROM_DR) { - switch (dr) { - case 0 ... 3: - val = vcpu->arch.db[dr]; - break; - case 4: - if (check_dr_alias(vcpu) < 0) - return 1; - /* fall through */ - case 6: - val = vcpu->arch.dr6; - break; - case 5: - if (check_dr_alias(vcpu) < 0) - return 1; - /* fall through */ - default: /* 7 */ - val = vcpu->arch.dr7; - break; - } - kvm_register_write(vcpu, reg, val); - } else { - val = vcpu->arch.regs[reg]; - switch (dr) { - case 0 ... 3: - vcpu->arch.db[dr] = val; - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) - vcpu->arch.eff_db[dr] = val; - break; - case 4: - if (check_dr_alias(vcpu) < 0) - return 1; - /* fall through */ - case 6: - if (val & 0xffffffff00000000ULL) { - kvm_inject_gp(vcpu, 0); - return 1; - } - vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; - break; - case 5: - if (check_dr_alias(vcpu) < 0) - return 1; - /* fall through */ - default: /* 7 */ - if (val & 0xffffffff00000000ULL) { - kvm_inject_gp(vcpu, 0); - return 1; - } - vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; - if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { - vmcs_writel(GUEST_DR7, vcpu->arch.dr7); - vcpu->arch.switch_db_regs = - (val & DR7_BP_EN_MASK); - } - break; - } - } + unsigned long val; + if (!kvm_get_dr(vcpu, dr, &val)) + kvm_register_write(vcpu, reg, val); + } else + kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); skip_emulated_instruction(vcpu); return 1; } +static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) +{ + vmcs_writel(GUEST_DR7, val); +} + static int handle_cpuid(struct kvm_vcpu *vcpu) { kvm_emulate_cpuid(vcpu); @@ -3287,6 +3271,8 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qualification; + bool has_error_code = false; + u32 error_code = 0; u16 tss_selector; int reason, type, idt_v; @@ -3309,6 +3295,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) kvm_clear_interrupt_queue(vcpu); break; case INTR_TYPE_HARD_EXCEPTION: + if (vmx->idt_vectoring_info & + VECTORING_INFO_DELIVER_CODE_MASK) { + has_error_code = true; + error_code = + vmcs_read32(IDT_VECTORING_ERROR_CODE); + } + /* fall through */ case INTR_TYPE_SOFT_EXCEPTION: kvm_clear_exception_queue(vcpu); break; @@ -3323,8 +3316,13 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) type != INTR_TYPE_NMI_INTR)) skip_emulated_instruction(vcpu); - if (!kvm_task_switch(vcpu, tss_selector, reason)) + if (kvm_task_switch(vcpu, tss_selector, reason, + has_error_code, error_code) == EMULATE_FAIL) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; return 0; + } /* clear all local breakpoint enable flags */ vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~55); @@ -3569,7 +3567,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; - trace_kvm_exit(exit_reason, kvm_rip_read(vcpu)); + trace_kvm_exit(exit_reason, vcpu); /* If guest state is invalid, start emulating */ if (vmx->emulation_required && emulate_invalid_guest_state) @@ -4149,6 +4147,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, + .set_dr7 = vmx_set_dr7, .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c3a33b2..58a96e6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -42,7 +42,7 @@ #include <linux/slab.h> #include <linux/perf_event.h> #include <trace/events/kvm.h> -#undef TRACE_INCLUDE_FILE + #define CREATE_TRACE_POINTS #include "trace.h" @@ -224,34 +224,6 @@ static void drop_user_return_notifiers(void *ignore) kvm_on_user_return(&smsr->urn); } -unsigned long segment_base(u16 selector) -{ - struct descriptor_table gdt; - struct desc_struct *d; - unsigned long table_base; - unsigned long v; - - if (selector == 0) - return 0; - - kvm_get_gdt(&gdt); - table_base = gdt.base; - - if (selector & 4) { /* from ldt */ - u16 ldt_selector = kvm_read_ldt(); - - table_base = segment_base(ldt_selector); - } - d = (struct desc_struct *)(table_base + (selector & ~7)); - v = get_desc_base(d); -#ifdef CONFIG_X86_64 - if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) - v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; -#endif - return v; -} -EXPORT_SYMBOL_GPL(segment_base); - u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) { if (irqchip_in_kernel(vcpu->kvm)) @@ -434,8 +406,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) #ifdef CONFIG_X86_64 if (cr0 & 0xffffffff00000000UL) { - printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n", - cr0, kvm_read_cr0(vcpu)); kvm_inject_gp(vcpu, 0); return; } @@ -444,14 +414,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) cr0 &= ~CR0_RESERVED_BITS; if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) { - printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n"); kvm_inject_gp(vcpu, 0); return; } if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) { - printk(KERN_DEBUG "set_cr0: #GP, set PG flag " - "and a clear PE flag\n"); kvm_inject_gp(vcpu, 0); return; } @@ -462,15 +429,11 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) int cs_db, cs_l; if (!is_pae(vcpu)) { - printk(KERN_DEBUG "set_cr0: #GP, start paging " - "in long mode while PAE is disabled\n"); kvm_inject_gp(vcpu, 0); return; } kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); if (cs_l) { - printk(KERN_DEBUG "set_cr0: #GP, start paging " - "in long mode while CS.L == 1\n"); kvm_inject_gp(vcpu, 0); return; @@ -478,8 +441,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } else #endif if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { - printk(KERN_DEBUG "set_cr0: #GP, pdptrs " - "reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } @@ -487,7 +448,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) } kvm_x86_ops->set_cr0(vcpu, cr0); - vcpu->arch.cr0 = cr0; kvm_mmu_reset_context(vcpu); return; @@ -506,28 +466,23 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE; if (cr4 & CR4_RESERVED_BITS) { - printk(KERN_DEBUG "set_cr4: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) { - printk(KERN_DEBUG "set_cr4: #GP, clearing PAE while " - "in long mode\n"); kvm_inject_gp(vcpu, 0); return; } } else if (is_paging(vcpu) && (cr4 & X86_CR4_PAE) && ((cr4 ^ old_cr4) & pdptr_bits) && !load_pdptrs(vcpu, vcpu->arch.cr3)) { - printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (cr4 & X86_CR4_VMXE) { - printk(KERN_DEBUG "set_cr4: #GP, setting VMXE\n"); kvm_inject_gp(vcpu, 0); return; } @@ -548,21 +503,16 @@ void kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (is_long_mode(vcpu)) { if (cr3 & CR3_L_MODE_RESERVED_BITS) { - printk(KERN_DEBUG "set_cr3: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } } else { if (is_pae(vcpu)) { if (cr3 & CR3_PAE_RESERVED_BITS) { - printk(KERN_DEBUG - "set_cr3: #GP, reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } if (is_paging(vcpu) && !load_pdptrs(vcpu, cr3)) { - printk(KERN_DEBUG "set_cr3: #GP, pdptrs " - "reserved bits\n"); kvm_inject_gp(vcpu, 0); return; } @@ -594,7 +544,6 @@ EXPORT_SYMBOL_GPL(kvm_set_cr3); void kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8) { if (cr8 & CR8_RESERVED_BITS) { - printk(KERN_DEBUG "set_cr8: #GP, reserved bits 0x%lx\n", cr8); kvm_inject_gp(vcpu, 0); return; } @@ -614,6 +563,80 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_cr8); +int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) +{ + switch (dr) { + case 0 ... 3: + vcpu->arch.db[dr] = val; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + vcpu->arch.eff_db[dr] = val; + break; + case 4: + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + /* fall through */ + case 6: + if (val & 0xffffffff00000000ULL) { + kvm_inject_gp(vcpu, 0); + return 1; + } + vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; + break; + case 5: + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + /* fall through */ + default: /* 7 */ + if (val & 0xffffffff00000000ULL) { + kvm_inject_gp(vcpu, 0); + return 1; + } + vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1; + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { + kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7); + vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK); + } + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_set_dr); + +int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) +{ + switch (dr) { + case 0 ... 3: + *val = vcpu->arch.db[dr]; + break; + case 4: + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + /* fall through */ + case 6: + *val = vcpu->arch.dr6; + break; + case 5: + if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + /* fall through */ + default: /* 7 */ + *val = vcpu->arch.dr7; + break; + } + + return 0; +} +EXPORT_SYMBOL_GPL(kvm_get_dr); + static inline u32 bit(int bitno) { return 1 << (bitno & 31); @@ -650,15 +673,12 @@ static u32 emulated_msrs[] = { static void set_efer(struct kvm_vcpu *vcpu, u64 efer) { if (efer & efer_reserved_bits) { - printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n", - efer); kvm_inject_gp(vcpu, 0); return; } if (is_paging(vcpu) && (vcpu->arch.efer & EFER_LME) != (efer & EFER_LME)) { - printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n"); kvm_inject_gp(vcpu, 0); return; } @@ -668,7 +688,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->edx & bit(X86_FEATURE_FXSR_OPT))) { - printk(KERN_DEBUG "set_efer: #GP, enable FFXSR w/o CPUID capability\n"); kvm_inject_gp(vcpu, 0); return; } @@ -679,7 +698,6 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) feat = kvm_find_cpuid_entry(vcpu, 0x80000001, 0); if (!feat || !(feat->ecx & bit(X86_FEATURE_SVM))) { - printk(KERN_DEBUG "set_efer: #GP, enable SVM w/o SVM\n"); kvm_inject_gp(vcpu, 0); return; } @@ -968,9 +986,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data) if (msr >= MSR_IA32_MC0_CTL && msr < MSR_IA32_MC0_CTL + 4 * bank_num) { u32 offset = msr - MSR_IA32_MC0_CTL; - /* only 0 or all 1s can be written to IA32_MCi_CTL */ + /* only 0 or all 1s can be written to IA32_MCi_CTL + * some Linux kernels though clear bit 10 in bank 4 to + * workaround a BIOS/GART TBL issue on AMD K8s, ignore + * this to avoid an uncatched #GP in the guest + */ if ((offset & 0x3) == 0 && - data != 0 && data != ~(u64)0) + data != 0 && (data | (1 << 10)) != ~(u64)0) return -1; vcpu->arch.mce_banks[offset] = data; break; @@ -1114,6 +1136,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; case MSR_K7_HWCR: data &= ~(u64)0x40; /* ignore flush filter disable */ + data &= ~(u64)0x100; /* ignore ignne emulation enable */ if (data != 0) { pr_unimpl(vcpu, "unimplemented HWCR wrmsr: 0x%llx\n", data); @@ -1572,6 +1595,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_SPIN: case KVM_CAP_PCI_SEGMENT: + case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: r = 1; break; @@ -2124,14 +2148,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, { vcpu_load(vcpu); - events->exception.injected = vcpu->arch.exception.pending; + events->exception.injected = + vcpu->arch.exception.pending && + !kvm_exception_is_soft(vcpu->arch.exception.nr); events->exception.nr = vcpu->arch.exception.nr; events->exception.has_error_code = vcpu->arch.exception.has_error_code; events->exception.error_code = vcpu->arch.exception.error_code; - events->interrupt.injected = vcpu->arch.interrupt.pending; + events->interrupt.injected = + vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft; events->interrupt.nr = vcpu->arch.interrupt.nr; - events->interrupt.soft = vcpu->arch.interrupt.soft; + events->interrupt.soft = 0; + events->interrupt.shadow = + kvm_x86_ops->get_interrupt_shadow(vcpu, + KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI); events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.pending = vcpu->arch.nmi_pending; @@ -2140,7 +2170,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->sipi_vector = vcpu->arch.sipi_vector; events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR); + | KVM_VCPUEVENT_VALID_SIPI_VECTOR + | KVM_VCPUEVENT_VALID_SHADOW); vcpu_put(vcpu); } @@ -2149,7 +2180,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING - | KVM_VCPUEVENT_VALID_SIPI_VECTOR)) + | KVM_VCPUEVENT_VALID_SIPI_VECTOR + | KVM_VCPUEVENT_VALID_SHADOW)) return -EINVAL; vcpu_load(vcpu); @@ -2164,6 +2196,9 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.interrupt.soft = events->interrupt.soft; if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm)) kvm_pic_clear_isr_ack(vcpu->kvm); + if (events->flags & KVM_VCPUEVENT_VALID_SHADOW) + kvm_x86_ops->set_interrupt_shadow(vcpu, + events->interrupt.shadow); vcpu->arch.nmi_injected = events->nmi.injected; if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) @@ -2178,6 +2213,36 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, return 0; } +static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, + struct kvm_debugregs *dbgregs) +{ + vcpu_load(vcpu); + + memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); + dbgregs->dr6 = vcpu->arch.dr6; + dbgregs->dr7 = vcpu->arch.dr7; + dbgregs->flags = 0; + + vcpu_put(vcpu); +} + +static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, + struct kvm_debugregs *dbgregs) +{ + if (dbgregs->flags) + return -EINVAL; + + vcpu_load(vcpu); + + memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); + vcpu->arch.dr6 = dbgregs->dr6; + vcpu->arch.dr7 = dbgregs->dr7; + + vcpu_put(vcpu); + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2356,6 +2421,29 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events); break; } + case KVM_GET_DEBUGREGS: { + struct kvm_debugregs dbgregs; + + kvm_vcpu_ioctl_x86_get_debugregs(vcpu, &dbgregs); + + r = -EFAULT; + if (copy_to_user(argp, &dbgregs, + sizeof(struct kvm_debugregs))) + break; + r = 0; + break; + } + case KVM_SET_DEBUGREGS: { + struct kvm_debugregs dbgregs; + + r = -EFAULT; + if (copy_from_user(&dbgregs, argp, + sizeof(struct kvm_debugregs))) + break; + + r = kvm_vcpu_ioctl_x86_set_debugregs(vcpu, &dbgregs); + break; + } default: r = -EINVAL; } @@ -2636,8 +2724,9 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm, int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { - int r, n, i; + int r, i; struct kvm_memory_slot *memslot; + unsigned long n; unsigned long is_dirty = 0; unsigned long *dirty_bitmap = NULL; @@ -2652,7 +2741,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, if (!memslot->dirty_bitmap) goto out; - n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + n = kvm_dirty_bitmap_bytes(memslot); r = -ENOMEM; dirty_bitmap = vmalloc(n); @@ -2822,11 +2911,13 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&irq_event, argp, sizeof irq_event)) goto out; + r = -ENXIO; if (irqchip_in_kernel(kvm)) { __s32 status; status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irq_event.irq, irq_event.level); if (ioctl == KVM_IRQ_LINE_STATUS) { + r = -EFAULT; irq_event.status = status; if (copy_to_user(argp, &irq_event, sizeof irq_event)) @@ -3042,6 +3133,18 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) return kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, len, v); } +static void kvm_set_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) +{ + kvm_x86_ops->set_segment(vcpu, var, seg); +} + +void kvm_get_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) +{ + kvm_x86_ops->get_segment(vcpu, var, seg); +} + gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; @@ -3122,14 +3225,17 @@ static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes, return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error); } -static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes, - struct kvm_vcpu *vcpu, u32 *error) +static int kvm_write_guest_virt_system(gva_t addr, void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu, + u32 *error) { void *data = val; int r = X86EMUL_CONTINUE; while (bytes) { - gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error); + gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, + PFERR_WRITE_MASK, error); unsigned offset = addr & (PAGE_SIZE-1); unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset); int ret; @@ -3152,7 +3258,6 @@ out: return r; } - static int emulator_read_emulated(unsigned long addr, void *val, unsigned int bytes, @@ -3255,9 +3360,9 @@ mmio: } int emulator_write_emulated(unsigned long addr, - const void *val, - unsigned int bytes, - struct kvm_vcpu *vcpu) + const void *val, + unsigned int bytes, + struct kvm_vcpu *vcpu) { /* Crossing a page boundary? */ if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { @@ -3275,45 +3380,150 @@ int emulator_write_emulated(unsigned long addr, } EXPORT_SYMBOL_GPL(emulator_write_emulated); +#define CMPXCHG_TYPE(t, ptr, old, new) \ + (cmpxchg((t *)(ptr), *(t *)(old), *(t *)(new)) == *(t *)(old)) + +#ifdef CONFIG_X86_64 +# define CMPXCHG64(ptr, old, new) CMPXCHG_TYPE(u64, ptr, old, new) +#else +# define CMPXCHG64(ptr, old, new) \ + (cmpxchg64((u64 *)(ptr), *(u64 *)(old), *(u64 *)(new)) == *(u64 *)(old)) +#endif + static int emulator_cmpxchg_emulated(unsigned long addr, const void *old, const void *new, unsigned int bytes, struct kvm_vcpu *vcpu) { - printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); -#ifndef CONFIG_X86_64 - /* guests cmpxchg8b have to be emulated atomically */ - if (bytes == 8) { - gpa_t gpa; - struct page *page; - char *kaddr; - u64 val; + gpa_t gpa; + struct page *page; + char *kaddr; + bool exchanged; - gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); + /* guests cmpxchg8b have to be emulated atomically */ + if (bytes > 8 || (bytes & (bytes - 1))) + goto emul_write; - if (gpa == UNMAPPED_GVA || - (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) - goto emul_write; + gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); - if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) - goto emul_write; + if (gpa == UNMAPPED_GVA || + (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) + goto emul_write; - val = *(u64 *)new; + if (((gpa + bytes - 1) & PAGE_MASK) != (gpa & PAGE_MASK)) + goto emul_write; - page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); - kaddr = kmap_atomic(page, KM_USER0); - set_64bit((u64 *)(kaddr + offset_in_page(gpa)), val); - kunmap_atomic(kaddr, KM_USER0); - kvm_release_page_dirty(page); + kaddr = kmap_atomic(page, KM_USER0); + kaddr += offset_in_page(gpa); + switch (bytes) { + case 1: + exchanged = CMPXCHG_TYPE(u8, kaddr, old, new); + break; + case 2: + exchanged = CMPXCHG_TYPE(u16, kaddr, old, new); + break; + case 4: + exchanged = CMPXCHG_TYPE(u32, kaddr, old, new); + break; + case 8: + exchanged = CMPXCHG64(kaddr, old, new); + break; + default: + BUG(); } + kunmap_atomic(kaddr, KM_USER0); + kvm_release_page_dirty(page); + + if (!exchanged) + return X86EMUL_CMPXCHG_FAILED; + + kvm_mmu_pte_write(vcpu, gpa, new, bytes, 1); + + return X86EMUL_CONTINUE; + emul_write: -#endif + printk_once(KERN_WARNING "kvm: emulating exchange as write\n"); return emulator_write_emulated(addr, new, bytes, vcpu); } +static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) +{ + /* TODO: String I/O for in kernel device */ + int r; + + if (vcpu->arch.pio.in) + r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, + vcpu->arch.pio.size, pd); + else + r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, + vcpu->arch.pio.port, vcpu->arch.pio.size, + pd); + return r; +} + + +static int emulator_pio_in_emulated(int size, unsigned short port, void *val, + unsigned int count, struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.pio.count) + goto data_avail; + + trace_kvm_pio(1, port, size, 1); + + vcpu->arch.pio.port = port; + vcpu->arch.pio.in = 1; + vcpu->arch.pio.count = count; + vcpu->arch.pio.size = size; + + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { + data_avail: + memcpy(val, vcpu->arch.pio_data, size * count); + vcpu->arch.pio.count = 0; + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_IO; + vcpu->run->io.direction = KVM_EXIT_IO_IN; + vcpu->run->io.size = size; + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; + vcpu->run->io.count = count; + vcpu->run->io.port = port; + + return 0; +} + +static int emulator_pio_out_emulated(int size, unsigned short port, + const void *val, unsigned int count, + struct kvm_vcpu *vcpu) +{ + trace_kvm_pio(0, port, size, 1); + + vcpu->arch.pio.port = port; + vcpu->arch.pio.in = 0; + vcpu->arch.pio.count = count; + vcpu->arch.pio.size = size; + + memcpy(vcpu->arch.pio_data, val, size * count); + + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { + vcpu->arch.pio.count = 0; + return 1; + } + + vcpu->run->exit_reason = KVM_EXIT_IO; + vcpu->run->io.direction = KVM_EXIT_IO_OUT; + vcpu->run->io.size = size; + vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; + vcpu->run->io.count = count; + vcpu->run->io.port = port; + + return 0; +} + static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) { return kvm_x86_ops->get_segment_base(vcpu, seg); @@ -3334,14 +3544,14 @@ int emulate_clts(struct kvm_vcpu *vcpu) int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) { - return kvm_x86_ops->get_dr(ctxt->vcpu, dr, dest); + return kvm_get_dr(ctxt->vcpu, dr, dest); } int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) { unsigned long mask = (ctxt->mode == X86EMUL_MODE_PROT64) ? ~0ULL : ~0U; - return kvm_x86_ops->set_dr(ctxt->vcpu, dr, value & mask); + return kvm_set_dr(ctxt->vcpu, dr, value & mask); } void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) @@ -3362,12 +3572,167 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) } EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); +static u64 mk_cr_64(u64 curr_cr, u32 new_val) +{ + return (curr_cr & ~((1ULL << 32) - 1)) | new_val; +} + +static unsigned long emulator_get_cr(int cr, struct kvm_vcpu *vcpu) +{ + unsigned long value; + + switch (cr) { + case 0: + value = kvm_read_cr0(vcpu); + break; + case 2: + value = vcpu->arch.cr2; + break; + case 3: + value = vcpu->arch.cr3; + break; + case 4: + value = kvm_read_cr4(vcpu); + break; + case 8: + value = kvm_get_cr8(vcpu); + break; + default: + vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); + return 0; + } + + return value; +} + +static void emulator_set_cr(int cr, unsigned long val, struct kvm_vcpu *vcpu) +{ + switch (cr) { + case 0: + kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); + break; + case 2: + vcpu->arch.cr2 = val; + break; + case 3: + kvm_set_cr3(vcpu, val); + break; + case 4: + kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); + break; + case 8: + kvm_set_cr8(vcpu, val & 0xfUL); + break; + default: + vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); + } +} + +static int emulator_get_cpl(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops->get_cpl(vcpu); +} + +static void emulator_get_gdt(struct desc_ptr *dt, struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->get_gdt(vcpu, dt); +} + +static bool emulator_get_cached_descriptor(struct desc_struct *desc, int seg, + struct kvm_vcpu *vcpu) +{ + struct kvm_segment var; + + kvm_get_segment(vcpu, &var, seg); + + if (var.unusable) + return false; + + if (var.g) + var.limit >>= 12; + set_desc_limit(desc, var.limit); + set_desc_base(desc, (unsigned long)var.base); + desc->type = var.type; + desc->s = var.s; + desc->dpl = var.dpl; + desc->p = var.present; + desc->avl = var.avl; + desc->l = var.l; + desc->d = var.db; + desc->g = var.g; + + return true; +} + +static void emulator_set_cached_descriptor(struct desc_struct *desc, int seg, + struct kvm_vcpu *vcpu) +{ + struct kvm_segment var; + + /* needed to preserve selector */ + kvm_get_segment(vcpu, &var, seg); + + var.base = get_desc_base(desc); + var.limit = get_desc_limit(desc); + if (desc->g) + var.limit = (var.limit << 12) | 0xfff; + var.type = desc->type; + var.present = desc->p; + var.dpl = desc->dpl; + var.db = desc->d; + var.s = desc->s; + var.l = desc->l; + var.g = desc->g; + var.avl = desc->avl; + var.present = desc->p; + var.unusable = !var.present; + var.padding = 0; + + kvm_set_segment(vcpu, &var, seg); + return; +} + +static u16 emulator_get_segment_selector(int seg, struct kvm_vcpu *vcpu) +{ + struct kvm_segment kvm_seg; + + kvm_get_segment(vcpu, &kvm_seg, seg); + return kvm_seg.selector; +} + +static void emulator_set_segment_selector(u16 sel, int seg, + struct kvm_vcpu *vcpu) +{ + struct kvm_segment kvm_seg; + + kvm_get_segment(vcpu, &kvm_seg, seg); + kvm_seg.selector = sel; + kvm_set_segment(vcpu, &kvm_seg, seg); +} + +static void emulator_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) +{ + kvm_x86_ops->set_rflags(vcpu, rflags); +} + static struct x86_emulate_ops emulate_ops = { .read_std = kvm_read_guest_virt_system, + .write_std = kvm_write_guest_virt_system, .fetch = kvm_fetch_guest_virt, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, .cmpxchg_emulated = emulator_cmpxchg_emulated, + .pio_in_emulated = emulator_pio_in_emulated, + .pio_out_emulated = emulator_pio_out_emulated, + .get_cached_descriptor = emulator_get_cached_descriptor, + .set_cached_descriptor = emulator_set_cached_descriptor, + .get_segment_selector = emulator_get_segment_selector, + .set_segment_selector = emulator_set_segment_selector, + .get_gdt = emulator_get_gdt, + .get_cr = emulator_get_cr, + .set_cr = emulator_set_cr, + .cpl = emulator_get_cpl, + .set_rflags = emulator_set_rflags, }; static void cache_all_regs(struct kvm_vcpu *vcpu) @@ -3398,14 +3763,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu, cache_all_regs(vcpu); vcpu->mmio_is_write = 0; - vcpu->arch.pio.string = 0; if (!(emulation_type & EMULTYPE_NO_DECODE)) { int cs_db, cs_l; kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); vcpu->arch.emulate_ctxt.vcpu = vcpu; - vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu); + vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); + vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); vcpu->arch.emulate_ctxt.mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) @@ -3414,6 +3779,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); + trace_kvm_emulate_insn_start(vcpu); /* Only allow emulation of specific instructions on #UD * (namely VMMCALL, sysenter, sysexit, syscall)*/ @@ -3446,6 +3812,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, ++vcpu->stat.insn_emulation; if (r) { ++vcpu->stat.insn_emulation_fail; + trace_kvm_emulate_insn_failed(vcpu); if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) return EMULATE_DONE; return EMULATE_FAIL; @@ -3457,16 +3824,20 @@ int emulate_instruction(struct kvm_vcpu *vcpu, return EMULATE_DONE; } +restart: r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; if (r == 0) kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); - if (vcpu->arch.pio.string) + if (vcpu->arch.pio.count) { + if (!vcpu->arch.pio.in) + vcpu->arch.pio.count = 0; return EMULATE_DO_MMIO; + } - if ((r || vcpu->mmio_is_write) && run) { + if (r || vcpu->mmio_is_write) { run->exit_reason = KVM_EXIT_MMIO; run->mmio.phys_addr = vcpu->mmio_phys_addr; memcpy(run->mmio.data, vcpu->mmio_data, 8); @@ -3476,222 +3847,41 @@ int emulate_instruction(struct kvm_vcpu *vcpu, if (r) { if (kvm_mmu_unprotect_page_virt(vcpu, cr2)) - return EMULATE_DONE; + goto done; if (!vcpu->mmio_needed) { + ++vcpu->stat.insn_emulation_fail; + trace_kvm_emulate_insn_failed(vcpu); kvm_report_emulation_failure(vcpu, "mmio"); return EMULATE_FAIL; } return EMULATE_DO_MMIO; } - kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); - if (vcpu->mmio_is_write) { vcpu->mmio_needed = 0; return EMULATE_DO_MMIO; } - return EMULATE_DONE; -} -EXPORT_SYMBOL_GPL(emulate_instruction); - -static int pio_copy_data(struct kvm_vcpu *vcpu) -{ - void *p = vcpu->arch.pio_data; - gva_t q = vcpu->arch.pio.guest_gva; - unsigned bytes; - int ret; - u32 error_code; - - bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count; - if (vcpu->arch.pio.in) - ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code); - else - ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code); - - if (ret == X86EMUL_PROPAGATE_FAULT) - kvm_inject_page_fault(vcpu, q, error_code); - - return ret; -} - -int complete_pio(struct kvm_vcpu *vcpu) -{ - struct kvm_pio_request *io = &vcpu->arch.pio; - long delta; - int r; - unsigned long val; - - if (!io->string) { - if (io->in) { - val = kvm_register_read(vcpu, VCPU_REGS_RAX); - memcpy(&val, vcpu->arch.pio_data, io->size); - kvm_register_write(vcpu, VCPU_REGS_RAX, val); - } - } else { - if (io->in) { - r = pio_copy_data(vcpu); - if (r) - goto out; - } - - delta = 1; - if (io->rep) { - delta *= io->cur_count; - /* - * The size of the register should really depend on - * current address size. - */ - val = kvm_register_read(vcpu, VCPU_REGS_RCX); - val -= delta; - kvm_register_write(vcpu, VCPU_REGS_RCX, val); - } - if (io->down) - delta = -delta; - delta *= io->size; - if (io->in) { - val = kvm_register_read(vcpu, VCPU_REGS_RDI); - val += delta; - kvm_register_write(vcpu, VCPU_REGS_RDI, val); - } else { - val = kvm_register_read(vcpu, VCPU_REGS_RSI); - val += delta; - kvm_register_write(vcpu, VCPU_REGS_RSI, val); - } - } -out: - io->count -= io->cur_count; - io->cur_count = 0; - - return 0; -} +done: + if (vcpu->arch.exception.pending) + vcpu->arch.emulate_ctxt.restart = false; -static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) -{ - /* TODO: String I/O for in kernel device */ - int r; + if (vcpu->arch.emulate_ctxt.restart) + goto restart; - if (vcpu->arch.pio.in) - r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, - vcpu->arch.pio.size, pd); - else - r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, - vcpu->arch.pio.port, vcpu->arch.pio.size, - pd); - return r; -} - -static int pio_string_write(struct kvm_vcpu *vcpu) -{ - struct kvm_pio_request *io = &vcpu->arch.pio; - void *pd = vcpu->arch.pio_data; - int i, r = 0; - - for (i = 0; i < io->cur_count; i++) { - if (kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, - io->port, io->size, pd)) { - r = -EOPNOTSUPP; - break; - } - pd += io->size; - } - return r; -} - -int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port) -{ - unsigned long val; - - trace_kvm_pio(!in, port, size, 1); - - vcpu->run->exit_reason = KVM_EXIT_IO; - vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; - vcpu->run->io.size = vcpu->arch.pio.size = size; - vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; - vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = 1; - vcpu->run->io.port = vcpu->arch.pio.port = port; - vcpu->arch.pio.in = in; - vcpu->arch.pio.string = 0; - vcpu->arch.pio.down = 0; - vcpu->arch.pio.rep = 0; - - if (!vcpu->arch.pio.in) { - val = kvm_register_read(vcpu, VCPU_REGS_RAX); - memcpy(vcpu->arch.pio_data, &val, 4); - } - - if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { - complete_pio(vcpu); - return 1; - } - return 0; + return EMULATE_DONE; } -EXPORT_SYMBOL_GPL(kvm_emulate_pio); +EXPORT_SYMBOL_GPL(emulate_instruction); -int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in, - int size, unsigned long count, int down, - gva_t address, int rep, unsigned port) +int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { - unsigned now, in_page; - int ret = 0; - - trace_kvm_pio(!in, port, size, count); - - vcpu->run->exit_reason = KVM_EXIT_IO; - vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; - vcpu->run->io.size = vcpu->arch.pio.size = size; - vcpu->run->io.data_offset = KVM_PIO_PAGE_OFFSET * PAGE_SIZE; - vcpu->run->io.count = vcpu->arch.pio.count = vcpu->arch.pio.cur_count = count; - vcpu->run->io.port = vcpu->arch.pio.port = port; - vcpu->arch.pio.in = in; - vcpu->arch.pio.string = 1; - vcpu->arch.pio.down = down; - vcpu->arch.pio.rep = rep; - - if (!count) { - kvm_x86_ops->skip_emulated_instruction(vcpu); - return 1; - } - - if (!down) - in_page = PAGE_SIZE - offset_in_page(address); - else - in_page = offset_in_page(address) + size; - now = min(count, (unsigned long)in_page / size); - if (!now) - now = 1; - if (down) { - /* - * String I/O in reverse. Yuck. Kill the guest, fix later. - */ - pr_unimpl(vcpu, "guest string pio down\n"); - kvm_inject_gp(vcpu, 0); - return 1; - } - vcpu->run->io.count = now; - vcpu->arch.pio.cur_count = now; - - if (vcpu->arch.pio.cur_count == vcpu->arch.pio.count) - kvm_x86_ops->skip_emulated_instruction(vcpu); - - vcpu->arch.pio.guest_gva = address; - - if (!vcpu->arch.pio.in) { - /* string PIO write */ - ret = pio_copy_data(vcpu); - if (ret == X86EMUL_PROPAGATE_FAULT) - return 1; - if (ret == 0 && !pio_string_write(vcpu)) { - complete_pio(vcpu); - if (vcpu->arch.pio.count == 0) - ret = 1; - } - } - /* no string PIO read support yet */ - + unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); + int ret = emulator_pio_out_emulated(size, port, &val, 1, vcpu); + /* do not return to emulator after return from userspace */ + vcpu->arch.pio.count = 0; return ret; } -EXPORT_SYMBOL_GPL(kvm_emulate_pio_string); +EXPORT_SYMBOL_GPL(kvm_fast_pio_out); static void bounce_off(void *info) { @@ -4010,85 +4200,20 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu) return emulator_write_emulated(rip, instruction, 3, vcpu); } -static u64 mk_cr_64(u64 curr_cr, u32 new_val) -{ - return (curr_cr & ~((1ULL << 32) - 1)) | new_val; -} - void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) { - struct descriptor_table dt = { limit, base }; + struct desc_ptr dt = { limit, base }; kvm_x86_ops->set_gdt(vcpu, &dt); } void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base) { - struct descriptor_table dt = { limit, base }; + struct desc_ptr dt = { limit, base }; kvm_x86_ops->set_idt(vcpu, &dt); } -void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, - unsigned long *rflags) -{ - kvm_lmsw(vcpu, msw); - *rflags = kvm_get_rflags(vcpu); -} - -unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) -{ - unsigned long value; - - switch (cr) { - case 0: - value = kvm_read_cr0(vcpu); - break; - case 2: - value = vcpu->arch.cr2; - break; - case 3: - value = vcpu->arch.cr3; - break; - case 4: - value = kvm_read_cr4(vcpu); - break; - case 8: - value = kvm_get_cr8(vcpu); - break; - default: - vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); - return 0; - } - - return value; -} - -void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, - unsigned long *rflags) -{ - switch (cr) { - case 0: - kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); - *rflags = kvm_get_rflags(vcpu); - break; - case 2: - vcpu->arch.cr2 = val; - break; - case 3: - kvm_set_cr3(vcpu, val); - break; - case 4: - kvm_set_cr4(vcpu, mk_cr_64(kvm_read_cr4(vcpu), val)); - break; - case 8: - kvm_set_cr8(vcpu, val & 0xfUL); - break; - default: - vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); - } -} - static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) { struct kvm_cpuid_entry2 *e = &vcpu->arch.cpuid_entries[i]; @@ -4152,9 +4277,13 @@ int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; + best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); + if (!best || best->eax < 0x80000008) + goto not_found; best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); if (best) return best->eax & 0xff; +not_found: return 36; } @@ -4268,6 +4397,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) { /* try to reinject previous events if any */ if (vcpu->arch.exception.pending) { + trace_kvm_inj_exception(vcpu->arch.exception.nr, + vcpu->arch.exception.has_error_code, + vcpu->arch.exception.error_code); kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); @@ -4528,24 +4660,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) if (!irqchip_in_kernel(vcpu->kvm)) kvm_set_cr8(vcpu, kvm_run->cr8); - if (vcpu->arch.pio.cur_count) { - r = complete_pio(vcpu); - if (r) - goto out; - } - if (vcpu->mmio_needed) { - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); - vcpu->mmio_read_completed = 1; - vcpu->mmio_needed = 0; - + if (vcpu->arch.pio.count || vcpu->mmio_needed || + vcpu->arch.emulate_ctxt.restart) { + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + } vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0, - EMULTYPE_NO_DECODE); + r = emulate_instruction(vcpu, 0, 0, EMULTYPE_NO_DECODE); srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); if (r == EMULATE_DO_MMIO) { - /* - * Read-modify-write. Back to userspace. - */ r = 0; goto out; } @@ -4628,12 +4753,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return 0; } -void kvm_get_segment(struct kvm_vcpu *vcpu, - struct kvm_segment *var, int seg) -{ - kvm_x86_ops->get_segment(vcpu, var, seg); -} - void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) { struct kvm_segment cs; @@ -4647,7 +4766,7 @@ EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits); int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { - struct descriptor_table dt; + struct desc_ptr dt; vcpu_load(vcpu); @@ -4662,11 +4781,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); kvm_x86_ops->get_idt(vcpu, &dt); - sregs->idt.limit = dt.limit; - sregs->idt.base = dt.base; + sregs->idt.limit = dt.size; + sregs->idt.base = dt.address; kvm_x86_ops->get_gdt(vcpu, &dt); - sregs->gdt.limit = dt.limit; - sregs->gdt.base = dt.base; + sregs->gdt.limit = dt.size; + sregs->gdt.base = dt.address; sregs->cr0 = kvm_read_cr0(vcpu); sregs->cr2 = vcpu->arch.cr2; @@ -4705,559 +4824,33 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, return 0; } -static void kvm_set_segment(struct kvm_vcpu *vcpu, - struct kvm_segment *var, int seg) -{ - kvm_x86_ops->set_segment(vcpu, var, seg); -} - -static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, - struct kvm_segment *kvm_desct) -{ - kvm_desct->base = get_desc_base(seg_desc); - kvm_desct->limit = get_desc_limit(seg_desc); - if (seg_desc->g) { - kvm_desct->limit <<= 12; - kvm_desct->limit |= 0xfff; - } - kvm_desct->selector = selector; - kvm_desct->type = seg_desc->type; - kvm_desct->present = seg_desc->p; - kvm_desct->dpl = seg_desc->dpl; - kvm_desct->db = seg_desc->d; - kvm_desct->s = seg_desc->s; - kvm_desct->l = seg_desc->l; - kvm_desct->g = seg_desc->g; - kvm_desct->avl = seg_desc->avl; - if (!selector) - kvm_desct->unusable = 1; - else - kvm_desct->unusable = 0; - kvm_desct->padding = 0; -} - -static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, - u16 selector, - struct descriptor_table *dtable) -{ - if (selector & 1 << 2) { - struct kvm_segment kvm_seg; - - kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); - - if (kvm_seg.unusable) - dtable->limit = 0; - else - dtable->limit = kvm_seg.limit; - dtable->base = kvm_seg.base; - } - else - kvm_x86_ops->get_gdt(vcpu, dtable); -} - -/* allowed just for 8 bytes segments */ -static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, - struct desc_struct *seg_desc) -{ - struct descriptor_table dtable; - u16 index = selector >> 3; - int ret; - u32 err; - gva_t addr; - - get_segment_descriptor_dtable(vcpu, selector, &dtable); - - if (dtable.limit < index * 8 + 7) { - kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); - return X86EMUL_PROPAGATE_FAULT; - } - addr = dtable.base + index * 8; - ret = kvm_read_guest_virt_system(addr, seg_desc, sizeof(*seg_desc), - vcpu, &err); - if (ret == X86EMUL_PROPAGATE_FAULT) - kvm_inject_page_fault(vcpu, addr, err); - - return ret; -} - -/* allowed just for 8 bytes segments */ -static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, - struct desc_struct *seg_desc) -{ - struct descriptor_table dtable; - u16 index = selector >> 3; - - get_segment_descriptor_dtable(vcpu, selector, &dtable); - - if (dtable.limit < index * 8 + 7) - return 1; - return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL); -} - -static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu, - struct desc_struct *seg_desc) -{ - u32 base_addr = get_desc_base(seg_desc); - - return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL); -} - -static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu, - struct desc_struct *seg_desc) -{ - u32 base_addr = get_desc_base(seg_desc); - - return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL); -} - -static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) -{ - struct kvm_segment kvm_seg; - - kvm_get_segment(vcpu, &kvm_seg, seg); - return kvm_seg.selector; -} - -static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg) -{ - struct kvm_segment segvar = { - .base = selector << 4, - .limit = 0xffff, - .selector = selector, - .type = 3, - .present = 1, - .dpl = 3, - .db = 0, - .s = 1, - .l = 0, - .g = 0, - .avl = 0, - .unusable = 0, - }; - kvm_x86_ops->set_segment(vcpu, &segvar, seg); - return X86EMUL_CONTINUE; -} - -static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg) +int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason, + bool has_error_code, u32 error_code) { - return (seg != VCPU_SREG_LDTR) && - (seg != VCPU_SREG_TR) && - (kvm_get_rflags(vcpu) & X86_EFLAGS_VM); -} - -int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg) -{ - struct kvm_segment kvm_seg; - struct desc_struct seg_desc; - u8 dpl, rpl, cpl; - unsigned err_vec = GP_VECTOR; - u32 err_code = 0; - bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */ - int ret; + int cs_db, cs_l, ret; + cache_all_regs(vcpu); - if (is_vm86_segment(vcpu, seg) || !is_protmode(vcpu)) - return kvm_load_realmode_segment(vcpu, selector, seg); + kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); - /* NULL selector is not valid for TR, CS and SS */ - if ((seg == VCPU_SREG_CS || seg == VCPU_SREG_SS || seg == VCPU_SREG_TR) - && null_selector) - goto exception; + vcpu->arch.emulate_ctxt.vcpu = vcpu; + vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu); + vcpu->arch.emulate_ctxt.eip = kvm_rip_read(vcpu); + vcpu->arch.emulate_ctxt.mode = + (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL : + (vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM) + ? X86EMUL_MODE_VM86 : cs_l + ? X86EMUL_MODE_PROT64 : cs_db + ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; - /* TR should be in GDT only */ - if (seg == VCPU_SREG_TR && (selector & (1 << 2))) - goto exception; + ret = emulator_task_switch(&vcpu->arch.emulate_ctxt, &emulate_ops, + tss_selector, reason, has_error_code, + error_code); - ret = load_guest_segment_descriptor(vcpu, selector, &seg_desc); if (ret) - return ret; - - seg_desct_to_kvm_desct(&seg_desc, selector, &kvm_seg); - - if (null_selector) { /* for NULL selector skip all following checks */ - kvm_seg.unusable = 1; - goto load; - } - - err_code = selector & 0xfffc; - err_vec = GP_VECTOR; - - /* can't load system descriptor into segment selecor */ - if (seg <= VCPU_SREG_GS && !kvm_seg.s) - goto exception; - - if (!kvm_seg.present) { - err_vec = (seg == VCPU_SREG_SS) ? SS_VECTOR : NP_VECTOR; - goto exception; - } - - rpl = selector & 3; - dpl = kvm_seg.dpl; - cpl = kvm_x86_ops->get_cpl(vcpu); - - switch (seg) { - case VCPU_SREG_SS: - /* - * segment is not a writable data segment or segment - * selector's RPL != CPL or segment selector's RPL != CPL - */ - if (rpl != cpl || (kvm_seg.type & 0xa) != 0x2 || dpl != cpl) - goto exception; - break; - case VCPU_SREG_CS: - if (!(kvm_seg.type & 8)) - goto exception; - - if (kvm_seg.type & 4) { - /* conforming */ - if (dpl > cpl) - goto exception; - } else { - /* nonconforming */ - if (rpl > cpl || dpl != cpl) - goto exception; - } - /* CS(RPL) <- CPL */ - selector = (selector & 0xfffc) | cpl; - break; - case VCPU_SREG_TR: - if (kvm_seg.s || (kvm_seg.type != 1 && kvm_seg.type != 9)) - goto exception; - break; - case VCPU_SREG_LDTR: - if (kvm_seg.s || kvm_seg.type != 2) - goto exception; - break; - default: /* DS, ES, FS, or GS */ - /* - * segment is not a data or readable code segment or - * ((segment is a data or nonconforming code segment) - * and (both RPL and CPL > DPL)) - */ - if ((kvm_seg.type & 0xa) == 0x8 || - (((kvm_seg.type & 0xc) != 0xc) && (rpl > dpl && cpl > dpl))) - goto exception; - break; - } - - if (!kvm_seg.unusable && kvm_seg.s) { - /* mark segment as accessed */ - kvm_seg.type |= 1; - seg_desc.type |= 1; - save_guest_segment_descriptor(vcpu, selector, &seg_desc); - } -load: - kvm_set_segment(vcpu, &kvm_seg, seg); - return X86EMUL_CONTINUE; -exception: - kvm_queue_exception_e(vcpu, err_vec, err_code); - return X86EMUL_PROPAGATE_FAULT; -} - -static void save_state_to_tss32(struct kvm_vcpu *vcpu, - struct tss_segment_32 *tss) -{ - tss->cr3 = vcpu->arch.cr3; - tss->eip = kvm_rip_read(vcpu); - tss->eflags = kvm_get_rflags(vcpu); - tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX); - tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX); - tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX); - tss->ebx = kvm_register_read(vcpu, VCPU_REGS_RBX); - tss->esp = kvm_register_read(vcpu, VCPU_REGS_RSP); - tss->ebp = kvm_register_read(vcpu, VCPU_REGS_RBP); - tss->esi = kvm_register_read(vcpu, VCPU_REGS_RSI); - tss->edi = kvm_register_read(vcpu, VCPU_REGS_RDI); - tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); - tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); - tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); - tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); - tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); - tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); - tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); -} - -static void kvm_load_segment_selector(struct kvm_vcpu *vcpu, u16 sel, int seg) -{ - struct kvm_segment kvm_seg; - kvm_get_segment(vcpu, &kvm_seg, seg); - kvm_seg.selector = sel; - kvm_set_segment(vcpu, &kvm_seg, seg); -} - -static int load_state_from_tss32(struct kvm_vcpu *vcpu, - struct tss_segment_32 *tss) -{ - kvm_set_cr3(vcpu, tss->cr3); - - kvm_rip_write(vcpu, tss->eip); - kvm_set_rflags(vcpu, tss->eflags | 2); - - kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax); - kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx); - kvm_register_write(vcpu, VCPU_REGS_RDX, tss->edx); - kvm_register_write(vcpu, VCPU_REGS_RBX, tss->ebx); - kvm_register_write(vcpu, VCPU_REGS_RSP, tss->esp); - kvm_register_write(vcpu, VCPU_REGS_RBP, tss->ebp); - kvm_register_write(vcpu, VCPU_REGS_RSI, tss->esi); - kvm_register_write(vcpu, VCPU_REGS_RDI, tss->edi); - - /* - * SDM says that segment selectors are loaded before segment - * descriptors - */ - kvm_load_segment_selector(vcpu, tss->ldt_selector, VCPU_SREG_LDTR); - kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); - kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); - kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); - kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); - kvm_load_segment_selector(vcpu, tss->fs, VCPU_SREG_FS); - kvm_load_segment_selector(vcpu, tss->gs, VCPU_SREG_GS); - - /* - * Now load segment descriptors. If fault happenes at this stage - * it is handled in a context of new task - */ - if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, VCPU_SREG_LDTR)) - return 1; - - if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) - return 1; - - if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) - return 1; - - if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) - return 1; - - if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) - return 1; - - if (kvm_load_segment_descriptor(vcpu, tss->fs, VCPU_SREG_FS)) - return 1; - - if (kvm_load_segment_descriptor(vcpu, tss->gs, VCPU_SREG_GS)) - return 1; - return 0; -} - -static void save_state_to_tss16(struct kvm_vcpu *vcpu, - struct tss_segment_16 *tss) -{ - tss->ip = kvm_rip_read(vcpu); - tss->flag = kvm_get_rflags(vcpu); - tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX); - tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX); - tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX); - tss->bx = kvm_register_read(vcpu, VCPU_REGS_RBX); - tss->sp = kvm_register_read(vcpu, VCPU_REGS_RSP); - tss->bp = kvm_register_read(vcpu, VCPU_REGS_RBP); - tss->si = kvm_register_read(vcpu, VCPU_REGS_RSI); - tss->di = kvm_register_read(vcpu, VCPU_REGS_RDI); - - tss->es = get_segment_selector(vcpu, VCPU_SREG_ES); - tss->cs = get_segment_selector(vcpu, VCPU_SREG_CS); - tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS); - tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS); - tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR); -} - -static int load_state_from_tss16(struct kvm_vcpu *vcpu, - struct tss_segment_16 *tss) -{ - kvm_rip_write(vcpu, tss->ip); - kvm_set_rflags(vcpu, tss->flag | 2); - kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax); - kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx); - kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx); - kvm_register_write(vcpu, VCPU_REGS_RBX, tss->bx); - kvm_register_write(vcpu, VCPU_REGS_RSP, tss->sp); - kvm_register_write(vcpu, VCPU_REGS_RBP, tss->bp); - kvm_register_write(vcpu, VCPU_REGS_RSI, tss->si); - kvm_register_write(vcpu, VCPU_REGS_RDI, tss->di); - - /* - * SDM says that segment selectors are loaded before segment - * descriptors - */ - kvm_load_segment_selector(vcpu, tss->ldt, VCPU_SREG_LDTR); - kvm_load_segment_selector(vcpu, tss->es, VCPU_SREG_ES); - kvm_load_segment_selector(vcpu, tss->cs, VCPU_SREG_CS); - kvm_load_segment_selector(vcpu, tss->ss, VCPU_SREG_SS); - kvm_load_segment_selector(vcpu, tss->ds, VCPU_SREG_DS); - - /* - * Now load segment descriptors. If fault happenes at this stage - * it is handled in a context of new task - */ - if (kvm_load_segment_descriptor(vcpu, tss->ldt, VCPU_SREG_LDTR)) - return 1; - - if (kvm_load_segment_descriptor(vcpu, tss->es, VCPU_SREG_ES)) - return 1; - - if (kvm_load_segment_descriptor(vcpu, tss->cs, VCPU_SREG_CS)) - return 1; - - if (kvm_load_segment_descriptor(vcpu, tss->ss, VCPU_SREG_SS)) - return 1; - - if (kvm_load_segment_descriptor(vcpu, tss->ds, VCPU_SREG_DS)) - return 1; - return 0; -} - -static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, - u16 old_tss_sel, u32 old_tss_base, - struct desc_struct *nseg_desc) -{ - struct tss_segment_16 tss_segment_16; - int ret = 0; - - if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, - sizeof tss_segment_16)) - goto out; - - save_state_to_tss16(vcpu, &tss_segment_16); - - if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, - sizeof tss_segment_16)) - goto out; - - if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), - &tss_segment_16, sizeof tss_segment_16)) - goto out; - - if (old_tss_sel != 0xffff) { - tss_segment_16.prev_task_link = old_tss_sel; - - if (kvm_write_guest(vcpu->kvm, - get_tss_base_addr_write(vcpu, nseg_desc), - &tss_segment_16.prev_task_link, - sizeof tss_segment_16.prev_task_link)) - goto out; - } - - if (load_state_from_tss16(vcpu, &tss_segment_16)) - goto out; - - ret = 1; -out: - return ret; -} - -static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, - u16 old_tss_sel, u32 old_tss_base, - struct desc_struct *nseg_desc) -{ - struct tss_segment_32 tss_segment_32; - int ret = 0; - - if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, - sizeof tss_segment_32)) - goto out; - - save_state_to_tss32(vcpu, &tss_segment_32); - - if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, - sizeof tss_segment_32)) - goto out; - - if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc), - &tss_segment_32, sizeof tss_segment_32)) - goto out; - - if (old_tss_sel != 0xffff) { - tss_segment_32.prev_task_link = old_tss_sel; - - if (kvm_write_guest(vcpu->kvm, - get_tss_base_addr_write(vcpu, nseg_desc), - &tss_segment_32.prev_task_link, - sizeof tss_segment_32.prev_task_link)) - goto out; - } - - if (load_state_from_tss32(vcpu, &tss_segment_32)) - goto out; - - ret = 1; -out: - return ret; -} - -int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) -{ - struct kvm_segment tr_seg; - struct desc_struct cseg_desc; - struct desc_struct nseg_desc; - int ret = 0; - u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); - u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); - - old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL); - - /* FIXME: Handle errors. Failure to read either TSS or their - * descriptors should generate a pagefault. - */ - if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) - goto out; - - if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) - goto out; - - if (reason != TASK_SWITCH_IRET) { - int cpl; - - cpl = kvm_x86_ops->get_cpl(vcpu); - if ((tss_selector & 3) > nseg_desc.dpl || cpl > nseg_desc.dpl) { - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; - } - } + return EMULATE_FAIL; - if (!nseg_desc.p || get_desc_limit(&nseg_desc) < 0x67) { - kvm_queue_exception_e(vcpu, TS_VECTOR, tss_selector & 0xfffc); - return 1; - } - - if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { - cseg_desc.type &= ~(1 << 1); //clear the B flag - save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); - } - - if (reason == TASK_SWITCH_IRET) { - u32 eflags = kvm_get_rflags(vcpu); - kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); - } - - /* set back link to prev task only if NT bit is set in eflags - note that old_tss_sel is not used afetr this point */ - if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) - old_tss_sel = 0xffff; - - if (nseg_desc.type & 8) - ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, - old_tss_base, &nseg_desc); - else - ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, - old_tss_base, &nseg_desc); - - if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { - u32 eflags = kvm_get_rflags(vcpu); - kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT); - } - - if (reason != TASK_SWITCH_IRET) { - nseg_desc.type |= (1 << 1); - save_guest_segment_descriptor(vcpu, tss_selector, - &nseg_desc); - } - - kvm_x86_ops->set_cr0(vcpu, kvm_read_cr0(vcpu) | X86_CR0_TS); - seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); - tr_seg.type = 11; - kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); -out: - return ret; + kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags); + return EMULATE_DONE; } EXPORT_SYMBOL_GPL(kvm_task_switch); @@ -5266,15 +4859,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, { int mmu_reset_needed = 0; int pending_vec, max_bits; - struct descriptor_table dt; + struct desc_ptr dt; vcpu_load(vcpu); - dt.limit = sregs->idt.limit; - dt.base = sregs->idt.base; + dt.size = sregs->idt.limit; + dt.address = sregs->idt.base; kvm_x86_ops->set_idt(vcpu, &dt); - dt.limit = sregs->gdt.limit; - dt.base = sregs->gdt.base; + dt.size = sregs->gdt.limit; + dt.address = sregs->gdt.base; kvm_x86_ops->set_gdt(vcpu, &dt); vcpu->arch.cr2 = sregs->cr2; @@ -5373,11 +4966,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK); } - if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { - vcpu->arch.singlestep_cs = - get_segment_selector(vcpu, VCPU_SREG_CS); - vcpu->arch.singlestep_rip = kvm_rip_read(vcpu); - } + if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) + vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) + + get_segment_base(vcpu, VCPU_SREG_CS); /* * Trigger an rflags update that will inject or remove the trace @@ -5868,13 +5459,22 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) return kvm_x86_ops->interrupt_allowed(vcpu); } +bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip) +{ + unsigned long current_rip = kvm_rip_read(vcpu) + + get_segment_base(vcpu, VCPU_SREG_CS); + + return current_rip == linear_rip; +} +EXPORT_SYMBOL_GPL(kvm_is_linear_rip); + unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu) { unsigned long rflags; rflags = kvm_x86_ops->get_rflags(vcpu); if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) - rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF); + rflags &= ~X86_EFLAGS_TF; return rflags; } EXPORT_SYMBOL_GPL(kvm_get_rflags); @@ -5882,10 +5482,8 @@ EXPORT_SYMBOL_GPL(kvm_get_rflags); void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && - vcpu->arch.singlestep_cs == - get_segment_selector(vcpu, VCPU_SREG_CS) && - vcpu->arch.singlestep_rip == kvm_rip_read(vcpu)) - rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF; + kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip)) + rflags |= X86_EFLAGS_TF; kvm_x86_ops->set_rflags(vcpu, rflags); } EXPORT_SYMBOL_GPL(kvm_set_rflags); @@ -5901,3 +5499,4 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit); +EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intercepts); diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 7e59dc1..2bdf628 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -115,7 +115,7 @@ static void async_hcall(unsigned long call, unsigned long arg1, local_irq_save(flags); if (lguest_data.hcall_status[next_call] != 0xFF) { /* Table full, so do normal hcall which will flush table. */ - kvm_hypercall4(call, arg1, arg2, arg3, arg4); + hcall(call, arg1, arg2, arg3, arg4); } else { lguest_data.hcalls[next_call].arg0 = call; lguest_data.hcalls[next_call].arg1 = arg1; @@ -145,46 +145,45 @@ static void async_hcall(unsigned long call, unsigned long arg1, * So, when we're in lazy mode, we call async_hcall() to store the call for * future processing: */ -static void lazy_hcall1(unsigned long call, - unsigned long arg1) +static void lazy_hcall1(unsigned long call, unsigned long arg1) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall1(call, arg1); + hcall(call, arg1, 0, 0, 0); else async_hcall(call, arg1, 0, 0, 0); } /* You can imagine what lazy_hcall2, 3 and 4 look like. :*/ static void lazy_hcall2(unsigned long call, - unsigned long arg1, - unsigned long arg2) + unsigned long arg1, + unsigned long arg2) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall2(call, arg1, arg2); + hcall(call, arg1, arg2, 0, 0); else async_hcall(call, arg1, arg2, 0, 0); } static void lazy_hcall3(unsigned long call, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3) + unsigned long arg1, + unsigned long arg2, + unsigned long arg3) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall3(call, arg1, arg2, arg3); + hcall(call, arg1, arg2, arg3, 0); else async_hcall(call, arg1, arg2, arg3, 0); } #ifdef CONFIG_X86_PAE static void lazy_hcall4(unsigned long call, - unsigned long arg1, - unsigned long arg2, - unsigned long arg3, - unsigned long arg4) + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) - kvm_hypercall4(call, arg1, arg2, arg3, arg4); + hcall(call, arg1, arg2, arg3, arg4); else async_hcall(call, arg1, arg2, arg3, arg4); } @@ -196,13 +195,13 @@ static void lazy_hcall4(unsigned long call, :*/ static void lguest_leave_lazy_mmu_mode(void) { - kvm_hypercall0(LHCALL_FLUSH_ASYNC); + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); paravirt_leave_lazy_mmu(); } static void lguest_end_context_switch(struct task_struct *next) { - kvm_hypercall0(LHCALL_FLUSH_ASYNC); + hcall(LHCALL_FLUSH_ASYNC, 0, 0, 0, 0); paravirt_end_context_switch(next); } @@ -286,7 +285,7 @@ static void lguest_write_idt_entry(gate_desc *dt, /* Keep the local copy up to date. */ native_write_idt_entry(dt, entrynum, g); /* Tell Host about this new entry. */ - kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1]); + hcall(LHCALL_LOAD_IDT_ENTRY, entrynum, desc[0], desc[1], 0); } /* @@ -300,7 +299,7 @@ static void lguest_load_idt(const struct desc_ptr *desc) struct desc_struct *idt = (void *)desc->address; for (i = 0; i < (desc->size+1)/8; i++) - kvm_hypercall3(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b); + hcall(LHCALL_LOAD_IDT_ENTRY, i, idt[i].a, idt[i].b, 0); } /* @@ -321,7 +320,7 @@ static void lguest_load_gdt(const struct desc_ptr *desc) struct desc_struct *gdt = (void *)desc->address; for (i = 0; i < (desc->size+1)/8; i++) - kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b); + hcall(LHCALL_LOAD_GDT_ENTRY, i, gdt[i].a, gdt[i].b, 0); } /* @@ -334,8 +333,8 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, { native_write_gdt_entry(dt, entrynum, desc, type); /* Tell Host about this new entry. */ - kvm_hypercall3(LHCALL_LOAD_GDT_ENTRY, entrynum, - dt[entrynum].a, dt[entrynum].b); + hcall(LHCALL_LOAD_GDT_ENTRY, entrynum, + dt[entrynum].a, dt[entrynum].b, 0); } /* @@ -931,7 +930,7 @@ static int lguest_clockevent_set_next_event(unsigned long delta, } /* Please wake us this far in the future. */ - kvm_hypercall1(LHCALL_SET_CLOCKEVENT, delta); + hcall(LHCALL_SET_CLOCKEVENT, delta, 0, 0, 0); return 0; } @@ -942,7 +941,7 @@ static void lguest_clockevent_set_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_UNUSED: case CLOCK_EVT_MODE_SHUTDOWN: /* A 0 argument shuts the clock down. */ - kvm_hypercall0(LHCALL_SET_CLOCKEVENT); + hcall(LHCALL_SET_CLOCKEVENT, 0, 0, 0, 0); break; case CLOCK_EVT_MODE_ONESHOT: /* This is what we expect. */ @@ -1100,7 +1099,7 @@ static void set_lguest_basic_apic_ops(void) /* STOP! Until an interrupt comes in. */ static void lguest_safe_halt(void) { - kvm_hypercall0(LHCALL_HALT); + hcall(LHCALL_HALT, 0, 0, 0, 0); } /* @@ -1112,8 +1111,8 @@ static void lguest_safe_halt(void) */ static void lguest_power_off(void) { - kvm_hypercall2(LHCALL_SHUTDOWN, __pa("Power down"), - LGUEST_SHUTDOWN_POWEROFF); + hcall(LHCALL_SHUTDOWN, __pa("Power down"), + LGUEST_SHUTDOWN_POWEROFF, 0, 0); } /* @@ -1123,7 +1122,7 @@ static void lguest_power_off(void) */ static int lguest_panic(struct notifier_block *nb, unsigned long l, void *p) { - kvm_hypercall2(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF); + hcall(LHCALL_SHUTDOWN, __pa(p), LGUEST_SHUTDOWN_POWEROFF, 0, 0); /* The hcall won't return, but to keep gcc happy, we're "done". */ return NOTIFY_DONE; } @@ -1162,7 +1161,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) len = sizeof(scratch) - 1; scratch[len] = '\0'; memcpy(scratch, buf, len); - kvm_hypercall1(LHCALL_NOTIFY, __pa(scratch)); + hcall(LHCALL_NOTIFY, __pa(scratch), 0, 0, 0); /* This routine returns the number of bytes actually written. */ return len; @@ -1174,7 +1173,7 @@ static __init int early_put_chars(u32 vtermno, const char *buf, int count) */ static void lguest_restart(char *reason) { - kvm_hypercall2(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART); + hcall(LHCALL_SHUTDOWN, __pa(reason), LGUEST_SHUTDOWN_RESTART, 0, 0); } /*G:050 diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 27eac0f..4f420c2f 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -32,7 +32,7 @@ ENTRY(lguest_entry) */ movl $LHCALL_LGUEST_INIT, %eax movl $lguest_data - __PAGE_OFFSET, %ebx - .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ + int $LGUEST_TRAP_ENTRY /* Set up the initial stack so we can run C code. */ movl $(init_thread_union+THREAD_SIZE),%esp |