From fb50b020c5331c8c4bee0eb875865f5f8be6c03a Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 16 Nov 2012 13:53:09 -0800 Subject: x86: Move some contents of page_64_types.h into pgtable_64.h and page_64.h This patch is meant to clean-up the fact that we have several functions in page_64_types.h which really don't belong there. I found this issue when I had tried to replace __phys_addr with an inline function. It resulted in the realmode bits generating compile warnings about types. In order to resolve that I am relocating the address translation to page_64.h since this is in keeping with where these functions are located in 32 bit. In addtion I have relocated several functions defined in init_64.c to pgtable_64.h as this seems to be where most of the functions related to memory initialization were already located. [ hpa: added missing #include to apic_numachip.c, as reported by Yinghai Lu. ] Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121116215244.8521.31505.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin Cc: Yinghai Lu Cc: Daniel J Blueman --- arch/x86/include/asm/page_64.h | 19 +++++++++++++++++++ arch/x86/include/asm/page_64_types.h | 22 ---------------------- arch/x86/include/asm/pgtable_64.h | 5 +++++ arch/x86/kernel/apic/apic_numachip.c | 1 + 4 files changed, 25 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 072694e..4150999 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -3,4 +3,23 @@ #include +#ifndef __ASSEMBLY__ + +/* duplicated to the one in bootmem.h */ +extern unsigned long max_pfn; +extern unsigned long phys_base; + +extern unsigned long __phys_addr(unsigned long); + +#define __phys_reloc_hide(x) (x) + +#ifdef CONFIG_FLATMEM +#define pfn_valid(pfn) ((pfn) < max_pfn) +#endif + +void clear_page(void *page); +void copy_page(void *to, void *from); + +#endif /* !__ASSEMBLY__ */ + #endif /* _ASM_X86_PAGE_64_H */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 320f7bb..8b491e6 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -50,26 +50,4 @@ #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) #define KERNEL_IMAGE_START _AC(0xffffffff80000000, UL) -#ifndef __ASSEMBLY__ -void clear_page(void *page); -void copy_page(void *to, void *from); - -/* duplicated to the one in bootmem.h */ -extern unsigned long max_pfn; -extern unsigned long phys_base; - -extern unsigned long __phys_addr(unsigned long); -#define __phys_reloc_hide(x) (x) - -#define vmemmap ((struct page *)VMEMMAP_START) - -extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); -extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); - -#endif /* !__ASSEMBLY__ */ - -#ifdef CONFIG_FLATMEM -#define pfn_valid(pfn) ((pfn) < max_pfn) -#endif - #endif /* _ASM_X86_PAGE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 47356f9..b5d30ad 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -183,6 +183,11 @@ extern void cleanup_highmap(void); #define __HAVE_ARCH_PTE_SAME +#define vmemmap ((struct page *)VMEMMAP_START) + +extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); +extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index a65829a..ae9196f 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c @@ -27,6 +27,7 @@ #include #include #include +#include static int numachip_system __read_mostly; -- cgit v1.1 From 0bdf525f04afd3a32c14e5a8778771f9c9e0f074 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 16 Nov 2012 13:53:51 -0800 Subject: x86: Improve __phys_addr performance by making use of carry flags and inlining This patch is meant to improve overall system performance when making use of the __phys_addr call. To do this I have implemented several changes. First if CONFIG_DEBUG_VIRTUAL is not defined __phys_addr is made an inline, similar to how this is currently handled in 32 bit. However in order to do this it is required to export phys_base so that it is available if __phys_addr is used in kernel modules. The second change was to streamline the code by making use of the carry flag on an add operation instead of performing a compare on a 64 bit value. The advantage to this is that it allows us to significantly reduce the overall size of the call. On my Xeon E5 system the entire __phys_addr inline call consumes a little less than 32 bytes and 5 instructions. I also applied similar logic to the debug version of the function. My testing shows that the debug version of the function with this patch applied is slightly faster than the non-debug version without the patch. Finally I also applied the same logic changes to __virt_addr_valid since it used the same general code flow as __phys_addr and could achieve similar gains though these changes. Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121116215315.8521.46270.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page_64.h | 14 ++++++++++++++ arch/x86/kernel/x8664_ksyms_64.c | 3 +++ arch/x86/mm/physaddr.c | 40 +++++++++++++++++++++++++--------------- 3 files changed, 42 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 4150999..5138174 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -9,7 +9,21 @@ extern unsigned long max_pfn; extern unsigned long phys_base; +static inline unsigned long __phys_addr_nodebug(unsigned long x) +{ + unsigned long y = x - __START_KERNEL_map; + + /* use the carry flag to determine if x was < __START_KERNEL_map */ + x = y + ((x > y) ? phys_base : (__START_KERNEL_map - PAGE_OFFSET)); + + return x; +} + +#ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); +#else +#define __phys_addr(x) __phys_addr_nodebug(x) +#endif #define __phys_reloc_hide(x) (x) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 1330dd1..b014d94 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -59,6 +59,9 @@ EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__memcpy); EXPORT_SYMBOL(memmove); +#ifndef CONFIG_DEBUG_VIRTUAL +EXPORT_SYMBOL(phys_base); +#endif EXPORT_SYMBOL(empty_zero_page); #ifndef CONFIG_PARAVIRT EXPORT_SYMBOL(native_load_gs_index); diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index d2e2735..fd40d75 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -8,33 +8,43 @@ #ifdef CONFIG_X86_64 +#ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { - if (x >= __START_KERNEL_map) { - x -= __START_KERNEL_map; - VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE); - x += phys_base; + unsigned long y = x - __START_KERNEL_map; + + /* use the carry flag to determine if x was < __START_KERNEL_map */ + if (unlikely(x > y)) { + x = y + phys_base; + + VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); } else { - VIRTUAL_BUG_ON(x < PAGE_OFFSET); - x -= PAGE_OFFSET; - VIRTUAL_BUG_ON(!phys_addr_valid(x)); + x = y + (__START_KERNEL_map - PAGE_OFFSET); + + /* carry flag will be set if starting x was >= PAGE_OFFSET */ + VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x)); } + return x; } EXPORT_SYMBOL(__phys_addr); +#endif bool __virt_addr_valid(unsigned long x) { - if (x >= __START_KERNEL_map) { - x -= __START_KERNEL_map; - if (x >= KERNEL_IMAGE_SIZE) + unsigned long y = x - __START_KERNEL_map; + + /* use the carry flag to determine if x was < __START_KERNEL_map */ + if (unlikely(x > y)) { + x = y + phys_base; + + if (y >= KERNEL_IMAGE_SIZE) return false; - x += phys_base; } else { - if (x < PAGE_OFFSET) - return false; - x -= PAGE_OFFSET; - if (!phys_addr_valid(x)) + x = y + (__START_KERNEL_map - PAGE_OFFSET); + + /* carry flag will be set if starting x was >= PAGE_OFFSET */ + if ((x > y) || !phys_addr_valid(x)) return false; } -- cgit v1.1 From 7d74275d39def4d3ccc8cf4725388bf79ef13861 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 16 Nov 2012 13:55:46 -0800 Subject: x86: Make it so that __pa_symbol can only process kernel symbols on x86_64 I submitted an earlier patch that make __phys_addr an inline. This obviously results in an increase in the code size. One step I can take to reduce that is to make it so that the __pa_symbol call does a direct translation for kernel addresses instead of covering all of virtual memory. On my system this reduced the size for __pa_symbol from 5 instructions totalling 30 bytes to 3 instructions totalling 16 bytes. Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121116215356.8521.92472.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page.h | 3 ++- arch/x86/include/asm/page_32.h | 1 + arch/x86/include/asm/page_64.h | 3 +++ arch/x86/mm/physaddr.c | 11 +++++++++++ 4 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 8ca8283..3698a6a 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -44,7 +44,8 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, * case properly. Once all supported versions of gcc understand it, we can * remove this Voodoo magic stuff. (i.e. once gcc3.x is deprecated) */ -#define __pa_symbol(x) __pa(__phys_reloc_hide((unsigned long)(x))) +#define __pa_symbol(x) \ + __phys_addr_symbol(__phys_reloc_hide((unsigned long)(x))) #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index da4e762..4d550d0 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -15,6 +15,7 @@ extern unsigned long __phys_addr(unsigned long); #else #define __phys_addr(x) __phys_addr_nodebug(x) #endif +#define __phys_addr_symbol(x) __phys_addr(x) #define __phys_reloc_hide(x) RELOC_HIDE((x), 0) #ifdef CONFIG_FLATMEM diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 5138174..0f1ddee 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -21,8 +21,11 @@ static inline unsigned long __phys_addr_nodebug(unsigned long x) #ifdef CONFIG_DEBUG_VIRTUAL extern unsigned long __phys_addr(unsigned long); +extern unsigned long __phys_addr_symbol(unsigned long); #else #define __phys_addr(x) __phys_addr_nodebug(x) +#define __phys_addr_symbol(x) \ + ((unsigned long)(x) - __START_KERNEL_map + phys_base) #endif #define __phys_reloc_hide(x) (x) diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index fd40d75..c73fedd 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -28,6 +28,17 @@ unsigned long __phys_addr(unsigned long x) return x; } EXPORT_SYMBOL(__phys_addr); + +unsigned long __phys_addr_symbol(unsigned long x) +{ + unsigned long y = x - __START_KERNEL_map; + + /* only check upper bounds since lower bounds will trigger carry */ + VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE); + + return y + phys_base; +} +EXPORT_SYMBOL(__phys_addr_symbol); #endif bool __virt_addr_valid(unsigned long x) -- cgit v1.1 From 05a476b6e3795f205806662bf09ab95774266292 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 16 Nov 2012 13:56:35 -0800 Subject: x86: Drop 4 unnecessary calls to __pa_symbol While debugging the __pa_symbol inline patch I found that there were a couple spots where __pa_symbol was used as follows: __pa_symbol(x) - __pa_symbol(y) The compiler had reduced them to: x - y Since we also support a debug case where __pa_symbol is a function call it would probably be useful to just change the two cases I found so that they are always just treated as "x - y". As such I am casting the values to phys_addr_t and then doing simple subtraction so that the correct type and value is returned. Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121116215552.8521.68085.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head32.c | 4 ++-- arch/x86/kernel/head64.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index c18f59d..f15db0c 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -30,8 +30,8 @@ static void __init i386_default_early_setup(void) void __init i386_start_kernel(void) { - memblock_reserve(__pa_symbol(&_text), - __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); + memblock_reserve(__pa_symbol(_text), + (phys_addr_t)__bss_stop - (phys_addr_t)_text); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 037df57..42f5df1 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -97,8 +97,8 @@ void __init x86_64_start_reservations(char *real_mode_data) { copy_bootdata(__va(real_mode_data)); - memblock_reserve(__pa_symbol(&_text), - __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); + memblock_reserve(__pa_symbol(_text), + (phys_addr_t)__bss_stop - (phys_addr_t)_text); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ -- cgit v1.1 From fc8d782677f163dee76427fdd8a92bebd2b50b23 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 16 Nov 2012 13:57:13 -0800 Subject: x86: Use __pa_symbol instead of __pa on C visible symbols When I made an attempt at separating __pa_symbol and __pa I found that there were a number of cases where __pa was used on an obvious symbol. I also caught one non-obvious case as _brk_start and _brk_end are based on the address of __brk_base which is a C visible symbol. In mark_rodata_ro I was able to reduce the overhead of kernel symbol to virtual memory translation by using a combination of __va(__pa_symbol()) instead of page_address(virt_to_page()). Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121116215640.8521.80483.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/intel.c | 2 +- arch/x86/kernel/setup.c | 16 ++++++++-------- arch/x86/mm/init_64.c | 18 ++++++++---------- arch/x86/mm/pageattr.c | 8 ++++---- arch/x86/platform/efi/efi.c | 4 ++-- arch/x86/realmode/init.c | 8 ++++---- 6 files changed, 27 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 198e019..2249e7e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -168,7 +168,7 @@ int __cpuinit ppro_with_ram_bug(void) #ifdef CONFIG_X86_F00F_BUG static void __cpuinit trap_init_f00f_bug(void) { - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); + __set_fixmap(FIX_F00F_IDT, __pa_symbol(idt_table), PAGE_KERNEL_RO); /* * Update the IDT descriptor and reload the IDT so that diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ca45696..2702c5d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -300,8 +300,8 @@ static void __init cleanup_highmap(void) static void __init reserve_brk(void) { if (_brk_end > _brk_start) - memblock_reserve(__pa(_brk_start), - __pa(_brk_end) - __pa(_brk_start)); + memblock_reserve(__pa_symbol(_brk_start), + _brk_end - _brk_start); /* Mark brk area as locked down and no longer taking any new allocations */ @@ -761,12 +761,12 @@ void __init setup_arch(char **cmdline_p) init_mm.end_data = (unsigned long) _edata; init_mm.brk = _brk_end; - code_resource.start = virt_to_phys(_text); - code_resource.end = virt_to_phys(_etext)-1; - data_resource.start = virt_to_phys(_etext); - data_resource.end = virt_to_phys(_edata)-1; - bss_resource.start = virt_to_phys(&__bss_start); - bss_resource.end = virt_to_phys(&__bss_stop)-1; + code_resource.start = __pa_symbol(_text); + code_resource.end = __pa_symbol(_etext)-1; + data_resource.start = __pa_symbol(_etext); + data_resource.end = __pa_symbol(_edata)-1; + bss_resource.start = __pa_symbol(__bss_start); + bss_resource.end = __pa_symbol(__bss_stop)-1; #ifdef CONFIG_CMDLINE_BOOL #ifdef CONFIG_CMDLINE_OVERRIDE diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3baff25..0374a10 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -770,12 +770,10 @@ void set_kernel_text_ro(void) void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); - unsigned long rodata_start = - ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; + unsigned long rodata_start = PFN_ALIGN(__start_rodata); unsigned long end = (unsigned long) &__end_rodata_hpage_align; - unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); - unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); - unsigned long data_start = (unsigned long) &_sdata; + unsigned long text_end = PFN_ALIGN(&__stop___ex_table); + unsigned long rodata_end = PFN_ALIGN(&__end_rodata); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); @@ -800,12 +798,12 @@ void mark_rodata_ro(void) #endif free_init_pages("unused kernel memory", - (unsigned long) page_address(virt_to_page(text_end)), - (unsigned long) - page_address(virt_to_page(rodata_start))); + (unsigned long) __va(__pa_symbol(text_end)), + (unsigned long) __va(__pa_symbol(rodata_start))); + free_init_pages("unused kernel memory", - (unsigned long) page_address(virt_to_page(rodata_end)), - (unsigned long) page_address(virt_to_page(data_start))); + (unsigned long) __va(__pa_symbol(rodata_end)), + (unsigned long) __va(__pa_symbol(_sdata))); } #endif diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a718e0d..40f92f3 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -94,12 +94,12 @@ static inline void split_page_count(int level) { } static inline unsigned long highmap_start_pfn(void) { - return __pa(_text) >> PAGE_SHIFT; + return __pa_symbol(_text) >> PAGE_SHIFT; } static inline unsigned long highmap_end_pfn(void) { - return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; + return __pa_symbol(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT; } #endif @@ -276,8 +276,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, * The .rodata section needs to be read-only. Using the pfn * catches all aliases. */ - if (within(pfn, __pa((unsigned long)__start_rodata) >> PAGE_SHIFT, - __pa((unsigned long)__end_rodata) >> PAGE_SHIFT)) + if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, + __pa_symbol(__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index ad443914..1b60026 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -410,8 +410,8 @@ void __init efi_reserve_boot_services(void) * - Not within any part of the kernel * - Not the bios reserved area */ - if ((start+size >= virt_to_phys(_text) - && start <= virt_to_phys(_end)) || + if ((start+size >= __pa_symbol(_text) + && start <= __pa_symbol(_end)) || !e820_all_mapped(start, start+size, E820_RAM) || memblock_is_region_reserved(start, size)) { /* Could not reserve, skip it */ diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index cbca565..8045026 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -62,9 +62,9 @@ void __init setup_real_mode(void) __va(real_mode_header->trampoline_header); #ifdef CONFIG_X86_32 - trampoline_header->start = __pa(startup_32_smp); + trampoline_header->start = __pa_symbol(startup_32_smp); trampoline_header->gdt_limit = __BOOT_DS + 7; - trampoline_header->gdt_base = __pa(boot_gdt); + trampoline_header->gdt_base = __pa_symbol(boot_gdt); #else /* * Some AMD processors will #GP(0) if EFER.LMA is set in WRMSR @@ -78,8 +78,8 @@ void __init setup_real_mode(void) *trampoline_cr4_features = read_cr4(); trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); - trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE; - trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE; + trampoline_pgd[0] = __pa_symbol(level3_ident_pgt) + _KERNPG_TABLE; + trampoline_pgd[511] = __pa_symbol(level3_kernel_pgt) + _KERNPG_TABLE; #endif } -- cgit v1.1 From 217f155e9fc68bf2a6c58a7b47e0d1ce68d78818 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 16 Nov 2012 13:57:32 -0800 Subject: x86/ftrace: Use __pa_symbol instead of __pa on C visible symbols Instead of using __pa which is meant to be a general function for converting virtual addresses to physical addresses we can use __pa_symbol which is the preferred way of decoding kernel text virtual addresses to physical addresses. In this case we are not directly converting C visible symbols however if we know that the instruction pointer is somewhere between _text and _etext we know that we are going to be translating an address form the kernel text space. Cc: Steven Rostedt Cc: Frederic Weisbecker Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121116215718.8521.24026.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1d41402..42a392a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -89,7 +89,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code) * kernel identity mapping to modify code. */ if (within(ip, (unsigned long)_text, (unsigned long)_etext)) - ip = (unsigned long)__va(__pa(ip)); + ip = (unsigned long)__va(__pa_symbol(ip)); return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); } @@ -279,7 +279,7 @@ static int ftrace_write(unsigned long ip, const char *val, int size) * kernel identity mapping to modify code. */ if (within(ip, (unsigned long)_text, (unsigned long)_etext)) - ip = (unsigned long)__va(__pa(ip)); + ip = (unsigned long)__va(__pa_symbol(ip)); return probe_kernel_write((void *)ip, val, size); } -- cgit v1.1 From afd51a0e32cd79261f0e823400886ed322a355ac Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 16 Nov 2012 13:57:43 -0800 Subject: x86/acpi: Use __pa_symbol instead of __pa on C visible symbols This change just updates one spot where __pa was being used when __pa_symbol should have been used. By using __pa_symbol we are able to drop a few extra lines of code as we don't have to test to see if the virtual pointer is a part of the kernel text or just standard virtual memory. Cc: Len Brown Cc: Pavel Machek Acked-by: "Rafael J. Wysocki" Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121116215737.8521.51167.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/sleep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 11676cf..f146a3c 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -69,7 +69,7 @@ int acpi_suspend_lowlevel(void) #ifndef CONFIG_64BIT header->pmode_entry = (u32)&wakeup_pmode_return; - header->pmode_cr3 = (u32)__pa(&initial_page_table); + header->pmode_cr3 = (u32)__pa_symbol(initial_page_table); saved_magic = 0x12345678; #else /* CONFIG_64BIT */ #ifdef CONFIG_SMP -- cgit v1.1 From 6a3956bd242926f8956992f6ed7805b0811be003 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 16 Nov 2012 13:58:12 -0800 Subject: x86/lguest: Use __pa_symbol instead of __pa on C visible symbols The function lguest_write_cr3 is using __pa to convert swapper_pg_dir and initial_page_table from virtual addresses to physical. The correct function to use for these values is __pa_symbol since they are C visible symbols. Cc: Rusty Russell Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121116215748.8521.83556.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/lguest/boot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 642d880..139dd35 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -552,7 +552,8 @@ static void lguest_write_cr3(unsigned long cr3) current_cr3 = cr3; /* These two page tables are simple, linear, and used during boot */ - if (cr3 != __pa(swapper_pg_dir) && cr3 != __pa(initial_page_table)) + if (cr3 != __pa_symbol(swapper_pg_dir) && + cr3 != __pa_symbol(initial_page_table)) cr3_changed = true; } -- cgit v1.1 From 6147a9d8070e1c9d16d57eb53a14942b95b28dc4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 19 Oct 2012 16:53:18 -0400 Subject: irq_work: Remove CONFIG_HAVE_IRQ_WORK irq work can run on any arch even without IPI support because of the hook on update_process_times(). So lets remove HAVE_IRQ_WORK because it doesn't reflect any backend requirement. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Andrew Morton Cc: Paul Gortmaker --- arch/x86/Kconfig | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff..c13e07a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -26,7 +26,6 @@ config X86 select HAVE_OPROFILE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS - select HAVE_IRQ_WORK select HAVE_IOREMAP_PROT select HAVE_KPROBES select HAVE_MEMBLOCK -- cgit v1.1 From fa62aafea9e415cd1efd8c4054106112fe809f19 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:38 -0800 Subject: x86, mm: Add global page_size_mask and probe one time only Now we pass around use_gbpages and use_pse for calculating page table size, Later we will need to call init_memory_mapping for every ram range one by one, that mean those calculation will be done several times. Those information are the same for all ram range and could be stored in page_size_mask and could be probed it one time only. Move that probing code out of init_memory_mapping into separated function probe_page_size_mask(), and call it before all init_memory_mapping. Suggested-by: Ingo Molnar Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-2-git-send-email-yinghai@kernel.org Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 1 + arch/x86/kernel/setup.c | 1 + arch/x86/mm/init.c | 55 +++++++++++++++++++----------------------- 3 files changed, 27 insertions(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a1f780d..98ac76d 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -602,6 +602,7 @@ static inline int pgd_none(pgd_t pgd) #ifndef __ASSEMBLY__ extern int direct_gbpages; +void probe_page_size_mask(void); /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ca45696..01fb5f9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -913,6 +913,7 @@ void __init setup_arch(char **cmdline_p) setup_real_mode(); init_gbpages(); + probe_page_size_mask(); /* max_pfn_mapped is updated here */ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< Date: Fri, 16 Nov 2012 19:38:39 -0800 Subject: x86, mm: Split out split_mem_range from init_memory_mapping So make init_memory_mapping smaller and readable. -v2: use 0 instead of nr_range as input parameter found by Yasuaki Ishimatsu. Suggested-by: Ingo Molnar Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-3-git-send-email-yinghai@kernel.org Reviewed-by: Pekka Enberg Cc: Yasuaki Ishimatsu Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index aa5b0da..6368b86 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -146,25 +146,13 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, return nr_range; } -/* - * Setup the direct mapping of the physical memory at PAGE_OFFSET. - * This runs before bootmem is initialized and gets pages directly from - * the physical memory. To access them they are temporarily mapped. - */ -unsigned long __init_refok init_memory_mapping(unsigned long start, - unsigned long end) +static int __meminit split_mem_range(struct map_range *mr, int nr_range, + unsigned long start, + unsigned long end) { unsigned long start_pfn, end_pfn; - unsigned long ret = 0; unsigned long pos; - struct map_range mr[NR_RANGE_MR]; - int nr_range, i; - - printk(KERN_INFO "init_memory_mapping: [mem %#010lx-%#010lx]\n", - start, end - 1); - - memset(mr, 0, sizeof(mr)); - nr_range = 0; + int i; /* head if not big page alignment ? */ start_pfn = start >> PAGE_SHIFT; @@ -258,6 +246,27 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, (mr[i].page_size_mask & (1< Date: Fri, 16 Nov 2012 19:38:40 -0800 Subject: x86, mm: Move down find_early_table_space() It will need to call split_mem_range(). Move it down after that to avoid extra declaration. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-4-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 117 +++++++++++++++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 58 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6368b86..701abbc 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -36,64 +36,6 @@ struct map_range { }; static int page_size_mask; -/* - * First calculate space needed for kernel direct mapping page tables to cover - * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB - * pages. Then find enough contiguous space for those page tables. - */ -static void __init find_early_table_space(struct map_range *mr, int nr_range) -{ - int i; - unsigned long puds = 0, pmds = 0, ptes = 0, tables; - unsigned long start = 0, good_end; - phys_addr_t base; - - for (i = 0; i < nr_range; i++) { - unsigned long range, extra; - - range = mr[i].end - mr[i].start; - puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; - - if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { - extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); - pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else { - pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; - } - - if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { - extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); -#ifdef CONFIG_X86_32 - extra += PMD_SIZE; -#endif - ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else { - ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; - } - } - - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); - tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); - -#ifdef CONFIG_X86_32 - /* for fixmap */ - tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); -#endif - good_end = max_pfn_mapped << PAGE_SHIFT; - - base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); - if (!base) - panic("Cannot find space for the kernel page tables"); - - pgt_buf_start = base >> PAGE_SHIFT; - pgt_buf_end = pgt_buf_start; - pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); - - printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", - mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, - (pgt_buf_top << PAGE_SHIFT) - 1); -} void probe_page_size_mask(void) { @@ -250,6 +192,65 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, } /* + * First calculate space needed for kernel direct mapping page tables to cover + * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB + * pages. Then find enough contiguous space for those page tables. + */ +static void __init find_early_table_space(struct map_range *mr, int nr_range) +{ + int i; + unsigned long puds = 0, pmds = 0, ptes = 0, tables; + unsigned long start = 0, good_end; + phys_addr_t base; + + for (i = 0; i < nr_range; i++) { + unsigned long range, extra; + + range = mr[i].end - mr[i].start; + puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; + + if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { + extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); + pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; + } else { + pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; + } + + if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { + extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); +#ifdef CONFIG_X86_32 + extra += PMD_SIZE; +#endif + ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else { + ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + } + + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); + +#ifdef CONFIG_X86_32 + /* for fixmap */ + tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); +#endif + good_end = max_pfn_mapped << PAGE_SHIFT; + + base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); + if (!base) + panic("Cannot find space for the kernel page tables"); + + pgt_buf_start = base >> PAGE_SHIFT; + pgt_buf_end = pgt_buf_start; + pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); + + printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", + mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, + (pgt_buf_top << PAGE_SHIFT) - 1); +} + +/* * Setup the direct mapping of the physical memory at PAGE_OFFSET. * This runs before bootmem is initialized and gets pages directly from * the physical memory. To access them they are temporarily mapped. -- cgit v1.1 From 22ddfcaa0dbae992332381d41b8a1fbc72269a13 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:41 -0800 Subject: x86, mm: Move init_memory_mapping calling out of setup.c Now init_memory_mapping is called two times, later will be called for every ram ranges. Could put all related init_mem calling together and out of setup.c. Actually, it reverts commit 1bbbbe7 x86: Exclude E820_RESERVED regions and memory holes above 4 GB from direct mapping. will address that later with complete solution include handling hole under 4g. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-5-git-send-email-yinghai@kernel.org Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/init.h | 1 - arch/x86/include/asm/pgtable.h | 2 +- arch/x86/kernel/setup.c | 27 +-------------------------- arch/x86/mm/init.c | 19 ++++++++++++++++++- 4 files changed, 20 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index adcc0ae..4f13998 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -12,7 +12,6 @@ kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask); - extern unsigned long __initdata pgt_buf_start; extern unsigned long __meminitdata pgt_buf_end; extern unsigned long __meminitdata pgt_buf_top; diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 98ac76d..dd1a888 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -602,7 +602,7 @@ static inline int pgd_none(pgd_t pgd) #ifndef __ASSEMBLY__ extern int direct_gbpages; -void probe_page_size_mask(void); +void init_mem_mapping(void); /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 01fb5f9..23b079f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -913,34 +913,9 @@ void __init setup_arch(char **cmdline_p) setup_real_mode(); init_gbpages(); - probe_page_size_mask(); - /* max_pfn_mapped is updated here */ - max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< max_low_pfn) { - int i; - unsigned long start, end; - unsigned long start_pfn, end_pfn; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, - NULL) { - - end = PFN_PHYS(end_pfn); - if (end <= (1UL<<32)) - continue; - - start = PFN_PHYS(start_pfn); - max_pfn_mapped = init_memory_mapping( - max((1UL<<32), start), end); - } - - /* can we preseve max_low_pfn ?*/ - max_low_pfn = max_pfn; - } -#endif memblock.current_limit = get_max_mapped(); dma_contiguous_reserve(0); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 701abbc..9e17f9e 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -37,7 +37,7 @@ struct map_range { static int page_size_mask; -void probe_page_size_mask(void) +static void __init probe_page_size_mask(void) { #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) /* @@ -315,6 +315,23 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, return ret >> PAGE_SHIFT; } +void __init init_mem_mapping(void) +{ + probe_page_size_mask(); + + /* max_pfn_mapped is updated here */ + max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< max_low_pfn) { + max_pfn_mapped = init_memory_mapping(1UL<<32, + max_pfn< Date: Fri, 16 Nov 2012 19:38:42 -0800 Subject: x86, mm: Revert back good_end setting for 64bit After | commit 8548c84da2f47e71bbbe300f55edb768492575f7 | Author: Takashi Iwai | Date: Sun Oct 23 23:19:12 2011 +0200 | | x86: Fix S4 regression | | Commit 4b239f458 ("x86-64, mm: Put early page table high") causes a S4 | regression since 2.6.39, namely the machine reboots occasionally at S4 | resume. It doesn't happen always, overall rate is about 1/20. But, | like other bugs, once when this happens, it continues to happen. | | This patch fixes the problem by essentially reverting the memory | assignment in the older way. Have some page table around 512M again, that will prevent kdump to find 512M under 768M. We need revert that reverting, so we could put page table high again for 64bit. Takashi agreed that S4 regression could be something else. https://lkml.org/lkml/2012/6/15/182 Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-6-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 9e17f9e..dbef4ff 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -234,8 +234,8 @@ static void __init find_early_table_space(struct map_range *mr, int nr_range) #ifdef CONFIG_X86_32 /* for fixmap */ tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); -#endif good_end = max_pfn_mapped << PAGE_SHIFT; +#endif base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); if (!base) -- cgit v1.1 From 84f1ae30bb68d8da98bca7ff2c2b825b2ac8c9a5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:43 -0800 Subject: x86, mm: Change find_early_table_space() paramters call split_mem_range inside the function. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-7-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index dbef4ff..51f919f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -196,12 +196,18 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB * pages. Then find enough contiguous space for those page tables. */ -static void __init find_early_table_space(struct map_range *mr, int nr_range) +static void __init find_early_table_space(unsigned long start, unsigned long end) { int i; unsigned long puds = 0, pmds = 0, ptes = 0, tables; - unsigned long start = 0, good_end; + unsigned long good_end; phys_addr_t base; + struct map_range mr[NR_RANGE_MR]; + int nr_range; + + memset(mr, 0, sizeof(mr)); + nr_range = 0; + nr_range = split_mem_range(mr, nr_range, start, end); for (i = 0; i < nr_range; i++) { unsigned long range, extra; @@ -276,7 +282,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * nodes are discovered. */ if (!after_bootmem) - find_early_table_space(mr, nr_range); + find_early_table_space(start, end); for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, -- cgit v1.1 From c14fa0b63b5b4234667c03fdc3314c0881caa514 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:44 -0800 Subject: x86, mm: Find early page table buffer together We should not do that in every calling of init_memory_mapping. At the same time need to move down early_memtest, and could remove after_bootmem checking. -v2: fix one early_memtest with 32bit by passing max_pfn_mapped instead. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-8-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 66 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 51f919f..1ce0d03 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -274,16 +274,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, memset(mr, 0, sizeof(mr)); nr_range = split_mem_range(mr, 0, start, end); - /* - * Find space for the kernel direct mapping tables. - * - * Later we should allocate these tables in the local node of the - * memory mapped. Unfortunately this is done currently before the - * nodes are discovered. - */ - if (!after_bootmem) - find_early_table_space(start, end); - for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, mr[i].page_size_mask); @@ -296,6 +286,36 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __flush_tlb_all(); + return ret >> PAGE_SHIFT; +} + +void __init init_mem_mapping(void) +{ + probe_page_size_mask(); + + /* + * Find space for the kernel direct mapping tables. + * + * Later we should allocate these tables in the local node of the + * memory mapped. Unfortunately this is done currently before the + * nodes are discovered. + */ +#ifdef CONFIG_X86_64 + find_early_table_space(0, max_pfn< max_low_pfn) { + max_pfn_mapped = init_memory_mapping(1UL<<32, + max_pfn< pgt_buf_start) + if (pgt_buf_end > pgt_buf_start) x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), PFN_PHYS(pgt_buf_end)); - if (!after_bootmem) - early_memtest(start, end); + /* stop the wrong using */ + pgt_buf_top = 0; - return ret >> PAGE_SHIFT; -} - -void __init init_mem_mapping(void) -{ - probe_page_size_mask(); - - /* max_pfn_mapped is updated here */ - max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< max_low_pfn) { - max_pfn_mapped = init_memory_mapping(1UL<<32, - max_pfn< Date: Fri, 16 Nov 2012 19:38:45 -0800 Subject: x86, mm: Separate out calculate_table_space_size() It should take physical address range that will need to be mapped. find_early_table_space should take range that pgt buff should be in. Separating page table size calculating and finding early page table to reduce confusing. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-9-git-send-email-yinghai@kernel.org Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1ce0d03..7b961d0 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -196,12 +196,10 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB * pages. Then find enough contiguous space for those page tables. */ -static void __init find_early_table_space(unsigned long start, unsigned long end) +static unsigned long __init calculate_table_space_size(unsigned long start, unsigned long end) { int i; unsigned long puds = 0, pmds = 0, ptes = 0, tables; - unsigned long good_end; - phys_addr_t base; struct map_range mr[NR_RANGE_MR]; int nr_range; @@ -240,9 +238,17 @@ static void __init find_early_table_space(unsigned long start, unsigned long end #ifdef CONFIG_X86_32 /* for fixmap */ tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); - good_end = max_pfn_mapped << PAGE_SHIFT; #endif + return tables; +} + +static void __init find_early_table_space(unsigned long start, + unsigned long good_end, + unsigned long tables) +{ + phys_addr_t base; + base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); if (!base) panic("Cannot find space for the kernel page tables"); @@ -250,10 +256,6 @@ static void __init find_early_table_space(unsigned long start, unsigned long end pgt_buf_start = base >> PAGE_SHIFT; pgt_buf_end = pgt_buf_start; pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); - - printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx]\n", - mr[nr_range - 1].end - 1, pgt_buf_start << PAGE_SHIFT, - (pgt_buf_top << PAGE_SHIFT) - 1); } /* @@ -291,6 +293,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, void __init init_mem_mapping(void) { + unsigned long tables, good_end, end; + probe_page_size_mask(); /* @@ -301,10 +305,18 @@ void __init init_mem_mapping(void) * nodes are discovered. */ #ifdef CONFIG_X86_64 - find_early_table_space(0, max_pfn< pgt_buf_start) + if (pgt_buf_end > pgt_buf_start) { + printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx] final\n", + end - 1, pgt_buf_start << PAGE_SHIFT, + (pgt_buf_end << PAGE_SHIFT) - 1); x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), PFN_PHYS(pgt_buf_end)); + } /* stop the wrong using */ pgt_buf_top = 0; -- cgit v1.1 From dd7dfad7fb297b1746bcdbebbdc970d723a635bd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:46 -0800 Subject: x86, mm: Set memblock initial limit to 1M memblock_x86_fill() could double memory array. If we set memblock.current_limit to 512M, so memory array could be around 512M. So kdump will not get big range (like 512M) under 1024M. Try to put it down under 1M, it would use about 4k or so, and that is limited. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-10-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 23b079f..4bd8921 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -890,7 +890,7 @@ void __init setup_arch(char **cmdline_p) cleanup_highmap(); - memblock.current_limit = get_max_mapped(); + memblock.current_limit = ISA_END_ADDRESS; memblock_x86_fill(); /* -- cgit v1.1 From 4eea6aa581abfeb2695ebe9f9d4672597e1bdd4b Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Fri, 16 Nov 2012 19:38:47 -0800 Subject: x86, mm: if kernel .text .data .bss are not marked as E820_RAM, complain and fix There could be cases where user supplied memmap=exactmap memory mappings do not mark the region where the kernel .text .data and .bss reside as E820_RAM, as reported here: https://lkml.org/lkml/2012/8/14/86 Handle it by complaining, and adding the range back into the e820. Signed-off-by: Jacob Shin Link: http://lkml.kernel.org/r/1353123563-3103-11-git-send-email-yinghai@kernel.org Signed-off-by: Yinghai Lu Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4bd8921..d85cbd9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -832,6 +832,20 @@ void __init setup_arch(char **cmdline_p) insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); + /* + * Complain if .text .data and .bss are not marked as E820_RAM and + * attempt to fix it by adding the range. We may have a confused BIOS, + * or the user may have incorrectly supplied it via memmap=exactmap. If + * we really are running on top non-RAM, we will crash later anyways. + */ + if (!e820_all_mapped(code_resource.start, __pa(__brk_limit), E820_RAM)) { + pr_warn(".text .data .bss are not marked as E820_RAM!\n"); + + e820_add_region(code_resource.start, + __pa(__brk_limit) - code_resource.start + 1, + E820_RAM); + } + trim_bios_range(); #ifdef CONFIG_X86_32 if (ppro_with_ram_bug()) { -- cgit v1.1 From dda56e134059b840631fdfd034784056b627c2a6 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Fri, 16 Nov 2012 19:38:48 -0800 Subject: x86, mm: Fixup code testing if a pfn is direct mapped Update code that previously assumed pfns [ 0 - max_low_pfn_mapped ) and [ 4GB - max_pfn_mapped ) were always direct mapped, to now look up pfn_mapped ranges instead. -v2: change applying sequence to keep git bisecting working. so add dummy pfn_range_is_mapped(). - Yinghai Lu Signed-off-by: Jacob Shin Link: http://lkml.kernel.org/r/1353123563-3103-12-git-send-email-yinghai@kernel.org Signed-off-by: Yinghai Lu Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page_types.h | 8 ++++++++ arch/x86/kernel/cpu/amd.c | 8 +++----- arch/x86/platform/efi/efi.c | 7 +++---- 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index e21fdd1..45aae6e 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -51,6 +51,14 @@ static inline phys_addr_t get_max_mapped(void) return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; } +static inline bool pfn_range_is_mapped(unsigned long start_pfn, + unsigned long end_pfn) +{ + return end_pfn <= max_low_pfn_mapped || + (end_pfn > (1UL << (32 - PAGE_SHIFT)) && + end_pfn <= max_pfn_mapped); +} + extern unsigned long init_memory_mapping(unsigned long start, unsigned long end); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index f7e98a2..9619ba6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -676,12 +676,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * benefit in doing so. */ if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { + unsigned long pfn = tseg >> PAGE_SHIFT; + printk(KERN_DEBUG "tseg: %010llx\n", tseg); - if ((tseg>>PMD_SHIFT) < - (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || - ((tseg>>PMD_SHIFT) < - (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && - (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) + if (pfn_range_is_mapped(pfn, pfn + 1)) set_memory_4k((unsigned long)__va(tseg), 1); } } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index ad443914..36e53f0 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -835,7 +835,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md, *prev_md = NULL; efi_status_t status; unsigned long size; - u64 end, systab, end_pfn; + u64 end, systab, start_pfn, end_pfn; void *p, *va, *new_memmap = NULL; int count = 0; @@ -888,10 +888,9 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; + start_pfn = PFN_DOWN(md->phys_addr); end_pfn = PFN_UP(end); - if (end_pfn <= max_low_pfn_mapped - || (end_pfn > (1UL << (32 - PAGE_SHIFT)) - && end_pfn <= max_pfn_mapped)) { + if (pfn_range_is_mapped(start_pfn, end_pfn)) { va = __va(md->phys_addr); if (!(md->attribute & EFI_MEMORY_WB)) -- cgit v1.1 From 8eb5779f6b9c7e390c92f451edaafc039e06e743 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:49 -0800 Subject: x86, mm: use pfn_range_is_mapped() with CPA We are going to map ram only, so under max_low_pfn_mapped, between 4g and max_pfn_mapped does not mean mapped at all. Use pfn_range_is_mapped() directly. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-13-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a718e0d..44acfcd 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -551,16 +551,10 @@ static int split_large_page(pte_t *kpte, unsigned long address) for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); - if (address >= (unsigned long)__va(0) && - address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) + if (pfn_range_is_mapped(PFN_DOWN(__pa(address)), + PFN_DOWN(__pa(address)) + 1)) split_page_count(level); -#ifdef CONFIG_X86_64 - if (address >= (unsigned long)__va(1UL<<32) && - address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) - split_page_count(level); -#endif - /* * Install the new, split up pagetable. * @@ -729,13 +723,9 @@ static int cpa_process_alias(struct cpa_data *cpa) unsigned long vaddr; int ret; - if (cpa->pfn >= max_pfn_mapped) + if (!pfn_range_is_mapped(cpa->pfn, cpa->pfn + 1)) return 0; -#ifdef CONFIG_X86_64 - if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT))) - return 0; -#endif /* * No need to redo, when the primary call touched the direct * mapping already: -- cgit v1.1 From 5101730cb0613b91d40b9bb7be6bb023d2f6aa24 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:50 -0800 Subject: x86, mm: use pfn_range_is_mapped() with gart We are going to map ram only, so under max_low_pfn_mapped, between 4g and max_pfn_mapped does not mean mapped at all. Use pfn_range_is_mapped() directly. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-14-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/amd_gart_64.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index e6631120..b574b29 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -768,10 +768,9 @@ int __init gart_iommu_init(void) aper_base = info.aper_base; end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); - if (end_pfn > max_low_pfn_mapped) { - start_pfn = (aper_base>>PAGE_SHIFT); + start_pfn = PFN_DOWN(aper_base); + if (!pfn_range_is_mapped(start_pfn, end_pfn)) init_memory_mapping(start_pfn< Date: Fri, 16 Nov 2012 19:38:51 -0800 Subject: x86, mm: use pfn_range_is_mapped() with reserve_initrd We are going to map ram only, so under max_low_pfn_mapped, between 4g and max_pfn_mapped does not mean mapped at all. Use pfn_range_is_mapped() to find out if range is mapped for initrd. That could happen bootloader put initrd in range but user could use memmap to carve some of range out. Also during copying need to use early_memmap to map original initrd for accessing. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-15-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 52 ++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d85cbd9..bd52f9d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -317,20 +317,19 @@ static void __init relocate_initrd(void) u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; u64 area_size = PAGE_ALIGN(ramdisk_size); - u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; u64 ramdisk_here; unsigned long slop, clen, mapaddr; char *p, *q; - /* We need to move the initrd down into lowmem */ - ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size, - PAGE_SIZE); + /* We need to move the initrd down into directly mapped mem */ + ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_low_pfn_mapped), + area_size, PAGE_SIZE); if (!ramdisk_here) panic("Cannot find place for new RAMDISK of size %lld\n", ramdisk_size); - /* Note: this includes all the lowmem currently occupied by + /* Note: this includes all the mem currently occupied by the initrd, we rely on that fact to keep the data intact. */ memblock_reserve(ramdisk_here, area_size); initrd_start = ramdisk_here + PAGE_OFFSET; @@ -340,17 +339,7 @@ static void __init relocate_initrd(void) q = (char *)initrd_start; - /* Copy any lowmem portion of the initrd */ - if (ramdisk_image < end_of_lowmem) { - clen = end_of_lowmem - ramdisk_image; - p = (char *)__va(ramdisk_image); - memcpy(q, p, clen); - q += clen; - ramdisk_image += clen; - ramdisk_size -= clen; - } - - /* Copy the highmem portion of the initrd */ + /* Copy the initrd */ while (ramdisk_size) { slop = ramdisk_image & ~PAGE_MASK; clen = ramdisk_size; @@ -364,7 +353,7 @@ static void __init relocate_initrd(void) ramdisk_image += clen; ramdisk_size -= clen; } - /* high pages is not converted by early_res_to_bootmem */ + ramdisk_image = boot_params.hdr.ramdisk_image; ramdisk_size = boot_params.hdr.ramdisk_size; printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" @@ -373,13 +362,27 @@ static void __init relocate_initrd(void) ramdisk_here, ramdisk_here + ramdisk_size - 1); } +static u64 __init get_mem_size(unsigned long limit_pfn) +{ + int i; + u64 mapped_pages = 0; + unsigned long start_pfn, end_pfn; + + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { + start_pfn = min_t(unsigned long, start_pfn, limit_pfn); + end_pfn = min_t(unsigned long, end_pfn, limit_pfn); + mapped_pages += end_pfn - start_pfn; + } + + return mapped_pages << PAGE_SHIFT; +} static void __init reserve_initrd(void) { /* Assume only end is not page aligned */ u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; + u64 mapped_size; if (!boot_params.hdr.type_of_loader || !ramdisk_image || !ramdisk_size) @@ -387,18 +390,19 @@ static void __init reserve_initrd(void) initrd_start = 0; - if (ramdisk_size >= (end_of_lowmem>>1)) { + mapped_size = get_mem_size(max_low_pfn_mapped); + if (ramdisk_size >= (mapped_size>>1)) panic("initrd too large to handle, " "disabling initrd (%lld needed, %lld available)\n", - ramdisk_size, end_of_lowmem>>1); - } + ramdisk_size, mapped_size>>1); printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, ramdisk_end - 1); - - if (ramdisk_end <= end_of_lowmem) { - /* All in lowmem, easy case */ + if (ramdisk_end <= (max_low_pfn_mapped< Date: Fri, 16 Nov 2012 19:38:52 -0800 Subject: x86, mm: Only direct map addresses that are marked as E820_RAM Currently direct mappings are created for [ 0 to max_low_pfn< Link: http://lkml.kernel.org/r/1353123563-3103-16-git-send-email-yinghai@kernel.org Signed-off-by: Yinghai Lu Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page_types.h | 8 +-- arch/x86/kernel/setup.c | 8 ++- arch/x86/mm/init.c | 120 ++++++++++++++++++++++++++++++++++---- arch/x86/mm/init_64.c | 6 +- 4 files changed, 117 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 45aae6e..54c9787 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -51,13 +51,7 @@ static inline phys_addr_t get_max_mapped(void) return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT; } -static inline bool pfn_range_is_mapped(unsigned long start_pfn, - unsigned long end_pfn) -{ - return end_pfn <= max_low_pfn_mapped || - (end_pfn > (1UL << (32 - PAGE_SHIFT)) && - end_pfn <= max_pfn_mapped); -} +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn); extern unsigned long init_memory_mapping(unsigned long start, unsigned long end); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bd52f9d..68dffec 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -116,9 +116,11 @@ #include /* - * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. - * The direct mapping extends to max_pfn_mapped, so that we can directly access - * apertures, ACPI and other tables without having to play with fixmaps. + * max_low_pfn_mapped: highest direct mapped pfn under 4GB + * max_pfn_mapped: highest direct mapped pfn over 4GB + * + * The direct mapping only covers E820_RAM regions, so the ranges and gaps are + * represented by pfn_mapped */ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 7b961d0..bb44e9f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -243,6 +243,38 @@ static unsigned long __init calculate_table_space_size(unsigned long start, unsi return tables; } +static unsigned long __init calculate_all_table_space_size(void) +{ + unsigned long start_pfn, end_pfn; + unsigned long tables; + int i; + + /* the ISA range is always mapped regardless of memory holes */ + tables = calculate_table_space_size(0, ISA_END_ADDRESS); + + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { + u64 start = start_pfn << PAGE_SHIFT; + u64 end = end_pfn << PAGE_SHIFT; + + if (end <= ISA_END_ADDRESS) + continue; + + if (start < ISA_END_ADDRESS) + start = ISA_END_ADDRESS; +#ifdef CONFIG_X86_32 + /* on 32 bit, we only map up to max_low_pfn */ + if ((start >> PAGE_SHIFT) >= max_low_pfn) + continue; + + if ((end >> PAGE_SHIFT) > max_low_pfn) + end = max_low_pfn << PAGE_SHIFT; +#endif + tables += calculate_table_space_size(start, end); + } + + return tables; +} + static void __init find_early_table_space(unsigned long start, unsigned long good_end, unsigned long tables) @@ -258,6 +290,34 @@ static void __init find_early_table_space(unsigned long start, pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); } +static struct range pfn_mapped[E820_X_MAX]; +static int nr_pfn_mapped; + +static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) +{ + nr_pfn_mapped = add_range_with_merge(pfn_mapped, E820_X_MAX, + nr_pfn_mapped, start_pfn, end_pfn); + nr_pfn_mapped = clean_sort_range(pfn_mapped, E820_X_MAX); + + max_pfn_mapped = max(max_pfn_mapped, end_pfn); + + if (start_pfn < (1UL<<(32-PAGE_SHIFT))) + max_low_pfn_mapped = max(max_low_pfn_mapped, + min(end_pfn, 1UL<<(32-PAGE_SHIFT))); +} + +bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn) +{ + int i; + + for (i = 0; i < nr_pfn_mapped; i++) + if ((start_pfn >= pfn_mapped[i].start) && + (end_pfn <= pfn_mapped[i].end)) + return true; + + return false; +} + /* * Setup the direct mapping of the physical memory at PAGE_OFFSET. * This runs before bootmem is initialized and gets pages directly from @@ -288,9 +348,55 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, __flush_tlb_all(); + add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); + return ret >> PAGE_SHIFT; } +/* + * Iterate through E820 memory map and create direct mappings for only E820_RAM + * regions. We cannot simply create direct mappings for all pfns from + * [0 to max_low_pfn) and [4GB to max_pfn) because of possible memory holes in + * high addresses that cannot be marked as UC by fixed/variable range MTRRs. + * Depending on the alignment of E820 ranges, this may possibly result in using + * smaller size (i.e. 4K instead of 2M or 1G) page tables. + */ +static void __init init_all_memory_mapping(void) +{ + unsigned long start_pfn, end_pfn; + int i; + + /* the ISA range is always mapped regardless of memory holes */ + init_memory_mapping(0, ISA_END_ADDRESS); + + for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { + u64 start = (u64)start_pfn << PAGE_SHIFT; + u64 end = (u64)end_pfn << PAGE_SHIFT; + + if (end <= ISA_END_ADDRESS) + continue; + + if (start < ISA_END_ADDRESS) + start = ISA_END_ADDRESS; +#ifdef CONFIG_X86_32 + /* on 32 bit, we only map up to max_low_pfn */ + if ((start >> PAGE_SHIFT) >= max_low_pfn) + continue; + + if ((end >> PAGE_SHIFT) > max_low_pfn) + end = max_low_pfn << PAGE_SHIFT; +#endif + init_memory_mapping(start, end); + } + +#ifdef CONFIG_X86_64 + if (max_pfn > max_low_pfn) { + /* can we preseve max_low_pfn ?*/ + max_low_pfn = max_pfn; + } +#endif +} + void __init init_mem_mapping(void) { unsigned long tables, good_end, end; @@ -311,23 +417,15 @@ void __init init_mem_mapping(void) end = max_low_pfn << PAGE_SHIFT; good_end = max_pfn_mapped << PAGE_SHIFT; #endif - tables = calculate_table_space_size(0, end); + tables = calculate_all_table_space_size(); find_early_table_space(0, good_end, tables); printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx] prealloc\n", end - 1, pgt_buf_start << PAGE_SHIFT, (pgt_buf_top << PAGE_SHIFT) - 1); - max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn< max_low_pfn) { - max_pfn_mapped = init_memory_mapping(1UL<<32, - max_pfn<node_zones + ZONE_NORMAL; - unsigned long last_mapped_pfn, start_pfn = start >> PAGE_SHIFT; + unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - last_mapped_pfn = init_memory_mapping(start, start + size); - if (last_mapped_pfn > max_pfn_mapped) - max_pfn_mapped = last_mapped_pfn; + init_memory_mapping(start, start + size); ret = __add_pages(nid, zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); -- cgit v1.1 From 74f27655dda84604d8bab47872020dcce5c88731 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:53 -0800 Subject: x86, mm: relocate initrd under all mem for 64bit instead of under 4g. For 64bit, we can use any mapped mem instead of low mem. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-17-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 68dffec..94f922a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -324,7 +324,7 @@ static void __init relocate_initrd(void) char *p, *q; /* We need to move the initrd down into directly mapped mem */ - ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_low_pfn_mapped), + ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), area_size, PAGE_SIZE); if (!ramdisk_here) @@ -392,7 +392,7 @@ static void __init reserve_initrd(void) initrd_start = 0; - mapped_size = get_mem_size(max_low_pfn_mapped); + mapped_size = get_mem_size(max_pfn_mapped); if (ramdisk_size >= (mapped_size>>1)) panic("initrd too large to handle, " "disabling initrd (%lld needed, %lld available)\n", @@ -401,8 +401,7 @@ static void __init reserve_initrd(void) printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, ramdisk_end - 1); - if (ramdisk_end <= (max_low_pfn_mapped< Date: Fri, 16 Nov 2012 19:38:54 -0800 Subject: x86, mm: Align start address to correct big page size We are going to use buffer in BRK to map small range just under memory top, and use those new mapped ram to map ram range under it. The ram range that will be mapped at first could be only page aligned, but ranges around it are ram too, we could use bigger page to map it to avoid small page size. We will adjust page_size_mask in following patch: x86, mm: Use big page size for small memory range to use big page size for small ram range. Before that patch, this patch will make sure start address to be aligned down according to bigger page size, otherwise entry in page page will not have correct value. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-18-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_32.c | 1 + arch/x86/mm/init_64.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 11a5800..27f7fc6 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -310,6 +310,7 @@ repeat: __pgprot(PTE_IDENT_ATTR | _PAGE_PSE); + pfn &= PMD_MASK >> PAGE_SHIFT; addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 32c7e38..869372a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -464,7 +464,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, pages++; spin_lock(&init_mm.page_table_lock); set_pte((pte_t *)pmd, - pfn_pte(address >> PAGE_SHIFT, + pfn_pte((address & PMD_MASK) >> PAGE_SHIFT, __pgprot(pgprot_val(prot) | _PAGE_PSE))); spin_unlock(&init_mm.page_table_lock); last_map_addr = next; @@ -541,7 +541,8 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, pages++; spin_lock(&init_mm.page_table_lock); set_pte((pte_t *)pud, - pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + pfn_pte((addr & PUD_MASK) >> PAGE_SHIFT, + PAGE_KERNEL_LARGE)); spin_unlock(&init_mm.page_table_lock); last_map_addr = next; continue; -- cgit v1.1 From aeebe84cc96cde4181807bc67c300c550d0ef123 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:55 -0800 Subject: x86, mm: Use big page size for small memory range We could map small range in the middle of big range at first, so should use big page size at first to avoid using small page size to break down page table. Only can set big page bit when that range has ram area around it. -v2: fix 32bit boundary checking. We can not count ram above max_low_pfn for 32 bit. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-19-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index bb44e9f..da591eb 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -88,6 +88,40 @@ static int __meminit save_mr(struct map_range *mr, int nr_range, return nr_range; } +/* + * adjust the page_size_mask for small range to go with + * big page size instead small one if nearby are ram too. + */ +static void __init_refok adjust_range_page_size_mask(struct map_range *mr, + int nr_range) +{ + int i; + + for (i = 0; i < nr_range; i++) { + if ((page_size_mask & (1<> PAGE_SHIFT) > max_low_pfn) + continue; +#endif + + if (memblock_is_region_memory(start, end - start)) + mr[i].page_size_mask |= 1< Date: Fri, 16 Nov 2012 19:38:56 -0800 Subject: x86, mm: Don't clear page table if range is ram After we add code use buffer in BRK to pre-map buf for page table in following patch: x86, mm: setup page table in top-down it should be safe to remove early_memmap for page table accessing. Instead we get panic with that. It turns out that we clear the initial page table wrongly for next range that is separated by holes. And it only happens when we are trying to map ram range one by one. We need to check if the range is ram before clearing page table. We change the loop structure to remove the extra little loop and use one loop only, and in that loop will caculate next at first, and check if [addr,next) is covered by E820_RAM. -v2: E820_RESERVED_KERN is treated as E820_RAM. EFI one change some E820_RAM to that, so next kernel by kexec will know that range is used already. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-20-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_64.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 869372a..fa28e3e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -363,20 +363,20 @@ static unsigned long __meminit phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, pgprot_t prot) { - unsigned pages = 0; + unsigned long pages = 0, next; unsigned long last_map_addr = end; int i; pte_t *pte = pte_page + pte_index(addr); - for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { - + for (i = pte_index(addr); i < PTRS_PER_PTE; i++, addr = next, pte++) { + next = (addr & PAGE_MASK) + PAGE_SIZE; if (addr >= end) { - if (!after_bootmem) { - for(; i < PTRS_PER_PTE; i++, pte++) - set_pte(pte, __pte(0)); - } - break; + if (!after_bootmem && + !e820_any_mapped(addr & PAGE_MASK, next, E820_RAM) && + !e820_any_mapped(addr & PAGE_MASK, next, E820_RESERVED_KERN)) + set_pte(pte, __pte(0)); + continue; } /* @@ -419,16 +419,15 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, pte_t *pte; pgprot_t new_prot = prot; + next = (address & PMD_MASK) + PMD_SIZE; if (address >= end) { - if (!after_bootmem) { - for (; i < PTRS_PER_PMD; i++, pmd++) - set_pmd(pmd, __pmd(0)); - } - break; + if (!after_bootmem && + !e820_any_mapped(address & PMD_MASK, next, E820_RAM) && + !e820_any_mapped(address & PMD_MASK, next, E820_RESERVED_KERN)) + set_pmd(pmd, __pmd(0)); + continue; } - next = (address & PMD_MASK) + PMD_SIZE; - if (pmd_val(*pmd)) { if (!pmd_large(*pmd)) { spin_lock(&init_mm.page_table_lock); @@ -497,13 +496,12 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, pmd_t *pmd; pgprot_t prot = PAGE_KERNEL; - if (addr >= end) - break; - next = (addr & PUD_MASK) + PUD_SIZE; - - if (!after_bootmem && !e820_any_mapped(addr, next, 0)) { - set_pud(pud, __pud(0)); + if (addr >= end) { + if (!after_bootmem && + !e820_any_mapped(addr & PUD_MASK, next, E820_RAM) && + !e820_any_mapped(addr & PUD_MASK, next, E820_RESERVED_KERN)) + set_pud(pud, __pud(0)); continue; } -- cgit v1.1 From f763ad1d3870abb811ec7520b4c1adc56471a3a4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:57 -0800 Subject: x86, mm: Break down init_all_memory_mapping Will replace that with top-down page table initialization. New API need to take range: init_range_memory_mapping() Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-21-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index da591eb..c688ea3 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -398,40 +398,30 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * Depending on the alignment of E820 ranges, this may possibly result in using * smaller size (i.e. 4K instead of 2M or 1G) page tables. */ -static void __init init_all_memory_mapping(void) +static void __init init_range_memory_mapping(unsigned long range_start, + unsigned long range_end) { unsigned long start_pfn, end_pfn; int i; - /* the ISA range is always mapped regardless of memory holes */ - init_memory_mapping(0, ISA_END_ADDRESS); - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { u64 start = (u64)start_pfn << PAGE_SHIFT; u64 end = (u64)end_pfn << PAGE_SHIFT; - if (end <= ISA_END_ADDRESS) + if (end <= range_start) continue; - if (start < ISA_END_ADDRESS) - start = ISA_END_ADDRESS; -#ifdef CONFIG_X86_32 - /* on 32 bit, we only map up to max_low_pfn */ - if ((start >> PAGE_SHIFT) >= max_low_pfn) + if (start < range_start) + start = range_start; + + if (start >= range_end) continue; - if ((end >> PAGE_SHIFT) > max_low_pfn) - end = max_low_pfn << PAGE_SHIFT; -#endif - init_memory_mapping(start, end); - } + if (end > range_end) + end = range_end; -#ifdef CONFIG_X86_64 - if (max_pfn > max_low_pfn) { - /* can we preseve max_low_pfn ?*/ - max_low_pfn = max_pfn; + init_memory_mapping(start, end); } -#endif } void __init init_mem_mapping(void) @@ -461,8 +451,15 @@ void __init init_mem_mapping(void) (pgt_buf_top << PAGE_SHIFT) - 1); max_pfn_mapped = 0; /* will get exact value next */ - init_all_memory_mapping(); - + /* the ISA range is always mapped regardless of memory holes */ + init_memory_mapping(0, ISA_END_ADDRESS); + init_range_memory_mapping(ISA_END_ADDRESS, end); +#ifdef CONFIG_X86_64 + if (max_pfn > max_low_pfn) { + /* can we preseve max_low_pfn ?*/ + max_low_pfn = max_pfn; + } +#endif /* * Reserve the kernel pagetable pages we used (pgt_buf_start - * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) -- cgit v1.1 From 8d57470d8f859635deffe3919d7d4867b488b85a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:58 -0800 Subject: x86, mm: setup page table in top-down Get pgt_buf early from BRK, and use it to map PMD_SIZE from top at first. Then use mapped pages to map more ranges below, and keep looping until all pages get mapped. alloc_low_page will use page from BRK at first, after that buffer is used up, will use memblock to find and reserve pages for page table usage. Introduce min_pfn_mapped to make sure find new pages from mapped ranges, that will be updated when lower pages get mapped. Also add step_size to make sure that don't try to map too big range with limited mapped pages initially, and increase the step_size when we have more mapped pages on hand. We don't need to call pagetable_reserve anymore, reserve work is done in alloc_low_page() directly. At last we can get rid of calculation and find early pgt related code. -v2: update to after fix_xen change, also use MACRO for initial pgt_buf size and add comments with it. -v3: skip big reserved range in memblock.reserved near end. -v4: don't need fix_xen change now. -v5: add changelog about moving about reserving pagetable to alloc_low_page. Suggested-by: "H. Peter Anvin" Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-22-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page_types.h | 1 + arch/x86/include/asm/pgtable.h | 1 + arch/x86/kernel/setup.c | 3 + arch/x86/mm/init.c | 210 +++++++++++--------------------------- arch/x86/mm/init_32.c | 17 ++- arch/x86/mm/init_64.c | 17 ++- 6 files changed, 94 insertions(+), 155 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 54c9787..9f6f3e6 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -45,6 +45,7 @@ extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; +extern unsigned long min_pfn_mapped; static inline phys_addr_t get_max_mapped(void) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index dd1a888..6991a3e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -603,6 +603,7 @@ static inline int pgd_none(pgd_t pgd) extern int direct_gbpages; void init_mem_mapping(void); +void early_alloc_pgt_buf(void); /* local pte updates need not use xchg for locking */ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 94f922a..f7634092 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -124,6 +124,7 @@ */ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; +unsigned long min_pfn_mapped; #ifdef CONFIG_DMI RESERVE_BRK(dmi_alloc, 65536); @@ -900,6 +901,8 @@ void __init setup_arch(char **cmdline_p) reserve_ibft_region(); + early_alloc_pgt_buf(); + /* * Need to conclude brk, before memblock_x86_fill() * it could use memblock_find_in_range, could overlap with diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c688ea3..2393d00 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -21,6 +21,21 @@ unsigned long __initdata pgt_buf_start; unsigned long __meminitdata pgt_buf_end; unsigned long __meminitdata pgt_buf_top; +/* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */ +#define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE) +RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); +void __init early_alloc_pgt_buf(void) +{ + unsigned long tables = INIT_PGT_BUF_SIZE; + phys_addr_t base; + + base = __pa(extend_brk(tables, PAGE_SIZE)); + + pgt_buf_start = base >> PAGE_SHIFT; + pgt_buf_end = pgt_buf_start; + pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); +} + int after_bootmem; int direct_gbpages @@ -228,105 +243,6 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, return nr_range; } -/* - * First calculate space needed for kernel direct mapping page tables to cover - * mr[0].start to mr[nr_range - 1].end, while accounting for possible 2M and 1GB - * pages. Then find enough contiguous space for those page tables. - */ -static unsigned long __init calculate_table_space_size(unsigned long start, unsigned long end) -{ - int i; - unsigned long puds = 0, pmds = 0, ptes = 0, tables; - struct map_range mr[NR_RANGE_MR]; - int nr_range; - - memset(mr, 0, sizeof(mr)); - nr_range = 0; - nr_range = split_mem_range(mr, nr_range, start, end); - - for (i = 0; i < nr_range; i++) { - unsigned long range, extra; - - range = mr[i].end - mr[i].start; - puds += (range + PUD_SIZE - 1) >> PUD_SHIFT; - - if (mr[i].page_size_mask & (1 << PG_LEVEL_1G)) { - extra = range - ((range >> PUD_SHIFT) << PUD_SHIFT); - pmds += (extra + PMD_SIZE - 1) >> PMD_SHIFT; - } else { - pmds += (range + PMD_SIZE - 1) >> PMD_SHIFT; - } - - if (mr[i].page_size_mask & (1 << PG_LEVEL_2M)) { - extra = range - ((range >> PMD_SHIFT) << PMD_SHIFT); -#ifdef CONFIG_X86_32 - extra += PMD_SIZE; -#endif - ptes += (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; - } else { - ptes += (range + PAGE_SIZE - 1) >> PAGE_SHIFT; - } - } - - tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); - tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); - tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); - -#ifdef CONFIG_X86_32 - /* for fixmap */ - tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE); -#endif - - return tables; -} - -static unsigned long __init calculate_all_table_space_size(void) -{ - unsigned long start_pfn, end_pfn; - unsigned long tables; - int i; - - /* the ISA range is always mapped regardless of memory holes */ - tables = calculate_table_space_size(0, ISA_END_ADDRESS); - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { - u64 start = start_pfn << PAGE_SHIFT; - u64 end = end_pfn << PAGE_SHIFT; - - if (end <= ISA_END_ADDRESS) - continue; - - if (start < ISA_END_ADDRESS) - start = ISA_END_ADDRESS; -#ifdef CONFIG_X86_32 - /* on 32 bit, we only map up to max_low_pfn */ - if ((start >> PAGE_SHIFT) >= max_low_pfn) - continue; - - if ((end >> PAGE_SHIFT) > max_low_pfn) - end = max_low_pfn << PAGE_SHIFT; -#endif - tables += calculate_table_space_size(start, end); - } - - return tables; -} - -static void __init find_early_table_space(unsigned long start, - unsigned long good_end, - unsigned long tables) -{ - phys_addr_t base; - - base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE); - if (!base) - panic("Cannot find space for the kernel page tables"); - - pgt_buf_start = base >> PAGE_SHIFT; - pgt_buf_end = pgt_buf_start; - pgt_buf_top = pgt_buf_start + (tables >> PAGE_SHIFT); -} - static struct range pfn_mapped[E820_X_MAX]; static int nr_pfn_mapped; @@ -391,17 +307,14 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, } /* - * Iterate through E820 memory map and create direct mappings for only E820_RAM - * regions. We cannot simply create direct mappings for all pfns from - * [0 to max_low_pfn) and [4GB to max_pfn) because of possible memory holes in - * high addresses that cannot be marked as UC by fixed/variable range MTRRs. - * Depending on the alignment of E820 ranges, this may possibly result in using - * smaller size (i.e. 4K instead of 2M or 1G) page tables. + * would have hole in the middle or ends, and only ram parts will be mapped. */ -static void __init init_range_memory_mapping(unsigned long range_start, +static unsigned long __init init_range_memory_mapping( + unsigned long range_start, unsigned long range_end) { unsigned long start_pfn, end_pfn; + unsigned long mapped_ram_size = 0; int i; for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { @@ -421,71 +334,70 @@ static void __init init_range_memory_mapping(unsigned long range_start, end = range_end; init_memory_mapping(start, end); + + mapped_ram_size += end - start; } + + return mapped_ram_size; } +/* (PUD_SHIFT-PMD_SHIFT)/2 */ +#define STEP_SIZE_SHIFT 5 void __init init_mem_mapping(void) { - unsigned long tables, good_end, end; + unsigned long end, real_end, start, last_start; + unsigned long step_size; + unsigned long addr; + unsigned long mapped_ram_size = 0; + unsigned long new_mapped_ram_size; probe_page_size_mask(); - /* - * Find space for the kernel direct mapping tables. - * - * Later we should allocate these tables in the local node of the - * memory mapped. Unfortunately this is done currently before the - * nodes are discovered. - */ #ifdef CONFIG_X86_64 end = max_pfn << PAGE_SHIFT; - good_end = end; #else end = max_low_pfn << PAGE_SHIFT; - good_end = max_pfn_mapped << PAGE_SHIFT; #endif - tables = calculate_all_table_space_size(); - find_early_table_space(0, good_end, tables); - printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx] prealloc\n", - end - 1, pgt_buf_start << PAGE_SHIFT, - (pgt_buf_top << PAGE_SHIFT) - 1); - max_pfn_mapped = 0; /* will get exact value next */ /* the ISA range is always mapped regardless of memory holes */ init_memory_mapping(0, ISA_END_ADDRESS); - init_range_memory_mapping(ISA_END_ADDRESS, end); + + /* xen has big range in reserved near end of ram, skip it at first */ + addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, + PAGE_SIZE); + real_end = addr + PMD_SIZE; + + /* step_size need to be small so pgt_buf from BRK could cover it */ + step_size = PMD_SIZE; + max_pfn_mapped = 0; /* will get exact value next */ + min_pfn_mapped = real_end >> PAGE_SHIFT; + last_start = start = real_end; + while (last_start > ISA_END_ADDRESS) { + if (last_start > step_size) { + start = round_down(last_start - 1, step_size); + if (start < ISA_END_ADDRESS) + start = ISA_END_ADDRESS; + } else + start = ISA_END_ADDRESS; + new_mapped_ram_size = init_range_memory_mapping(start, + last_start); + last_start = start; + min_pfn_mapped = last_start >> PAGE_SHIFT; + /* only increase step_size after big range get mapped */ + if (new_mapped_ram_size > mapped_ram_size) + step_size <<= STEP_SIZE_SHIFT; + mapped_ram_size += new_mapped_ram_size; + } + + if (real_end < end) + init_range_memory_mapping(real_end, end); + #ifdef CONFIG_X86_64 if (max_pfn > max_low_pfn) { /* can we preseve max_low_pfn ?*/ max_low_pfn = max_pfn; } #endif - /* - * Reserve the kernel pagetable pages we used (pgt_buf_start - - * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top) - * so that they can be reused for other purposes. - * - * On native it just means calling memblock_reserve, on Xen it also - * means marking RW the pagetable pages that we allocated before - * but that haven't been used. - * - * In fact on xen we mark RO the whole range pgt_buf_start - - * pgt_buf_top, because we have to make sure that when - * init_memory_mapping reaches the pagetable pages area, it maps - * RO all the pagetable pages, including the ones that are beyond - * pgt_buf_end at that time. - */ - if (pgt_buf_end > pgt_buf_start) { - printk(KERN_DEBUG "kernel direct mapping tables up to %#lx @ [mem %#010lx-%#010lx] final\n", - end - 1, pgt_buf_start << PAGE_SHIFT, - (pgt_buf_end << PAGE_SHIFT) - 1); - x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start), - PFN_PHYS(pgt_buf_end)); - } - - /* stop the wrong using */ - pgt_buf_top = 0; - early_memtest(0, max_pfn_mapped << PAGE_SHIFT); } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 27f7fc6..7bb1106 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -61,11 +61,22 @@ bool __read_mostly __vmalloc_start_set = false; static __init void *alloc_low_page(void) { - unsigned long pfn = pgt_buf_end++; + unsigned long pfn; void *adr; - if (pfn >= pgt_buf_top) - panic("alloc_low_page: ran out of memory"); + if ((pgt_buf_end + 1) >= pgt_buf_top) { + unsigned long ret; + if (min_pfn_mapped >= max_pfn_mapped) + panic("alloc_low_page: ran out of memory"); + ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, + max_pfn_mapped << PAGE_SHIFT, + PAGE_SIZE, PAGE_SIZE); + if (!ret) + panic("alloc_low_page: can not alloc memory"); + memblock_reserve(ret, PAGE_SIZE); + pfn = ret >> PAGE_SHIFT; + } else + pfn = pgt_buf_end++; adr = __va(pfn * PAGE_SIZE); clear_page(adr); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index fa28e3e..eefaea6 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -316,7 +316,7 @@ void __init cleanup_highmap(void) static __ref void *alloc_low_page(unsigned long *phys) { - unsigned long pfn = pgt_buf_end++; + unsigned long pfn; void *adr; if (after_bootmem) { @@ -326,8 +326,19 @@ static __ref void *alloc_low_page(unsigned long *phys) return adr; } - if (pfn >= pgt_buf_top) - panic("alloc_low_page: ran out of memory"); + if ((pgt_buf_end + 1) >= pgt_buf_top) { + unsigned long ret; + if (min_pfn_mapped >= max_pfn_mapped) + panic("alloc_low_page: ran out of memory"); + ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, + max_pfn_mapped << PAGE_SHIFT, + PAGE_SIZE, PAGE_SIZE); + if (!ret) + panic("alloc_low_page: can not alloc memory"); + memblock_reserve(ret, PAGE_SIZE); + pfn = ret >> PAGE_SHIFT; + } else + pfn = pgt_buf_end++; adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); clear_page(adr); -- cgit v1.1 From 973dc4f3fad5890bc7b694148ad4c825b9af6dc1 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:38:59 -0800 Subject: x86, mm: Remove early_memremap workaround for page table accessing on 64bit We try to put page table high to make room for kdump, and at that time those ranges are not mapped yet, and have to use ioremap to access it. Now after patch that pre-map page table top down. x86, mm: setup page table in top-down We do not need that workaround anymore. Just use __va to return directly mapping address. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-23-git-send-email-yinghai@kernel.org Acked-by: Stefano Stabellini Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_64.c | 38 ++++---------------------------------- 1 file changed, 4 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index eefaea6..5ee9242 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -340,36 +340,12 @@ static __ref void *alloc_low_page(unsigned long *phys) } else pfn = pgt_buf_end++; - adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE); + adr = __va(pfn * PAGE_SIZE); clear_page(adr); *phys = pfn * PAGE_SIZE; return adr; } -static __ref void *map_low_page(void *virt) -{ - void *adr; - unsigned long phys, left; - - if (after_bootmem) - return virt; - - phys = __pa(virt); - left = phys & (PAGE_SIZE - 1); - adr = early_memremap(phys & PAGE_MASK, PAGE_SIZE); - adr = (void *)(((unsigned long)adr) | left); - - return adr; -} - -static __ref void unmap_low_page(void *adr) -{ - if (after_bootmem) - return; - - early_iounmap((void *)((unsigned long)adr & PAGE_MASK), PAGE_SIZE); -} - static unsigned long __meminit phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, pgprot_t prot) @@ -442,10 +418,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, if (pmd_val(*pmd)) { if (!pmd_large(*pmd)) { spin_lock(&init_mm.page_table_lock); - pte = map_low_page((pte_t *)pmd_page_vaddr(*pmd)); + pte = (pte_t *)pmd_page_vaddr(*pmd); last_map_addr = phys_pte_init(pte, address, end, prot); - unmap_low_page(pte); spin_unlock(&init_mm.page_table_lock); continue; } @@ -483,7 +458,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, pte = alloc_low_page(&pte_phys); last_map_addr = phys_pte_init(pte, address, end, new_prot); - unmap_low_page(pte); spin_lock(&init_mm.page_table_lock); pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); @@ -518,10 +492,9 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, if (pud_val(*pud)) { if (!pud_large(*pud)) { - pmd = map_low_page(pmd_offset(pud, 0)); + pmd = pmd_offset(pud, 0); last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, prot); - unmap_low_page(pmd); __flush_tlb_all(); continue; } @@ -560,7 +533,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, pmd = alloc_low_page(&pmd_phys); last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, prot); - unmap_low_page(pmd); spin_lock(&init_mm.page_table_lock); pud_populate(&init_mm, pud, __va(pmd_phys)); @@ -596,17 +568,15 @@ kernel_physical_mapping_init(unsigned long start, next = end; if (pgd_val(*pgd)) { - pud = map_low_page((pud_t *)pgd_page_vaddr(*pgd)); + pud = (pud_t *)pgd_page_vaddr(*pgd); last_map_addr = phys_pud_init(pud, __pa(start), __pa(end), page_size_mask); - unmap_low_page(pud); continue; } pud = alloc_low_page(&pud_phys); last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), page_size_mask); - unmap_low_page(pud); spin_lock(&init_mm.page_table_lock); pgd_populate(&init_mm, pgd, __va(pud_phys)); -- cgit v1.1 From 868bf4d6b94c980d3ad87f892a5e528b8ee2c320 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:00 -0800 Subject: x86, mm: Remove parameter in alloc_low_page for 64bit Now all page table buf are pre-mapped, and could use virtual address directly. So don't need to remember physical address anymore. Remove that phys pointer in alloc_low_page(), and that will allow us to merge alloc_low_page between 64bit and 32bit. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-24-git-send-email-yinghai@kernel.org Acked-by: Stefano Stabellini Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_64.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 5ee9242..1960820 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -314,14 +314,13 @@ void __init cleanup_highmap(void) } } -static __ref void *alloc_low_page(unsigned long *phys) +static __ref void *alloc_low_page(void) { unsigned long pfn; void *adr; if (after_bootmem) { adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); - *phys = __pa(adr); return adr; } @@ -342,7 +341,6 @@ static __ref void *alloc_low_page(unsigned long *phys) adr = __va(pfn * PAGE_SIZE); clear_page(adr); - *phys = pfn * PAGE_SIZE; return adr; } @@ -401,7 +399,6 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, int i = pmd_index(address); for (; i < PTRS_PER_PMD; i++, address = next) { - unsigned long pte_phys; pmd_t *pmd = pmd_page + pmd_index(address); pte_t *pte; pgprot_t new_prot = prot; @@ -456,11 +453,11 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, continue; } - pte = alloc_low_page(&pte_phys); + pte = alloc_low_page(); last_map_addr = phys_pte_init(pte, address, end, new_prot); spin_lock(&init_mm.page_table_lock); - pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); + pmd_populate_kernel(&init_mm, pmd, pte); spin_unlock(&init_mm.page_table_lock); } update_page_count(PG_LEVEL_2M, pages); @@ -476,7 +473,6 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, int i = pud_index(addr); for (; i < PTRS_PER_PUD; i++, addr = next) { - unsigned long pmd_phys; pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; pgprot_t prot = PAGE_KERNEL; @@ -530,12 +526,12 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, continue; } - pmd = alloc_low_page(&pmd_phys); + pmd = alloc_low_page(); last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask, prot); spin_lock(&init_mm.page_table_lock); - pud_populate(&init_mm, pud, __va(pmd_phys)); + pud_populate(&init_mm, pud, pmd); spin_unlock(&init_mm.page_table_lock); } __flush_tlb_all(); @@ -560,7 +556,6 @@ kernel_physical_mapping_init(unsigned long start, for (; start < end; start = next) { pgd_t *pgd = pgd_offset_k(start); - unsigned long pud_phys; pud_t *pud; next = (start + PGDIR_SIZE) & PGDIR_MASK; @@ -574,12 +569,12 @@ kernel_physical_mapping_init(unsigned long start, continue; } - pud = alloc_low_page(&pud_phys); + pud = alloc_low_page(); last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), page_size_mask); spin_lock(&init_mm.page_table_lock); - pgd_populate(&init_mm, pgd, __va(pud_phys)); + pgd_populate(&init_mm, pgd, pud); spin_unlock(&init_mm.page_table_lock); pgd_changed = true; } -- cgit v1.1 From 5c51bdbe4c74dce7996d0bbfa39974775cc3f13c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:01 -0800 Subject: x86, mm: Merge alloc_low_page between 64bit and 32bit They are almost same except 64 bit need to handle after_bootmem case. Add mm_internal.h to make that alloc_low_page() only to be accessible from arch/x86/mm/init*.c Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-25-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 34 ++++++++++++++++++++++++++++++++++ arch/x86/mm/init_32.c | 26 ++------------------------ arch/x86/mm/init_64.c | 32 ++------------------------------ arch/x86/mm/mm_internal.h | 6 ++++++ 4 files changed, 44 insertions(+), 54 deletions(-) create mode 100644 arch/x86/mm/mm_internal.h (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 2393d00..8481892 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -17,10 +17,44 @@ #include #include /* for MAX_DMA_PFN */ +#include "mm_internal.h" + unsigned long __initdata pgt_buf_start; unsigned long __meminitdata pgt_buf_end; unsigned long __meminitdata pgt_buf_top; +__ref void *alloc_low_page(void) +{ + unsigned long pfn; + void *adr; + +#ifdef CONFIG_X86_64 + if (after_bootmem) { + adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); + + return adr; + } +#endif + + if ((pgt_buf_end + 1) >= pgt_buf_top) { + unsigned long ret; + if (min_pfn_mapped >= max_pfn_mapped) + panic("alloc_low_page: ran out of memory"); + ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, + max_pfn_mapped << PAGE_SHIFT, + PAGE_SIZE, PAGE_SIZE); + if (!ret) + panic("alloc_low_page: can not alloc memory"); + memblock_reserve(ret, PAGE_SIZE); + pfn = ret >> PAGE_SHIFT; + } else + pfn = pgt_buf_end++; + + adr = __va(pfn * PAGE_SIZE); + clear_page(adr); + return adr; +} + /* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */ #define INIT_PGT_BUF_SIZE (5 * PAGE_SIZE) RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE); diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 7bb1106..a7f2df1 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -53,36 +53,14 @@ #include #include +#include "mm_internal.h" + unsigned long highstart_pfn, highend_pfn; static noinline int do_test_wp_bit(void); bool __read_mostly __vmalloc_start_set = false; -static __init void *alloc_low_page(void) -{ - unsigned long pfn; - void *adr; - - if ((pgt_buf_end + 1) >= pgt_buf_top) { - unsigned long ret; - if (min_pfn_mapped >= max_pfn_mapped) - panic("alloc_low_page: ran out of memory"); - ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, - max_pfn_mapped << PAGE_SHIFT, - PAGE_SIZE, PAGE_SIZE); - if (!ret) - panic("alloc_low_page: can not alloc memory"); - memblock_reserve(ret, PAGE_SIZE); - pfn = ret >> PAGE_SHIFT; - } else - pfn = pgt_buf_end++; - - adr = __va(pfn * PAGE_SIZE); - clear_page(adr); - return adr; -} - /* * Creates a middle page table and puts a pointer to it in the * given global directory entry. This only returns the gd entry diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1960820..1d53def 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -54,6 +54,8 @@ #include #include +#include "mm_internal.h" + static int __init parse_direct_gbpages_off(char *arg) { direct_gbpages = 0; @@ -314,36 +316,6 @@ void __init cleanup_highmap(void) } } -static __ref void *alloc_low_page(void) -{ - unsigned long pfn; - void *adr; - - if (after_bootmem) { - adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); - - return adr; - } - - if ((pgt_buf_end + 1) >= pgt_buf_top) { - unsigned long ret; - if (min_pfn_mapped >= max_pfn_mapped) - panic("alloc_low_page: ran out of memory"); - ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, - max_pfn_mapped << PAGE_SHIFT, - PAGE_SIZE, PAGE_SIZE); - if (!ret) - panic("alloc_low_page: can not alloc memory"); - memblock_reserve(ret, PAGE_SIZE); - pfn = ret >> PAGE_SHIFT; - } else - pfn = pgt_buf_end++; - - adr = __va(pfn * PAGE_SIZE); - clear_page(adr); - return adr; -} - static unsigned long __meminit phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, pgprot_t prot) diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h new file mode 100644 index 0000000..b3f993a --- /dev/null +++ b/arch/x86/mm/mm_internal.h @@ -0,0 +1,6 @@ +#ifndef __X86_MM_INTERNAL_H +#define __X86_MM_INTERNAL_H + +void *alloc_low_page(void); + +#endif /* __X86_MM_INTERNAL_H */ -- cgit v1.1 From 9985b4c6fa7d660f685918a58282275e9e35d8e0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:02 -0800 Subject: x86, mm: Move min_pfn_mapped back to mm/init.c Also change it to static. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-26-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page_types.h | 1 - arch/x86/kernel/setup.c | 1 - arch/x86/mm/init.c | 2 ++ 3 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 9f6f3e6..54c9787 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -45,7 +45,6 @@ extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; extern unsigned long max_pfn_mapped; -extern unsigned long min_pfn_mapped; static inline phys_addr_t get_max_mapped(void) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f7634092..2015194 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -124,7 +124,6 @@ */ unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; -unsigned long min_pfn_mapped; #ifdef CONFIG_DMI RESERVE_BRK(dmi_alloc, 65536); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 8481892..6392bf9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -23,6 +23,8 @@ unsigned long __initdata pgt_buf_start; unsigned long __meminitdata pgt_buf_end; unsigned long __meminitdata pgt_buf_top; +static unsigned long min_pfn_mapped; + __ref void *alloc_low_page(void) { unsigned long pfn; -- cgit v1.1 From 6f80b68e9e515547edbacb0c37491730bf766db5 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:03 -0800 Subject: x86, mm, Xen: Remove mapping_pagetable_reserve() Page table area are pre-mapped now after x86, mm: setup page table in top-down x86, mm: Remove early_memremap workaround for page table accessing on 64bit mapping_pagetable_reserve is not used anymore, so remove it. Also remove operation in mask_rw_pte(), as modified allow_low_page always return pages that are already mapped, moreover xen_alloc_pte_init, xen_alloc_pmd_init, etc, will mark the page RO before hooking it into the pagetable automatically. -v2: add changelog about mask_rw_pte() from Stefano. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-27-git-send-email-yinghai@kernel.org Cc: Stefano Stabellini Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable_types.h | 1 - arch/x86/include/asm/x86_init.h | 12 ------------ arch/x86/kernel/x86_init.c | 4 ---- arch/x86/mm/init.c | 4 ---- arch/x86/xen/mmu.c | 28 ---------------------------- 5 files changed, 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index ec8a1fc..79738f2 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -301,7 +301,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, /* Install a pte for a particular vaddr in kernel space. */ void set_pte_vaddr(unsigned long vaddr, pte_t pte); -extern void native_pagetable_reserve(u64 start, u64 end); #ifdef CONFIG_X86_32 extern void native_pagetable_init(void); #else diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 5769349..3b2ce8f 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -69,17 +69,6 @@ struct x86_init_oem { }; /** - * struct x86_init_mapping - platform specific initial kernel pagetable setup - * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage - * - * For more details on the purpose of this hook, look in - * init_memory_mapping and the commit that added it. - */ -struct x86_init_mapping { - void (*pagetable_reserve)(u64 start, u64 end); -}; - -/** * struct x86_init_paging - platform specific paging functions * @pagetable_init: platform specific paging initialization call to setup * the kernel pagetables and prepare accessors functions. @@ -136,7 +125,6 @@ struct x86_init_ops { struct x86_init_mpparse mpparse; struct x86_init_irqs irqs; struct x86_init_oem oem; - struct x86_init_mapping mapping; struct x86_init_paging paging; struct x86_init_timers timers; struct x86_init_iommu iommu; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7a3d075..50cf83e 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -62,10 +62,6 @@ struct x86_init_ops x86_init __initdata = { .banner = default_banner, }, - .mapping = { - .pagetable_reserve = native_pagetable_reserve, - }, - .paging = { .pagetable_init = native_pagetable_init, }, diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6392bf9..21173fc 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -112,10 +112,6 @@ static void __init probe_page_size_mask(void) __supported_pte_mask |= _PAGE_GLOBAL; } } -void __init native_pagetable_reserve(u64 start, u64 end) -{ - memblock_reserve(start, end - start); -} #ifdef CONFIG_X86_32 #define NR_RANGE_MR 3 diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index dcf5f2d..bbb883f 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1178,20 +1178,6 @@ static void xen_exit_mmap(struct mm_struct *mm) static void xen_post_allocator_init(void); -static __init void xen_mapping_pagetable_reserve(u64 start, u64 end) -{ - /* reserve the range used */ - native_pagetable_reserve(start, end); - - /* set as RW the rest */ - printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end, - PFN_PHYS(pgt_buf_top)); - while (end < PFN_PHYS(pgt_buf_top)) { - make_lowmem_page_readwrite(__va(end)); - end += PAGE_SIZE; - } -} - #ifdef CONFIG_X86_64 static void __init xen_cleanhighmap(unsigned long vaddr, unsigned long vaddr_end) @@ -1503,19 +1489,6 @@ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) #else /* CONFIG_X86_64 */ static pte_t __init mask_rw_pte(pte_t *ptep, pte_t pte) { - unsigned long pfn = pte_pfn(pte); - - /* - * If the new pfn is within the range of the newly allocated - * kernel pagetable, and it isn't being mapped into an - * early_ioremap fixmap slot as a freshly allocated page, make sure - * it is RO. - */ - if (((!is_early_ioremap_ptep(ptep) && - pfn >= pgt_buf_start && pfn < pgt_buf_top)) || - (is_early_ioremap_ptep(ptep) && pfn != (pgt_buf_end - 1))) - pte = pte_wrprotect(pte); - return pte; } #endif /* CONFIG_X86_64 */ @@ -2197,7 +2170,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { void __init xen_init_mmu_ops(void) { - x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve; x86_init.paging.pagetable_init = xen_pagetable_init; pv_mmu_ops = xen_mmu_ops; -- cgit v1.1 From 22c8ca2ac256bb681be791858b35502b5d37e73b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:04 -0800 Subject: x86, mm: Add alloc_low_pages(num) 32bit kmap mapping needs pages to be used for low to high. At this point those pages are still from pgt_buf_* from BRK, so it is ok now. But we want to move early_ioremap_page_table_range_init() out of init_memory_mapping() and only call it one time later, that will make page_table_range_init/page_table_kmap_check/alloc_low_page to use memblock to get page. memblock allocation for pages are from high to low. So will get panic from page_table_kmap_check() that has BUG_ON to do ordering checking. This patch add alloc_low_pages to make it possible to allocate serveral pages at first, and hand out pages one by one from low to high. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-28-git-send-email-yinghai@kernel.org Cc: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 33 +++++++++++++++++++++------------ arch/x86/mm/mm_internal.h | 6 +++++- 2 files changed, 26 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 21173fc..02cea14 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -25,36 +25,45 @@ unsigned long __meminitdata pgt_buf_top; static unsigned long min_pfn_mapped; -__ref void *alloc_low_page(void) +__ref void *alloc_low_pages(unsigned int num) { unsigned long pfn; - void *adr; + int i; #ifdef CONFIG_X86_64 if (after_bootmem) { - adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); + unsigned int order; - return adr; + order = get_order((unsigned long)num << PAGE_SHIFT); + return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK | + __GFP_ZERO, order); } #endif - if ((pgt_buf_end + 1) >= pgt_buf_top) { + if ((pgt_buf_end + num) >= pgt_buf_top) { unsigned long ret; if (min_pfn_mapped >= max_pfn_mapped) panic("alloc_low_page: ran out of memory"); ret = memblock_find_in_range(min_pfn_mapped << PAGE_SHIFT, max_pfn_mapped << PAGE_SHIFT, - PAGE_SIZE, PAGE_SIZE); + PAGE_SIZE * num , PAGE_SIZE); if (!ret) panic("alloc_low_page: can not alloc memory"); - memblock_reserve(ret, PAGE_SIZE); + memblock_reserve(ret, PAGE_SIZE * num); pfn = ret >> PAGE_SHIFT; - } else - pfn = pgt_buf_end++; + } else { + pfn = pgt_buf_end; + pgt_buf_end += num; + } + + for (i = 0; i < num; i++) { + void *adr; + + adr = __va((pfn + i) << PAGE_SHIFT); + clear_page(adr); + } - adr = __va(pfn * PAGE_SIZE); - clear_page(adr); - return adr; + return __va(pfn << PAGE_SHIFT); } /* need 4 4k for initial PMD_SIZE, 4k for 0-ISA_END_ADDRESS */ diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index b3f993a..7e3b88e 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -1,6 +1,10 @@ #ifndef __X86_MM_INTERNAL_H #define __X86_MM_INTERNAL_H -void *alloc_low_page(void); +void *alloc_low_pages(unsigned int num); +static inline void *alloc_low_page(void) +{ + return alloc_low_pages(1); +} #endif /* __X86_MM_INTERNAL_H */ -- cgit v1.1 From ddd3509df8f8d4f1cf4784f559d702ce00dc8846 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Fri, 16 Nov 2012 19:39:05 -0800 Subject: x86, mm: Add pointer about Xen mmu requirement for alloc_low_pages Add link for more information 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve -v2: updated to commets from hpa to include commit name. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-29-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 02cea14..cb4f8ba 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -25,6 +25,15 @@ unsigned long __meminitdata pgt_buf_top; static unsigned long min_pfn_mapped; +/* + * Pages returned are already directly mapped. + * + * Changing that is likely to break Xen, see commit: + * + * 279b706 x86,xen: introduce x86_init.mapping.pagetable_reserve + * + * for detailed information. + */ __ref void *alloc_low_pages(unsigned int num) { unsigned long pfn; -- cgit v1.1 From 719272c45b821d38608fc333700bde1a89c56c59 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:06 -0800 Subject: x86, mm: only call early_ioremap_page_table_range_init() once On 32bit, before patcheset that only set page table for ram, we only call that one time. Now, we are calling that during every init_memory_mapping if we have holes under max_low_pfn. We should only call it one time after all ranges under max_low_page get mapped just like we did before. Also that could avoid the risk to run out of pgt_buf in BRK. Need to update page_table_range_init() to count the pages for kmap page table at first, and use new added alloc_low_pages() to get pages in sequence. That will conform to the requirement that pages need to be in low to high order. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-30-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 13 +++++-------- arch/x86/mm/init_32.c | 47 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 46 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index cb4f8ba..bed4888 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -343,14 +343,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, mr[i].page_size_mask); -#ifdef CONFIG_X86_32 - early_ioremap_page_table_range_init(); - - load_cr3(swapper_pg_dir); -#endif - - __flush_tlb_all(); - add_pfn_range_mapped(start >> PAGE_SHIFT, ret >> PAGE_SHIFT); return ret >> PAGE_SHIFT; @@ -447,7 +439,12 @@ void __init init_mem_mapping(void) /* can we preseve max_low_pfn ?*/ max_low_pfn = max_pfn; } +#else + early_ioremap_page_table_range_init(); + load_cr3(swapper_pg_dir); + __flush_tlb_all(); #endif + early_memtest(0, max_pfn_mapped << PAGE_SHIFT); } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index a7f2df1..0ae1ba8 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -135,8 +135,39 @@ pte_t * __init populate_extra_pte(unsigned long vaddr) return one_page_table_init(pmd) + pte_idx; } +static unsigned long __init +page_table_range_init_count(unsigned long start, unsigned long end) +{ + unsigned long count = 0; +#ifdef CONFIG_HIGHMEM + int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT; + int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT; + int pgd_idx, pmd_idx; + unsigned long vaddr; + + if (pmd_idx_kmap_begin == pmd_idx_kmap_end) + return 0; + + vaddr = start; + pgd_idx = pgd_index(vaddr); + + for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) { + for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); + pmd_idx++) { + if ((vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin && + (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) + count++; + vaddr += PMD_SIZE; + } + pmd_idx = 0; + } +#endif + return count; +} + static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, - unsigned long vaddr, pte_t *lastpte) + unsigned long vaddr, pte_t *lastpte, + void **adr) { #ifdef CONFIG_HIGHMEM /* @@ -150,16 +181,15 @@ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd, if (pmd_idx_kmap_begin != pmd_idx_kmap_end && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin - && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end - && ((__pa(pte) >> PAGE_SHIFT) < pgt_buf_start - || (__pa(pte) >> PAGE_SHIFT) >= pgt_buf_end)) { + && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end) { pte_t *newpte; int i; BUG_ON(after_bootmem); - newpte = alloc_low_page(); + newpte = *adr; for (i = 0; i < PTRS_PER_PTE; i++) set_pte(newpte + i, pte[i]); + *adr = (void *)(((unsigned long)(*adr)) + PAGE_SIZE); paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE)); @@ -193,6 +223,11 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) pgd_t *pgd; pmd_t *pmd; pte_t *pte = NULL; + unsigned long count = page_table_range_init_count(start, end); + void *adr = NULL; + + if (count) + adr = alloc_low_pages(count); vaddr = start; pgd_idx = pgd_index(vaddr); @@ -205,7 +240,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base) for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { pte = page_table_kmap_check(one_page_table_init(pmd), - pmd, vaddr, pte); + pmd, vaddr, pte, &adr); vaddr += PMD_SIZE; } -- cgit v1.1 From cf47065961b48727b4e47bc3e2e67f4996878437 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:07 -0800 Subject: x86, mm: Move back pgt_buf_* to mm/init.c Also change them to static. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-31-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/init.h | 4 ---- arch/x86/mm/init.c | 6 +++--- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 4f13998..626ea8d 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -12,8 +12,4 @@ kernel_physical_mapping_init(unsigned long start, unsigned long end, unsigned long page_size_mask); -extern unsigned long __initdata pgt_buf_start; -extern unsigned long __meminitdata pgt_buf_end; -extern unsigned long __meminitdata pgt_buf_top; - #endif /* _ASM_X86_INIT_32_H */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index bed4888..3cadf10 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -19,9 +19,9 @@ #include "mm_internal.h" -unsigned long __initdata pgt_buf_start; -unsigned long __meminitdata pgt_buf_end; -unsigned long __meminitdata pgt_buf_top; +static unsigned long __initdata pgt_buf_start; +static unsigned long __initdata pgt_buf_end; +static unsigned long __initdata pgt_buf_top; static unsigned long min_pfn_mapped; -- cgit v1.1 From 148b20989e0b83cb301e1fcd9e987c7abde05333 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:08 -0800 Subject: x86, mm: Move init_gbpages() out of setup.c Put it in mm/init.c, and call it from probe_page_mask(). init_mem_mapping is calling probe_page_mask at first. So calling sequence is not changed. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-32-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 15 +-------------- arch/x86/mm/init.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2015194..85b62f1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -282,18 +282,7 @@ void * __init extend_brk(size_t size, size_t align) return ret; } -#ifdef CONFIG_X86_64 -static void __init init_gbpages(void) -{ - if (direct_gbpages && cpu_has_gbpages) - printk(KERN_INFO "Using GB pages for direct mapping\n"); - else - direct_gbpages = 0; -} -#else -static inline void init_gbpages(void) -{ -} +#ifdef CONFIG_X86_32 static void __init cleanup_highmap(void) { } @@ -933,8 +922,6 @@ void __init setup_arch(char **cmdline_p) setup_real_mode(); - init_gbpages(); - init_mem_mapping(); memblock.current_limit = get_max_mapped(); diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 3cadf10..8168bf8 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -98,6 +98,16 @@ int direct_gbpages #endif ; +static void __init init_gbpages(void) +{ +#ifdef CONFIG_X86_64 + if (direct_gbpages && cpu_has_gbpages) + printk(KERN_INFO "Using GB pages for direct mapping\n"); + else + direct_gbpages = 0; +#endif +} + struct map_range { unsigned long start; unsigned long end; @@ -108,6 +118,8 @@ static int page_size_mask; static void __init probe_page_size_mask(void) { + init_gbpages(); + #if !defined(CONFIG_DEBUG_PAGEALLOC) && !defined(CONFIG_KMEMCHECK) /* * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. -- cgit v1.1 From f836e35a98ab3b2f0d4c8730610e4a4a7f533505 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:09 -0800 Subject: x86, mm: change low/hignmem_pfn_init to static on 32bit Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-33-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 0ae1ba8..322ee56 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -575,7 +575,7 @@ early_param("highmem", parse_highmem); * artificially via the highmem=x boot parameter then create * it: */ -void __init lowmem_pfn_init(void) +static void __init lowmem_pfn_init(void) { /* max_low_pfn is 0, we already have early_res support */ max_low_pfn = max_pfn; @@ -611,7 +611,7 @@ void __init lowmem_pfn_init(void) * We have more RAM than fits into lowmem - we try to put it into * highmem, also taking the highmem=x boot parameter into account: */ -void __init highmem_pfn_init(void) +static void __init highmem_pfn_init(void) { max_low_pfn = MAXMEM_PFN; -- cgit v1.1 From c8dcdb9ce463ad4a660099a74a850f4f6fc81c41 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:10 -0800 Subject: x86, mm: Move function declaration into mm_internal.h They are only for mm/init*.c. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-34-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/init.h | 16 +++------------- arch/x86/mm/mm_internal.h | 7 +++++++ 2 files changed, 10 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index 626ea8d..bac770b 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -1,15 +1,5 @@ -#ifndef _ASM_X86_INIT_32_H -#define _ASM_X86_INIT_32_H +#ifndef _ASM_X86_INIT_H +#define _ASM_X86_INIT_H -#ifdef CONFIG_X86_32 -extern void __init early_ioremap_page_table_range_init(void); -#endif -extern void __init zone_sizes_init(void); - -extern unsigned long __init -kernel_physical_mapping_init(unsigned long start, - unsigned long end, - unsigned long page_size_mask); - -#endif /* _ASM_X86_INIT_32_H */ +#endif /* _ASM_X86_INIT_H */ diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index 7e3b88e..dc79ac1 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -7,4 +7,11 @@ static inline void *alloc_low_page(void) return alloc_low_pages(1); } +void early_ioremap_page_table_range_init(void); + +unsigned long kernel_physical_mapping_init(unsigned long start, + unsigned long end, + unsigned long page_size_mask); +void zone_sizes_init(void); + #endif /* __X86_MM_INTERNAL_H */ -- cgit v1.1 From 11ed9e927d573d78beda6e6a166612666ae97064 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:11 -0800 Subject: x86, mm: Add check before clear pte above max_low_pfn on 32bit During test patch that adjust page_size_mask to map small range ram with big page size, found page table is setup wrongly for 32bit. And native_pagetable_init wrong clear pte for pmd with large page support. 1. add more comments about why we are expecting pte. 2. add BUG checking, so next time we could find problem earlier when we mess up page table setup again. 3. max_low_pfn is not included boundary for low memory mapping. We should check from max_low_pfn instead of +1. 4. add print out when some pte really get cleared, or we should use WARN() to find out why above max_low_pfn get mapped? so we could fix it. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-35-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_32.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 322ee56..19ef9f0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -480,9 +480,14 @@ void __init native_pagetable_init(void) /* * Remove any mappings which extend past the end of physical - * memory from the boot time page table: + * memory from the boot time page table. + * In virtual address space, we should have at least two pages + * from VMALLOC_END to pkmap or fixmap according to VMALLOC_END + * definition. And max_low_pfn is set to VMALLOC_END physical + * address. If initial memory mapping is doing right job, we + * should have pte used near max_low_pfn or one pmd is not present. */ - for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) { + for (pfn = max_low_pfn; pfn < 1<<(32-PAGE_SHIFT); pfn++) { va = PAGE_OFFSET + (pfn<> PAGE_SHIFT); -- cgit v1.1 From 5a0d3aeeeffbd1534a510fc10c4ab7c99c45afce Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:12 -0800 Subject: x86, mm: use round_up/down in split_mem_range() to replace own inline version for those roundup and rounddown. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-36-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 8168bf8..0e625e6 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -218,13 +218,11 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, * slowdowns. */ if (pos == 0) - end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT); + end_pfn = PMD_SIZE >> PAGE_SHIFT; else - end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = round_up(pos, PMD_SIZE) >> PAGE_SHIFT; #else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = round_up(pos, PMD_SIZE) >> PAGE_SHIFT; #endif if (end_pfn > (end >> PAGE_SHIFT)) end_pfn = end >> PAGE_SHIFT; @@ -234,15 +232,13 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, } /* big page (2M) range */ - start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); + start_pfn = round_up(pos, PMD_SIZE) >> PAGE_SHIFT; #ifdef CONFIG_X86_32 - end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = round_down(end, PMD_SIZE) >> PAGE_SHIFT; #else /* CONFIG_X86_64 */ - end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) - end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); + end_pfn = round_up(pos, PUD_SIZE) >> PAGE_SHIFT; + if (end_pfn > (round_down(end, PMD_SIZE) >> PAGE_SHIFT)) + end_pfn = round_down(end, PMD_SIZE) >> PAGE_SHIFT; #endif if (start_pfn < end_pfn) { @@ -253,9 +249,8 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, #ifdef CONFIG_X86_64 /* big page (1G) range */ - start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) - << (PUD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); + start_pfn = round_up(pos, PUD_SIZE) >> PAGE_SHIFT; + end_pfn = round_down(end, PUD_SIZE) >> PAGE_SHIFT; if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & @@ -264,9 +259,8 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, } /* tail is not big page (1G) alignment */ - start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) - << (PMD_SHIFT - PAGE_SHIFT); - end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + start_pfn = round_up(pos, PMD_SIZE) >> PAGE_SHIFT; + end_pfn = round_down(end, PMD_SIZE) >> PAGE_SHIFT; if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & (1< Date: Fri, 16 Nov 2012 19:39:13 -0800 Subject: x86, mm: use PFN_DOWN in split_mem_range() to replace own inline version for shifting. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-37-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 44 ++++++++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 0e625e6..1cca052b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -208,8 +208,8 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, int i; /* head if not big page alignment ? */ - start_pfn = start >> PAGE_SHIFT; - pos = start_pfn << PAGE_SHIFT; + start_pfn = PFN_DOWN(start); + pos = PFN_PHYS(start_pfn); #ifdef CONFIG_X86_32 /* * Don't use a large page for the first 2/4MB of memory @@ -218,59 +218,59 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, * slowdowns. */ if (pos == 0) - end_pfn = PMD_SIZE >> PAGE_SHIFT; + end_pfn = PFN_DOWN(PMD_SIZE); else - end_pfn = round_up(pos, PMD_SIZE) >> PAGE_SHIFT; + end_pfn = PFN_DOWN(round_up(pos, PMD_SIZE)); #else /* CONFIG_X86_64 */ - end_pfn = round_up(pos, PMD_SIZE) >> PAGE_SHIFT; + end_pfn = PFN_DOWN(round_up(pos, PMD_SIZE)); #endif - if (end_pfn > (end >> PAGE_SHIFT)) - end_pfn = end >> PAGE_SHIFT; + if (end_pfn > PFN_DOWN(end)) + end_pfn = PFN_DOWN(end); if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - pos = end_pfn << PAGE_SHIFT; + pos = PFN_PHYS(end_pfn); } /* big page (2M) range */ - start_pfn = round_up(pos, PMD_SIZE) >> PAGE_SHIFT; + start_pfn = PFN_DOWN(round_up(pos, PMD_SIZE)); #ifdef CONFIG_X86_32 - end_pfn = round_down(end, PMD_SIZE) >> PAGE_SHIFT; + end_pfn = PFN_DOWN(round_down(end, PMD_SIZE)); #else /* CONFIG_X86_64 */ - end_pfn = round_up(pos, PUD_SIZE) >> PAGE_SHIFT; - if (end_pfn > (round_down(end, PMD_SIZE) >> PAGE_SHIFT)) - end_pfn = round_down(end, PMD_SIZE) >> PAGE_SHIFT; + end_pfn = PFN_DOWN(round_up(pos, PUD_SIZE)); + if (end_pfn > PFN_DOWN(round_down(end, PMD_SIZE))) + end_pfn = PFN_DOWN(round_down(end, PMD_SIZE)); #endif if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & (1<> PAGE_SHIFT; - end_pfn = round_down(end, PUD_SIZE) >> PAGE_SHIFT; + start_pfn = PFN_DOWN(round_up(pos, PUD_SIZE)); + end_pfn = PFN_DOWN(round_down(end, PUD_SIZE)); if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & ((1<> PAGE_SHIFT; - end_pfn = round_down(end, PMD_SIZE) >> PAGE_SHIFT; + start_pfn = PFN_DOWN(round_up(pos, PMD_SIZE)); + end_pfn = PFN_DOWN(round_down(end, PMD_SIZE)); if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & (1<>PAGE_SHIFT; - end_pfn = end>>PAGE_SHIFT; + start_pfn = PFN_DOWN(pos); + end_pfn = PFN_DOWN(end); nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); /* try to merge same page size and continuous */ -- cgit v1.1 From 1829ae9ad7380bf17333ab9ad1610631d9cb8664 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:14 -0800 Subject: x86, mm: use pfn instead of pos in split_mem_range could save some bit shifting operations. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-38-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1cca052b..4bf1c53 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -204,12 +204,11 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, unsigned long end) { unsigned long start_pfn, end_pfn; - unsigned long pos; + unsigned long pfn; int i; /* head if not big page alignment ? */ - start_pfn = PFN_DOWN(start); - pos = PFN_PHYS(start_pfn); + pfn = start_pfn = PFN_DOWN(start); #ifdef CONFIG_X86_32 /* * Don't use a large page for the first 2/4MB of memory @@ -217,26 +216,26 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, * and overlapping MTRRs into large pages can cause * slowdowns. */ - if (pos == 0) + if (pfn == 0) end_pfn = PFN_DOWN(PMD_SIZE); else - end_pfn = PFN_DOWN(round_up(pos, PMD_SIZE)); + end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); #else /* CONFIG_X86_64 */ - end_pfn = PFN_DOWN(round_up(pos, PMD_SIZE)); + end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); #endif if (end_pfn > PFN_DOWN(end)) end_pfn = PFN_DOWN(end); if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); - pos = PFN_PHYS(end_pfn); + pfn = end_pfn; } /* big page (2M) range */ - start_pfn = PFN_DOWN(round_up(pos, PMD_SIZE)); + start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); #ifdef CONFIG_X86_32 end_pfn = PFN_DOWN(round_down(end, PMD_SIZE)); #else /* CONFIG_X86_64 */ - end_pfn = PFN_DOWN(round_up(pos, PUD_SIZE)); + end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); if (end_pfn > PFN_DOWN(round_down(end, PMD_SIZE))) end_pfn = PFN_DOWN(round_down(end, PMD_SIZE)); #endif @@ -244,32 +243,32 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & (1< Date: Fri, 16 Nov 2012 19:39:15 -0800 Subject: x86, mm: use limit_pfn for end pfn instead of shifting end to get that. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-39-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 4bf1c53..f410dc6 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -203,10 +203,12 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, unsigned long start, unsigned long end) { - unsigned long start_pfn, end_pfn; + unsigned long start_pfn, end_pfn, limit_pfn; unsigned long pfn; int i; + limit_pfn = PFN_DOWN(end); + /* head if not big page alignment ? */ pfn = start_pfn = PFN_DOWN(start); #ifdef CONFIG_X86_32 @@ -223,8 +225,8 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, #else /* CONFIG_X86_64 */ end_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); #endif - if (end_pfn > PFN_DOWN(end)) - end_pfn = PFN_DOWN(end); + if (end_pfn > limit_pfn) + end_pfn = limit_pfn; if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); pfn = end_pfn; @@ -233,11 +235,11 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, /* big page (2M) range */ start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); #ifdef CONFIG_X86_32 - end_pfn = PFN_DOWN(round_down(end, PMD_SIZE)); + end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); #else /* CONFIG_X86_64 */ end_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); - if (end_pfn > PFN_DOWN(round_down(end, PMD_SIZE))) - end_pfn = PFN_DOWN(round_down(end, PMD_SIZE)); + if (end_pfn > round_down(limit_pfn, PFN_DOWN(PMD_SIZE))) + end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); #endif if (start_pfn < end_pfn) { @@ -249,7 +251,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, #ifdef CONFIG_X86_64 /* big page (1G) range */ start_pfn = round_up(pfn, PFN_DOWN(PUD_SIZE)); - end_pfn = PFN_DOWN(round_down(end, PUD_SIZE)); + end_pfn = round_down(limit_pfn, PFN_DOWN(PUD_SIZE)); if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & @@ -259,7 +261,7 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, /* tail is not big page (1G) alignment */ start_pfn = round_up(pfn, PFN_DOWN(PMD_SIZE)); - end_pfn = PFN_DOWN(round_down(end, PMD_SIZE)); + end_pfn = round_down(limit_pfn, PFN_DOWN(PMD_SIZE)); if (start_pfn < end_pfn) { nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & (1< Date: Fri, 16 Nov 2012 19:39:16 -0800 Subject: x86, mm: Unifying after_bootmem for 32bit and 64bit after_bootmem has different meaning in 32bit and 64bit. 32bit: after bootmem is ready 64bit: after bootmem is distroyed Let's merget them make 32bit the same as 64bit. for 32bit, it is mixing alloc_bootmem_pages, and alloc_low_page under after_bootmem is set or not set. alloc_bootmem is just wrapper for memblock for x86. Now we have alloc_low_page() with memblock too. We can drop bootmem path now, and only alloc_low_page only. At the same time, we make alloc_low_page could handle real after_bootmem for 32bit, because alloc_bootmem_pages could fallback to use slab too. At last move after_bootmem set position for 32bit the same as 64bit. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-40-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 2 -- arch/x86/mm/init_32.c | 21 ++++----------------- 2 files changed, 4 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index f410dc6..2a27e5a 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -39,7 +39,6 @@ __ref void *alloc_low_pages(unsigned int num) unsigned long pfn; int i; -#ifdef CONFIG_X86_64 if (after_bootmem) { unsigned int order; @@ -47,7 +46,6 @@ __ref void *alloc_low_pages(unsigned int num) return (void *)__get_free_pages(GFP_ATOMIC | __GFP_NOTRACK | __GFP_ZERO, order); } -#endif if ((pgt_buf_end + num) >= pgt_buf_top) { unsigned long ret; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 19ef9f0..f4fc4a2 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -73,10 +73,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) #ifdef CONFIG_X86_PAE if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { - if (after_bootmem) - pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE); - else - pmd_table = (pmd_t *)alloc_low_page(); + pmd_table = (pmd_t *)alloc_low_page(); paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); pud = pud_offset(pgd, 0); @@ -98,17 +95,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) static pte_t * __init one_page_table_init(pmd_t *pmd) { if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { - pte_t *page_table = NULL; - - if (after_bootmem) { -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) - page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); -#endif - if (!page_table) - page_table = - (pte_t *)alloc_bootmem_pages(PAGE_SIZE); - } else - page_table = (pte_t *)alloc_low_page(); + pte_t *page_table = (pte_t *)alloc_low_page(); paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); @@ -708,8 +695,6 @@ void __init setup_bootmem_allocator(void) printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped< Date: Fri, 16 Nov 2012 19:39:17 -0800 Subject: x86, mm: Move after_bootmem to mm_internel.h it is only used in arch/x86/mm/init*.c Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-41-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/mm_internal.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/mm_internal.h b/arch/x86/mm/mm_internal.h index dc79ac1..6b563a1 100644 --- a/arch/x86/mm/mm_internal.h +++ b/arch/x86/mm/mm_internal.h @@ -14,4 +14,6 @@ unsigned long kernel_physical_mapping_init(unsigned long start, unsigned long page_size_mask); void zone_sizes_init(void); +extern int after_bootmem; + #endif /* __X86_MM_INTERNAL_H */ -- cgit v1.1 From b8fd39c036ab982aa087b7ee671f86e2574d31f2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:18 -0800 Subject: x86, mm: Use clamp_t() in init_range_memory_mapping save some lines, and make code more readable. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-42-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 2a27e5a..6f85de8 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -357,31 +357,20 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, * would have hole in the middle or ends, and only ram parts will be mapped. */ static unsigned long __init init_range_memory_mapping( - unsigned long range_start, - unsigned long range_end) + unsigned long r_start, + unsigned long r_end) { unsigned long start_pfn, end_pfn; unsigned long mapped_ram_size = 0; int i; for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { - u64 start = (u64)start_pfn << PAGE_SHIFT; - u64 end = (u64)end_pfn << PAGE_SHIFT; - - if (end <= range_start) - continue; - - if (start < range_start) - start = range_start; - - if (start >= range_end) + u64 start = clamp_val(PFN_PHYS(start_pfn), r_start, r_end); + u64 end = clamp_val(PFN_PHYS(end_pfn), r_start, r_end); + if (start >= end) continue; - if (end > range_end) - end = range_end; - init_memory_mapping(start, end); - mapped_ram_size += end - start; } -- cgit v1.1 From 94b43c3d86dddf95064fc83e9087448b35f985ff Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:19 -0800 Subject: x86, mm: kill numa_free_all_bootmem() Now NO_BOOTMEM version free_all_bootmem_node() does not really do free_bootmem at all, and it only call register_page_bootmem_info_node instead. That is confusing, try to kill that free_all_bootmem_node(). Before that, this patch will remove numa_free_all_bootmem(). That function could be replaced with register_page_bootmem_info() and free_all_bootmem(); Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-43-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/numa_64.h | 2 -- arch/x86/mm/init_64.c | 15 +++++++++++---- arch/x86/mm/numa_64.c | 13 ------------- 3 files changed, 11 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 0c05f7a..fe4d2d4 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h @@ -1,6 +1,4 @@ #ifndef _ASM_X86_NUMA_64_H #define _ASM_X86_NUMA_64_H -extern unsigned long numa_free_all_bootmem(void); - #endif /* _ASM_X86_NUMA_64_H */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1d53def..4178530 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -629,6 +629,16 @@ EXPORT_SYMBOL_GPL(arch_add_memory); static struct kcore_list kcore_vsyscall; +static void __init register_page_bootmem_info(void) +{ +#ifdef CONFIG_NUMA + int i; + + for_each_online_node(i) + register_page_bootmem_info_node(NODE_DATA(i)); +#endif +} + void __init mem_init(void) { long codesize, reservedpages, datasize, initsize; @@ -641,11 +651,8 @@ void __init mem_init(void) reservedpages = 0; /* this will put all low memory onto the freelists */ -#ifdef CONFIG_NUMA - totalram_pages = numa_free_all_bootmem(); -#else + register_page_bootmem_info(); totalram_pages = free_all_bootmem(); -#endif absent_pages = absent_pages_in_range(0, max_pfn); reservedpages = max_pfn - totalram_pages - absent_pages; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 92e2711..9405ffc 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -10,16 +10,3 @@ void __init initmem_init(void) { x86_numa_init(); } - -unsigned long __init numa_free_all_bootmem(void) -{ - unsigned long pages = 0; - int i; - - for_each_online_node(i) - pages += free_all_bootmem_node(NODE_DATA(i)); - - pages += free_low_memory_core_early(MAX_NUMNODES); - - return pages; -} -- cgit v1.1 From c074eaac2ab264c94520efff7e896b771de885ae Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:20 -0800 Subject: x86, mm: kill numa_64.h Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-44-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/numa.h | 2 -- arch/x86/include/asm/numa_64.h | 4 ---- arch/x86/kernel/acpi/boot.c | 1 - arch/x86/kernel/cpu/amd.c | 1 - arch/x86/kernel/cpu/intel.c | 1 - arch/x86/kernel/setup.c | 3 --- 6 files changed, 12 deletions(-) delete mode 100644 arch/x86/include/asm/numa_64.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 49119fc..52560a2 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -54,8 +54,6 @@ static inline int numa_cpu_node(int cpu) #ifdef CONFIG_X86_32 # include -#else -# include #endif #ifdef CONFIG_NUMA diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h deleted file mode 100644 index fe4d2d4..0000000 --- a/arch/x86/include/asm/numa_64.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef _ASM_X86_NUMA_64_H -#define _ASM_X86_NUMA_64_H - -#endif /* _ASM_X86_NUMA_64_H */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index e651f7a..4b23aa1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -51,7 +51,6 @@ EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_64 # include -# include #endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 9619ba6..913f94f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -12,7 +12,6 @@ #include #ifdef CONFIG_X86_64 -# include # include # include #endif diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 198e019..3b547cc 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -17,7 +17,6 @@ #ifdef CONFIG_X86_64 #include -#include #endif #include "cpu.h" diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 85b62f1..6d29d1f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -108,9 +108,6 @@ #include #include #include -#ifdef CONFIG_X86_64 -#include -#endif #include #include #include -- cgit v1.1 From 9710f581bb4c35589ac046b0cfc0deb7f369fc85 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 16 Nov 2012 19:39:23 -0800 Subject: x86, mm: Let "memmap=" take more entries one time Current "memmap=" only can take one entry every time. when we have more entries, we have to use memmap= for each of them. For pxe booting, we have command line length limitation, those extra "memmap=" would waste too much space. This patch make memmap= could take several entries one time, and those entries will be split with ',' Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1353123563-3103-47-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/e820.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index df06ade..d32abea 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -835,7 +835,7 @@ static int __init parse_memopt(char *p) } early_param("mem", parse_memopt); -static int __init parse_memmap_opt(char *p) +static int __init parse_memmap_one(char *p) { char *oldp; u64 start_at, mem_size; @@ -877,6 +877,20 @@ static int __init parse_memmap_opt(char *p) return *p == '\0' ? 0 : -EINVAL; } +static int __init parse_memmap_opt(char *str) +{ + while (str) { + char *k = strchr(str, ','); + + if (k) + *k++ = 0; + + parse_memmap_one(str); + str = k; + } + + return 0; +} early_param("memmap", parse_memmap_opt); void __init finish_e820_parsing(void) -- cgit v1.1 From bbee3aec3472fc2ca10b6b1020aec84567ea25ce Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 19 Nov 2012 10:31:37 -0800 Subject: x86: Fix warning about cast from pointer to integer of different size This patch fixes a warning reported by the kbuild test robot where we were casting a pointer to a physical address which represents an integer of a different size. Per the suggestion of Peter Anvin I am replacing it and one other spot where I made a similar cast with an unsigned long. Signed-off-by: Alexander Duyck Link: http://lkml.kernel.org/r/20121119182927.3655.7641.stgit@ahduyck-cp1.jf.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head32.c | 2 +- arch/x86/kernel/head64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index f15db0c..e175548 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -31,7 +31,7 @@ static void __init i386_default_early_setup(void) void __init i386_start_kernel(void) { memblock_reserve(__pa_symbol(_text), - (phys_addr_t)__bss_stop - (phys_addr_t)_text); + (unsigned long)__bss_stop - (unsigned long)_text); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 42f5df1..7b215a5 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -98,7 +98,7 @@ void __init x86_64_start_reservations(char *real_mode_data) copy_bootdata(__va(real_mode_data)); memblock_reserve(__pa_symbol(_text), - (phys_addr_t)__bss_stop - (phys_addr_t)_text); + (unsigned long)__bss_stop - (unsigned long)_text); #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ -- cgit v1.1 From 5e4bf1a55da976a5ed60901bb8801f1024ef9774 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 20 Nov 2012 13:02:51 +0100 Subject: x86/mm: Don't flush the TLB on #WP pmd fixups If we have a write protection #PF and fix up the pmd then the hugetlb code [the only user of pmdp_set_access_flags], in its do_huge_pmd_wp_page() page fault resolution function calls pmdp_set_access_flags() to mark the pmd permissive again, and flushes the TLB. This TLB flush is unnecessary: a flush on #PF is guaranteed on most (all?) x86 CPUs, and even in the worst-case we'll generate a spurious fault. So remove it. Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Paul Turner Cc: Lee Schermerhorn Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Johannes Weiner Cc: Christoph Lameter Cc: Mel Gorman Cc: Hugh Dickins Link: http://lkml.kernel.org/r/20121120120251.GA15742@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pgtable.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8573b83..8a828d7 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -328,7 +328,12 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, if (changed && dirty) { *pmdp = entry; pmd_update_defer(vma->vm_mm, address, pmdp); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + /* + * We had a write-protection fault here and changed the pmd + * to to more permissive. No need to flush the TLB for that, + * #PF is architecturally guaranteed to do that and in the + * worst-case we'll generate a spurious fault. + */ } return changed; -- cgit v1.1 From 7f662273e476e2d7ff44f411fa9f17c946480100 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 10 Dec 2012 11:42:30 +0200 Subject: KVM: emulator: implement AAD instruction Windows2000 uses it during boot. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=50921 Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a27e763..47d62e1 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2852,6 +2852,27 @@ static int em_das(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } +static int em_aad(struct x86_emulate_ctxt *ctxt) +{ + u8 al = ctxt->dst.val & 0xff; + u8 ah = (ctxt->dst.val >> 8) & 0xff; + + al = (al + (ah * ctxt->src.val)) & 0xff; + + ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al; + + ctxt->eflags &= ~(X86_EFLAGS_PF | X86_EFLAGS_SF | X86_EFLAGS_ZF); + + if (!al) + ctxt->eflags |= X86_EFLAGS_ZF; + if (!(al & 1)) + ctxt->eflags |= X86_EFLAGS_PF; + if (al & 0x80) + ctxt->eflags |= X86_EFLAGS_SF; + + return X86EMUL_CONTINUE; +} + static int em_call(struct x86_emulate_ctxt *ctxt) { long rel = ctxt->src.val; @@ -3801,7 +3822,7 @@ static const struct opcode opcode_table[256] = { D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), /* 0xD0 - 0xD7 */ D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), - N, N, N, N, + N, I(DstAcc | SrcImmByte | No64, em_aad), N, N, /* 0xD8 - 0xDF */ N, N, N, N, N, N, N, N, /* 0xE0 - 0xE7 */ -- cgit v1.1 From 5e2c688351f4aee9981918661b6c1679f4155f06 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 6 Dec 2012 21:55:10 -0200 Subject: KVM: x86: fix mov immediate emulation for 64-bit operands MOV immediate instruction (opcodes 0xB8-0xBF) may take 64-bit operand. The previous emulation implementation assumes the operand is no longer than 32. Adding OpImm64 for this matter. Fixes https://bugzilla.redhat.com/show_bug.cgi?id=881579 Signed-off-by: Nadav Amit Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 47d62e1..c7547b3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -43,7 +43,7 @@ #define OpCL 9ull /* CL register (for shifts) */ #define OpImmByte 10ull /* 8-bit sign extended immediate */ #define OpOne 11ull /* Implied 1 */ -#define OpImm 12ull /* Sign extended immediate */ +#define OpImm 12ull /* Sign extended up to 32-bit immediate */ #define OpMem16 13ull /* Memory operand (16-bit). */ #define OpMem32 14ull /* Memory operand (32-bit). */ #define OpImmU 15ull /* Immediate operand, zero extended */ @@ -58,6 +58,7 @@ #define OpFS 24ull /* FS */ #define OpGS 25ull /* GS */ #define OpMem8 26ull /* 8-bit zero extended memory operand */ +#define OpImm64 27ull /* Sign extended 16/32/64-bit immediate */ #define OpBits 5 /* Width of operand field */ #define OpMask ((1ull << OpBits) - 1) @@ -101,6 +102,7 @@ #define SrcMemFAddr (OpMemFAddr << SrcShift) #define SrcAcc (OpAcc << SrcShift) #define SrcImmU16 (OpImmU16 << SrcShift) +#define SrcImm64 (OpImm64 << SrcShift) #define SrcDX (OpDX << SrcShift) #define SrcMem8 (OpMem8 << SrcShift) #define SrcMask (OpMask << SrcShift) @@ -3807,7 +3809,7 @@ static const struct opcode opcode_table[256] = { /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ - X8(I(DstReg | SrcImm | Mov, em_mov)), + X8(I(DstReg | SrcImm64 | Mov, em_mov)), /* 0xC0 - 0xC7 */ D2bv(DstMem | SrcImmByte | ModRM), I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), @@ -3971,6 +3973,9 @@ static int decode_imm(struct x86_emulate_ctxt *ctxt, struct operand *op, case 4: op->val = insn_fetch(s32, ctxt); break; + case 8: + op->val = insn_fetch(s64, ctxt); + break; } if (!sign_extension) { switch (op->bytes) { @@ -4049,6 +4054,9 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, case OpImm: rc = decode_imm(ctxt, op, imm_size(ctxt), true); break; + case OpImm64: + rc = decode_imm(ctxt, op, ctxt->op_bytes, true); + break; case OpMem8: ctxt->memop.bytes = 1; goto mem_common; -- cgit v1.1 From f3200d00ea42e485772ff92d6d649aa8eeb640c0 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 10 Dec 2012 14:05:55 +0200 Subject: KVM: inject ExtINT interrupt before APIC interrupts According to Intel SDM Volume 3 Section 10.8.1 "Interrupt Handling with the Pentium 4 and Intel Xeon Processors" and Section 10.8.2 "Interrupt Handling with the P6 Family and Pentium Processors" ExtINT interrupts are sent directly to the processor core for handling. Currently KVM checks APIC before it considers ExtINT interrupts for injection which is backwards from the spec. Make code behave according to the SDM. Signed-off-by: Gleb Natapov Acked-by: "Zhang, Yang Z" Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/i8259.c | 2 ++ arch/x86/kvm/irq.c | 26 ++++++++------------------ 2 files changed, 10 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 848206d..cc31f7c 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -241,6 +241,8 @@ int kvm_pic_read_irq(struct kvm *kvm) int irq, irq2, intno; struct kvm_pic *s = pic_irqchip(kvm); + s->output = 0; + pic_lock(s); irq = pic_get_irq(&s->pics[0]); if (irq >= 0) { diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 7e06ba1..ebd98d0 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -48,14 +48,10 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.pending; - if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ - if (kvm_apic_accept_pic_intr(v)) { - s = pic_irqchip(v->kvm); /* PIC */ - return s->output; - } else - return 0; - } - return 1; + if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output) + return pic_irqchip(v->kvm)->output; /* PIC */ + + return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ } EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); @@ -65,20 +61,14 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { struct kvm_pic *s; - int vector; if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.nr; - vector = kvm_get_apic_interrupt(v); /* APIC */ - if (vector == -1) { - if (kvm_apic_accept_pic_intr(v)) { - s = pic_irqchip(v->kvm); - s->output = 0; /* PIC */ - vector = kvm_pic_read_irq(v->kvm); - } - } - return vector; + if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output) + return kvm_pic_read_irq(v->kvm); /* PIC */ + + return kvm_get_apic_interrupt(v); /* APIC */ } EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); -- cgit v1.1 From bbacc0c111c3c5d1f3192b8cc1642b9c3954f80d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 10 Dec 2012 10:33:09 -0700 Subject: KVM: Rename KVM_MEMORY_SLOTS -> KVM_USER_MEM_SLOTS It's easy to confuse KVM_MEMORY_SLOTS and KVM_MEM_SLOTS_NUM. One is the user accessible slots and the other is user + private. Make this more obvious. Reviewed-by: Gleb Natapov Signed-off-by: Alex Williamson Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/include/asm/vmx.h | 6 +++--- arch/x86/kvm/x86.c | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dc87b65..c7df6ff 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -33,10 +33,10 @@ #define KVM_MAX_VCPUS 254 #define KVM_SOFT_MAX_VCPUS 160 -#define KVM_MEMORY_SLOTS 32 +#define KVM_USER_MEM_SLOTS 32 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 4 -#define KVM_MEM_SLOTS_NUM (KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) +#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) #define KVM_MMIO_SIZE 16 diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index c2d56b3..e385df9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -427,9 +427,9 @@ enum vmcs_field { #define AR_RESERVD_MASK 0xfffe0f00 -#define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) -#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) -#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) +#define TSS_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 0) +#define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 1) +#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_USER_MEM_SLOTS + 2) #define VMX_NR_VPIDS (1 << 16) #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 76f5446..8160747 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2518,7 +2518,7 @@ int kvm_dev_ioctl_check_extension(long ext) r = KVM_MAX_VCPUS; break; case KVM_CAP_NR_MEMSLOTS: - r = KVM_MEMORY_SLOTS; + r = KVM_USER_MEM_SLOTS; break; case KVM_CAP_PV_MMU: /* obsolete */ r = 0; @@ -3435,7 +3435,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) mutex_lock(&kvm->slots_lock); r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= KVM_USER_MEM_SLOTS) goto out; memslot = id_to_memslot(kvm->memslots, log->slot); @@ -6845,7 +6845,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; /* Prevent internal slot pages from being moved by fork()/COW. */ - if (memslot->id >= KVM_MEMORY_SLOTS) + if (memslot->id >= KVM_USER_MEM_SLOTS) map_flags = MAP_SHARED | MAP_ANONYMOUS; /*To keep backward compatibility with older userspace, -- cgit v1.1 From 0743247fbf0c4a27185b2aa1fdda91d0745dfed1 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 10 Dec 2012 10:33:15 -0700 Subject: KVM: Make KVM_PRIVATE_MEM_SLOTS optional Seems like everyone copied x86 and defined 4 private memory slots that never actually get used. Even x86 only uses 3 of the 4. These aren't exposed so there's no need to add padding. Reviewed-by: Gleb Natapov Signed-off-by: Alex Williamson Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c7df6ff..51d5210 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -34,8 +34,8 @@ #define KVM_MAX_VCPUS 254 #define KVM_SOFT_MAX_VCPUS 160 #define KVM_USER_MEM_SLOTS 32 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 4 +/* memory slots that are not exposed to userspace */ +#define KVM_PRIVATE_MEM_SLOTS 3 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) #define KVM_MMIO_SIZE 16 -- cgit v1.1 From f82a8cfe9354f5cdea55ebeceba3fd19051d3ee8 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 10 Dec 2012 10:33:21 -0700 Subject: KVM: struct kvm_memory_slot.user_alloc -> bool There's no need for this to be an int, it holds a boolean. Move to the end of the struct for alignment. Reviewed-by: Gleb Natapov Signed-off-by: Alex Williamson Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 6 +++--- arch/x86/kvm/x86.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9120ae1..b3101e3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3667,7 +3667,7 @@ static int alloc_apic_access_page(struct kvm *kvm) kvm_userspace_mem.flags = 0; kvm_userspace_mem.guest_phys_addr = 0xfee00000ULL; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); if (r) goto out; @@ -3697,7 +3697,7 @@ static int alloc_identity_pagetable(struct kvm *kvm) kvm_userspace_mem.guest_phys_addr = kvm->arch.ept_identity_map_addr; kvm_userspace_mem.memory_size = PAGE_SIZE; - r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0); + r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, false); if (r) goto out; @@ -4251,7 +4251,7 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) .flags = 0, }; - ret = kvm_set_memory_region(kvm, &tss_mem, 0); + ret = kvm_set_memory_region(kvm, &tss_mem, false); if (ret) return ret; kvm->arch.tss_addr = addr; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8160747..1c9c834 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6839,7 +6839,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { int npages = memslot->npages; int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; @@ -6875,7 +6875,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, - int user_alloc) + bool user_alloc) { int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; -- cgit v1.1 From 0f888f5acd0cd806d4fd9f4067276b3855a13309 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 10 Dec 2012 10:33:38 -0700 Subject: KVM: Increase user memory slots on x86 to 125 With the 3 private slots, this gives us a nice round 128 slots total. The primary motivation for this is to support more assigned devices. Each assigned device can theoretically use up to 8 slots (6 MMIO BARs, 1 ROM BAR, 1 spare for a split MSI-X table mapping) though it's far more typical for a device to use 3-4 slots. If we assume a typical VM uses a dozen slots for non-assigned devices purposes, we should always be able to support 14 worst case assigned devices or 28 to 37 typical devices. Reviewed-by: Gleb Natapov Signed-off-by: Alex Williamson Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 51d5210..c431b33 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -33,7 +33,7 @@ #define KVM_MAX_VCPUS 254 #define KVM_SOFT_MAX_VCPUS 160 -#define KVM_USER_MEM_SLOTS 32 +#define KVM_USER_MEM_SLOTS 125 /* memory slots that are not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 3 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS) -- cgit v1.1 From e11ae1a102b46f76441e328a2743ae5d6e201423 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Fri, 14 Dec 2012 15:23:16 +0200 Subject: KVM: remove unused variable. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/irq.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index ebd98d0..b111aee 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -43,8 +43,6 @@ EXPORT_SYMBOL(kvm_cpu_has_pending_timer); */ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) { - struct kvm_pic *s; - if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.pending; @@ -60,8 +58,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); */ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { - struct kvm_pic *s; - if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.nr; -- cgit v1.1 From d4b06c2d4cce466e2d62163c0a954e1b2ce96f8b Mon Sep 17 00:00:00 2001 From: Nickolai Zeldovich Date: Sat, 15 Dec 2012 06:34:37 -0500 Subject: kvm: fix i8254 counter 0 wraparound The kvm i8254 emulation for counter 0 (but not for counters 1 and 2) has at least two bugs in mode 0: 1. The OUT bit, computed by pit_get_out(), is never set high. 2. The counter value, computed by pit_get_count(), wraps back around to the initial counter value, rather than wrapping back to 0xFFFF (which is the behavior described in the comment in __kpit_elapsed, the behavior implemented by qemu, and the behavior observed on AMD hardware). The bug stems from __kpit_elapsed computing the elapsed time mod the initial counter value (stored as nanoseconds in ps->period). This is both unnecessary (none of the callers of kpit_elapsed expect the value to be at most the initial counter value) and incorrect (it causes pit_get_count to appear to wrap around to the initial counter value rather than 0xFFFF). Removing this mod from __kpit_elapsed fixes both of the above bugs. Signed-off-by: Nickolai Zeldovich Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/i8254.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 11300d2..c1d30b2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -122,7 +122,6 @@ static s64 __kpit_elapsed(struct kvm *kvm) */ remaining = hrtimer_get_remaining(&ps->timer); elapsed = ps->period - ktime_to_ns(remaining); - elapsed = mod_64(elapsed, ps->period); return elapsed; } -- cgit v1.1 From 6444174548f6556fcf26c84d5296defd295914c4 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Dec 2012 14:11:34 -0500 Subject: arch/x86/platform/uv: use ARRAY_SIZE where possible Signed-off-by: Sasha Levin Link: http://lkml.kernel.org/r/1356030701-16284-26-git-send-email-sasha.levin@oracle.com Cc: Cliff Wickman Cc: Alex Shi Signed-off-by: H. Peter Anvin --- arch/x86/platform/uv/tlb_uv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index b8b3a37..933bea5 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1463,7 +1463,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user, } if (input_arg == 0) { - elements = sizeof(stat_description)/sizeof(*stat_description); + elements = ARRAY_SIZE(stat_description); printk(KERN_DEBUG "# cpu: cpu number\n"); printk(KERN_DEBUG "Sender statistics:\n"); for (i = 0; i < elements; i++) @@ -1504,7 +1504,7 @@ static int parse_tunables_write(struct bau_control *bcp, char *instr, char *q; int cnt = 0; int val; - int e = sizeof(tunables) / sizeof(*tunables); + int e = ARRAY_SIZE(tunables); p = instr + strspn(instr, WHITESPACE); q = p; -- cgit v1.1 From 7be0b06520e8219e28f54cd05e1310e7e2d921ee Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Dec 2012 14:11:35 -0500 Subject: um: don't compare a pointer to 0 Signed-off-by: Sasha Levin Link: http://lkml.kernel.org/r/1356030701-16284-27-git-send-email-sasha.levin@oracle.com Cc: Richard Weinberger Signed-off-by: H. Peter Anvin --- arch/x86/um/fault.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c index 8784ab3..84ac7f7 100644 --- a/arch/x86/um/fault.c +++ b/arch/x86/um/fault.c @@ -20,7 +20,7 @@ int arch_fixup(unsigned long address, struct uml_pt_regs *regs) const struct exception_table_entry *fixup; fixup = search_exception_tables(address); - if (fixup != 0) { + if (fixup) { UPT_IP(regs) = fixup->fixup; return 1; } -- cgit v1.1 From 8f170faeb458532282dbfa870f456e42c11d1ebb Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 20 Dec 2012 14:11:36 -0500 Subject: x86, apb_timer: remove unused variable percpu_timer Signed-off-by: Sasha Levin Link: http://lkml.kernel.org/r/1356030701-16284-28-git-send-email-sasha.levin@oracle.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apb_timer.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index afdc3f75..cc74fd0 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -311,7 +311,6 @@ void __init apbt_time_init(void) #ifdef CONFIG_SMP int i; struct sfi_timer_table_entry *p_mtmr; - unsigned int percpu_timer; struct apbt_dev *adev; #endif @@ -346,13 +345,10 @@ void __init apbt_time_init(void) return; } pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); - if (num_possible_cpus() <= sfi_mtimer_num) { - percpu_timer = 1; + if (num_possible_cpus() <= sfi_mtimer_num) apbt_num_timers_used = num_possible_cpus(); - } else { - percpu_timer = 0; + else apbt_num_timers_used = 1; - } pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); /* here we set up per CPU timer data structure */ -- cgit v1.1 From ffee0de411fd4f74f3b788892eeb075abbf26c52 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 20 Dec 2012 21:51:55 +0000 Subject: x86: Default to ARCH=x86 to avoid overriding CONFIG_64BIT It is easy to waste a bunch of time when one takes a 32-bit .config from a test machine and try to build it on a faster 64-bit system, and its existing setting of CONFIG_64BIT=n gets *changed* to match the build host. Similarly, if one has an existing build tree it is easy to trash an entire build tree that way. This is because the default setting for $ARCH when discovered from 'uname' is one of the legacy pre-x86-merge values (i386 or x86_64), which effectively force the setting of CONFIG_64BIT to match. We should default to ARCH=x86 instead, finally completing the merge that we started so long ago. This patch preserves the behaviour of the legacy ARCH settings for commands such as: make ARCH=x86_64 randconfig make ARCH=i386 randconfig ... since making the value of CONFIG_64BIT actually random in that situation is not desirable. In time, perhaps we can retire this legacy use of the old ARCH= values. We already have a way to override values for *any* config option, using $KCONFIG_ALLCONFIG, so it could be argued that we don't necessarily need to keep ARCH={i386,x86_64} around as a special case just for overriding CONFIG_64BIT. We'd probably at least want to add a way to override config options from the command line ('make CONFIG_FOO=y oldconfig') before we talk about doing that though. Signed-off-by: David Woodhouse Link: http://lkml.kernel.org/r/1356040315.3198.51.camel@shinybook.infradead.org Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 2 +- arch/x86/Makefile | 4 ++++ arch/x86/configs/i386_defconfig | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff..9084c7b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1,7 +1,7 @@ # Select 32 or 64 bit config 64BIT bool "64-bit kernel" if ARCH = "x86" - default ARCH = "x86_64" + default ARCH != "i386" ---help--- Say yes to build a 64-bit kernel - formerly known as x86_64 Say no to build a 32-bit kernel - formerly known as i386 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 05afcca..fa981ca 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -2,7 +2,11 @@ # select defconfig based on actual architecture ifeq ($(ARCH),x86) + ifeq ($(shell uname -m),x86_64) + KBUILD_DEFCONFIG := x86_64_defconfig + else KBUILD_DEFCONFIG := i386_defconfig + endif else KBUILD_DEFCONFIG := $(ARCH)_defconfig endif diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 5598547..9444708 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,3 +1,4 @@ +# CONFIG_64BIT is not set CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set CONFIG_SYSVIPC=y -- cgit v1.1 From 07f42f5f25dc214a33214159fc8b62b984b713eb Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:49 +0200 Subject: KVM: VMX: cleanup rmode_segment_valid() Set segment fields explicitly instead of using binary operations. No behaviour changes. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b3101e3..265fdd3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3380,13 +3380,16 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) u32 ar; vmx_get_segment(vcpu, &var, seg); + var.dpl = 0x3; + var.g = 0; + var.db = 0; ar = vmx_segment_access_rights(&var); if (var.base != (var.selector << 4)) return false; if (var.limit < 0xffff) return false; - if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3) + if (ar != 0xf3) return false; return true; -- cgit v1.1 From 0647f4aa8c58a7e5adb873b485c83e0c93d9c6d1 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:50 +0200 Subject: KVM: VMX: relax check for CS register in rmode_segment_valid() rmode_segment_valid() checks if segment descriptor can be used to enter vm86 mode. VMX spec mandates that in vm86 mode CS register will be of type data, not code. Lets allow guest entry with vm86 mode if the only problem with CS register is incorrect type. Otherwise entire real mode will be emulated. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 265fdd3..7c2c054 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3383,6 +3383,8 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) var.dpl = 0x3; var.g = 0; var.db = 0; + if (seg == VCPU_SREG_CS) + var.type = 0x3; ar = vmx_segment_access_rights(&var); if (var.base != (var.selector << 4)) -- cgit v1.1 From c6ad115348035f474d6fb81005bb4764bdd128ef Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:51 +0200 Subject: KVM: VMX: return correct segment limit and flags for CS/SS registers in real mode VMX without unrestricted mode cannot virtualize real mode, so if emulate_invalid_guest_state=0 kvm uses vm86 mode to approximate it. Sometimes, when guest moves from protected mode to real mode, it leaves segment descriptors in a state not suitable for use by vm86 mode virtualization, so we keep shadow copy of segment descriptors for internal use and load fake register to VMCS for guest entry to succeed. Till now we kept shadow for all segments except SS and CS (for SS and CS we returned parameters directly from VMCS), but since commit a5625189f6810 emulator enforces segment limits in real mode. This causes #GP during move from protected mode to real mode when emulator fetches first instruction after moving to real mode since it uses incorrect CS base and limit to linearize the %rip. Fix by keeping shadow for SS and CS too. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7c2c054..792d9cc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2856,6 +2856,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); vmx->emulation_required = 1; vmx->rmode.vm86_active = 1; @@ -3171,10 +3173,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx = to_vmx(vcpu); u32 ar; - if (vmx->rmode.vm86_active - && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES - || seg == VCPU_SREG_DS || seg == VCPU_SREG_FS - || seg == VCPU_SREG_GS)) { + if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { *var = vmx->rmode.segs[seg]; if (seg == VCPU_SREG_TR || var->selector == vmx_read_guest_seg_selector(vmx, seg)) -- cgit v1.1 From beb853ffeccbfa626f30038e816d61187103c455 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:52 +0200 Subject: KVM: VMX: use fix_rmode_seg() to fix all code/data segments The code for SS and CS does the same thing fix_rmode_seg() is doing. Use it instead of hand crafted code. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 792d9cc..9e784c2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3315,30 +3315,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, * unrestricted guest like Westmere to older host that don't have * unrestricted guest like Nehelem. */ - if (vmx->rmode.vm86_active) { - switch (seg) { - case VCPU_SREG_CS: - vmcs_write32(GUEST_CS_AR_BYTES, 0xf3); - vmcs_write32(GUEST_CS_LIMIT, 0xffff); - if (vmcs_readl(GUEST_CS_BASE) == 0xffff0000) - vmcs_writel(GUEST_CS_BASE, 0xf0000); - vmcs_write16(GUEST_CS_SELECTOR, - vmcs_readl(GUEST_CS_BASE) >> 4); - break; - case VCPU_SREG_ES: - case VCPU_SREG_DS: - case VCPU_SREG_GS: - case VCPU_SREG_FS: - fix_rmode_seg(seg, &vmx->rmode.segs[seg]); - break; - case VCPU_SREG_SS: - vmcs_write16(GUEST_SS_SELECTOR, - vmcs_readl(GUEST_SS_BASE) >> 4); - vmcs_write32(GUEST_SS_LIMIT, 0xffff); - vmcs_write32(GUEST_SS_AR_BYTES, 0xf3); - break; - } - } + if (vmx->rmode.vm86_active && var->s) + fix_rmode_seg(seg, &vmx->rmode.segs[seg]); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -- cgit v1.1 From 39dcfb95def4646ecdb76ea7a7491e8420dce7a7 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:53 +0200 Subject: KVM: VMX: remove redundant code from vmx_set_segment() Segment descriptor's base is fixed by call to fix_rmode_seg(). Not need to do it twice. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9e784c2..9b8edd2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3268,7 +3268,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - u32 ar; + u32 ar = 0; vmx_segment_cache_clear(vmx); @@ -3280,15 +3280,9 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_writel(sf->base, var->base); vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); - if (vmx->rmode.vm86_active && var->s) { + if (vmx->rmode.vm86_active && var->s) vmx->rmode.segs[seg] = *var; - /* - * Hack real-mode segments into vm86 compatibility. - */ - if (var->base == 0xffff0000 && var->selector == 0xf000) - vmcs_writel(sf->base, 0xf0000); - ar = 0xf3; - } else + else ar = vmx_segment_access_rights(var); /* -- cgit v1.1 From 1ecd50a9474c4bfa3129d90cfcf1c1eb4fbf19e7 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:54 +0200 Subject: KVM: VMX: clean-up vmx_set_segment() Move all vm86_active logic into one place. Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9b8edd2..4bd1c22 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3271,19 +3271,21 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, u32 ar = 0; vmx_segment_cache_clear(vmx); + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); - if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) { - vmcs_write16(sf->selector, var->selector); - vmx->rmode.segs[VCPU_SREG_TR] = *var; + if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { + vmx->rmode.segs[seg] = *var; + if (seg == VCPU_SREG_TR) + vmcs_write16(sf->selector, var->selector); + else if (var->s) + fix_rmode_seg(seg, &vmx->rmode.segs[seg]); return; } + vmcs_writel(sf->base, var->base); vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); - if (vmx->rmode.vm86_active && var->s) - vmx->rmode.segs[seg] = *var; - else - ar = vmx_segment_access_rights(var); + ar = vmx_segment_access_rights(var); /* * Fix the "Accessed" bit in AR field of segment registers for older @@ -3300,17 +3302,6 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, ar |= 0x1; /* Accessed */ vmcs_write32(sf->ar_bytes, ar); - __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); - - /* - * Fix segments for real mode guest in hosts that don't have - * "unrestricted_mode" or it was disabled. - * This is done to allow migration of the guests from hosts with - * unrestricted guest like Westmere to older host that don't have - * unrestricted guest like Nehelem. - */ - if (vmx->rmode.vm86_active && var->s) - fix_rmode_seg(seg, &vmx->rmode.segs[seg]); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -- cgit v1.1 From f924d66d278d5da890f3098805b0450a4ef66c32 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 12 Dec 2012 19:10:55 +0200 Subject: KVM: VMX: remove unneeded temporary variable from vmx_set_segment() Reviewed-by: Marcelo Tosatti Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4bd1c22..23d5aec 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3268,7 +3268,6 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - u32 ar = 0; vmx_segment_cache_clear(vmx); __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); @@ -3285,7 +3284,6 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_writel(sf->base, var->base); vmcs_write32(sf->limit, var->limit); vmcs_write16(sf->selector, var->selector); - ar = vmx_segment_access_rights(var); /* * Fix the "Accessed" bit in AR field of segment registers for older @@ -3299,9 +3297,9 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, * kvm hack. */ if (enable_unrestricted_guest && (seg != VCPU_SREG_LDTR)) - ar |= 0x1; /* Accessed */ + var->type |= 0x1; /* Accessed */ - vmcs_write32(sf->ar_bytes, ar); + vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -- cgit v1.1 From 4d899be584d4b4c5d6b49d655176b25cebf6ff1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 21 Dec 2012 17:57:05 -0800 Subject: x86/mce: don't use [delayed_]work_pending() There's no need to test whether a (delayed) work item in pending before queueing, flushing or cancelling it. Most uses are unnecessary and quite a few of them are buggy. Remove unnecessary pending tests from x86/mce. Only compile tested. v2: Local var work removed from mce_schedule_work() as suggested by Borislav. Signed-off-by: Tejun Heo Acked-by: Borislav Petkov Cc: Tony Luck Cc: linux-edac@vger.kernel.org --- arch/x86/kernel/cpu/mcheck/mce.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 80dbda8..fc7608a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -512,11 +512,8 @@ int mce_available(struct cpuinfo_x86 *c) static void mce_schedule_work(void) { - if (!mce_ring_empty()) { - struct work_struct *work = &__get_cpu_var(mce_work); - if (!work_pending(work)) - schedule_work(work); - } + if (!mce_ring_empty()) + schedule_work(&__get_cpu_var(mce_work)); } DEFINE_PER_CPU(struct irq_work, mce_irq_work); @@ -1351,12 +1348,7 @@ int mce_notify_irq(void) /* wake processes polling /dev/mcelog */ wake_up_interruptible(&mce_chrdev_wait); - /* - * There is no risk of missing notifications because - * work_pending is always cleared before the function is - * executed. - */ - if (mce_helper[0] && !work_pending(&mce_trigger_work)) + if (mce_helper[0]) schedule_work(&mce_trigger_work); if (__ratelimit(&ratelimit)) -- cgit v1.1 From 11393a077dcfa7fb827d957f0305fc369d402a5e Mon Sep 17 00:00:00 2001 From: Jesse Larrew Date: Mon, 10 Dec 2012 15:31:51 -0600 Subject: x86: kvm_para: fix typo in hypercall comments Correct a typo in the comment explaining hypercalls. Signed-off-by: Jesse Larrew Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_para.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index eb3e9d8..f49c16d 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -122,7 +122,7 @@ static inline bool kvm_check_and_clear_guest_paused(void) * * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively. * The hypercall number should be placed in rax and the return value will be - * placed in rax. No other registers will be clobbered unless explicited + * placed in rax. No other registers will be clobbered unless explicitly * noted by the particular hypercall. */ -- cgit v1.1 From 3a78a4f46302bfc83602a53dfa4dcbe76a7a1f5f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:42 +0200 Subject: KVM: emulator: drop RPL check from linearize() function According to Intel SDM Vol3 Section 5.5 "Privilege Levels" and 5.6 "Privilege Level Checking When Accessing Data Segments" RPL checking is done during loading of a segment selector, not during data access. We already do checking during segment selector loading, so drop the check during data access. Checking RPL during data access triggers #GP if after transition from real mode to protected mode RPL bits in a segment selector are set. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c7547b3..a3d31e3 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -665,7 +665,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, ulong la; u32 lim; u16 sel; - unsigned cpl, rpl; + unsigned cpl; la = seg_base(ctxt, addr.seg) + addr.ea; switch (ctxt->mode) { @@ -699,11 +699,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt, goto bad; } cpl = ctxt->ops->cpl(ctxt); - if (ctxt->mode == X86EMUL_MODE_REAL) - rpl = 0; - else - rpl = sel & 3; - cpl = max(cpl, rpl); if (!(desc.type & 8)) { /* data segment */ if (cpl > desc.dpl) -- cgit v1.1 From 045a282ca41505184e8fc805335d1f5aae0c8a03 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:43 +0200 Subject: KVM: emulator: implement fninit, fnstsw, fnstcw Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 125 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a3d31e3..53c5ad6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -115,6 +115,7 @@ #define GroupDual (2<<15) /* Alternate decoding of mod == 3 */ #define Prefix (3<<15) /* Instruction varies with 66/f2/f3 prefix */ #define RMExt (4<<15) /* Opcode extension in ModRM r/m if mod == 3 */ +#define Escape (5<<15) /* Escape to coprocessor instruction */ #define Sse (1<<18) /* SSE Vector instruction */ /* Generic ModRM decode. */ #define ModRM (1<<19) @@ -166,6 +167,7 @@ struct opcode { const struct opcode *group; const struct group_dual *gdual; const struct gprefix *gprefix; + const struct escape *esc; } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; @@ -182,6 +184,11 @@ struct gprefix { struct opcode pfx_f3; }; +struct escape { + struct opcode op[8]; + struct opcode high[64]; +}; + /* EFLAGS bit definitions. */ #define EFLG_ID (1<<21) #define EFLG_VIP (1<<20) @@ -991,6 +998,53 @@ static void write_mmx_reg(struct x86_emulate_ctxt *ctxt, u64 *data, int reg) ctxt->ops->put_fpu(ctxt); } +static int em_fninit(struct x86_emulate_ctxt *ctxt) +{ + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + + ctxt->ops->get_fpu(ctxt); + asm volatile("fninit"); + ctxt->ops->put_fpu(ctxt); + return X86EMUL_CONTINUE; +} + +static int em_fnstcw(struct x86_emulate_ctxt *ctxt) +{ + u16 fcw; + + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + + ctxt->ops->get_fpu(ctxt); + asm volatile("fnstcw %0": "+m"(fcw)); + ctxt->ops->put_fpu(ctxt); + + /* force 2 byte destination */ + ctxt->dst.bytes = 2; + ctxt->dst.val = fcw; + + return X86EMUL_CONTINUE; +} + +static int em_fnstsw(struct x86_emulate_ctxt *ctxt) +{ + u16 fsw; + + if (ctxt->ops->get_cr(ctxt, 0) & (X86_CR0_TS | X86_CR0_EM)) + return emulate_nm(ctxt); + + ctxt->ops->get_fpu(ctxt); + asm volatile("fnstsw %0": "+m"(fsw)); + ctxt->ops->put_fpu(ctxt); + + /* force 2 byte destination */ + ctxt->dst.bytes = 2; + ctxt->dst.val = fsw; + + return X86EMUL_CONTINUE; +} + static void decode_register_operand(struct x86_emulate_ctxt *ctxt, struct operand *op) { @@ -3590,6 +3644,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) } #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } +#define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } #define II(_f, _e, _i) \ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } @@ -3725,6 +3780,69 @@ static const struct gprefix pfx_vmovntpx = { I(0, em_mov), N, N, N, }; +static const struct escape escape_d9 = { { + N, N, N, N, N, N, N, I(DstMem, em_fnstcw), +}, { + /* 0xC0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xC8 - 0xCF */ + N, N, N, N, N, N, N, N, + /* 0xD0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xD8 - 0xDF */ + N, N, N, N, N, N, N, N, + /* 0xE0 - 0xE7 */ + N, N, N, N, N, N, N, N, + /* 0xE8 - 0xEF */ + N, N, N, N, N, N, N, N, + /* 0xF0 - 0xF7 */ + N, N, N, N, N, N, N, N, + /* 0xF8 - 0xFF */ + N, N, N, N, N, N, N, N, +} }; + +static const struct escape escape_db = { { + N, N, N, N, N, N, N, N, +}, { + /* 0xC0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xC8 - 0xCF */ + N, N, N, N, N, N, N, N, + /* 0xD0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xD8 - 0xDF */ + N, N, N, N, N, N, N, N, + /* 0xE0 - 0xE7 */ + N, N, N, I(ImplicitOps, em_fninit), N, N, N, N, + /* 0xE8 - 0xEF */ + N, N, N, N, N, N, N, N, + /* 0xF0 - 0xF7 */ + N, N, N, N, N, N, N, N, + /* 0xF8 - 0xFF */ + N, N, N, N, N, N, N, N, +} }; + +static const struct escape escape_dd = { { + N, N, N, N, N, N, N, I(DstMem, em_fnstsw), +}, { + /* 0xC0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xC8 - 0xCF */ + N, N, N, N, N, N, N, N, + /* 0xD0 - 0xC7 */ + N, N, N, N, N, N, N, N, + /* 0xD8 - 0xDF */ + N, N, N, N, N, N, N, N, + /* 0xE0 - 0xE7 */ + N, N, N, N, N, N, N, N, + /* 0xE8 - 0xEF */ + N, N, N, N, N, N, N, N, + /* 0xF0 - 0xF7 */ + N, N, N, N, N, N, N, N, + /* 0xF8 - 0xFF */ + N, N, N, N, N, N, N, N, +} }; + static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ I6ALU(Lock, em_add), @@ -3821,7 +3939,7 @@ static const struct opcode opcode_table[256] = { D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), N, I(DstAcc | SrcImmByte | No64, em_aad), N, N, /* 0xD8 - 0xDF */ - N, N, N, N, N, N, N, N, + N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, /* 0xE0 - 0xE7 */ X3(I(SrcImmByte, em_loop)), I(SrcImmByte, em_jcxz), @@ -4246,6 +4364,12 @@ done_prefixes: case 0xf3: opcode = opcode.u.gprefix->pfx_f3; break; } break; + case Escape: + if (ctxt->modrm > 0xbf) + opcode = opcode.u.esc->high[ctxt->modrm - 0xc0]; + else + opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7]; + break; default: return EMULATION_FAILED; } -- cgit v1.1 From 89efbed02cfd7e9ce3324de0b44a70ee1c716fac Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:44 +0200 Subject: KVM: VMX: make rmode_segment_valid() more strict. Currently it allows entering vm86 mode if segment limit is greater than 0xffff and db bit is set. Both of those can cause incorrect execution of instruction by cpu since in vm86 mode limit will be set to 0xffff and db will be forced to 0. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 23d5aec..7ebcac2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3341,15 +3341,13 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg) vmx_get_segment(vcpu, &var, seg); var.dpl = 0x3; - var.g = 0; - var.db = 0; if (seg == VCPU_SREG_CS) var.type = 0x3; ar = vmx_segment_access_rights(&var); if (var.base != (var.selector << 4)) return false; - if (var.limit < 0xffff) + if (var.limit != 0xffff) return false; if (ar != 0xf3) return false; -- cgit v1.1 From d99e415275dd3f757b75981adad8645cdc26da45 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:45 +0200 Subject: KVM: VMX: fix emulation of invalid guest state. Currently when emulation of invalid guest state is enable (emulate_invalid_guest_state=1) segment registers are still fixed for entry to vm86 mode some times. Segment register fixing is avoided in enter_rmode(), but vmx_set_segment() still does it unconditionally. The patch fixes it. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 122 +++++++++++++++++++++++++++++------------------------ 1 file changed, 68 insertions(+), 54 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7ebcac2..9dff310 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -624,6 +624,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +static bool guest_state_valid(struct kvm_vcpu *vcpu); +static u32 vmx_segment_access_rights(struct kvm_segment *var); static DEFINE_PER_CPU(struct vmcs *, vmxarea); static DEFINE_PER_CPU(struct vmcs *, current_vmcs); @@ -2758,18 +2760,23 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } -static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) +static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, + struct kvm_segment *save) { - const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; - struct kvm_segment tmp = *save; - - if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) { - tmp.base = vmcs_readl(sf->base); - tmp.selector = vmcs_read16(sf->selector); - tmp.dpl = tmp.selector & SELECTOR_RPL_MASK; - tmp.s = 1; + if (!emulate_invalid_guest_state) { + /* + * CS and SS RPL should be equal during guest entry according + * to VMX spec, but in reality it is not always so. Since vcpu + * is in the middle of the transition from real mode to + * protected mode it is safe to assume that RPL 0 is a good + * default value. + */ + if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) + save->selector &= ~SELECTOR_RPL_MASK; + save->dpl = save->selector & SELECTOR_RPL_MASK; + save->s = 1; } - vmx_set_segment(vcpu, &tmp, seg); + vmx_set_segment(vcpu, save, seg); } static void enter_pmode(struct kvm_vcpu *vcpu) @@ -2777,6 +2784,17 @@ static void enter_pmode(struct kvm_vcpu *vcpu) unsigned long flags; struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * Update real mode segment cache. It may be not up-to-date if sement + * register was written while vcpu was in a guest mode. + */ + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); + vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); + vmx->emulation_required = 1; vmx->rmode.vm86_active = 0; @@ -2794,22 +2812,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu) update_exception_bitmap(vcpu); - if (emulate_invalid_guest_state) - return; - + fix_pmode_dataseg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); + fix_pmode_dataseg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); - - vmx_segment_cache_clear(vmx); - - vmcs_write16(GUEST_SS_SELECTOR, 0); - vmcs_write32(GUEST_SS_AR_BYTES, 0x93); - - vmcs_write16(GUEST_CS_SELECTOR, - vmcs_read16(GUEST_CS_SELECTOR) & ~SELECTOR_RPL_MASK); - vmcs_write32(GUEST_CS_AR_BYTES, 0x9b); } static gva_t rmode_tss_base(struct kvm *kvm) @@ -2831,22 +2839,40 @@ static gva_t rmode_tss_base(struct kvm *kvm) static void fix_rmode_seg(int seg, struct kvm_segment *save) { const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; + struct kvm_segment var = *save; - vmcs_write16(sf->selector, save->base >> 4); - vmcs_write32(sf->base, save->base & 0xffff0); - vmcs_write32(sf->limit, 0xffff); - vmcs_write32(sf->ar_bytes, 0xf3); - if (save->base & 0xf) - printk_once(KERN_WARNING "kvm: segment base is not paragraph" - " aligned when entering protected mode (seg=%d)", - seg); + var.dpl = 0x3; + if (seg == VCPU_SREG_CS) + var.type = 0x3; + + if (!emulate_invalid_guest_state) { + var.selector = var.base >> 4; + var.base = var.base & 0xffff0; + var.limit = 0xffff; + var.g = 0; + var.db = 0; + var.present = 1; + var.s = 1; + var.l = 0; + var.unusable = 0; + var.type = 0x3; + var.avl = 0; + if (save->base & 0xf) + printk_once(KERN_WARNING "kvm: segment base is not " + "paragraph aligned when entering " + "protected mode (seg=%d)", seg); + } + + vmcs_write16(sf->selector, var.selector); + vmcs_write32(sf->base, var.base); + vmcs_write32(sf->limit, var.limit); + vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); } static void enter_rmode(struct kvm_vcpu *vcpu) { unsigned long flags; struct vcpu_vmx *vmx = to_vmx(vcpu); - struct kvm_segment var; if (enable_unrestricted_guest) return; @@ -2862,7 +2888,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmx->emulation_required = 1; vmx->rmode.vm86_active = 1; - /* * Very old userspace does not call KVM_SET_TSS_ADDR before entering * vcpu. Call it here with phys address pointing 16M below 4G. @@ -2890,28 +2915,13 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmcs_writel(GUEST_CR4, vmcs_readl(GUEST_CR4) | X86_CR4_VME); update_exception_bitmap(vcpu); - if (emulate_invalid_guest_state) - goto continue_rmode; - - vmx_get_segment(vcpu, &var, VCPU_SREG_SS); - vmx_set_segment(vcpu, &var, VCPU_SREG_SS); + fix_rmode_seg(VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); + fix_rmode_seg(VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); + fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); + fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); + fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); + fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); - vmx_get_segment(vcpu, &var, VCPU_SREG_CS); - vmx_set_segment(vcpu, &var, VCPU_SREG_CS); - - vmx_get_segment(vcpu, &var, VCPU_SREG_ES); - vmx_set_segment(vcpu, &var, VCPU_SREG_ES); - - vmx_get_segment(vcpu, &var, VCPU_SREG_DS); - vmx_set_segment(vcpu, &var, VCPU_SREG_DS); - - vmx_get_segment(vcpu, &var, VCPU_SREG_GS); - vmx_set_segment(vcpu, &var, VCPU_SREG_GS); - - vmx_get_segment(vcpu, &var, VCPU_SREG_FS); - vmx_set_segment(vcpu, &var, VCPU_SREG_FS); - -continue_rmode: kvm_mmu_reset_context(vcpu); } @@ -3278,7 +3288,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_write16(sf->selector, var->selector); else if (var->s) fix_rmode_seg(seg, &vmx->rmode.segs[seg]); - return; + goto out; } vmcs_writel(sf->base, var->base); @@ -3300,6 +3310,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, var->type |= 0x1; /* Accessed */ vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); + +out: + if (!vmx->emulation_required) + vmx->emulation_required = !guest_state_valid(vcpu); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) -- cgit v1.1 From d54d07b2ca19a2908aa89e0c67715ca2e8e62a4c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:46 +0200 Subject: KVM: VMX: Do not fix segment register during vcpu initialization. Segment registers will be fixed according to current emulation policy during switching to real mode for the first time. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9dff310..a101dd4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3621,12 +3621,9 @@ static void seg_setup(int seg) vmcs_write16(sf->selector, 0); vmcs_writel(sf->base, 0); vmcs_write32(sf->limit, 0xffff); - if (enable_unrestricted_guest) { - ar = 0x93; - if (seg == VCPU_SREG_CS) - ar |= 0x08; /* code segment */ - } else - ar = 0xf3; + ar = 0x93; + if (seg == VCPU_SREG_CS) + ar |= 0x08; /* code segment */ vmcs_write32(sf->ar_bytes, ar); } @@ -3967,14 +3964,9 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx_segment_cache_clear(vmx); seg_setup(VCPU_SREG_CS); - /* - * GUEST_CS_BASE should really be 0xffff0000, but VT vm86 mode - * insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4. Sigh. - */ - if (kvm_vcpu_is_bsp(&vmx->vcpu)) { + if (kvm_vcpu_is_bsp(&vmx->vcpu)) vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - vmcs_writel(GUEST_CS_BASE, 0x000f0000); - } else { + else { vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.arch.sipi_vector << 8); vmcs_writel(GUEST_CS_BASE, vmx->vcpu.arch.sipi_vector << 12); } -- cgit v1.1 From 0ca1b4f4ba3a9f75bb099ccaf6c4bd8bb6db7a74 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 20 Dec 2012 16:57:47 +0200 Subject: KVM: VMX: handle IO when emulation is due to #GP in real mode. With emulate_invalid_guest_state=0 if a vcpu is in real mode VMX can enter the vcpu with smaller segment limit than guest configured. If the guest tries to access pass this limit it will get #GP at which point instruction will be emulated with correct segment limit applied. If during the emulation IO is detected it is not handled correctly. Vcpu thread should exit to userspace to serve the IO, but it returns to the guest instead. Since emulation is not completed till userspace completes the IO the faulty instruction is re-executed ad infinitum. The patch fixes that by exiting to userspace if IO happens during instruction emulation. Reported-by: Alex Williamson Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 75 +++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a101dd4..55dfc37 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4230,28 +4230,9 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) return 0; } -static int handle_rmode_exception(struct kvm_vcpu *vcpu, - int vec, u32 err_code) +static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) { - /* - * Instruction with address size override prefix opcode 0x67 - * Cause the #SS fault with 0 error code in VM86 mode. - */ - if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) - if (emulate_instruction(vcpu, 0) == EMULATE_DONE) - return 1; - /* - * Forward all other exceptions that are valid in real mode. - * FIXME: Breaks guest debugging in real mode, needs to be fixed with - * the required debugging infrastructure rework. - */ switch (vec) { - case DB_VECTOR: - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return 0; - kvm_queue_exception(vcpu, vec); - return 1; case BP_VECTOR: /* * Update instruction length as we may reinject the exception @@ -4260,7 +4241,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, to_vmx(vcpu)->vcpu.arch.event_exit_inst_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN); if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) - return 0; + return false; + /* fall through */ + case DB_VECTOR: + if (vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) + return false; /* fall through */ case DE_VECTOR: case OF_VECTOR: @@ -4270,10 +4256,37 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, case SS_VECTOR: case GP_VECTOR: case MF_VECTOR: - kvm_queue_exception(vcpu, vec); - return 1; + return true; + break; } - return 0; + return false; +} + +static int handle_rmode_exception(struct kvm_vcpu *vcpu, + int vec, u32 err_code) +{ + /* + * Instruction with address size override prefix opcode 0x67 + * Cause the #SS fault with 0 error code in VM86 mode. + */ + if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) { + if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { + if (vcpu->arch.halt_request) { + vcpu->arch.halt_request = 0; + return kvm_emulate_halt(vcpu); + } + return 1; + } + return 0; + } + + /* + * Forward all other exceptions that are valid in real mode. + * FIXME: Breaks guest debugging in real mode, needs to be fixed with + * the required debugging infrastructure rework. + */ + kvm_queue_exception(vcpu, vec); + return 1; } /* @@ -4361,17 +4374,11 @@ static int handle_exception(struct kvm_vcpu *vcpu) return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0); } - if (vmx->rmode.vm86_active && - handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, - error_code)) { - if (vcpu->arch.halt_request) { - vcpu->arch.halt_request = 0; - return kvm_emulate_halt(vcpu); - } - return 1; - } - ex_no = intr_info & INTR_INFO_VECTOR_MASK; + + if (vmx->rmode.vm86_active && rmode_exception(vcpu, ex_no)) + return handle_rmode_exception(vcpu, ex_no, error_code); + switch (ex_no) { case DB_VECTOR: dr6 = vmcs_readl(EXIT_QUALIFICATION); -- cgit v1.1 From d82603c6da7579c50ebe3fe7da6e3e267d9f6427 Mon Sep 17 00:00:00 2001 From: Jorrit Schippers Date: Thu, 27 Dec 2012 17:33:02 +0100 Subject: treewide: Replace incomming with incoming in all comments and strings Signed-off-by: Jorrit Schippers Signed-off-by: Jiri Kosina --- arch/x86/kernel/cpu/perf_event_amd_ibs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 6336bcb..5f0581e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -528,7 +528,7 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs) if (!test_bit(IBS_STARTED, pcpu->state)) { /* * Catch spurious interrupts after stopping IBS: After - * disabling IBS there could be still incomming NMIs + * disabling IBS there could be still incoming NMIs * with samples that even have the valid bit cleared. * Mark all this NMIs as handled. */ -- cgit v1.1 From b7869ba17cfb5553a33e11f18c7c45d988e4c455 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 3 Jan 2013 15:28:34 -0700 Subject: x86/PCI: Remove unused pci_root_bus pci_root_bus is unused, so remove all references to it. Signed-off-by: Bjorn Helgaas --- arch/x86/include/asm/pci_x86.h | 1 - arch/x86/pci/common.c | 1 - arch/x86/pci/legacy.c | 2 +- arch/x86/pci/numaq_32.c | 2 +- 4 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index 73e8eef..0126f10 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -54,7 +54,6 @@ void pcibios_set_cache_line_size(void); /* pci-pc.c */ extern int pcibios_last_bus; -extern struct pci_bus *pci_root_bus; extern struct pci_ops pci_root_ops; void pcibios_scan_specific_bus(int busn); diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 412e128..505731b 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -34,7 +34,6 @@ int noioapicreroute = 1; #endif int pcibios_last_bus = -1; unsigned long pirq_table_addr; -struct pci_bus *pci_root_bus; const struct pci_raw_ops *__read_mostly raw_pci_ops; const struct pci_raw_ops *__read_mostly raw_pci_ext_ops; diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index a1df191..a9e8308 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -30,7 +30,7 @@ int __init pci_legacy_init(void) } printk("PCI: Probing PCI hardware\n"); - pci_root_bus = pcibios_scan_root(0); + pcibios_scan_root(0); return 0; } diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 83e125b..00edfe6 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c @@ -152,7 +152,7 @@ int __init pci_numaq_init(void) raw_pci_ops = &pci_direct_conf1_mq; - pci_root_bus = pcibios_scan_root(0); + pcibios_scan_root(0); if (num_online_nodes() > 1) for_each_online_node(quad) { if (quad == 0) -- cgit v1.1 From f7ac356dc3da1f69dc52cb6273e08e53b85b4884 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 3 Nov 2012 21:39:24 -0700 Subject: x86/PCI: Factor out pcibios_allocate_bridge_resources() Thus pcibios_allocate_bus_resources() could more simple and clean. Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- arch/x86/pci/i386.c | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index dd8ca6f..9800362 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -193,34 +193,36 @@ EXPORT_SYMBOL(pcibios_align_resource); * as well. */ -static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) +static void __init pcibios_allocate_bridge_resources(struct pci_dev *dev) { - struct pci_bus *bus; - struct pci_dev *dev; int idx; struct resource *r; + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { + r = &dev->resource[idx]; + if (!r->flags) + continue; + if (!r->start || pci_claim_resource(dev, idx) < 0) { + /* + * Something is wrong with the region. + * Invalidate the resource to prevent + * child resource allocations in this + * range. + */ + r->start = r->end = 0; + r->flags = 0; + } + } +} + +static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) +{ + struct pci_bus *bus; + /* Depth-First Search on bus tree */ list_for_each_entry(bus, bus_list, node) { - if ((dev = bus->self)) { - for (idx = PCI_BRIDGE_RESOURCES; - idx < PCI_NUM_RESOURCES; idx++) { - r = &dev->resource[idx]; - if (!r->flags) - continue; - if (!r->start || - pci_claim_resource(dev, idx) < 0) { - /* - * Something is wrong with the region. - * Invalidate the resource to prevent - * child resource allocations in this - * range. - */ - r->start = r->end = 0; - r->flags = 0; - } - } - } + if (bus->self) + pcibios_allocate_bridge_resources(bus->self); pcibios_allocate_bus_resources(&bus->children); } } -- cgit v1.1 From c7f4bbc92feee2986212ef3b42c806e2257197dc Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 3 Nov 2012 21:39:25 -0700 Subject: x86/PCI: Factor out pcibios_allocate_dev_resources() Factor pcibios_allocate_dev_resources() out of pcibios_allocate_resources(). Currently we only allocate these resources at boot-time with a for_each_pci_dev() loop. Eventually we'll use pcibios_allocate_dev_resources() for hot-added devices, too. [bhelgaas: changelog] Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- arch/x86/pci/i386.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 9800362..5817cf2 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -232,9 +232,8 @@ struct pci_check_idx_range { int end; }; -static void __init pcibios_allocate_resources(int pass) +static void __init pcibios_allocate_dev_resources(struct pci_dev *dev, int pass) { - struct pci_dev *dev = NULL; int idx, disabled, i; u16 command; struct resource *r; @@ -246,14 +245,13 @@ static void __init pcibios_allocate_resources(int pass) #endif }; - for_each_pci_dev(dev) { - pci_read_config_word(dev, PCI_COMMAND, &command); - for (i = 0; i < ARRAY_SIZE(idx_range); i++) + pci_read_config_word(dev, PCI_COMMAND, &command); + for (i = 0; i < ARRAY_SIZE(idx_range); i++) for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) { r = &dev->resource[idx]; - if (r->parent) /* Already allocated */ + if (r->parent) /* Already allocated */ continue; - if (!r->start) /* Address not assigned at all */ + if (!r->start) /* Address not assigned at all */ continue; if (r->flags & IORESOURCE_IO) disabled = !(command & PCI_COMMAND_IO); @@ -272,23 +270,29 @@ static void __init pcibios_allocate_resources(int pass) } } } - if (!pass) { - r = &dev->resource[PCI_ROM_RESOURCE]; - if (r->flags & IORESOURCE_ROM_ENABLE) { - /* Turn the ROM off, leave the resource region, - * but keep it unregistered. */ - u32 reg; - dev_dbg(&dev->dev, "disabling ROM %pR\n", r); - r->flags &= ~IORESOURCE_ROM_ENABLE; - pci_read_config_dword(dev, - dev->rom_base_reg, ®); - pci_write_config_dword(dev, dev->rom_base_reg, + if (!pass) { + r = &dev->resource[PCI_ROM_RESOURCE]; + if (r->flags & IORESOURCE_ROM_ENABLE) { + /* Turn the ROM off, leave the resource region, + * but keep it unregistered. */ + u32 reg; + dev_dbg(&dev->dev, "disabling ROM %pR\n", r); + r->flags &= ~IORESOURCE_ROM_ENABLE; + pci_read_config_dword(dev, dev->rom_base_reg, ®); + pci_write_config_dword(dev, dev->rom_base_reg, reg & ~PCI_ROM_ADDRESS_ENABLE); - } } } } +static void __init pcibios_allocate_resources(int pass) +{ + struct pci_dev *dev = NULL; + + for_each_pci_dev(dev) + pcibios_allocate_dev_resources(dev, pass); +} + static int __init pcibios_assign_resources(void) { struct pci_dev *dev = NULL; -- cgit v1.1 From ee04e0cea8b170ae9c8542162091c716de36666b Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 25 Dec 2012 14:34:06 +0200 Subject: KVM: mmu: remove unused trace event trace_kvm_mmu_delay_free_pages() is no longer used. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmutrace.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index cd6e983..b8f6172 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -195,12 +195,6 @@ DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_prepare_zap_page, TP_ARGS(sp) ); -DEFINE_EVENT(kvm_mmu_page_class, kvm_mmu_delay_free_pages, - TP_PROTO(struct kvm_mmu_page *sp), - - TP_ARGS(sp) -); - TRACE_EVENT( mark_mmio_spte, TP_PROTO(u64 *sptep, gfn_t gfn, unsigned access), -- cgit v1.1 From 908e7d7999bcce70ac52e7f390a8f5cbc55948de Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 27 Dec 2012 14:44:58 +0200 Subject: KVM: MMU: simplify folding of dirty bit into accessed_dirty MMU code tries to avoid if()s HW is not able to predict reliably by using bitwise operation to streamline code execution, but in case of a dirty bit folding this gives us nothing since write_fault is checked right before the folding code. Lets just piggyback onto the if() to make code more clear. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 891eb6d..a7b24cf 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -249,16 +249,12 @@ retry_walk: if (!write_fault) protect_clean_gpte(&pte_access, pte); - - /* - * On a write fault, fold the dirty bit into accessed_dirty by shifting it one - * place right. - * - * On a read fault, do nothing. - */ - shift = write_fault >> ilog2(PFERR_WRITE_MASK); - shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT; - accessed_dirty &= pte >> shift; + else + /* + * On a write fault, fold the dirty bit into accessed_dirty by + * shifting it one place right. + */ + accessed_dirty &= pte >> (PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT); if (unlikely(!accessed_dirty)) { ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault); -- cgit v1.1 From 83edc87ce8b284a3d60ab8072e55041c76a68277 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 3 Nov 2012 21:39:26 -0700 Subject: x86/PCI: Allocate resources on a per-bus basis for hot-adding root buses Previously pcibios_allocate_resources() allocated resources at boot-time for all PCI devices using for_each_pci_dev(). This patch changes pcibios_allocate_resources() so we can specify a bus, so we can do similar allocation when hot-adding a root bus. [bhelgaas: changelog] Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- arch/x86/pci/i386.c | 38 +++++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 5817cf2..84696ed 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -215,16 +215,15 @@ static void __init pcibios_allocate_bridge_resources(struct pci_dev *dev) } } -static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) +static void __init pcibios_allocate_bus_resources(struct pci_bus *bus) { - struct pci_bus *bus; + struct pci_bus *child; /* Depth-First Search on bus tree */ - list_for_each_entry(bus, bus_list, node) { - if (bus->self) - pcibios_allocate_bridge_resources(bus->self); - pcibios_allocate_bus_resources(&bus->children); - } + if (bus->self) + pcibios_allocate_bridge_resources(bus->self); + list_for_each_entry(child, &bus->children, node) + pcibios_allocate_bus_resources(child); } struct pci_check_idx_range { @@ -285,12 +284,18 @@ static void __init pcibios_allocate_dev_resources(struct pci_dev *dev, int pass) } } -static void __init pcibios_allocate_resources(int pass) +static void __init pcibios_allocate_resources(struct pci_bus *bus, int pass) { - struct pci_dev *dev = NULL; + struct pci_dev *dev; + struct pci_bus *child; - for_each_pci_dev(dev) + list_for_each_entry(dev, &bus->devices, bus_list) { pcibios_allocate_dev_resources(dev, pass); + + child = dev->subordinate; + if (child) + pcibios_allocate_resources(child, pass); + } } static int __init pcibios_assign_resources(void) @@ -323,10 +328,17 @@ static int __init pcibios_assign_resources(void) void __init pcibios_resource_survey(void) { + struct pci_bus *bus; + DBG("PCI: Allocating resources\n"); - pcibios_allocate_bus_resources(&pci_root_buses); - pcibios_allocate_resources(0); - pcibios_allocate_resources(1); + + list_for_each_entry(bus, &pci_root_buses, node) + pcibios_allocate_bus_resources(bus); + + list_for_each_entry(bus, &pci_root_buses, node) + pcibios_allocate_resources(bus, 0); + list_for_each_entry(bus, &pci_root_buses, node) + pcibios_allocate_resources(bus, 1); e820_reserve_resources_late(); /* -- cgit v1.1 From dc2f56fa8400677ef4852d5128f03b795cf57e7b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 3 Nov 2012 21:39:27 -0700 Subject: x86/PCI: Factor out pcibios_allocate_dev_rom_resource() Factor pcibios_allocate_rom_resources() and pcibios_allocate_dev_rom_resource() out of pcibios_assign_resources(). This will allow us to allocate ROM resources for hot-added root buses. [bhelgaas: changelog] Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- arch/x86/pci/i386.c | 52 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 84696ed..42dd755 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -298,27 +298,45 @@ static void __init pcibios_allocate_resources(struct pci_bus *bus, int pass) } } -static int __init pcibios_assign_resources(void) +static void __init pcibios_allocate_dev_rom_resource(struct pci_dev *dev) { - struct pci_dev *dev = NULL; struct resource *r; - if (!(pci_probe & PCI_ASSIGN_ROMS)) { - /* - * Try to use BIOS settings for ROMs, otherwise let - * pci_assign_unassigned_resources() allocate the new - * addresses. - */ - for_each_pci_dev(dev) { - r = &dev->resource[PCI_ROM_RESOURCE]; - if (!r->flags || !r->start) - continue; - if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { - r->end -= r->start; - r->start = 0; - } - } + /* + * Try to use BIOS settings for ROMs, otherwise let + * pci_assign_unassigned_resources() allocate the new + * addresses. + */ + r = &dev->resource[PCI_ROM_RESOURCE]; + if (!r->flags || !r->start) + return; + + if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { + r->end -= r->start; + r->start = 0; } +} +static void __init pcibios_allocate_rom_resources(struct pci_bus *bus) +{ + struct pci_dev *dev; + struct pci_bus *child; + + list_for_each_entry(dev, &bus->devices, bus_list) { + pcibios_allocate_dev_rom_resource(dev); + + child = dev->subordinate; + if (child) + pcibios_allocate_rom_resources(child); + } +} + +static int __init pcibios_assign_resources(void) +{ + struct pci_bus *bus; + + if (!(pci_probe & PCI_ASSIGN_ROMS)) + list_for_each_entry(bus, &pci_root_buses, node) + pcibios_allocate_rom_resources(bus); pci_assign_unassigned_resources(); pcibios_fw_addr_list_del(); -- cgit v1.1 From 745216025de0354eea23493d994e3fc0ab7369fc Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 3 Nov 2012 21:39:28 -0700 Subject: x86/PCI: Don't track firmware-assigned BAR values for hot-added devices The BIOS doesn't assign BAR values for hot-added devices, so don't bother saving the original values when we enumerate these devices. [bhelgaas: changelog, return constant 0 in pcibios_retrieve_fw_addr] Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- arch/x86/pci/i386.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 42dd755..1bd672a 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -51,6 +51,7 @@ struct pcibios_fwaddrmap { static LIST_HEAD(pcibios_fwaddrmappings); static DEFINE_SPINLOCK(pcibios_fwaddrmap_lock); +static bool pcibios_fw_addr_done; /* Must be called with 'pcibios_fwaddrmap_lock' lock held. */ static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) @@ -72,6 +73,9 @@ pcibios_save_fw_addr(struct pci_dev *dev, int idx, resource_size_t fw_addr) unsigned long flags; struct pcibios_fwaddrmap *map; + if (pcibios_fw_addr_done) + return; + spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); map = pcibios_fwaddrmap_lookup(dev); if (!map) { @@ -97,6 +101,9 @@ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx) struct pcibios_fwaddrmap *map; resource_size_t fw_addr = 0; + if (pcibios_fw_addr_done) + return 0; + spin_lock_irqsave(&pcibios_fwaddrmap_lock, flags); map = pcibios_fwaddrmap_lookup(dev); if (map) @@ -106,7 +113,7 @@ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx) return fw_addr; } -static void pcibios_fw_addr_list_del(void) +static void __init pcibios_fw_addr_list_del(void) { unsigned long flags; struct pcibios_fwaddrmap *entry, *next; @@ -118,6 +125,7 @@ static void pcibios_fw_addr_list_del(void) kfree(entry); } spin_unlock_irqrestore(&pcibios_fwaddrmap_lock, flags); + pcibios_fw_addr_done = true; } static int -- cgit v1.1 From b95168e010a405add13aa010d7c45b55dc4026c7 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 3 Nov 2012 21:39:29 -0700 Subject: x86/PCI: Keep resource allocation functions after boot The PCI resource allocation functions will be used for hot-added devices, so keep them around. [bhelgaas: changelog] Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- arch/x86/pci/i386.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 1bd672a..8656ea8 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -201,7 +201,7 @@ EXPORT_SYMBOL(pcibios_align_resource); * as well. */ -static void __init pcibios_allocate_bridge_resources(struct pci_dev *dev) +static void pcibios_allocate_bridge_resources(struct pci_dev *dev) { int idx; struct resource *r; @@ -223,7 +223,7 @@ static void __init pcibios_allocate_bridge_resources(struct pci_dev *dev) } } -static void __init pcibios_allocate_bus_resources(struct pci_bus *bus) +static void pcibios_allocate_bus_resources(struct pci_bus *bus) { struct pci_bus *child; @@ -239,7 +239,7 @@ struct pci_check_idx_range { int end; }; -static void __init pcibios_allocate_dev_resources(struct pci_dev *dev, int pass) +static void pcibios_allocate_dev_resources(struct pci_dev *dev, int pass) { int idx, disabled, i; u16 command; @@ -292,7 +292,7 @@ static void __init pcibios_allocate_dev_resources(struct pci_dev *dev, int pass) } } -static void __init pcibios_allocate_resources(struct pci_bus *bus, int pass) +static void pcibios_allocate_resources(struct pci_bus *bus, int pass) { struct pci_dev *dev; struct pci_bus *child; @@ -306,7 +306,7 @@ static void __init pcibios_allocate_resources(struct pci_bus *bus, int pass) } } -static void __init pcibios_allocate_dev_rom_resource(struct pci_dev *dev) +static void pcibios_allocate_dev_rom_resource(struct pci_dev *dev) { struct resource *r; @@ -324,7 +324,7 @@ static void __init pcibios_allocate_dev_rom_resource(struct pci_dev *dev) r->start = 0; } } -static void __init pcibios_allocate_rom_resources(struct pci_bus *bus) +static void pcibios_allocate_rom_resources(struct pci_bus *bus) { struct pci_dev *dev; struct pci_bus *child; -- cgit v1.1 From b3e65e1f9185a2eb034defe4270ba178ba70b9a9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 3 Nov 2012 21:39:30 -0700 Subject: x86/PCI: Implement pcibios_resource_survey_bus() During testing remove/rescan root bus 00, found [ 338.142574] bus: 'pci': really_probe: probing driver ata_piix with device 0000:00:01.1 [ 338.146788] ata_piix 0000:00:01.1: device not available (can't reserve [io 0x01f0-0x01f7]) [ 338.150565] ata_piix: probe of 0000:00:01.1 failed with error -22 because that fixed resource is not claimed. For bootint path it is claimed in from arch/x86/pci/i386.c::pcibios_allocate_resources() Claim those resources, so on the remove/rescan will still use old resources. It is some kind honoring FW setting in the registers during hot add. esp root-bus hot add is through acpi, BIOS has chance to set some registers before handing over. [bhelgaas: move weak definition to patch that uses it] Signed-off-by: Yinghai Lu Signed-off-by: Bjorn Helgaas --- arch/x86/pci/i386.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 8656ea8..94919e3 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -352,6 +352,19 @@ static int __init pcibios_assign_resources(void) return 0; } +void pcibios_resource_survey_bus(struct pci_bus *bus) +{ + dev_printk(KERN_DEBUG, &bus->dev, "Allocating resources\n"); + + pcibios_allocate_bus_resources(bus); + + pcibios_allocate_resources(bus, 0); + pcibios_allocate_resources(bus, 1); + + if (!(pci_probe & PCI_ASSIGN_ROMS)) + pcibios_allocate_rom_resources(bus); +} + void __init pcibios_resource_survey(void) { struct pci_bus *bus; -- cgit v1.1 From 0024dc5371b41abb30217bbcaa708d35f49fe273 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Fri, 28 Dec 2012 12:05:04 +0200 Subject: crypto: aesni-intel - remove rfc3686(ctr(aes)), utilize rfc3686 from ctr-module instead rfc3686 in CTR module is now able of using asynchronous ctr(aes) from aesni-intel, so rfc3686(ctr(aes)) in aesni-intel is no longer needed. Signed-off-by: Jussi Kivilinna Acked-by: Herbert Xu Signed-off-by: Steffen Klassert --- arch/x86/crypto/aesni-intel_glue.c | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 1b9c22b..a0795da 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -40,10 +40,6 @@ #include #include -#if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) -#define HAS_CTR -#endif - #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) #define HAS_PCBC #endif @@ -395,12 +391,6 @@ static int ablk_ctr_init(struct crypto_tfm *tfm) return ablk_init_common(tfm, "__driver-ctr-aes-aesni"); } -#ifdef HAS_CTR -static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm) -{ - return ablk_init_common(tfm, "rfc3686(__driver-ctr-aes-aesni)"); -} -#endif #endif #ifdef HAS_PCBC @@ -1158,33 +1148,6 @@ static struct crypto_alg aesni_algs[] = { { .maxauthsize = 16, }, }, -#ifdef HAS_CTR -}, { - .cra_name = "rfc3686(ctr(aes))", - .cra_driver_name = "rfc3686-ctr-aes-aesni", - .cra_priority = 400, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = 1, - .cra_ctxsize = sizeof(struct async_helper_ctx), - .cra_alignmask = 0, - .cra_type = &crypto_ablkcipher_type, - .cra_module = THIS_MODULE, - .cra_init = ablk_rfc3686_ctr_init, - .cra_exit = ablk_exit, - .cra_u = { - .ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE + - CTR_RFC3686_NONCE_SIZE, - .max_keysize = AES_MAX_KEY_SIZE + - CTR_RFC3686_NONCE_SIZE, - .ivsize = CTR_RFC3686_IV_SIZE, - .setkey = ablk_set_key, - .encrypt = ablk_encrypt, - .decrypt = ablk_decrypt, - .geniv = "seqiv", - }, - }, -#endif #endif #ifdef HAS_PCBC }, { -- cgit v1.1 From b0cfeb5dedf9109b8a8bd594abe60791b135a643 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 8 Jan 2013 10:49:00 +0200 Subject: KVM: x86: remove unused variable from walk_addr_generic() Fix compilation warning. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a7b24cf..2ad76b9 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, pt_element_t pte; pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; - unsigned index, pt_access, pte_access, accessed_dirty, shift; + unsigned index, pt_access, pte_access, accessed_dirty; gpa_t pte_gpa; int offset; const int write_fault = access & PFERR_WRITE_MASK; -- cgit v1.1 From b09408d00fd82be80289a329dd94d1a0d6b77dc2 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 7 Jan 2013 19:27:06 -0200 Subject: KVM: VMX: fix incorrect cached cpl value with real/v8086 modes CPL is always 0 when in real mode, and always 3 when virtual 8086 mode. Using values other than those can cause failures on operations that check CPL. Reviewed-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 55dfc37..dd2a85c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1696,7 +1696,6 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); - __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); to_vmx(vcpu)->rflags = rflags; if (to_vmx(vcpu)->rmode.vm86_active) { to_vmx(vcpu)->rmode.save_rflags = rflags; @@ -3110,7 +3109,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; - __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); } static u64 construct_eptp(unsigned long root_hpa) @@ -3220,8 +3218,10 @@ static u64 vmx_get_segment_base(struct kvm_vcpu *vcpu, int seg) return vmx_read_guest_seg_base(to_vmx(vcpu), seg); } -static int __vmx_get_cpl(struct kvm_vcpu *vcpu) +static int vmx_get_cpl(struct kvm_vcpu *vcpu) { + struct vcpu_vmx *vmx = to_vmx(vcpu); + if (!is_protmode(vcpu)) return 0; @@ -3229,13 +3229,6 @@ static int __vmx_get_cpl(struct kvm_vcpu *vcpu) && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ return 3; - return vmx_read_guest_seg_selector(to_vmx(vcpu), VCPU_SREG_CS) & 3; -} - -static int vmx_get_cpl(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - /* * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations * fail; use the cache instead. @@ -3246,7 +3239,7 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); - vmx->cpl = __vmx_get_cpl(vcpu); + vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3; } return vmx->cpl; -- cgit v1.1 From f51bde6f0d66ab6a80eb1a05b6fe8d90c4960486 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 21 Dec 2012 17:03:58 +0100 Subject: x86, MCE: Retract most UAPI exports Retract back most macro definitions which went into the user-visible mce.h header. Even though those bits are mostly hardware-defined/-architectural, their naming is not. If we export them to userspace, any kernel unification/renaming/cleanup cannot be done anymore since those are effectively cast in stone. Besides, if userspace wants those definitions, they can write their own defines and go crazy. Signed-off-by: Borislav Petkov --- arch/x86/include/asm/mce.h | 84 +++++++++++++++++++++++++++++++++++++++ arch/x86/include/uapi/asm/mce.h | 87 ----------------------------------------- 2 files changed, 84 insertions(+), 87 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index ecdfee6..f4076af 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -3,6 +3,90 @@ #include +/* + * Machine Check support for x86 + */ + +/* MCG_CAP register defines */ +#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ +#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ +#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ +#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ +#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ +#define MCG_EXT_CNT_SHIFT 16 +#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) +#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ + +/* MCG_STATUS register defines */ +#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ +#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ +#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ + +/* MCi_STATUS register defines */ +#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ +#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ +#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ +#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ +#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ +#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ +#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ +#define MCI_STATUS_AR (1ULL<<55) /* Action required */ +#define MCACOD 0xffff /* MCA Error Code */ + +/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ +#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ +#define MCACOD_SCRUBMSK 0xfff0 +#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ +#define MCACOD_DATA 0x0134 /* Data Load */ +#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ + +/* MCi_MISC register defines */ +#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) +#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) +#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ +#define MCI_MISC_ADDR_LINEAR 1 /* linear address */ +#define MCI_MISC_ADDR_PHYS 2 /* physical address */ +#define MCI_MISC_ADDR_MEM 3 /* memory address */ +#define MCI_MISC_ADDR_GENERIC 7 /* generic */ + +/* CTL2 register defines */ +#define MCI_CTL2_CMCI_EN (1ULL << 30) +#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL + +#define MCJ_CTX_MASK 3 +#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) +#define MCJ_CTX_RANDOM 0 /* inject context: random */ +#define MCJ_CTX_PROCESS 0x1 /* inject context: process */ +#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ +#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ +#define MCJ_EXCEPTION 0x8 /* raise as exception */ +#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ + +#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ + +/* Software defined banks */ +#define MCE_EXTENDED_BANK 128 +#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0) +#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) + +#define MCE_LOG_LEN 32 +#define MCE_LOG_SIGNATURE "MACHINECHECK" + +/* + * This structure contains all data related to the MCE log. Also + * carries a signature to make it easier to find from external + * debugging tools. Each entry is only valid when its finished flag + * is set. + */ +struct mce_log { + char signature[12]; /* "MACHINECHECK" */ + unsigned len; /* = MCE_LOG_LEN */ + unsigned next; + unsigned flags; + unsigned recordlen; /* length of struct mce */ + struct mce entry[MCE_LOG_LEN]; +}; struct mca_config { bool dont_log_ce; diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 58c8298..a0eab85 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -4,66 +4,6 @@ #include #include -/* - * Machine Check support for x86 - */ - -/* MCG_CAP register defines */ -#define MCG_BANKCNT_MASK 0xff /* Number of Banks */ -#define MCG_CTL_P (1ULL<<8) /* MCG_CTL register available */ -#define MCG_EXT_P (1ULL<<9) /* Extended registers available */ -#define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ -#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ -#define MCG_EXT_CNT_SHIFT 16 -#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) -#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ - -/* MCG_STATUS register defines */ -#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ -#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ -#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ - -/* MCi_STATUS register defines */ -#define MCI_STATUS_VAL (1ULL<<63) /* valid error */ -#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ -#define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ -#define MCI_STATUS_EN (1ULL<<60) /* error enabled */ -#define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ -#define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ -#define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ -#define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ -#define MCI_STATUS_AR (1ULL<<55) /* Action required */ -#define MCACOD 0xffff /* MCA Error Code */ - -/* Architecturally defined codes from SDM Vol. 3B Chapter 15 */ -#define MCACOD_SCRUB 0x00C0 /* 0xC0-0xCF Memory Scrubbing */ -#define MCACOD_SCRUBMSK 0xfff0 -#define MCACOD_L3WB 0x017A /* L3 Explicit Writeback */ -#define MCACOD_DATA 0x0134 /* Data Load */ -#define MCACOD_INSTR 0x0150 /* Instruction Fetch */ - -/* MCi_MISC register defines */ -#define MCI_MISC_ADDR_LSB(m) ((m) & 0x3f) -#define MCI_MISC_ADDR_MODE(m) (((m) >> 6) & 7) -#define MCI_MISC_ADDR_SEGOFF 0 /* segment offset */ -#define MCI_MISC_ADDR_LINEAR 1 /* linear address */ -#define MCI_MISC_ADDR_PHYS 2 /* physical address */ -#define MCI_MISC_ADDR_MEM 3 /* memory address */ -#define MCI_MISC_ADDR_GENERIC 7 /* generic */ - -/* CTL2 register defines */ -#define MCI_CTL2_CMCI_EN (1ULL << 30) -#define MCI_CTL2_CMCI_THRESHOLD_MASK 0x7fffULL - -#define MCJ_CTX_MASK 3 -#define MCJ_CTX(flags) ((flags) & MCJ_CTX_MASK) -#define MCJ_CTX_RANDOM 0 /* inject context: random */ -#define MCJ_CTX_PROCESS 0x1 /* inject context: process */ -#define MCJ_CTX_IRQ 0x2 /* inject context: IRQ */ -#define MCJ_NMI_BROADCAST 0x4 /* do NMI broadcasting */ -#define MCJ_EXCEPTION 0x8 /* raise as exception */ -#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */ - /* Fields are zero when not available */ struct mce { __u64 status; @@ -87,35 +27,8 @@ struct mce { __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ }; -/* - * This structure contains all data related to the MCE log. Also - * carries a signature to make it easier to find from external - * debugging tools. Each entry is only valid when its finished flag - * is set. - */ - -#define MCE_LOG_LEN 32 - -struct mce_log { - char signature[12]; /* "MACHINECHECK" */ - unsigned len; /* = MCE_LOG_LEN */ - unsigned next; - unsigned flags; - unsigned recordlen; /* length of struct mce */ - struct mce entry[MCE_LOG_LEN]; -}; - -#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ - -#define MCE_LOG_SIGNATURE "MACHINECHECK" - #define MCE_GET_RECORD_LEN _IOR('M', 1, int) #define MCE_GET_LOG_LEN _IOR('M', 2, int) #define MCE_GETCLEAR_FLAGS _IOR('M', 3, int) -/* Software defined banks */ -#define MCE_EXTENDED_BANK 128 -#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0 -#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) - #endif /* _UAPI_ASM_X86_MCE_H */ -- cgit v1.1 From e28bbd44dad134046ef9463cbb8c1cf81f53de5e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:48 +0200 Subject: KVM: x86 emulator: framework for streamlining arithmetic opcodes We emulate arithmetic opcodes by executing a "similar" (same operation, different operands) on the cpu. This ensures accurate emulation, esp. wrt. eflags. However, the prologue and epilogue around the opcode is fairly long, consisting of a switch (for the operand size) and code to load and save the operands. This is repeated for every opcode. This patch introduces an alternative way to emulate arithmetic opcodes. Instead of the above, we have four (three on i386) functions consisting of just the opcode and a ret; one for each operand size. For example: .align 8 em_notb: not %al ret .align 8 em_notw: not %ax ret .align 8 em_notl: not %eax ret .align 8 em_notq: not %rax ret The prologue and epilogue are shared across all opcodes. Note the functions use a special calling convention; notably eflags is an input/output parameter and is not clobbered. Rather than dispatching the four functions through a jump table, the functions are declared as a constant size (8) so their address can be calculated. Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 53c5ad6..dd71567 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -149,6 +149,7 @@ #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ +#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ #define X2(x...) x, x #define X3(x...) X2(x), x @@ -159,6 +160,27 @@ #define X8(x...) X4(x), X4(x) #define X16(x...) X8(x), X8(x) +#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) +#define FASTOP_SIZE 8 + +/* + * fastop functions have a special calling convention: + * + * dst: [rdx]:rax (in/out) + * src: rbx (in/out) + * src2: rcx (in) + * flags: rflags (in/out) + * + * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for + * different operand sizes can be reached by calculation, rather than a jump + * table (which would be bigger than the code). + * + * fastop functions are declared as taking a never-defined fastop parameter, + * so they can't be called from C directly. + */ + +struct fastop; + struct opcode { u64 flags : 56; u64 intercept : 8; @@ -168,6 +190,7 @@ struct opcode { const struct group_dual *gdual; const struct gprefix *gprefix; const struct escape *esc; + void (*fastop)(struct fastop *fake); } u; int (*check_perm)(struct x86_emulate_ctxt *ctxt); }; @@ -3646,6 +3669,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) } #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } +#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } #define II(_f, _e, _i) \ { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } #define IIP(_f, _e, _i, _p) \ @@ -4502,6 +4526,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); } +static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) +{ + ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; + fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; + asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" + : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags) + : "c"(ctxt->src2.val), [fastop]"S"(fop)); + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); + return X86EMUL_CONTINUE; +} int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) { @@ -4631,6 +4665,13 @@ special_insn: } if (ctxt->execute) { + if (ctxt->d & Fastop) { + void (*fop)(struct fastop *) = (void *)ctxt->execute; + rc = fastop(ctxt, fop); + if (rc != X86EMUL_CONTINUE) + goto done; + goto writeback; + } rc = ctxt->execute(ctxt); if (rc != X86EMUL_CONTINUE) goto done; -- cgit v1.1 From b7d491e7f065b5b957a27a65c9e7cd3ef96b2736 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:49 +0200 Subject: KVM: x86 emulator: Support for declaring single operand fastops Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index dd71567..42c53c80 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -24,6 +24,7 @@ #include "kvm_cache_regs.h" #include #include +#include #include "x86.h" #include "tss.h" @@ -439,6 +440,30 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) } \ } while (0) +#define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" +#define FOP_RET "ret \n\t" + +#define FOP_START(op) \ + extern void em_##op(struct fastop *fake); \ + asm(".pushsection .text, \"ax\" \n\t" \ + ".global em_" #op " \n\t" \ + FOP_ALIGN \ + "em_" #op ": \n\t" + +#define FOP_END \ + ".popsection") + +#define FOP1E(op, dst) \ + FOP_ALIGN #op " %" #dst " \n\t" FOP_RET + +#define FASTOP1(op) \ + FOP_START(op) \ + FOP1E(op##b, al) \ + FOP1E(op##w, ax) \ + FOP1E(op##l, eax) \ + ON64(FOP1E(op##q, rax)) \ + FOP_END + #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ do { \ unsigned long _tmp; \ -- cgit v1.1 From b6744dc3fb55fda7cfcb53beecfa056f02415e6d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:50 +0200 Subject: KVM: x86 emulator: introduce NoWrite flag Instead of disabling writeback via OP_NONE, just specify NoWrite. Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 42c53c80..fe113fb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -151,6 +151,7 @@ #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ #define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ +#define NoWrite ((u64)1 << 45) /* No writeback */ #define X2(x...) x, x #define X3(x...) X2(x), x @@ -1633,6 +1634,9 @@ static int writeback(struct x86_emulate_ctxt *ctxt) { int rc; + if (ctxt->d & NoWrite) + return X86EMUL_CONTINUE; + switch (ctxt->dst.type) { case OP_REG: write_register_operand(&ctxt->dst); -- cgit v1.1 From 75f728456f368140e6b34b6a79f3408774076325 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:51 +0200 Subject: KVM: x86 emulator: mark CMP, CMPS, SCAS, TEST as NoWrite Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index fe113fb..2af0c44 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3069,16 +3069,12 @@ static int em_xor(struct x86_emulate_ctxt *ctxt) static int em_cmp(struct x86_emulate_ctxt *ctxt) { emulate_2op_SrcV(ctxt, "cmp"); - /* Disable writeback. */ - ctxt->dst.type = OP_NONE; return X86EMUL_CONTINUE; } static int em_test(struct x86_emulate_ctxt *ctxt) { emulate_2op_SrcV(ctxt, "test"); - /* Disable writeback. */ - ctxt->dst.type = OP_NONE; return X86EMUL_CONTINUE; } @@ -3747,7 +3743,7 @@ static const struct opcode group1[] = { I(Lock | PageTable, em_and), I(Lock, em_sub), I(Lock, em_xor), - I(0, em_cmp), + I(NoWrite, em_cmp), }; static const struct opcode group1A[] = { @@ -3755,8 +3751,8 @@ static const struct opcode group1A[] = { }; static const struct opcode group3[] = { - I(DstMem | SrcImm, em_test), - I(DstMem | SrcImm, em_test), + I(DstMem | SrcImm | NoWrite, em_test), + I(DstMem | SrcImm | NoWrite, em_test), I(DstMem | SrcNone | Lock, em_not), I(DstMem | SrcNone | Lock, em_neg), I(SrcMem, em_mul_ex), @@ -3920,7 +3916,7 @@ static const struct opcode opcode_table[256] = { /* 0x30 - 0x37 */ I6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - I6ALU(0, em_cmp), N, N, + I6ALU(NoWrite, em_cmp), N, N, /* 0x40 - 0x4F */ X16(D(DstReg)), /* 0x50 - 0x57 */ @@ -3946,7 +3942,7 @@ static const struct opcode opcode_table[256] = { G(DstMem | SrcImm, group1), G(ByteOp | DstMem | SrcImm | No64, group1), G(DstMem | SrcImmByte, group1), - I2bv(DstMem | SrcReg | ModRM, em_test), + I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), /* 0x88 - 0x8F */ I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), @@ -3966,12 +3962,12 @@ static const struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), I2bv(SrcSI | DstDI | Mov | String, em_mov), - I2bv(SrcSI | DstDI | String, em_cmp), + I2bv(SrcSI | DstDI | String | NoWrite, em_cmp), /* 0xA8 - 0xAF */ - I2bv(DstAcc | SrcImm, em_test), + I2bv(DstAcc | SrcImm | NoWrite, em_test), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - I2bv(SrcAcc | DstDI | String, em_cmp), + I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ -- cgit v1.1 From 45a1467d7edff741d97a8be28342440ee65aa03c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:52 +0200 Subject: KVM: x86 emulator: convert NOT, NEG to fastop Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2af0c44..09dbdc5 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2050,17 +2050,8 @@ static int em_grp2(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_not(struct x86_emulate_ctxt *ctxt) -{ - ctxt->dst.val = ~ctxt->dst.val; - return X86EMUL_CONTINUE; -} - -static int em_neg(struct x86_emulate_ctxt *ctxt) -{ - emulate_1op(ctxt, "neg"); - return X86EMUL_CONTINUE; -} +FASTOP1(not); +FASTOP1(neg); static int em_mul_ex(struct x86_emulate_ctxt *ctxt) { @@ -3753,8 +3744,8 @@ static const struct opcode group1A[] = { static const struct opcode group3[] = { I(DstMem | SrcImm | NoWrite, em_test), I(DstMem | SrcImm | NoWrite, em_test), - I(DstMem | SrcNone | Lock, em_not), - I(DstMem | SrcNone | Lock, em_neg), + F(DstMem | SrcNone | Lock, em_not), + F(DstMem | SrcNone | Lock, em_neg), I(SrcMem, em_mul_ex), I(SrcMem, em_imul_ex), I(SrcMem, em_div_ex), -- cgit v1.1 From f7857f35dbf8e7ca36ebff3f43888fd3fb0f0e70 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:53 +0200 Subject: KVM: x86 emulator: add macros for defining 2-operand fastop emulation Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 09dbdc5..3b5d4dd 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -465,6 +465,17 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) ON64(FOP1E(op##q, rax)) \ FOP_END +#define FOP2E(op, dst, src) \ + FOP_ALIGN #op " %" #src ", %" #dst " \n\t" FOP_RET + +#define FASTOP2(op) \ + FOP_START(op) \ + FOP2E(op##b, al, bl) \ + FOP2E(op##w, ax, bx) \ + FOP2E(op##l, eax, ebx) \ + ON64(FOP2E(op##q, rax, rbx)) \ + FOP_END + #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ do { \ unsigned long _tmp; \ @@ -3696,6 +3707,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define D2bv(_f) D((_f) | ByteOp), D(_f) #define D2bvIP(_f, _i, _p) DIP((_f) | ByteOp, _i, _p), DIP(_f, _i, _p) #define I2bv(_f, _e) I((_f) | ByteOp, _e), I(_f, _e) +#define F2bv(_f, _e) F((_f) | ByteOp, _e), F(_f, _e) #define I2bvIP(_f, _e, _i, _p) \ IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) -- cgit v1.1 From fb864fbc72fd4e2175fb64072fe9134d3a3ab89a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Fri, 4 Jan 2013 16:18:54 +0200 Subject: KVM: x86 emulator: convert basic ALU ops to fastop Opcodes: TEST CMP ADD ADC SUB SBB XOR OR AND Acked-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 112 +++++++++++++++---------------------------------- 1 file changed, 34 insertions(+), 78 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 3b5d4dd..619a33d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3026,59 +3026,15 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_add(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV(ctxt, "add"); - return X86EMUL_CONTINUE; -} - -static int em_or(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV(ctxt, "or"); - return X86EMUL_CONTINUE; -} - -static int em_adc(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV(ctxt, "adc"); - return X86EMUL_CONTINUE; -} - -static int em_sbb(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV(ctxt, "sbb"); - return X86EMUL_CONTINUE; -} - -static int em_and(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV(ctxt, "and"); - return X86EMUL_CONTINUE; -} - -static int em_sub(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV(ctxt, "sub"); - return X86EMUL_CONTINUE; -} - -static int em_xor(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV(ctxt, "xor"); - return X86EMUL_CONTINUE; -} - -static int em_cmp(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV(ctxt, "cmp"); - return X86EMUL_CONTINUE; -} - -static int em_test(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV(ctxt, "test"); - return X86EMUL_CONTINUE; -} +FASTOP2(add); +FASTOP2(or); +FASTOP2(adc); +FASTOP2(sbb); +FASTOP2(and); +FASTOP2(sub); +FASTOP2(xor); +FASTOP2(cmp); +FASTOP2(test); static int em_xchg(struct x86_emulate_ctxt *ctxt) { @@ -3711,9 +3667,9 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) #define I2bvIP(_f, _e, _i, _p) \ IIP((_f) | ByteOp, _e, _i, _p), IIP(_f, _e, _i, _p) -#define I6ALU(_f, _e) I2bv((_f) | DstMem | SrcReg | ModRM, _e), \ - I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ - I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) +#define F6ALU(_f, _e) F2bv((_f) | DstMem | SrcReg | ModRM, _e), \ + F2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e), \ + F2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e) static const struct opcode group7_rm1[] = { DI(SrcNone | Priv, monitor), @@ -3739,14 +3695,14 @@ static const struct opcode group7_rm7[] = { }; static const struct opcode group1[] = { - I(Lock, em_add), - I(Lock | PageTable, em_or), - I(Lock, em_adc), - I(Lock, em_sbb), - I(Lock | PageTable, em_and), - I(Lock, em_sub), - I(Lock, em_xor), - I(NoWrite, em_cmp), + F(Lock, em_add), + F(Lock | PageTable, em_or), + F(Lock, em_adc), + F(Lock, em_sbb), + F(Lock | PageTable, em_and), + F(Lock, em_sub), + F(Lock, em_xor), + F(NoWrite, em_cmp), }; static const struct opcode group1A[] = { @@ -3754,8 +3710,8 @@ static const struct opcode group1A[] = { }; static const struct opcode group3[] = { - I(DstMem | SrcImm | NoWrite, em_test), - I(DstMem | SrcImm | NoWrite, em_test), + F(DstMem | SrcImm | NoWrite, em_test), + F(DstMem | SrcImm | NoWrite, em_test), F(DstMem | SrcNone | Lock, em_not), F(DstMem | SrcNone | Lock, em_neg), I(SrcMem, em_mul_ex), @@ -3897,29 +3853,29 @@ static const struct escape escape_dd = { { static const struct opcode opcode_table[256] = { /* 0x00 - 0x07 */ - I6ALU(Lock, em_add), + F6ALU(Lock, em_add), I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2ES, em_pop_sreg), /* 0x08 - 0x0F */ - I6ALU(Lock | PageTable, em_or), + F6ALU(Lock | PageTable, em_or), I(ImplicitOps | Stack | No64 | Src2CS, em_push_sreg), N, /* 0x10 - 0x17 */ - I6ALU(Lock, em_adc), + F6ALU(Lock, em_adc), I(ImplicitOps | Stack | No64 | Src2SS, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2SS, em_pop_sreg), /* 0x18 - 0x1F */ - I6ALU(Lock, em_sbb), + F6ALU(Lock, em_sbb), I(ImplicitOps | Stack | No64 | Src2DS, em_push_sreg), I(ImplicitOps | Stack | No64 | Src2DS, em_pop_sreg), /* 0x20 - 0x27 */ - I6ALU(Lock | PageTable, em_and), N, N, + F6ALU(Lock | PageTable, em_and), N, N, /* 0x28 - 0x2F */ - I6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), + F6ALU(Lock, em_sub), N, I(ByteOp | DstAcc | No64, em_das), /* 0x30 - 0x37 */ - I6ALU(Lock, em_xor), N, N, + F6ALU(Lock, em_xor), N, N, /* 0x38 - 0x3F */ - I6ALU(NoWrite, em_cmp), N, N, + F6ALU(NoWrite, em_cmp), N, N, /* 0x40 - 0x4F */ X16(D(DstReg)), /* 0x50 - 0x57 */ @@ -3945,7 +3901,7 @@ static const struct opcode opcode_table[256] = { G(DstMem | SrcImm, group1), G(ByteOp | DstMem | SrcImm | No64, group1), G(DstMem | SrcImmByte, group1), - I2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), + F2bv(DstMem | SrcReg | ModRM | NoWrite, em_test), I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_xchg), /* 0x88 - 0x8F */ I2bv(DstMem | SrcReg | ModRM | Mov | PageTable, em_mov), @@ -3965,12 +3921,12 @@ static const struct opcode opcode_table[256] = { I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), I2bv(SrcSI | DstDI | Mov | String, em_mov), - I2bv(SrcSI | DstDI | String | NoWrite, em_cmp), + F2bv(SrcSI | DstDI | String | NoWrite, em_cmp), /* 0xA8 - 0xAF */ - I2bv(DstAcc | SrcImm | NoWrite, em_test), + F2bv(DstAcc | SrcImm | NoWrite, em_test), I2bv(SrcAcc | DstDI | Mov | String, em_mov), I2bv(SrcSI | DstAcc | Mov | String, em_mov), - I2bv(SrcAcc | DstDI | String | NoWrite, em_cmp), + F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp), /* 0xB0 - 0xB7 */ X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)), /* 0xB8 - 0xBF */ -- cgit v1.1 From 0947c6dee3f6f334fb3772175152853bd90c86ea Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Thu, 20 Dec 2012 01:07:15 +0000 Subject: ACPICA: Update compilation environment settings. This patch does not affect the generation of the Linux binary. This patch decreases 300 lines of 20121018 divergence.diff. This patch updates architecture specific environment settings for compiling ACPICA as such enhancement already has been done in ACPICA. Note that the appended compiler default settings in the will deprecate some of the macros defined in the architecture specific . Thus two of the headers have been cleaned up in this patch accordingly. Signed-off-by: Lv Zheng Signed-off-by: Rafael J. Wysocki --- arch/x86/include/asm/acpi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 0c44630..b31bf97 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -49,10 +49,6 @@ /* Asm macros */ -#define ACPI_ASM_MACROS -#define BREAKPOINT3 -#define ACPI_DISABLE_IRQS() local_irq_disable() -#define ACPI_ENABLE_IRQS() local_irq_enable() #define ACPI_FLUSH_CPU_CACHE() wbinvd() int __acpi_acquire_global_lock(unsigned int *lock); -- cgit v1.1 From 772c3ff385eda0d0b4744596f87b79a17f8c9282 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Tue, 27 Nov 2012 14:32:09 +0800 Subject: x86, AMD, NB: Add multi-domain support Fix get_node_id to match northbridge IDs from the array of detected ones, allowing multi-server support such as with Numascale's NumaConnect, renaming to 'amd_get_node_id' for consistency. Signed-off-by: Daniel J Blueman Link: http://lkml.kernel.org/r/1353997932-8475-1-git-send-email-daniel@numascale-asia.com [Boris: shorten lines to fit 80 cols] Signed-off-by: Borislav Petkov --- arch/x86/include/asm/amd_nb.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index b3341e9..a54ee1d 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -81,6 +81,23 @@ static inline struct amd_northbridge *node_to_amd_nb(int node) return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL; } +static inline u16 amd_get_node_id(struct pci_dev *pdev) +{ + struct pci_dev *misc; + int i; + + for (i = 0; i != amd_nb_num(); i++) { + misc = node_to_amd_nb(i)->misc; + + if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) && + PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn)) + return i; + } + + WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev)); + return 0; +} + #else #define amd_nb_num(x) 0 -- cgit v1.1 From 8b84c8df38d5796da2e8cd051666d203ddabcb62 Mon Sep 17 00:00:00 2001 From: Daniel J Blueman Date: Tue, 27 Nov 2012 14:32:10 +0800 Subject: x86, AMD, NB: Use u16 for northbridge IDs in amd_get_nb_id Change amd_get_nb_id to return u16 to support >255 memory controllers, and related consistency fixes. Signed-off-by: Daniel J Blueman Link: http://lkml.kernel.org/r/1353997932-8475-2-git-send-email-daniel@numascale-asia.com Signed-off-by: Borislav Petkov --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/amd.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 888184b..cf50054 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -943,7 +943,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); -extern int amd_get_nb_id(int cpu); +extern u16 amd_get_nb_id(int cpu); struct aperfmperf { u64 aperf, mperf; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15239ff..782c456 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -364,9 +364,9 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) #endif } -int amd_get_nb_id(int cpu) +u16 amd_get_nb_id(int cpu) { - int id = 0; + u16 id = 0; #ifdef CONFIG_SMP id = per_cpu(cpu_llc_id, cpu); #endif -- cgit v1.1 From c22885050e651c2f5d2a1706cdc2eb38698db968 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 8 Jan 2013 14:36:04 +0800 Subject: KVM: MMU: fix Dirty bit missed if CR0.WP = 0 If the write-fault access is from supervisor and CR0.WP is not set on the vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte and clears U bit. This is the chance that kvm can change pte access from readonly to writable Unfortunately, the pte access is the access of 'direct' shadow page table, means direct sp.role.access = pte_access, then we will create a writable spte entry on the readonly shadow page table. It will cause Dirty bit is not tracked when two guest ptes point to the same large page. Note, it does not have other impact except Dirty bit since cr0.wp is encoded into sp.role It can be fixed by adjusting pte access before establishing shadow page table. Also, after that, no mmu specified code exists in the common function and drop two parameters in set_spte Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 47 +++++++++++++--------------------------------- arch/x86/kvm/paging_tmpl.h | 30 ++++++++++++++++++++++++----- 2 files changed, 38 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 01d7c2ad..2a3c890 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2342,8 +2342,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, } static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, - unsigned pte_access, int user_fault, - int write_fault, int level, + unsigned pte_access, int level, gfn_t gfn, pfn_t pfn, bool speculative, bool can_unsync, bool host_writable) { @@ -2378,9 +2377,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= (u64)pfn << PAGE_SHIFT; - if ((pte_access & ACC_WRITE_MASK) - || (!vcpu->arch.mmu.direct_map && write_fault - && !is_write_protection(vcpu) && !user_fault)) { + if (pte_access & ACC_WRITE_MASK) { /* * There are two cases: @@ -2399,19 +2396,6 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE; - if (!vcpu->arch.mmu.direct_map - && !(pte_access & ACC_WRITE_MASK)) { - spte &= ~PT_USER_MASK; - /* - * If we converted a user page to a kernel page, - * so that the kernel can write to it when cr0.wp=0, - * then we should prevent the kernel from executing it - * if SMEP is enabled. - */ - if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) - spte |= PT64_NX_MASK; - } - /* * Optimization: for pte sync, if spte was writable the hash * lookup is unnecessary (and expensive). Write protection @@ -2442,18 +2426,15 @@ done: static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pt_access, unsigned pte_access, - int user_fault, int write_fault, - int *emulate, int level, gfn_t gfn, - pfn_t pfn, bool speculative, - bool host_writable) + int write_fault, int *emulate, int level, gfn_t gfn, + pfn_t pfn, bool speculative, bool host_writable) { int was_rmapped = 0; int rmap_count; - pgprintk("%s: spte %llx access %x write_fault %d" - " user_fault %d gfn %llx\n", + pgprintk("%s: spte %llx access %x write_fault %d gfn %llx\n", __func__, *sptep, pt_access, - write_fault, user_fault, gfn); + write_fault, gfn); if (is_rmap_spte(*sptep)) { /* @@ -2477,9 +2458,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, was_rmapped = 1; } - if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, - level, gfn, pfn, speculative, true, - host_writable)) { + if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, + true, host_writable)) { if (write_fault) *emulate = 1; kvm_mmu_flush_tlb(vcpu); @@ -2571,10 +2551,9 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, return -1; for (i = 0; i < ret; i++, gfn++, start++) - mmu_set_spte(vcpu, start, ACC_ALL, - access, 0, 0, NULL, - sp->role.level, gfn, - page_to_pfn(pages[i]), true, true); + mmu_set_spte(vcpu, start, ACC_ALL, access, 0, NULL, + sp->role.level, gfn, page_to_pfn(pages[i]), + true, true); return 0; } @@ -2636,8 +2615,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, unsigned pte_access = ACC_ALL; mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, - 0, write, &emulate, - level, gfn, pfn, prefault, map_writable); + write, &emulate, level, gfn, pfn, + prefault, map_writable); direct_pte_prefetch(vcpu, iterator.sptep); ++vcpu->stat.pf_fixed; break; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 2ad76b9..0d2e711 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -326,7 +326,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * we call mmu_set_spte() with host_writable = true because * pte_prefetch_gfn_to_pfn always gets a writable pfn. */ - mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, + mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); return true; @@ -401,7 +401,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, */ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct guest_walker *gw, - int user_fault, int write_fault, int hlevel, + int write_fault, int hlevel, pfn_t pfn, bool map_writable, bool prefault) { struct kvm_mmu_page *sp = NULL; @@ -474,7 +474,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, clear_sp_write_flooding_count(it.sptep); mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, - user_fault, write_fault, &emulate, it.level, + write_fault, &emulate, it.level, gw->gfn, pfn, prefault, map_writable); FNAME(pte_prefetch)(vcpu, gw, it.sptep); @@ -560,6 +560,26 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, walker.gfn, pfn, walker.pte_access, &r)) return r; + /* + * Do not change pte_access if the pfn is a mmio page, otherwise + * we will cache the incorrect access into mmio spte. + */ + if (write_fault && !(walker.pte_access & ACC_WRITE_MASK) && + !is_write_protection(vcpu) && !user_fault && + !is_noslot_pfn(pfn)) { + walker.pte_access |= ACC_WRITE_MASK; + walker.pte_access &= ~ACC_USER_MASK; + + /* + * If we converted a user page to a kernel page, + * so that the kernel can write to it when cr0.wp=0, + * then we should prevent the kernel from executing it + * if SMEP is enabled. + */ + if (kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)) + walker.pte_access &= ~ACC_EXEC_MASK; + } + spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) goto out_unlock; @@ -568,7 +588,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, kvm_mmu_free_some_pages(vcpu); if (!force_pt_level) transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); - r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, + r = FNAME(fetch)(vcpu, addr, &walker, write_fault, level, pfn, map_writable, prefault); ++vcpu->stat.pf_fixed; kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); @@ -743,7 +763,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; - set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, + set_spte(vcpu, &sp->spt[i], pte_access, PT_PAGE_TABLE_LEVEL, gfn, spte_to_pfn(sp->spt[i]), true, false, host_writable); -- cgit v1.1 From 7751babd3c6d365316e7a405f516bdd0bc7cec60 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 8 Jan 2013 14:36:51 +0800 Subject: KVM: MMU: fix infinite fault access retry We have two issues in current code: - if target gfn is used as its page table, guest will refault then kvm will use small page size to map it. We need two #PF to fix its shadow page table - sometimes, say a exception is triggered during vm-exit caused by #PF (see handle_exception() in vmx.c), we remove all the shadow pages shadowed by the target gfn before go into page fault path, it will cause infinite loop: delete shadow pages shadowed by the gfn -> try to use large page size to map the gfn -> retry the access ->... To fix these, we can adjust page size early if the target gfn is used as page table Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 13 ++++--------- arch/x86/kvm/paging_tmpl.h | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2a3c890..54fc61e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2380,15 +2380,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (pte_access & ACC_WRITE_MASK) { /* - * There are two cases: - * - the one is other vcpu creates new sp in the window - * between mapping_level() and acquiring mmu-lock. - * - the another case is the new sp is created by itself - * (page-fault path) when guest uses the target gfn as - * its page table. - * Both of these cases can be fixed by allowing guest to - * retry the access, it will refault, then we can establish - * the mapping by using small page. + * Other vcpu creates new sp in the window between + * mapping_level() and acquiring mmu-lock. We can + * allow guest to retry the access, the mapping can + * be fixed if guest refault. */ if (level > PT_PAGE_TABLE_LEVEL && has_wrprotected_page(vcpu->kvm, gfn, level)) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 0d2e711..3d1a352 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -487,6 +487,38 @@ out_gpte_changed: return 0; } + /* + * To see whether the mapped gfn can write its page table in the current + * mapping. + * + * It is the helper function of FNAME(page_fault). When guest uses large page + * size to map the writable gfn which is used as current page table, we should + * force kvm to use small page size to map it because new shadow page will be + * created when kvm establishes shadow page table that stop kvm using large + * page size. Do it early can avoid unnecessary #PF and emulation. + * + * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok + * since the PDPT is always shadowed, that means, we can not use large page + * size to map the gfn which is used as PDPT. + */ +static bool +FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, + struct guest_walker *walker, int user_fault) +{ + int level; + gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1); + + if (!(walker->pte_access & ACC_WRITE_MASK || + (!is_write_protection(vcpu) && !user_fault))) + return false; + + for (level = walker->level; level <= walker->max_level; level++) + if (!((walker->gfn ^ walker->table_gfn[level - 1]) & mask)) + return true; + + return false; +} + /* * Page fault handler. There are several causes for a page fault: * - there is no shadow pte for the guest pte @@ -541,7 +573,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, } if (walker.level >= PT_DIRECTORY_LEVEL) - force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn); + force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) + || FNAME(is_self_change_mapping)(vcpu, &walker, user_fault); else force_pt_level = 1; if (!force_pt_level) { -- cgit v1.1 From 6ea3038648da400cd3412925ff453041a7bd38d3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 2 Oct 2012 11:16:47 -0700 Subject: arch/x86: remove depends on CONFIG_EXPERIMENTAL The CONFIG_EXPERIMENTAL config item has not carried much meaning for a while now and is almost always enabled by default. As agreed during the Linux kernel summit, remove it from any "depends on" lines in Kconfigs. CC: Thomas Gleixner CC: Ingo Molnar CC: "H. Peter Anvin" Signed-off-by: Kees Cook Acked-by: Ingo Molnar --- arch/x86/Kconfig | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 79795af..3ac0e64 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -222,7 +222,7 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC config HAVE_INTEL_TXT def_bool y - depends on EXPERIMENTAL && INTEL_IOMMU && ACPI + depends on INTEL_IOMMU && ACPI config X86_32_SMP def_bool y @@ -617,7 +617,7 @@ config PARAVIRT config PARAVIRT_SPINLOCKS bool "Paravirtualization layer for spinlocks" - depends on PARAVIRT && SMP && EXPERIMENTAL + depends on PARAVIRT && SMP ---help--- Paravirtualized spinlocks allow a pvops backend to replace the spinlock implementation with something virtualization-friendly @@ -729,7 +729,7 @@ config GART_IOMMU config CALGARY_IOMMU bool "IBM Calgary IOMMU support" select SWIOTLB - depends on X86_64 && PCI && EXPERIMENTAL + depends on X86_64 && PCI ---help--- Support for hardware IOMMUs in IBM's xSeries x366 and x460 systems. Needed to run systems with more than 3GB of memory @@ -771,7 +771,7 @@ config IOMMU_HELPER config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" - depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL + depends on X86_64 && SMP && DEBUG_KERNEL select CPUMASK_OFFSTACK ---help--- Enable maximum number of CPUS and NUMA Nodes for this architecture. @@ -1107,7 +1107,6 @@ config HIGHMEM64G endchoice choice - depends on EXPERIMENTAL prompt "Memory split" if EXPERT default VMSPLIT_3G depends on X86_32 @@ -1184,7 +1183,7 @@ config DIRECT_GBPAGES config NUMA bool "Numa Memory Allocation and Scheduler Support" depends on SMP - depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) + depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI)) default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) ---help--- Enable NUMA (Non Uniform Memory Access) support. @@ -1279,7 +1278,7 @@ config ARCH_DISCONTIGMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD + depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 @@ -1593,8 +1592,7 @@ config CRASH_DUMP For more details see Documentation/kdump/kdump.txt config KEXEC_JUMP - bool "kexec jump (EXPERIMENTAL)" - depends on EXPERIMENTAL + bool "kexec jump" depends on KEXEC && HIBERNATION ---help--- Jump between original kernel and kexeced kernel and invoke @@ -2037,7 +2035,7 @@ config PCI_MMCONFIG config PCI_CNB20LE_QUIRK bool "Read CNB20LE Host Bridge Windows" if EXPERT - depends on PCI && EXPERIMENTAL + depends on PCI help Read the PCI windows out of the CNB20LE host bridge. This allows PCI hotplug to work on systems with the CNB20LE chipset which do @@ -2231,8 +2229,8 @@ config IA32_AOUT Support old a.out binaries in the 32bit emulation. config X86_X32 - bool "x32 ABI for 64-bit mode (EXPERIMENTAL)" - depends on X86_64 && IA32_EMULATION && EXPERIMENTAL + bool "x32 ABI for 64-bit mode" + depends on X86_64 && IA32_EMULATION ---help--- Include code to run binaries for the x32 native 32-bit ABI for 64-bit processors. An x32 process gets access to the -- cgit v1.1 From 01b35ab7230cd6244318c9d5fb7daddb4b0d6d2e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 2 Oct 2012 11:16:48 -0700 Subject: arch/x86/um: remove depends on CONFIG_EXPERIMENTAL The CONFIG_EXPERIMENTAL config item has not carried much meaning for a while now and is almost always enabled by default. As agreed during the Linux kernel summit, remove it from any "depends on" lines in Kconfigs. CC: Jeff Dike CC: Richard Weinberger CC: Thomas Gleixner CC: Ingo Molnar CC: "H. Peter Anvin" Signed-off-by: Kees Cook Acked-by: Richard Weinberger --- arch/x86/um/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 53c90fd..21a13ce 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -37,9 +37,8 @@ config RWSEM_GENERIC_SPINLOCK def_bool !RWSEM_XCHGADD_ALGORITHM config 3_LEVEL_PGTABLES - bool "Three-level pagetables (EXPERIMENTAL)" if !64BIT + bool "Three-level pagetables" if !64BIT default 64BIT - depends on EXPERIMENTAL help Three-level pagetables will let UML have more than 4G of physical memory. All the memory that can't be mapped directly will be treated -- cgit v1.1 From 6c0cc950ae670403a362bdcbf3cde0df33744928 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 Jan 2013 22:33:37 +0100 Subject: ACPI / PCI: Set root bridge ACPI handle in advance The ACPI handles of PCI root bridges need to be known to acpi_bind_one(), so that it can create the appropriate "firmware_node" and "physical_node" files for them, but currently the way it gets to know those handles is not exactly straightforward (to put it lightly). This is how it works, roughly: 1. acpi_bus_scan() finds the handle of a PCI root bridge, creates a struct acpi_device object for it and passes that object to acpi_pci_root_add(). 2. acpi_pci_root_add() creates a struct acpi_pci_root object, populates its "device" field with its argument's address (device->handle is the ACPI handle found in step 1). 3. The struct acpi_pci_root object created in step 2 is passed to pci_acpi_scan_root() and used to get resources that are passed to pci_create_root_bus(). 4. pci_create_root_bus() creates a struct pci_host_bridge object and passes its "dev" member to device_register(). 5. platform_notify(), which for systems with ACPI is set to acpi_platform_notify(), is called. So far, so good. Now it starts to be "interesting". 6. acpi_find_bridge_device() is used to find the ACPI handle of the given device (which is the PCI root bridge) and executes acpi_pci_find_root_bridge(), among other things, for the given device object. 7. acpi_pci_find_root_bridge() uses the name (sic!) of the given device object to extract the segment and bus numbers of the PCI root bridge and passes them to acpi_get_pci_rootbridge_handle(). 8. acpi_get_pci_rootbridge_handle() browses the list of ACPI PCI root bridges and finds the one that matches the given segment and bus numbers. Its handle is then used to initialize the ACPI handle of the PCI root bridge's device object by acpi_bind_one(). However, this is *exactly* the ACPI handle we started with in step 1. Needless to say, this is quite embarassing, but it may be avoided thanks to commit f3fd0c8 (ACPI: Allow ACPI handles of devices to be initialized in advance), which makes it possible to initialize the ACPI handle of a device before passing it to device_register(). Accordingly, add a new __weak routine, pcibios_root_bridge_prepare(), defaulting to an empty implementation that can be replaced by the interested architecutres (x86 and ia64 at the moment) with functions that will set the root bridge's ACPI handle before its dev member is passed to device_register(). Make both x86 and ia64 provide such implementations of pcibios_root_bridge_prepare() and remove acpi_pci_find_root_bridge() and acpi_get_pci_rootbridge_handle() that aren't necessary any more. Included is a fix for breakage on systems with non-ACPI PCI host bridges from Bjorn Helgaas. Signed-off-by: Rafael J. Wysocki Signed-off-by: Bjorn Helgaas --- arch/x86/include/asm/pci.h | 3 +++ arch/x86/pci/acpi.c | 9 +++++++++ 2 files changed, 12 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index dba7805..9f437e9 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -14,6 +14,9 @@ struct pci_sysdata { int domain; /* PCI domain */ int node; /* NUMA node */ +#ifdef CONFIG_ACPI + void *acpi; /* ACPI-specific data */ +#endif #ifdef CONFIG_X86_64 void *iommu; /* IOMMU private data */ #endif diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 0c01261f..3d49094 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -522,6 +522,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) sd = &info->sd; sd->domain = domain; sd->node = node; + sd->acpi = device->handle; /* * Maybe the desired pci bus has been already scanned. In such case * it is unnecessary to scan the pci bus with the given domain,busnum. @@ -593,6 +594,14 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) return bus; } +int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + struct pci_sysdata *sd = bridge->bus->sysdata; + + ACPI_HANDLE_SET(&bridge->dev, sd->acpi); + return 0; +} + int __init pci_acpi_init(void) { struct pci_dev *dev = NULL; -- cgit v1.1 From c972f3b125d8818748429b94cd2e59f473943a33 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 8 Jan 2013 19:43:28 +0900 Subject: KVM: Write protect the updated slot only when dirty logging is enabled Calling kvm_mmu_slot_remove_write_access() for a deleted slot does nothing but search for non-existent mmu pages which have mappings to that deleted memory; this is safe but a waste of time. Since we want to make the function rmap based in a later patch, in a manner which makes it unsafe to be called for a deleted slot, we makes the caller see if the slot is non-zero and being dirty logged. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/kvm/x86.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1c9c834..add5e48 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6897,7 +6897,13 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, spin_lock(&kvm->mmu_lock); if (nr_mmu_pages) kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); - kvm_mmu_slot_remove_write_access(kvm, mem->slot); + /* + * Write protect all pages for dirty logging. + * Existing largepage mappings are destroyed here and new ones will + * not be created until the end of the logging. + */ + if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) + kvm_mmu_slot_remove_write_access(kvm, mem->slot); spin_unlock(&kvm->mmu_lock); /* * If memory slot is created, or moved, we need to clear all -- cgit v1.1 From 245c3912eae642a4b7a3ce0adfcde5cc7672d5fe Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 8 Jan 2013 19:44:09 +0900 Subject: KVM: MMU: Remove unused parameter level from __rmap_write_protect() No longer need to care about the mapping level in this function. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/kvm/mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 54fc61e..2a1cde5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1142,7 +1142,7 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) } static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, - int level, bool pt_protect) + bool pt_protect) { u64 *sptep; struct rmap_iterator iter; @@ -1180,7 +1180,7 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, while (mask) { rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), PT_PAGE_TABLE_LEVEL, slot); - __rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false); + __rmap_write_protect(kvm, rmapp, false); /* clear the first set bit */ mask &= mask - 1; @@ -1199,7 +1199,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn) for (i = PT_PAGE_TABLE_LEVEL; i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { rmapp = __gfn_to_rmap(gfn, i, slot); - write_protected |= __rmap_write_protect(kvm, rmapp, i, true); + write_protected |= __rmap_write_protect(kvm, rmapp, true); } return write_protected; -- cgit v1.1 From b99db1d35295cb26b61a1c665f542504110b0ac3 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 8 Jan 2013 19:44:48 +0900 Subject: KVM: MMU: Make kvm_mmu_slot_remove_write_access() rmap based This makes it possible to release mmu_lock and reschedule conditionally in a later patch. Although this may increase the time needed to protect the whole slot when we start dirty logging, the kernel should not allow the userspace to trigger something that will hold a spinlock for such a long time as tens of milliseconds: actually there is no limit since it is roughly proportional to the number of guest pages. Another point to note is that this patch removes the only user of slot_bitmap which will cause some problems when we increase the number of slots further. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/kvm/mmu.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2a1cde5..aeb7666 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4172,25 +4172,27 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu) void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) { - struct kvm_mmu_page *sp; - bool flush = false; + struct kvm_memory_slot *memslot; + gfn_t last_gfn; + int i; - list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { - int i; - u64 *pt; + memslot = id_to_memslot(kvm->memslots, slot); + last_gfn = memslot->base_gfn + memslot->npages - 1; - if (!test_bit(slot, sp->slot_bitmap)) - continue; + for (i = PT_PAGE_TABLE_LEVEL; + i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { + unsigned long *rmapp; + unsigned long last_index, index; - pt = sp->spt; - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { - if (!is_shadow_present_pte(pt[i]) || - !is_last_spte(pt[i], sp->role.level)) - continue; + rmapp = memslot->arch.rmap[i - PT_PAGE_TABLE_LEVEL]; + last_index = gfn_to_index(last_gfn, memslot->base_gfn, i); - spte_write_protect(kvm, &pt[i], &flush, false); + for (index = 0; index <= last_index; ++index, ++rmapp) { + if (*rmapp) + __rmap_write_protect(kvm, rmapp, false); } } + kvm_flush_remote_tlbs(kvm); } -- cgit v1.1 From e12091ce7bdd3c82fa392a868d1bdccecee655d5 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 8 Jan 2013 19:45:28 +0900 Subject: KVM: Remove unused slot_bitmap from kvm_mmu_page Not needed any more. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 5 ----- arch/x86/kvm/mmu.c | 10 ---------- 2 files changed, 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c431b33..f75e1fe 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -219,11 +219,6 @@ struct kvm_mmu_page { u64 *spt; /* hold the gfn of each spte inside spt */ gfn_t *gfns; - /* - * One bit set per slot which has memory - * in this shadow page. - */ - DECLARE_BITMAP(slot_bitmap, KVM_MEM_SLOTS_NUM); bool unsync; int root_count; /* Currently serving as active root */ unsigned int unsync_children; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index aeb7666..9c1b2d6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1522,7 +1522,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache); set_page_private(virt_to_page(sp->spt), (unsigned long)sp); list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - bitmap_zero(sp->slot_bitmap, KVM_MEM_SLOTS_NUM); sp->parent_ptes = 0; mmu_page_add_parent_pte(vcpu, sp, parent_pte); kvm_mod_used_mmu_pages(vcpu->kvm, +1); @@ -2183,14 +2182,6 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) } EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page); -static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) -{ - int slot = memslot_id(kvm, gfn); - struct kvm_mmu_page *sp = page_header(__pa(pte)); - - __set_bit(slot, sp->slot_bitmap); -} - /* * The function is based on mtrr_type_lookup() in * arch/x86/kernel/cpu/mtrr/generic.c @@ -2472,7 +2463,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ++vcpu->kvm->stat.lpages; if (is_shadow_present_pte(*sptep)) { - page_header_update_slot(vcpu->kvm, sptep, gfn); if (!was_rmapped) { rmap_count = rmap_add(vcpu, sptep, gfn); if (rmap_count > RMAP_RECYCLE_THRESHOLD) -- cgit v1.1 From b34cb590fb099f7929dd78d9464b70319ee12a98 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 8 Jan 2013 19:46:07 +0900 Subject: KVM: Make kvm_mmu_change_mmu_pages() take mmu_lock by itself No reason to make callers take mmu_lock since we do not need to protect kvm_mmu_change_mmu_pages() and kvm_mmu_slot_remove_write_access() together by mmu_lock in kvm_arch_commit_memory_region(): the former calls kvm_mmu_commit_zap_page() and flushes TLBs by itself. Note: we do not need to protect kvm->arch.n_requested_mmu_pages by mmu_lock as can be seen from the fact that it is read locklessly. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/kvm/mmu.c | 4 ++++ arch/x86/kvm/x86.c | 9 ++++----- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9c1b2d6..f557280 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2143,6 +2143,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) * change the value */ + spin_lock(&kvm->mmu_lock); + if (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages) { while (kvm->arch.n_used_mmu_pages > goal_nr_mmu_pages && !list_empty(&kvm->arch.active_mmu_pages)) { @@ -2157,6 +2159,8 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) } kvm->arch.n_max_mmu_pages = goal_nr_mmu_pages; + + spin_unlock(&kvm->mmu_lock); } int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index add5e48..080bbdc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3270,12 +3270,10 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, return -EINVAL; mutex_lock(&kvm->slots_lock); - spin_lock(&kvm->mmu_lock); kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; - spin_unlock(&kvm->mmu_lock); mutex_unlock(&kvm->slots_lock); return 0; } @@ -6894,7 +6892,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, if (!kvm->arch.n_requested_mmu_pages) nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); - spin_lock(&kvm->mmu_lock); if (nr_mmu_pages) kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); /* @@ -6902,9 +6899,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * Existing largepage mappings are destroyed here and new ones will * not be created until the end of the logging. */ - if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) + if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) { + spin_lock(&kvm->mmu_lock); kvm_mmu_slot_remove_write_access(kvm, mem->slot); - spin_unlock(&kvm->mmu_lock); + spin_unlock(&kvm->mmu_lock); + } /* * If memory slot is created, or moved, we need to clear all * mmio sptes. -- cgit v1.1 From 9d1beefb71146bbf5f820ab17c450808b0d0b2df Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 8 Jan 2013 19:46:48 +0900 Subject: KVM: Make kvm_mmu_slot_remove_write_access() take mmu_lock by itself Better to place mmu_lock handling and TLB flushing code together since this is a self-contained function. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/kvm/mmu.c | 3 +++ arch/x86/kvm/x86.c | 5 +---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f557280..e5dcae3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4173,6 +4173,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) memslot = id_to_memslot(kvm->memslots, slot); last_gfn = memslot->base_gfn + memslot->npages - 1; + spin_lock(&kvm->mmu_lock); + for (i = PT_PAGE_TABLE_LEVEL; i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) { unsigned long *rmapp; @@ -4188,6 +4190,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) } kvm_flush_remote_tlbs(kvm); + spin_unlock(&kvm->mmu_lock); } void kvm_mmu_zap_all(struct kvm *kvm) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 080bbdc..5483228 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6899,11 +6899,8 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, * Existing largepage mappings are destroyed here and new ones will * not be created until the end of the logging. */ - if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) { - spin_lock(&kvm->mmu_lock); + if (npages && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) kvm_mmu_slot_remove_write_access(kvm, mem->slot); - spin_unlock(&kvm->mmu_lock); - } /* * If memory slot is created, or moved, we need to clear all * mmio sptes. -- cgit v1.1 From 6b81b05e449e15abb60eaa4f62cdc7954f4d74f0 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 8 Jan 2013 19:47:33 +0900 Subject: KVM: MMU: Conditionally reschedule when kvm_mmu_slot_remove_write_access() takes a long time If the userspace starts dirty logging for a large slot, say 64GB of memory, kvm_mmu_slot_remove_write_access() needs to hold mmu_lock for a long time such as tens of milliseconds. This patch controls the lock hold time by asking the scheduler if we need to reschedule for others. One penalty for this is that we need to flush TLBs before releasing mmu_lock. But since holding mmu_lock for a long time does affect not only the guest, vCPU threads in other words, but also the host as a whole, we should pay for that. In practice, the cost will not be so high because we can protect a fair amount of memory before being rescheduled: on my test environment, cond_resched_lock() was called only once for protecting 12GB of memory even without THP. We can also revisit Avi's "unlocked TLB flush" work later for completely suppressing extra TLB flushes if needed. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/kvm/mmu.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e5dcae3..9f628f7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4186,6 +4186,11 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) for (index = 0; index <= last_index; ++index, ++rmapp) { if (*rmapp) __rmap_write_protect(kvm, rmapp, false); + + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { + kvm_flush_remote_tlbs(kvm); + cond_resched_lock(&kvm->mmu_lock); + } } } -- cgit v1.1 From 2353b47bffe4e6ab39042f470c55d41bb3ff3846 Mon Sep 17 00:00:00 2001 From: Bernd Faust Date: Wed, 5 Dec 2012 15:16:49 +0100 Subject: Round the calculated scale factor in set_cyc2ns_scale() During some experiments with an external clock (in a FPGA), we saw that the TSC clock drifted approx. 2.5ms per second. This drift was caused by the current way of calculating the scale. In our case cpu_khz had a value of 3292725. This resulted in a scale value of 310. But when doing the calculation by hand it shows that the actual value is 310.9886188491, so a value of 311 would be more precise. With this change the value is rounded. Signed-off-by: Bernd Faust Signed-off-by: John Stultz --- arch/x86/kernel/tsc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index cfa5d4f..8ed0857 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -617,7 +617,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) ns_now = __cycles_2_ns(tsc_now); if (cpu_khz) { - *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; + *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) + + cpu_khz / 2) / cpu_khz; *offset = ns_now - mult_frac(tsc_now, *scale, (1UL << CYC2NS_SCALE_FACTOR)); } -- cgit v1.1 From e90c83f757fffdacec8b3c5eee5617dcc038338f Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 15 Jan 2013 19:45:19 +0000 Subject: x86: Select HAS_PERSISTENT_CLOCK on x86 Select HAS_PERSISTENT_CLOCK on x86 to simplify RTC options and allow the compiler to remove unused code. Signed-off-by: John Stultz --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46c3bff..a4135b5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -108,6 +108,7 @@ config X86 select GENERIC_STRNLEN_USER select HAVE_RCU_USER_QS if X86_64 select HAVE_IRQ_TIME_ACCOUNTING + select HAS_PERSISTENT_CLOCK select GENERIC_KERNEL_THREAD select GENERIC_KERNEL_EXECVE select MODULES_USE_ELF_REL if X86_32 -- cgit v1.1 From 29c6fb7be156ae3c0e202c3903087ab6e57d3ad3 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 4 Jan 2013 11:12:44 +0000 Subject: x86/nmi: export local_touch_nmi() symbol for modules Signed-off-by: Jacob Pan Signed-off-by: Zhang Rui --- arch/x86/kernel/nmi.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index f84f5c5..6030805 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -509,3 +509,4 @@ void local_touch_nmi(void) { __this_cpu_write(last_nmi_rip, 0); } +EXPORT_SYMBOL_GPL(local_touch_nmi); -- cgit v1.1 From 78c37d191dd6899d8c219fee597a17d6e3c5d288 Mon Sep 17 00:00:00 2001 From: Alexander Boyko Date: Thu, 10 Jan 2013 18:54:59 +0400 Subject: crypto: crc32 - add crc32 pclmulqdq implementation and wrappers for table implementation This patch adds crc32 algorithms to shash crypto api. One is wrapper to gerneric crc32_le function. Second is crc32 pclmulqdq implementation. It use hardware provided PCLMULQDQ instruction to accelerate the CRC32 disposal. This instruction present from Intel Westmere and AMD Bulldozer CPUs. For intel core i5 I got 450MB/s for table implementation and 2100MB/s for pclmulqdq implementation. Signed-off-by: Alexander Boyko Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/crc32-pclmul_asm.S | 247 ++++++++++++++++++++++++++++++++++++ arch/x86/crypto/crc32-pclmul_glue.c | 201 +++++++++++++++++++++++++++++ 3 files changed, 450 insertions(+) create mode 100644 arch/x86/crypto/crc32-pclmul_asm.S create mode 100644 arch/x86/crypto/crc32-pclmul_glue.c (limited to 'arch/x86') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e0ca7c9..63947a8 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o +obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o aes-i586-y := aes-i586-asm_32.o aes_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o @@ -52,3 +53,4 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o crc32c-intel-y := crc32c-intel_glue.o crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o +crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S new file mode 100644 index 0000000..65ea6a6 --- /dev/null +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -0,0 +1,247 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32 + * calculation. + * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE) + * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found + * at: + * http://www.intel.com/products/processor/manuals/ + * Intel(R) 64 and IA-32 Architectures Software Developer's Manual + * Volume 2B: Instruction Set Reference, N-Z + * + * Authors: Gregory Prestas + * Alexander Boyko + */ + +#include +#include + + +.align 16 +/* + * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4 + * #define CONSTANT_R1 0x154442bd4LL + * + * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596 + * #define CONSTANT_R2 0x1c6e41596LL + */ +.Lconstant_R2R1: + .octa 0x00000001c6e415960000000154442bd4 +/* + * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0 + * #define CONSTANT_R3 0x1751997d0LL + * + * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e + * #define CONSTANT_R4 0x0ccaa009eLL + */ +.Lconstant_R4R3: + .octa 0x00000000ccaa009e00000001751997d0 +/* + * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124 + * #define CONSTANT_R5 0x163cd6124LL + */ +.Lconstant_R5: + .octa 0x00000000000000000000000163cd6124 +.Lconstant_mask32: + .octa 0x000000000000000000000000FFFFFFFF +/* + * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL + * + * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL + * #define CONSTANT_RU 0x1F7011641LL + */ +.Lconstant_RUpoly: + .octa 0x00000001F701164100000001DB710641 + +#define CONSTANT %xmm0 + +#ifdef __x86_64__ +#define BUF %rdi +#define LEN %rsi +#define CRC %edx +#else +#warning Using 32bit code support +#define BUF %eax +#define LEN %edx +#define CRC %ecx +#endif + + + +.text +/** + * Calculate crc32 + * BUF - buffer (16 bytes aligned) + * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63 + * CRC - initial crc32 + * return %eax crc32 + * uint crc32_pclmul_le_16(unsigned char const *buffer, + * size_t len, uint crc32) + */ +.globl crc32_pclmul_le_16 +.align 4, 0x90 +crc32_pclmul_le_16:/* buffer and buffer size are 16 bytes aligned */ + movdqa (BUF), %xmm1 + movdqa 0x10(BUF), %xmm2 + movdqa 0x20(BUF), %xmm3 + movdqa 0x30(BUF), %xmm4 + movd CRC, CONSTANT + pxor CONSTANT, %xmm1 + sub $0x40, LEN + add $0x40, BUF +#ifndef __x86_64__ + /* This is for position independent code(-fPIC) support for 32bit */ + call delta +delta: + pop %ecx +#endif + cmp $0x40, LEN + jb less_64 + +#ifdef __x86_64__ + movdqa .Lconstant_R2R1(%rip), CONSTANT +#else + movdqa .Lconstant_R2R1 - delta(%ecx), CONSTANT +#endif + +loop_64:/* 64 bytes Full cache line folding */ + prefetchnta 0x40(BUF) + movdqa %xmm1, %xmm5 + movdqa %xmm2, %xmm6 + movdqa %xmm3, %xmm7 +#ifdef __x86_64__ + movdqa %xmm4, %xmm8 +#endif + PCLMULQDQ 00, CONSTANT, %xmm1 + PCLMULQDQ 00, CONSTANT, %xmm2 + PCLMULQDQ 00, CONSTANT, %xmm3 +#ifdef __x86_64__ + PCLMULQDQ 00, CONSTANT, %xmm4 +#endif + PCLMULQDQ 0x11, CONSTANT, %xmm5 + PCLMULQDQ 0x11, CONSTANT, %xmm6 + PCLMULQDQ 0x11, CONSTANT, %xmm7 +#ifdef __x86_64__ + PCLMULQDQ 0x11, CONSTANT, %xmm8 +#endif + pxor %xmm5, %xmm1 + pxor %xmm6, %xmm2 + pxor %xmm7, %xmm3 +#ifdef __x86_64__ + pxor %xmm8, %xmm4 +#else + /* xmm8 unsupported for x32 */ + movdqa %xmm4, %xmm5 + PCLMULQDQ 00, CONSTANT, %xmm4 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm4 +#endif + + pxor (BUF), %xmm1 + pxor 0x10(BUF), %xmm2 + pxor 0x20(BUF), %xmm3 + pxor 0x30(BUF), %xmm4 + + sub $0x40, LEN + add $0x40, BUF + cmp $0x40, LEN + jge loop_64 +less_64:/* Folding cache line into 128bit */ +#ifdef __x86_64__ + movdqa .Lconstant_R4R3(%rip), CONSTANT +#else + movdqa .Lconstant_R4R3 - delta(%ecx), CONSTANT +#endif + prefetchnta (BUF) + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm2, %xmm1 + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm3, %xmm1 + + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor %xmm4, %xmm1 + + cmp $0x10, LEN + jb fold_64 +loop_16:/* Folding rest buffer into 128bit */ + movdqa %xmm1, %xmm5 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + PCLMULQDQ 0x11, CONSTANT, %xmm5 + pxor %xmm5, %xmm1 + pxor (BUF), %xmm1 + sub $0x10, LEN + add $0x10, BUF + cmp $0x10, LEN + jge loop_16 + +fold_64: + /* perform the last 64 bit fold, also adds 32 zeroes + * to the input stream */ + PCLMULQDQ 0x01, %xmm1, CONSTANT /* R4 * xmm1.low */ + psrldq $0x08, %xmm1 + pxor CONSTANT, %xmm1 + + /* final 32-bit fold */ + movdqa %xmm1, %xmm2 +#ifdef __x86_64__ + movdqa .Lconstant_R5(%rip), CONSTANT + movdqa .Lconstant_mask32(%rip), %xmm3 +#else + movdqa .Lconstant_R5 - delta(%ecx), CONSTANT + movdqa .Lconstant_mask32 - delta(%ecx), %xmm3 +#endif + psrldq $0x04, %xmm2 + pand %xmm3, %xmm1 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + pxor %xmm2, %xmm1 + + /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ +#ifdef __x86_64__ + movdqa .Lconstant_RUpoly(%rip), CONSTANT +#else + movdqa .Lconstant_RUpoly - delta(%ecx), CONSTANT +#endif + movdqa %xmm1, %xmm2 + pand %xmm3, %xmm1 + PCLMULQDQ 0x10, CONSTANT, %xmm1 + pand %xmm3, %xmm1 + PCLMULQDQ 0x00, CONSTANT, %xmm1 + pxor %xmm2, %xmm1 + pextrd $0x01, %xmm1, %eax + + ret diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c new file mode 100644 index 0000000..9d014a7 --- /dev/null +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -0,0 +1,201 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see http://www.gnu.org/licenses + * + * Please visit http://www.xyratex.com/contact if you need additional + * information or have any questions. + * + * GPL HEADER END + */ + +/* + * Copyright 2012 Xyratex Technology Limited + * + * Wrappers for kernel crypto shash api to pclmulqdq crc32 imlementation. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define CHKSUM_BLOCK_SIZE 1 +#define CHKSUM_DIGEST_SIZE 4 + +#define PCLMUL_MIN_LEN 64L /* minimum size of buffer + * for crc32_pclmul_le_16 */ +#define SCALE_F 16L /* size of xmm register */ +#define SCALE_F_MASK (SCALE_F - 1) + +u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32); + +static u32 __attribute__((pure)) + crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) +{ + unsigned int iquotient; + unsigned int iremainder; + unsigned int prealign; + + if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable()) + return crc32_le(crc, p, len); + + if ((long)p & SCALE_F_MASK) { + /* align p to 16 byte */ + prealign = SCALE_F - ((long)p & SCALE_F_MASK); + + crc = crc32_le(crc, p, prealign); + len -= prealign; + p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & + ~SCALE_F_MASK); + } + iquotient = len & (~SCALE_F_MASK); + iremainder = len & SCALE_F_MASK; + + kernel_fpu_begin(); + crc = crc32_pclmul_le_16(p, iquotient, crc); + kernel_fpu_end(); + + if (iremainder) + crc = crc32_le(crc, p + iquotient, iremainder); + + return crc; +} + +static int crc32_pclmul_cra_init(struct crypto_tfm *tfm) +{ + u32 *key = crypto_tfm_ctx(tfm); + + *key = 0; + + return 0; +} + +static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, + unsigned int keylen) +{ + u32 *mctx = crypto_shash_ctx(hash); + + if (keylen != sizeof(u32)) { + crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + *mctx = le32_to_cpup((__le32 *)key); + return 0; +} + +static int crc32_pclmul_init(struct shash_desc *desc) +{ + u32 *mctx = crypto_shash_ctx(desc->tfm); + u32 *crcp = shash_desc_ctx(desc); + + *crcp = *mctx; + + return 0; +} + +static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + u32 *crcp = shash_desc_ctx(desc); + + *crcp = crc32_pclmul_le(*crcp, data, len); + return 0; +} + +/* No final XOR 0xFFFFFFFF, like crc32_le */ +static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len, + u8 *out) +{ + *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len)); + return 0; +} + +static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); +} + +static int crc32_pclmul_final(struct shash_desc *desc, u8 *out) +{ + u32 *crcp = shash_desc_ctx(desc); + + *(__le32 *)out = cpu_to_le32p(crcp); + return 0; +} + +static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len, + out); +} + +static struct shash_alg alg = { + .setkey = crc32_pclmul_setkey, + .init = crc32_pclmul_init, + .update = crc32_pclmul_update, + .final = crc32_pclmul_final, + .finup = crc32_pclmul_finup, + .digest = crc32_pclmul_digest, + .descsize = sizeof(u32), + .digestsize = CHKSUM_DIGEST_SIZE, + .base = { + .cra_name = "crc32", + .cra_driver_name = "crc32-pclmul", + .cra_priority = 200, + .cra_blocksize = CHKSUM_BLOCK_SIZE, + .cra_ctxsize = sizeof(u32), + .cra_module = THIS_MODULE, + .cra_init = crc32_pclmul_cra_init, + } +}; + +static const struct x86_cpu_id crc32pclmul_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id); + + +static int __init crc32_pclmul_mod_init(void) +{ + + if (!x86_match_cpu(crc32pclmul_cpu_id)) { + pr_info("PCLMULQDQ-NI instructions are not detected.\n"); + return -ENODEV; + } + return crypto_register_shash(&alg); +} + +static void __exit crc32_pclmul_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(crc32_pclmul_mod_init); +module_exit(crc32_pclmul_mod_fini); + +MODULE_AUTHOR("Alexander Boyko "); +MODULE_LICENSE("GPL"); + +MODULE_ALIAS("crc32"); +MODULE_ALIAS("crc32-pclmul"); -- cgit v1.1 From 3f299743839ae0a4c183035c36aa1e2807e53fe4 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:38:50 +0200 Subject: crypto: x86/aes - assembler clean-ups: use ENTRY/ENDPROC, localize jump targets Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-i586-asm_32.S | 15 +++++---------- arch/x86/crypto/aes-x86_64-asm_64.S | 30 +++++++++++++++--------------- 2 files changed, 20 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S index b949ec2..2849dbc 100644 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -36,6 +36,7 @@ .file "aes-i586-asm.S" .text +#include #include #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words) @@ -219,14 +220,10 @@ // AES (Rijndael) Encryption Subroutine /* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ -.global aes_enc_blk - .extern crypto_ft_tab .extern crypto_fl_tab -.align 4 - -aes_enc_blk: +ENTRY(aes_enc_blk) push %ebp mov ctx(%esp),%ebp @@ -290,18 +287,15 @@ aes_enc_blk: mov %r0,(%ebp) pop %ebp ret +ENDPROC(aes_enc_blk) // AES (Rijndael) Decryption Subroutine /* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */ -.global aes_dec_blk - .extern crypto_it_tab .extern crypto_il_tab -.align 4 - -aes_dec_blk: +ENTRY(aes_dec_blk) push %ebp mov ctx(%esp),%ebp @@ -365,3 +359,4 @@ aes_dec_blk: mov %r0,(%ebp) pop %ebp ret +ENDPROC(aes_dec_blk) diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 5b577d5..9105655 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -15,6 +15,7 @@ .text +#include #include #define R1 %rax @@ -49,10 +50,8 @@ #define R11 %r11 #define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ - .global FUNC; \ - .type FUNC,@function; \ - .align 8; \ -FUNC: movq r1,r2; \ + ENTRY(FUNC); \ + movq r1,r2; \ movq r3,r4; \ leaq KEY+48(r8),r9; \ movq r10,r11; \ @@ -71,14 +70,15 @@ FUNC: movq r1,r2; \ je B192; \ leaq 32(r9),r9; -#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \ +#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \ movq r1,r2; \ movq r3,r4; \ movl r5 ## E,(r9); \ movl r6 ## E,4(r9); \ movl r7 ## E,8(r9); \ movl r8 ## E,12(r9); \ - ret; + ret; \ + ENDPROC(FUNC); #define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \ movzbl r2 ## H,r5 ## E; \ @@ -133,7 +133,7 @@ FUNC: movq r1,r2; \ #define entry(FUNC,KEY,B128,B192) \ prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) -#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11) +#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11) #define encrypt_round(TAB,OFFSET) \ round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ @@ -151,12 +151,12 @@ FUNC: movq r1,r2; \ /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ - entry(aes_enc_blk,0,enc128,enc192) + entry(aes_enc_blk,0,.Le128,.Le192) encrypt_round(crypto_ft_tab,-96) encrypt_round(crypto_ft_tab,-80) -enc192: encrypt_round(crypto_ft_tab,-64) +.Le192: encrypt_round(crypto_ft_tab,-64) encrypt_round(crypto_ft_tab,-48) -enc128: encrypt_round(crypto_ft_tab,-32) +.Le128: encrypt_round(crypto_ft_tab,-32) encrypt_round(crypto_ft_tab,-16) encrypt_round(crypto_ft_tab, 0) encrypt_round(crypto_ft_tab, 16) @@ -166,16 +166,16 @@ enc128: encrypt_round(crypto_ft_tab,-32) encrypt_round(crypto_ft_tab, 80) encrypt_round(crypto_ft_tab, 96) encrypt_final(crypto_fl_tab,112) - return + return(aes_enc_blk) /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ - entry(aes_dec_blk,240,dec128,dec192) + entry(aes_dec_blk,240,.Ld128,.Ld192) decrypt_round(crypto_it_tab,-96) decrypt_round(crypto_it_tab,-80) -dec192: decrypt_round(crypto_it_tab,-64) +.Ld192: decrypt_round(crypto_it_tab,-64) decrypt_round(crypto_it_tab,-48) -dec128: decrypt_round(crypto_it_tab,-32) +.Ld128: decrypt_round(crypto_it_tab,-32) decrypt_round(crypto_it_tab,-16) decrypt_round(crypto_it_tab, 0) decrypt_round(crypto_it_tab, 16) @@ -185,4 +185,4 @@ dec128: decrypt_round(crypto_it_tab,-32) decrypt_round(crypto_it_tab, 80) decrypt_round(crypto_it_tab, 96) decrypt_final(crypto_il_tab,112) - return + return(aes_dec_blk) -- cgit v1.1 From 8309b745bbaf3fe3df7fc67de51d0049b51452c5 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:38:55 +0200 Subject: crypto: aesni-intel - add ENDPROC statements for assembler functions Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3470624..04b7977 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1262,7 +1262,6 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst * poly = x^128 + x^127 + x^126 + x^121 + 1 * *****************************************************************************/ - ENTRY(aesni_gcm_dec) push %r12 push %r13 @@ -1437,6 +1436,7 @@ _return_T_done_decrypt: pop %r13 pop %r12 ret +ENDPROC(aesni_gcm_dec) /***************************************************************************** @@ -1700,10 +1700,12 @@ _return_T_done_encrypt: pop %r13 pop %r12 ret +ENDPROC(aesni_gcm_enc) #endif +.align 4 _key_expansion_128: _key_expansion_256a: pshufd $0b11111111, %xmm1, %xmm1 @@ -1715,6 +1717,8 @@ _key_expansion_256a: movaps %xmm0, (TKEYP) add $0x10, TKEYP ret +ENDPROC(_key_expansion_128) +ENDPROC(_key_expansion_256a) .align 4 _key_expansion_192a: @@ -1739,6 +1743,7 @@ _key_expansion_192a: movaps %xmm1, 0x10(TKEYP) add $0x20, TKEYP ret +ENDPROC(_key_expansion_192a) .align 4 _key_expansion_192b: @@ -1758,6 +1763,7 @@ _key_expansion_192b: movaps %xmm0, (TKEYP) add $0x10, TKEYP ret +ENDPROC(_key_expansion_192b) .align 4 _key_expansion_256b: @@ -1770,6 +1776,7 @@ _key_expansion_256b: movaps %xmm2, (TKEYP) add $0x10, TKEYP ret +ENDPROC(_key_expansion_256b) /* * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, @@ -1882,6 +1889,7 @@ ENTRY(aesni_set_key) popl KEYP #endif ret +ENDPROC(aesni_set_key) /* * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) @@ -1903,6 +1911,7 @@ ENTRY(aesni_enc) popl KEYP #endif ret +ENDPROC(aesni_enc) /* * _aesni_enc1: internal ABI @@ -1960,6 +1969,7 @@ _aesni_enc1: movaps 0x70(TKEYP), KEY AESENCLAST KEY STATE ret +ENDPROC(_aesni_enc1) /* * _aesni_enc4: internal ABI @@ -2068,6 +2078,7 @@ _aesni_enc4: AESENCLAST KEY STATE3 AESENCLAST KEY STATE4 ret +ENDPROC(_aesni_enc4) /* * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) @@ -2090,6 +2101,7 @@ ENTRY(aesni_dec) popl KEYP #endif ret +ENDPROC(aesni_dec) /* * _aesni_dec1: internal ABI @@ -2147,6 +2159,7 @@ _aesni_dec1: movaps 0x70(TKEYP), KEY AESDECLAST KEY STATE ret +ENDPROC(_aesni_dec1) /* * _aesni_dec4: internal ABI @@ -2255,6 +2268,7 @@ _aesni_dec4: AESDECLAST KEY STATE3 AESDECLAST KEY STATE4 ret +ENDPROC(_aesni_dec4) /* * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2312,6 +2326,7 @@ ENTRY(aesni_ecb_enc) popl LEN #endif ret +ENDPROC(aesni_ecb_enc) /* * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2370,6 +2385,7 @@ ENTRY(aesni_ecb_dec) popl LEN #endif ret +ENDPROC(aesni_ecb_dec) /* * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2411,6 +2427,7 @@ ENTRY(aesni_cbc_enc) popl IVP #endif ret +ENDPROC(aesni_cbc_enc) /* * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2501,6 +2518,7 @@ ENTRY(aesni_cbc_dec) popl IVP #endif ret +ENDPROC(aesni_cbc_dec) #ifdef __x86_64__ .align 16 @@ -2527,6 +2545,7 @@ _aesni_inc_init: MOVQ_R64_XMM TCTR_LOW INC MOVQ_R64_XMM CTR TCTR_LOW ret +ENDPROC(_aesni_inc_init) /* * _aesni_inc: internal ABI @@ -2555,6 +2574,7 @@ _aesni_inc: movaps CTR, IV PSHUFB_XMM BSWAP_MASK IV ret +ENDPROC(_aesni_inc) /* * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, @@ -2615,4 +2635,5 @@ ENTRY(aesni_ctr_enc) movups IV, (IVP) .Lctr_enc_just_ret: ret +ENDPROC(aesni_ctr_enc) #endif -- cgit v1.1 From 5186e395fee266bede96b6906773973ed6fa2278 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:00 +0200 Subject: crypto: blowfish-x86_64: use ENTRY()/ENDPROC() for assembler functions and localize jump targets Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/blowfish-x86_64-asm_64.S | 39 ++++++++++++-------------------- 1 file changed, 14 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 391d245..246c670 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -20,6 +20,8 @@ * */ +#include + .file "blowfish-x86_64-asm.S" .text @@ -116,11 +118,7 @@ bswapq RX0; \ xorq RX0, (RIO); -.align 8 -.global __blowfish_enc_blk -.type __blowfish_enc_blk,@function; - -__blowfish_enc_blk: +ENTRY(__blowfish_enc_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -148,19 +146,16 @@ __blowfish_enc_blk: movq %r10, RIO; test %cl, %cl; - jnz __enc_xor; + jnz .L__enc_xor; write_block(); ret; -__enc_xor: +.L__enc_xor: xor_block(); ret; +ENDPROC(__blowfish_enc_blk) -.align 8 -.global blowfish_dec_blk -.type blowfish_dec_blk,@function; - -blowfish_dec_blk: +ENTRY(blowfish_dec_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -189,6 +184,7 @@ blowfish_dec_blk: movq %r11, %rbp; ret; +ENDPROC(blowfish_dec_blk) /********************************************************************** 4-way blowfish, four blocks parallel @@ -300,11 +296,7 @@ blowfish_dec_blk: bswapq RX3; \ xorq RX3, 24(RIO); -.align 8 -.global __blowfish_enc_blk_4way -.type __blowfish_enc_blk_4way,@function; - -__blowfish_enc_blk_4way: +ENTRY(__blowfish_enc_blk_4way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -336,7 +328,7 @@ __blowfish_enc_blk_4way: movq %r11, RIO; test %bpl, %bpl; - jnz __enc_xor4; + jnz .L__enc_xor4; write_block4(); @@ -344,18 +336,15 @@ __blowfish_enc_blk_4way: popq %rbp; ret; -__enc_xor4: +.L__enc_xor4: xor_block4(); popq %rbx; popq %rbp; ret; +ENDPROC(__blowfish_enc_blk_4way) -.align 8 -.global blowfish_dec_blk_4way -.type blowfish_dec_blk_4way,@function; - -blowfish_dec_blk_4way: +ENTRY(blowfish_dec_blk_4way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -387,4 +376,4 @@ blowfish_dec_blk_4way: popq %rbp; ret; - +ENDPROC(blowfish_dec_blk_4way) -- cgit v1.1 From 59990684b0d2b5ab57e37141412bc41cb6c9a2e9 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:05 +0200 Subject: crypto: camellia-x86_64/aes-ni: use ENTRY()/ENDPROC() for assembler functions and localize jump targets Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 38 ++++++++-------------- arch/x86/crypto/camellia-x86_64-asm_64.S | 50 +++++++++++++---------------- 2 files changed, 36 insertions(+), 52 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 2306d2e..cfc1634 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -15,6 +15,8 @@ * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz */ +#include + #define CAMELLIA_TABLE_BYTE_LEN 272 /* struct camellia_ctx: */ @@ -190,6 +192,7 @@ roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd: %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %rcx, (%r9)); ret; +ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) .align 8 roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: @@ -197,6 +200,7 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11, %rax, (%r9)); ret; +ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) /* * IN/OUT: @@ -709,8 +713,6 @@ roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab: .text .align 8 -.type __camellia_enc_blk16,@function; - __camellia_enc_blk16: /* input: * %rdi: ctx, CTX @@ -793,10 +795,9 @@ __camellia_enc_blk16: %xmm15, %rax, %rcx, 24); jmp .Lenc_done; +ENDPROC(__camellia_enc_blk16) .align 8 -.type __camellia_dec_blk16,@function; - __camellia_dec_blk16: /* input: * %rdi: ctx, CTX @@ -877,12 +878,9 @@ __camellia_dec_blk16: ((key_table + (24) * 8) + 4)(CTX)); jmp .Ldec_max24; +ENDPROC(__camellia_dec_blk16) -.align 8 -.global camellia_ecb_enc_16way -.type camellia_ecb_enc_16way,@function; - -camellia_ecb_enc_16way: +ENTRY(camellia_ecb_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -903,12 +901,9 @@ camellia_ecb_enc_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_ecb_enc_16way) -.align 8 -.global camellia_ecb_dec_16way -.type camellia_ecb_dec_16way,@function; - -camellia_ecb_dec_16way: +ENTRY(camellia_ecb_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -934,12 +929,9 @@ camellia_ecb_dec_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_ecb_dec_16way) -.align 8 -.global camellia_cbc_dec_16way -.type camellia_cbc_dec_16way,@function; - -camellia_cbc_dec_16way: +ENTRY(camellia_cbc_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -986,6 +978,7 @@ camellia_cbc_dec_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_cbc_dec_16way) #define inc_le128(x, minus_one, tmp) \ vpcmpeqq minus_one, x, tmp; \ @@ -993,11 +986,7 @@ camellia_cbc_dec_16way: vpslldq $8, tmp, tmp; \ vpsubq tmp, x, x; -.align 8 -.global camellia_ctr_16way -.type camellia_ctr_16way,@function; - -camellia_ctr_16way: +ENTRY(camellia_ctr_16way) /* input: * %rdi: ctx, CTX * %rsi: dst (16 blocks) @@ -1100,3 +1089,4 @@ camellia_ctr_16way: %xmm8, %rsi); ret; +ENDPROC(camellia_ctr_16way) diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 0b33743..310319c 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -20,6 +20,8 @@ * */ +#include + .file "camellia-x86_64-asm_64.S" .text @@ -188,10 +190,7 @@ bswapq RAB0; \ movq RAB0, 4*2(RIO); -.global __camellia_enc_blk; -.type __camellia_enc_blk,@function; - -__camellia_enc_blk: +ENTRY(__camellia_enc_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -214,33 +213,31 @@ __camellia_enc_blk: movl $24, RT1d; /* max */ cmpb $16, key_length(CTX); - je __enc_done; + je .L__enc_done; enc_fls(24); enc_rounds(24); movl $32, RT1d; /* max */ -__enc_done: +.L__enc_done: testb RXORbl, RXORbl; movq RDST, RIO; - jnz __enc_xor; + jnz .L__enc_xor; enc_outunpack(mov, RT1); movq RRBP, %rbp; ret; -__enc_xor: +.L__enc_xor: enc_outunpack(xor, RT1); movq RRBP, %rbp; ret; +ENDPROC(__camellia_enc_blk) -.global camellia_dec_blk; -.type camellia_dec_blk,@function; - -camellia_dec_blk: +ENTRY(camellia_dec_blk) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -258,12 +255,12 @@ camellia_dec_blk: dec_inpack(RT2); cmpb $24, RT2bl; - je __dec_rounds16; + je .L__dec_rounds16; dec_rounds(24); dec_fls(24); -__dec_rounds16: +.L__dec_rounds16: dec_rounds(16); dec_fls(16); dec_rounds(8); @@ -276,6 +273,7 @@ __dec_rounds16: movq RRBP, %rbp; ret; +ENDPROC(camellia_dec_blk) /********************************************************************** 2-way camellia @@ -426,10 +424,7 @@ __dec_rounds16: bswapq RAB1; \ movq RAB1, 12*2(RIO); -.global __camellia_enc_blk_2way; -.type __camellia_enc_blk_2way,@function; - -__camellia_enc_blk_2way: +ENTRY(__camellia_enc_blk_2way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -453,16 +448,16 @@ __camellia_enc_blk_2way: movl $24, RT2d; /* max */ cmpb $16, key_length(CTX); - je __enc2_done; + je .L__enc2_done; enc_fls2(24); enc_rounds2(24); movl $32, RT2d; /* max */ -__enc2_done: +.L__enc2_done: test RXORbl, RXORbl; movq RDST, RIO; - jnz __enc2_xor; + jnz .L__enc2_xor; enc_outunpack2(mov, RT2); @@ -470,17 +465,15 @@ __enc2_done: popq %rbx; ret; -__enc2_xor: +.L__enc2_xor: enc_outunpack2(xor, RT2); movq RRBP, %rbp; popq %rbx; ret; +ENDPROC(__camellia_enc_blk_2way) -.global camellia_dec_blk_2way; -.type camellia_dec_blk_2way,@function; - -camellia_dec_blk_2way: +ENTRY(camellia_dec_blk_2way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -499,12 +492,12 @@ camellia_dec_blk_2way: dec_inpack2(RT2); cmpb $24, RT2bl; - je __dec2_rounds16; + je .L__dec2_rounds16; dec_rounds2(24); dec_fls2(24); -__dec2_rounds16: +.L__dec2_rounds16: dec_rounds2(16); dec_fls2(16); dec_rounds2(8); @@ -518,3 +511,4 @@ __dec2_rounds16: movq RRBP, %rbp; movq RXOR, %rbx; ret; +ENDPROC(camellia_dec_blk_2way) -- cgit v1.1 From e17e209ea44ae69bcfdcfacd6974cf48d04e6f71 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:11 +0200 Subject: crypto: cast5-avx: use ENTRY()/ENDPROC() for assembler functions and localize jump targets Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 48 ++++++++++++------------------- 1 file changed, 18 insertions(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 15b00ac..c35fd5d 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -23,6 +23,8 @@ * */ +#include + .file "cast5-avx-x86_64-asm_64.S" .extern cast_s1 @@ -211,8 +213,6 @@ .text .align 16 -.type __cast5_enc_blk16,@function; - __cast5_enc_blk16: /* input: * %rdi: ctx, CTX @@ -263,14 +263,14 @@ __cast5_enc_blk16: movzbl rr(CTX), %eax; testl %eax, %eax; - jnz __skip_enc; + jnz .L__skip_enc; round(RL, RR, 12, 1); round(RR, RL, 13, 2); round(RL, RR, 14, 3); round(RR, RL, 15, 1); -__skip_enc: +.L__skip_enc: popq %rbx; popq %rbp; @@ -282,10 +282,9 @@ __skip_enc: outunpack_blocks(RR4, RL4, RTMP, RX, RKM); ret; +ENDPROC(__cast5_enc_blk16) .align 16 -.type __cast5_dec_blk16,@function; - __cast5_dec_blk16: /* input: * %rdi: ctx, CTX @@ -323,14 +322,14 @@ __cast5_dec_blk16: movzbl rr(CTX), %eax; testl %eax, %eax; - jnz __skip_dec; + jnz .L__skip_dec; round(RL, RR, 15, 1); round(RR, RL, 14, 3); round(RL, RR, 13, 2); round(RR, RL, 12, 1); -__dec_tail: +.L__dec_tail: round(RL, RR, 11, 3); round(RR, RL, 10, 2); round(RL, RR, 9, 1); @@ -355,15 +354,12 @@ __dec_tail: ret; -__skip_dec: +.L__skip_dec: vpsrldq $4, RKR, RKR; - jmp __dec_tail; + jmp .L__dec_tail; +ENDPROC(__cast5_dec_blk16) -.align 16 -.global cast5_ecb_enc_16way -.type cast5_ecb_enc_16way,@function; - -cast5_ecb_enc_16way: +ENTRY(cast5_ecb_enc_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -393,12 +389,9 @@ cast5_ecb_enc_16way: vmovdqu RL4, (7*4*4)(%r11); ret; +ENDPROC(cast5_ecb_enc_16way) -.align 16 -.global cast5_ecb_dec_16way -.type cast5_ecb_dec_16way,@function; - -cast5_ecb_dec_16way: +ENTRY(cast5_ecb_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -428,12 +421,9 @@ cast5_ecb_dec_16way: vmovdqu RL4, (7*4*4)(%r11); ret; +ENDPROC(cast5_ecb_dec_16way) -.align 16 -.global cast5_cbc_dec_16way -.type cast5_cbc_dec_16way,@function; - -cast5_cbc_dec_16way: +ENTRY(cast5_cbc_dec_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -480,12 +470,9 @@ cast5_cbc_dec_16way: popq %r12; ret; +ENDPROC(cast5_cbc_dec_16way) -.align 16 -.global cast5_ctr_16way -.type cast5_ctr_16way,@function; - -cast5_ctr_16way: +ENTRY(cast5_ctr_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -556,3 +543,4 @@ cast5_ctr_16way: popq %r12; ret; +ENDPROC(cast5_ctr_16way) -- cgit v1.1 From 1985fecf019dae1db78c90ef9af435e1462e7766 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:16 +0200 Subject: crypto: cast6-avx: use ENTRY()/ENDPROC() for assembler functions Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 35 ++++++++++--------------------- 1 file changed, 11 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 2569d0d..f93b610 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -23,6 +23,7 @@ * */ +#include #include "glue_helper-asm-avx.S" .file "cast6-avx-x86_64-asm_64.S" @@ -250,8 +251,6 @@ .text .align 8 -.type __cast6_enc_blk8,@function; - __cast6_enc_blk8: /* input: * %rdi: ctx, CTX @@ -295,10 +294,9 @@ __cast6_enc_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; +ENDPROC(__cast6_enc_blk8) .align 8 -.type __cast6_dec_blk8,@function; - __cast6_dec_blk8: /* input: * %rdi: ctx, CTX @@ -341,12 +339,9 @@ __cast6_dec_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM); ret; +ENDPROC(__cast6_dec_blk8) -.align 8 -.global cast6_ecb_enc_8way -.type cast6_ecb_enc_8way,@function; - -cast6_ecb_enc_8way: +ENTRY(cast6_ecb_enc_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -362,12 +357,9 @@ cast6_ecb_enc_8way: store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(cast6_ecb_enc_8way) -.align 8 -.global cast6_ecb_dec_8way -.type cast6_ecb_dec_8way,@function; - -cast6_ecb_dec_8way: +ENTRY(cast6_ecb_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -383,12 +375,9 @@ cast6_ecb_dec_8way: store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(cast6_ecb_dec_8way) -.align 8 -.global cast6_cbc_dec_8way -.type cast6_cbc_dec_8way,@function; - -cast6_cbc_dec_8way: +ENTRY(cast6_cbc_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -409,12 +398,9 @@ cast6_cbc_dec_8way: popq %r12; ret; +ENDPROC(cast6_cbc_dec_8way) -.align 8 -.global cast6_ctr_8way -.type cast6_ctr_8way,@function; - -cast6_ctr_8way: +ENTRY(cast6_ctr_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -437,3 +423,4 @@ cast6_ctr_8way: popq %r12; ret; +ENDPROC(cast6_ctr_8way) -- cgit v1.1 From 698a5abbb0c15ac273bf02f55a1385725714353a Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:21 +0200 Subject: crypto: x86/crc32c - assembler clean-up: use ENTRY/ENDPROC Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 93c6d39..cf1a7ec 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -42,6 +42,8 @@ * SOFTWARE. */ +#include + ## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction .macro LABEL prefix n @@ -68,8 +70,7 @@ # unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init); -.global crc_pcl -crc_pcl: +ENTRY(crc_pcl) #define bufp %rdi #define bufp_dw %edi #define bufp_w %di @@ -323,6 +324,9 @@ JMPTBL_ENTRY %i .noaltmacro i=i+1 .endr + +ENDPROC(crc_pcl) + ################################################################ ## PCLMULQDQ tables ## Table is 128 entries x 2 quad words each -- cgit v1.1 From b05d3f375676e57672ac5a9090cb1068fab8b85f Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:26 +0200 Subject: crypto: x86/ghash - assembler clean-up: use ENDPROC at end of assember functions Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 1eb7f90..586f41a 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -94,6 +94,7 @@ __clmul_gf128mul_ble: pxor T2, T1 pxor T1, DATA ret +ENDPROC(__clmul_gf128mul_ble) /* void clmul_ghash_mul(char *dst, const be128 *shash) */ ENTRY(clmul_ghash_mul) @@ -105,6 +106,7 @@ ENTRY(clmul_ghash_mul) PSHUFB_XMM BSWAP DATA movups DATA, (%rdi) ret +ENDPROC(clmul_ghash_mul) /* * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, @@ -131,6 +133,7 @@ ENTRY(clmul_ghash_update) movups DATA, (%rdi) .Lupdate_just_ret: ret +ENDPROC(clmul_ghash_update) /* * void clmul_ghash_setkey(be128 *shash, const u8 *key); @@ -155,3 +158,4 @@ ENTRY(clmul_ghash_setkey) pxor %xmm1, %xmm0 movups %xmm0, (%rdi) ret +ENDPROC(clmul_ghash_setkey) -- cgit v1.1 From 044438082cf1447e37534b24beff723835464954 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:31 +0200 Subject: crypto: x86/salsa20 - assembler cleanup, use ENTRY/ENDPROC for assember functions and rename ECRYPT_* to salsa20_* Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/salsa20-i586-asm_32.S | 28 ++++++++++++++-------------- arch/x86/crypto/salsa20-x86_64-asm_64.S | 28 +++++++++++++--------------- arch/x86/crypto/salsa20_glue.c | 5 ----- 3 files changed, 27 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S index 72eb306..329452b8 100644 --- a/arch/x86/crypto/salsa20-i586-asm_32.S +++ b/arch/x86/crypto/salsa20-i586-asm_32.S @@ -2,11 +2,12 @@ # D. J. Bernstein # Public domain. -# enter ECRYPT_encrypt_bytes +#include + .text -.p2align 5 -.globl ECRYPT_encrypt_bytes -ECRYPT_encrypt_bytes: + +# enter salsa20_encrypt_bytes +ENTRY(salsa20_encrypt_bytes) mov %esp,%eax and $31,%eax add $256,%eax @@ -933,11 +934,10 @@ ECRYPT_encrypt_bytes: add $64,%esi # goto bytesatleast1 jmp ._bytesatleast1 -# enter ECRYPT_keysetup -.text -.p2align 5 -.globl ECRYPT_keysetup -ECRYPT_keysetup: +ENDPROC(salsa20_encrypt_bytes) + +# enter salsa20_keysetup +ENTRY(salsa20_keysetup) mov %esp,%eax and $31,%eax add $256,%eax @@ -1060,11 +1060,10 @@ ECRYPT_keysetup: # leave add %eax,%esp ret -# enter ECRYPT_ivsetup -.text -.p2align 5 -.globl ECRYPT_ivsetup -ECRYPT_ivsetup: +ENDPROC(salsa20_keysetup) + +# enter salsa20_ivsetup +ENTRY(salsa20_ivsetup) mov %esp,%eax and $31,%eax add $256,%eax @@ -1112,3 +1111,4 @@ ECRYPT_ivsetup: # leave add %eax,%esp ret +ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S index 6214a9b..9279e0b 100644 --- a/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S @@ -1,8 +1,7 @@ -# enter ECRYPT_encrypt_bytes -.text -.p2align 5 -.globl ECRYPT_encrypt_bytes -ECRYPT_encrypt_bytes: +#include + +# enter salsa20_encrypt_bytes +ENTRY(salsa20_encrypt_bytes) mov %rsp,%r11 and $31,%r11 add $256,%r11 @@ -802,11 +801,10 @@ ECRYPT_encrypt_bytes: # comment:fp stack unchanged by jump # goto bytesatleast1 jmp ._bytesatleast1 -# enter ECRYPT_keysetup -.text -.p2align 5 -.globl ECRYPT_keysetup -ECRYPT_keysetup: +ENDPROC(salsa20_encrypt_bytes) + +# enter salsa20_keysetup +ENTRY(salsa20_keysetup) mov %rsp,%r11 and $31,%r11 add $256,%r11 @@ -892,11 +890,10 @@ ECRYPT_keysetup: mov %rdi,%rax mov %rsi,%rdx ret -# enter ECRYPT_ivsetup -.text -.p2align 5 -.globl ECRYPT_ivsetup -ECRYPT_ivsetup: +ENDPROC(salsa20_keysetup) + +# enter salsa20_ivsetup +ENTRY(salsa20_ivsetup) mov %rsp,%r11 and $31,%r11 add $256,%r11 @@ -918,3 +915,4 @@ ECRYPT_ivsetup: mov %rdi,%rax mov %rsi,%rdx ret +ENDPROC(salsa20_ivsetup) diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c index a3a3c02..5e8e677 100644 --- a/arch/x86/crypto/salsa20_glue.c +++ b/arch/x86/crypto/salsa20_glue.c @@ -26,11 +26,6 @@ #define SALSA20_MIN_KEY_SIZE 16U #define SALSA20_MAX_KEY_SIZE 32U -// use the ECRYPT_* function names -#define salsa20_keysetup ECRYPT_keysetup -#define salsa20_ivsetup ECRYPT_ivsetup -#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes - struct salsa20_ctx { u32 input[16]; -- cgit v1.1 From 2dcfd44dee3fd3a63e3e3d3f5cbfd2436d1f98a6 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:36 +0200 Subject: crypto: x86/serpent - use ENTRY/ENDPROC for assember functions and localize jump targets Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 35 +++++++++------------------- arch/x86/crypto/serpent-sse2-i586-asm_32.S | 20 +++++++--------- arch/x86/crypto/serpent-sse2-x86_64-asm_64.S | 20 +++++++--------- 3 files changed, 27 insertions(+), 48 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 02b0e9f..43c9386 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -24,6 +24,7 @@ * */ +#include #include "glue_helper-asm-avx.S" .file "serpent-avx-x86_64-asm_64.S" @@ -566,8 +567,6 @@ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) .align 8 -.type __serpent_enc_blk8_avx,@function; - __serpent_enc_blk8_avx: /* input: * %rdi: ctx, CTX @@ -619,10 +618,9 @@ __serpent_enc_blk8_avx: write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; +ENDPROC(__serpent_enc_blk8_avx) .align 8 -.type __serpent_dec_blk8_avx,@function; - __serpent_dec_blk8_avx: /* input: * %rdi: ctx, CTX @@ -674,12 +672,9 @@ __serpent_dec_blk8_avx: write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2); ret; +ENDPROC(__serpent_dec_blk8_avx) -.align 8 -.global serpent_ecb_enc_8way_avx -.type serpent_ecb_enc_8way_avx,@function; - -serpent_ecb_enc_8way_avx: +ENTRY(serpent_ecb_enc_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -693,12 +688,9 @@ serpent_ecb_enc_8way_avx: store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(serpent_ecb_enc_8way_avx) -.align 8 -.global serpent_ecb_dec_8way_avx -.type serpent_ecb_dec_8way_avx,@function; - -serpent_ecb_dec_8way_avx: +ENTRY(serpent_ecb_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -712,12 +704,9 @@ serpent_ecb_dec_8way_avx: store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); ret; +ENDPROC(serpent_ecb_dec_8way_avx) -.align 8 -.global serpent_cbc_dec_8way_avx -.type serpent_cbc_dec_8way_avx,@function; - -serpent_cbc_dec_8way_avx: +ENTRY(serpent_cbc_dec_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -731,12 +720,9 @@ serpent_cbc_dec_8way_avx: store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2); ret; +ENDPROC(serpent_cbc_dec_8way_avx) -.align 8 -.global serpent_ctr_8way_avx -.type serpent_ctr_8way_avx,@function; - -serpent_ctr_8way_avx: +ENTRY(serpent_ctr_8way_avx) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -752,3 +738,4 @@ serpent_ctr_8way_avx: store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(serpent_ctr_8way_avx) diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S index c00053d..d348f15 100644 --- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S +++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S @@ -24,6 +24,8 @@ * */ +#include + .file "serpent-sse2-i586-asm_32.S" .text @@ -510,11 +512,7 @@ pxor t0, x3; \ movdqu x3, (3*4*4)(out); -.align 8 -.global __serpent_enc_blk_4way -.type __serpent_enc_blk_4way,@function; - -__serpent_enc_blk_4way: +ENTRY(__serpent_enc_blk_4way) /* input: * arg_ctx(%esp): ctx, CTX * arg_dst(%esp): dst @@ -566,22 +564,19 @@ __serpent_enc_blk_4way: movl arg_dst(%esp), %eax; cmpb $0, arg_xor(%esp); - jnz __enc_xor4; + jnz .L__enc_xor4; write_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); ret; -__enc_xor4: +.L__enc_xor4: xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE); ret; +ENDPROC(__serpent_enc_blk_4way) -.align 8 -.global serpent_dec_blk_4way -.type serpent_dec_blk_4way,@function; - -serpent_dec_blk_4way: +ENTRY(serpent_dec_blk_4way) /* input: * arg_ctx(%esp): ctx, CTX * arg_dst(%esp): dst @@ -633,3 +628,4 @@ serpent_dec_blk_4way: write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA); ret; +ENDPROC(serpent_dec_blk_4way) diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S index 3ee1ff0..acc066c 100644 --- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S @@ -24,6 +24,8 @@ * */ +#include + .file "serpent-sse2-x86_64-asm_64.S" .text @@ -632,11 +634,7 @@ pxor t0, x3; \ movdqu x3, (3*4*4)(out); -.align 8 -.global __serpent_enc_blk_8way -.type __serpent_enc_blk_8way,@function; - -__serpent_enc_blk_8way: +ENTRY(__serpent_enc_blk_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -687,24 +685,21 @@ __serpent_enc_blk_8way: leaq (4*4*4)(%rsi), %rax; testb %cl, %cl; - jnz __enc_xor8; + jnz .L__enc_xor8; write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; -__enc_xor8: +.L__enc_xor8: xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2); xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2); ret; +ENDPROC(__serpent_enc_blk_8way) -.align 8 -.global serpent_dec_blk_8way -.type serpent_dec_blk_8way,@function; - -serpent_dec_blk_8way: +ENTRY(serpent_dec_blk_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -756,3 +751,4 @@ serpent_dec_blk_8way: write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2); ret; +ENDPROC(serpent_dec_blk_8way) -- cgit v1.1 From ac9d55dd42858b127bedb84f4e59789958263d37 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:41 +0200 Subject: crypto: x86/sha1 - assembler clean-ups: use ENTRY/ENDPROC Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ssse3_asm.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 49d6987..a410950 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -28,6 +28,8 @@ * (at your option) any later version. */ +#include + #define CTX %rdi // arg1 #define BUF %rsi // arg2 #define CNT %rdx // arg3 @@ -69,10 +71,8 @@ * param: function's name */ .macro SHA1_VECTOR_ASM name - .global \name - .type \name, @function - .align 32 -\name: + ENTRY(\name) + push %rbx push %rbp push %r12 @@ -106,7 +106,7 @@ pop %rbx ret - .size \name, .-\name + ENDPROC(\name) .endm /* -- cgit v1.1 From d3f5188dfea70e7ea6570bd4bc9d6d7dbd431e39 Mon Sep 17 00:00:00 2001 From: Jussi Kivilinna Date: Sat, 19 Jan 2013 13:39:46 +0200 Subject: crypto: x86/twofish - assembler clean-ups: use ENTRY/ENDPROC, localize jump labels Signed-off-by: Jussi Kivilinna Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 35 +++++++++------------------- arch/x86/crypto/twofish-i586-asm_32.S | 11 ++++----- arch/x86/crypto/twofish-x86_64-asm_64-3way.S | 20 +++++++--------- arch/x86/crypto/twofish-x86_64-asm_64.S | 11 ++++----- 4 files changed, 29 insertions(+), 48 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index ebac16b..8d3e113 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -23,6 +23,7 @@ * */ +#include #include "glue_helper-asm-avx.S" .file "twofish-avx-x86_64-asm_64.S" @@ -243,8 +244,6 @@ vpxor x3, wkey, x3; .align 8 -.type __twofish_enc_blk8,@function; - __twofish_enc_blk8: /* input: * %rdi: ctx, CTX @@ -284,10 +283,9 @@ __twofish_enc_blk8: outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); ret; +ENDPROC(__twofish_enc_blk8) .align 8 -.type __twofish_dec_blk8,@function; - __twofish_dec_blk8: /* input: * %rdi: ctx, CTX @@ -325,12 +323,9 @@ __twofish_dec_blk8: outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); ret; +ENDPROC(__twofish_dec_blk8) -.align 8 -.global twofish_ecb_enc_8way -.type twofish_ecb_enc_8way,@function; - -twofish_ecb_enc_8way: +ENTRY(twofish_ecb_enc_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -346,12 +341,9 @@ twofish_ecb_enc_8way: store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2); ret; +ENDPROC(twofish_ecb_enc_8way) -.align 8 -.global twofish_ecb_dec_8way -.type twofish_ecb_dec_8way,@function; - -twofish_ecb_dec_8way: +ENTRY(twofish_ecb_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -367,12 +359,9 @@ twofish_ecb_dec_8way: store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); ret; +ENDPROC(twofish_ecb_dec_8way) -.align 8 -.global twofish_cbc_dec_8way -.type twofish_cbc_dec_8way,@function; - -twofish_cbc_dec_8way: +ENTRY(twofish_cbc_dec_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -393,12 +382,9 @@ twofish_cbc_dec_8way: popq %r12; ret; +ENDPROC(twofish_cbc_dec_8way) -.align 8 -.global twofish_ctr_8way -.type twofish_ctr_8way,@function; - -twofish_ctr_8way: +ENTRY(twofish_ctr_8way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -421,3 +407,4 @@ twofish_ctr_8way: popq %r12; ret; +ENDPROC(twofish_ctr_8way) diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S index 658af4b..694ea45 100644 --- a/arch/x86/crypto/twofish-i586-asm_32.S +++ b/arch/x86/crypto/twofish-i586-asm_32.S @@ -20,6 +20,7 @@ .file "twofish-i586-asm.S" .text +#include #include /* return address at 0 */ @@ -219,11 +220,7 @@ xor %esi, d ## D;\ ror $1, d ## D; -.align 4 -.global twofish_enc_blk -.global twofish_dec_blk - -twofish_enc_blk: +ENTRY(twofish_enc_blk) push %ebp /* save registers according to calling convention*/ push %ebx push %esi @@ -277,8 +274,9 @@ twofish_enc_blk: pop %ebp mov $1, %eax ret +ENDPROC(twofish_enc_blk) -twofish_dec_blk: +ENTRY(twofish_dec_blk) push %ebp /* save registers according to calling convention*/ push %ebx push %esi @@ -333,3 +331,4 @@ twofish_dec_blk: pop %ebp mov $1, %eax ret +ENDPROC(twofish_dec_blk) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S index 5b012a2..1c3b7ce 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S @@ -20,6 +20,8 @@ * */ +#include + .file "twofish-x86_64-asm-3way.S" .text @@ -214,11 +216,7 @@ rorq $32, RAB2; \ outunpack3(mov, RIO, 2, RAB, 2); -.align 8 -.global __twofish_enc_blk_3way -.type __twofish_enc_blk_3way,@function; - -__twofish_enc_blk_3way: +ENTRY(__twofish_enc_blk_3way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -250,7 +248,7 @@ __twofish_enc_blk_3way: popq %rbp; /* bool xor */ testb %bpl, %bpl; - jnz __enc_xor3; + jnz .L__enc_xor3; outunpack_enc3(mov); @@ -262,7 +260,7 @@ __twofish_enc_blk_3way: popq %r15; ret; -__enc_xor3: +.L__enc_xor3: outunpack_enc3(xor); popq %rbx; @@ -272,11 +270,9 @@ __enc_xor3: popq %r14; popq %r15; ret; +ENDPROC(__twofish_enc_blk_3way) -.global twofish_dec_blk_3way -.type twofish_dec_blk_3way,@function; - -twofish_dec_blk_3way: +ENTRY(twofish_dec_blk_3way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -313,4 +309,4 @@ twofish_dec_blk_3way: popq %r14; popq %r15; ret; - +ENDPROC(twofish_dec_blk_3way) diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S index 7bcf3fc..a039d21 100644 --- a/arch/x86/crypto/twofish-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-x86_64-asm_64.S @@ -20,6 +20,7 @@ .file "twofish-x86_64-asm.S" .text +#include #include #define a_offset 0 @@ -214,11 +215,7 @@ xor %r8d, d ## D;\ ror $1, d ## D; -.align 8 -.global twofish_enc_blk -.global twofish_dec_blk - -twofish_enc_blk: +ENTRY(twofish_enc_blk) pushq R1 /* %rdi contains the ctx address */ @@ -269,8 +266,9 @@ twofish_enc_blk: popq R1 movq $1,%rax ret +ENDPROC(twofish_enc_blk) -twofish_dec_blk: +ENTRY(twofish_dec_blk) pushq R1 /* %rdi contains the ctx address */ @@ -320,3 +318,4 @@ twofish_dec_blk: popq R1 movq $1,%rax ret +ENDPROC(twofish_dec_blk) -- cgit v1.1 From 7983627657db5e37594af5c28cdb623855eb554f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 20 Jan 2013 18:05:02 +1100 Subject: crypto: crc32-pclmul - Kill warning on x86-32 This patch removes a gratuitous warning on x86-32: arch/x86/crypto/crc32-pclmul_asm.S:87:2: warning: #warning Using 32bit code support [-Wcpp] Signed-off-by: Herbert Xu --- arch/x86/crypto/crc32-pclmul_asm.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S index 65ea6a6..c8335014 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -84,7 +84,6 @@ #define LEN %rsi #define CRC %edx #else -#warning Using 32bit code support #define BUF %eax #define LEN %edx #define CRC %ecx -- cgit v1.1 From 373d4d099761cb1f637bed488ab3871945882273 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 21 Jan 2013 17:17:39 +1030 Subject: taint: add explicit flag to show whether lock dep is still OK. Fix up all callers as they were before, with make one change: an unsigned module taints the kernel, but doesn't turn off lockdep. Signed-off-by: Rusty Russell --- arch/x86/kernel/cpu/amd.c | 3 +-- arch/x86/kernel/cpu/mcheck/mce.c | 2 +- arch/x86/kernel/cpu/mcheck/p5.c | 2 +- arch/x86/kernel/cpu/mcheck/winchip.c | 2 +- arch/x86/kernel/cpu/mtrr/generic.c | 2 +- arch/x86/kernel/dumpstack.c | 2 +- 6 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15239ff..5853e57 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -220,8 +220,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) */ WARN_ONCE(1, "WARNING: This combination of AMD" " processors is not suitable for SMP.\n"); - if (!test_taint(TAINT_UNSAFE_SMP)) - add_taint(TAINT_UNSAFE_SMP); + add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE); valid_k7: ; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 80dbda8..6bc15ed 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1085,7 +1085,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) /* * Set taint even when machine check was not enabled. */ - add_taint(TAINT_MACHINE_CHECK); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); severity = mce_severity(&m, cfg->tolerant, NULL); diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 2d5454c..1c044b1 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c @@ -33,7 +33,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) smp_processor_id()); } - add_taint(TAINT_MACHINE_CHECK); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); } /* Set up machine check reporting for processors with Intel style MCE: */ diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 2d7998f..e9a701a 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c @@ -15,7 +15,7 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code) { printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); - add_taint(TAINT_MACHINE_CHECK); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); } /* Set up machine check reporting on the Winchip C6 series */ diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index e9fe907..fa72a39 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -542,7 +542,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, if (tmp != mask_lo) { printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); - add_taint(TAINT_FIRMWARE_WORKAROUND); + add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); mask_lo = tmp; } } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index ae42418b..c8797d5 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -232,7 +232,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) bust_spinlocks(0); die_owner = -1; - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); die_nest_count--; if (!die_nest_count) /* Nest count reaches zero, release the lock. */ -- cgit v1.1 From 06aeaaeabf69da4a3e86df532425640f51b01cef Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Sep 2012 17:15:17 +0900 Subject: ftrace: Move ARCH_SUPPORTS_FTRACE_SAVE_REGS in Kconfig Move SAVE_REGS support flag into Kconfig and rename it to CONFIG_DYNAMIC_FTRACE_WITH_REGS. This also introduces CONFIG_HAVE_DYNAMIC_FTRACE_WITH_REGS which indicates the architecture depending part of ftrace has a code that saves full registers. On the other hand, CONFIG_DYNAMIC_FTRACE_WITH_REGS indicates the code is enabled. Link: http://lkml.kernel.org/r/20120928081516.3560.72534.stgit@ltc138.sdl.hitachi.co.jp Cc: Ingo Molnar Cc: Ananth N Mavinakayanahalli Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/Kconfig | 1 + arch/x86/include/asm/ftrace.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 79795af..996ccec 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -44,6 +44,7 @@ config X86 select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_FP_TEST diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9a25b52..86cb51e 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -44,7 +44,6 @@ #ifdef CONFIG_DYNAMIC_FTRACE #define ARCH_SUPPORTS_FTRACE_OPS 1 -#define ARCH_SUPPORTS_FTRACE_SAVE_REGS #endif #ifndef __ASSEMBLY__ -- cgit v1.1 From e7dbfe349d12eabb7783b117e0c115f6f3d9ef9e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Sep 2012 17:15:20 +0900 Subject: kprobes/x86: Move ftrace-based kprobe code into kprobes-ftrace.c Split ftrace-based kprobes code from kprobes, and introduce CONFIG_(HAVE_)KPROBES_ON_FTRACE Kconfig flags. For the cleanup reason, this also moves kprobe_ftrace check into skip_singlestep. Link: http://lkml.kernel.org/r/20120928081520.3560.25624.stgit@ltc138.sdl.hitachi.co.jp Cc: Ingo Molnar Cc: Ananth N Mavinakayanahalli Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Frederic Weisbecker Signed-off-by: Masami Hiramatsu Signed-off-by: Steven Rostedt --- arch/x86/Kconfig | 1 + arch/x86/kernel/Makefile | 1 + arch/x86/kernel/kprobes-common.h | 11 +++++ arch/x86/kernel/kprobes-ftrace.c | 93 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/kprobes.c | 70 +----------------------------- 5 files changed, 108 insertions(+), 68 deletions(-) create mode 100644 arch/x86/kernel/kprobes-ftrace.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 996ccec..be8b2b3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -40,6 +40,7 @@ config X86 select HAVE_DMA_CONTIGUOUS if !SWIOTLB select HAVE_KRETPROBES select HAVE_OPTPROBES + select HAVE_KPROBES_ON_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FENTRY if X86_64 select HAVE_C_RECORDMCOUNT diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 34e923a..cc5d31f 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -67,6 +67,7 @@ obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_OPTPROBES) += kprobes-opt.o +obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h index 3230b68..2e9d4b5 100644 --- a/arch/x86/kernel/kprobes-common.h +++ b/arch/x86/kernel/kprobes-common.h @@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig return addr; } #endif + +#ifdef CONFIG_KPROBES_ON_FTRACE +extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb); +#else +static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + return 0; +} +#endif #endif diff --git a/arch/x86/kernel/kprobes-ftrace.c b/arch/x86/kernel/kprobes-ftrace.c new file mode 100644 index 0000000..70a81c7 --- /dev/null +++ b/arch/x86/kernel/kprobes-ftrace.c @@ -0,0 +1,93 @@ +/* + * Dynamic Ftrace based Kprobes Optimization + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Hitachi Ltd., 2012 + */ +#include +#include +#include +#include +#include + +#include "kprobes-common.h" + +static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); + return 1; +} + +int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + if (kprobe_ftrace(p)) + return __skip_singlestep(p, regs, kcb); + else + return 0; +} + +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ + regs->ip = ip + sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + __skip_singlestep(p, regs, kcb); + /* + * If pre_handler returns !0, it sets regs->ip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 57916c0..18114bf 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb return 1; } -#ifdef KPROBES_CAN_USE_FTRACE -static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - /* - * Emulate singlestep (and also recover regs->ip) - * as if there is a 5byte nop - */ - regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - __this_cpu_write(current_kprobe, NULL); -} -#endif - /* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function. @@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } else if (kprobe_running()) { p = __this_cpu_read(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { -#ifdef KPROBES_CAN_USE_FTRACE - if (kprobe_ftrace(p)) { - skip_singlestep(p, regs, kcb); - return 1; - } -#endif - setup_singlestep(p, regs, kcb, 0); + if (!skip_singlestep(p, regs, kcb)) + setup_singlestep(p, regs, kcb, 0); return 1; } } /* else: not a kprobe fault; let the kernel handle it */ @@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -#ifdef KPROBES_CAN_USE_FTRACE -/* Ftrace callback handler for kprobes */ -void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) -{ - struct kprobe *p; - struct kprobe_ctlblk *kcb; - unsigned long flags; - - /* Disable irq for emulating a breakpoint and avoiding preempt */ - local_irq_save(flags); - - p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) - goto end; - - kcb = get_kprobe_ctlblk(); - if (kprobe_running()) { - kprobes_inc_nmissed_count(p); - } else { - /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ - regs->ip = ip + sizeof(kprobe_opcode_t); - - __this_cpu_write(current_kprobe, p); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) - skip_singlestep(p, regs, kcb); - /* - * If pre_handler returns !0, it sets regs->ip and - * resets current kprobe. - */ - } -end: - local_irq_restore(flags); -} - -int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) -{ - p->ainsn.insn = NULL; - p->ainsn.boostable = -1; - return 0; -} -#endif - int __init arch_init_kprobes(void) { return arch_init_optprobes(); -- cgit v1.1 From f684199f5de805ac50ea5bdec2b082882586a777 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 28 Sep 2012 17:15:22 +0900 Subject: kprobes/x86: Move kprobes stuff under arch/x86/kernel/kprobes/ Move arch-dep kprobes stuff under arch/x86/kernel/kprobes. Link: http://lkml.kernel.org/r/20120928081522.3560.75469.stgit@ltc138.sdl.hitachi.co.jp Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Masami Hiramatsu [ fixed whitespace and s/__attribute__((packed))/__packed/ ] Signed-off-by: Steven Rostedt --- arch/x86/kernel/Makefile | 4 +- arch/x86/kernel/kprobes-common.h | 113 ---- arch/x86/kernel/kprobes-ftrace.c | 93 ---- arch/x86/kernel/kprobes-opt.c | 512 ------------------ arch/x86/kernel/kprobes.c | 1064 -------------------------------------- arch/x86/kernel/kprobes/Makefile | 7 + arch/x86/kernel/kprobes/common.h | 113 ++++ arch/x86/kernel/kprobes/core.c | 1064 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/kprobes/ftrace.c | 93 ++++ arch/x86/kernel/kprobes/opt.c | 512 ++++++++++++++++++ 10 files changed, 1790 insertions(+), 1785 deletions(-) delete mode 100644 arch/x86/kernel/kprobes-common.h delete mode 100644 arch/x86/kernel/kprobes-ftrace.c delete mode 100644 arch/x86/kernel/kprobes-opt.c delete mode 100644 arch/x86/kernel/kprobes.c create mode 100644 arch/x86/kernel/kprobes/Makefile create mode 100644 arch/x86/kernel/kprobes/common.h create mode 100644 arch/x86/kernel/kprobes/core.c create mode 100644 arch/x86/kernel/kprobes/ftrace.c create mode 100644 arch/x86/kernel/kprobes/opt.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index cc5d31f..ac3b3d0 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -65,9 +65,7 @@ obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o -obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_OPTPROBES) += kprobes-opt.o -obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o +obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes-common.h deleted file mode 100644 index 2e9d4b5..0000000 --- a/arch/x86/kernel/kprobes-common.h +++ /dev/null @@ -1,113 +0,0 @@ -#ifndef __X86_KERNEL_KPROBES_COMMON_H -#define __X86_KERNEL_KPROBES_COMMON_H - -/* Kprobes and Optprobes common header */ - -#ifdef CONFIG_X86_64 -#define SAVE_REGS_STRING \ - /* Skip cs, ip, orig_ax. */ \ - " subq $24, %rsp\n" \ - " pushq %rdi\n" \ - " pushq %rsi\n" \ - " pushq %rdx\n" \ - " pushq %rcx\n" \ - " pushq %rax\n" \ - " pushq %r8\n" \ - " pushq %r9\n" \ - " pushq %r10\n" \ - " pushq %r11\n" \ - " pushq %rbx\n" \ - " pushq %rbp\n" \ - " pushq %r12\n" \ - " pushq %r13\n" \ - " pushq %r14\n" \ - " pushq %r15\n" -#define RESTORE_REGS_STRING \ - " popq %r15\n" \ - " popq %r14\n" \ - " popq %r13\n" \ - " popq %r12\n" \ - " popq %rbp\n" \ - " popq %rbx\n" \ - " popq %r11\n" \ - " popq %r10\n" \ - " popq %r9\n" \ - " popq %r8\n" \ - " popq %rax\n" \ - " popq %rcx\n" \ - " popq %rdx\n" \ - " popq %rsi\n" \ - " popq %rdi\n" \ - /* Skip orig_ax, ip, cs */ \ - " addq $24, %rsp\n" -#else -#define SAVE_REGS_STRING \ - /* Skip cs, ip, orig_ax and gs. */ \ - " subl $16, %esp\n" \ - " pushl %fs\n" \ - " pushl %es\n" \ - " pushl %ds\n" \ - " pushl %eax\n" \ - " pushl %ebp\n" \ - " pushl %edi\n" \ - " pushl %esi\n" \ - " pushl %edx\n" \ - " pushl %ecx\n" \ - " pushl %ebx\n" -#define RESTORE_REGS_STRING \ - " popl %ebx\n" \ - " popl %ecx\n" \ - " popl %edx\n" \ - " popl %esi\n" \ - " popl %edi\n" \ - " popl %ebp\n" \ - " popl %eax\n" \ - /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ - " addl $24, %esp\n" -#endif - -/* Ensure if the instruction can be boostable */ -extern int can_boost(kprobe_opcode_t *instruction); -/* Recover instruction if given address is probed */ -extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, - unsigned long addr); -/* - * Copy an instruction and adjust the displacement if the instruction - * uses the %rip-relative addressing mode. - */ -extern int __copy_instruction(u8 *dest, u8 *src); - -/* Generate a relative-jump/call instruction */ -extern void synthesize_reljump(void *from, void *to); -extern void synthesize_relcall(void *from, void *to); - -#ifdef CONFIG_OPTPROBES -extern int arch_init_optprobes(void); -extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); -extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr); -#else /* !CONFIG_OPTPROBES */ -static inline int arch_init_optprobes(void) -{ - return 0; -} -static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) -{ - return 0; -} -static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) -{ - return addr; -} -#endif - -#ifdef CONFIG_KPROBES_ON_FTRACE -extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb); -#else -static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - return 0; -} -#endif -#endif diff --git a/arch/x86/kernel/kprobes-ftrace.c b/arch/x86/kernel/kprobes-ftrace.c deleted file mode 100644 index 70a81c7..0000000 --- a/arch/x86/kernel/kprobes-ftrace.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Dynamic Ftrace based Kprobes Optimization - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) Hitachi Ltd., 2012 - */ -#include -#include -#include -#include -#include - -#include "kprobes-common.h" - -static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - /* - * Emulate singlestep (and also recover regs->ip) - * as if there is a 5byte nop - */ - regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; - if (unlikely(p->post_handler)) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, regs, 0); - } - __this_cpu_write(current_kprobe, NULL); - return 1; -} - -int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - if (kprobe_ftrace(p)) - return __skip_singlestep(p, regs, kcb); - else - return 0; -} - -/* Ftrace callback handler for kprobes */ -void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct pt_regs *regs) -{ - struct kprobe *p; - struct kprobe_ctlblk *kcb; - unsigned long flags; - - /* Disable irq for emulating a breakpoint and avoiding preempt */ - local_irq_save(flags); - - p = get_kprobe((kprobe_opcode_t *)ip); - if (unlikely(!p) || kprobe_disabled(p)) - goto end; - - kcb = get_kprobe_ctlblk(); - if (kprobe_running()) { - kprobes_inc_nmissed_count(p); - } else { - /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ - regs->ip = ip + sizeof(kprobe_opcode_t); - - __this_cpu_write(current_kprobe, p); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, regs)) - __skip_singlestep(p, regs, kcb); - /* - * If pre_handler returns !0, it sets regs->ip and - * resets current kprobe. - */ - } -end: - local_irq_restore(flags); -} - -int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) -{ - p->ainsn.insn = NULL; - p->ainsn.boostable = -1; - return 0; -} diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes-opt.c deleted file mode 100644 index c5e410e..0000000 --- a/arch/x86/kernel/kprobes-opt.c +++ /dev/null @@ -1,512 +0,0 @@ -/* - * Kernel Probes Jump Optimization (Optprobes) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2002, 2004 - * Copyright (C) Hitachi Ltd., 2012 - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "kprobes-common.h" - -unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) -{ - struct optimized_kprobe *op; - struct kprobe *kp; - long offs; - int i; - - for (i = 0; i < RELATIVEJUMP_SIZE; i++) { - kp = get_kprobe((void *)addr - i); - /* This function only handles jump-optimized kprobe */ - if (kp && kprobe_optimized(kp)) { - op = container_of(kp, struct optimized_kprobe, kp); - /* If op->list is not empty, op is under optimizing */ - if (list_empty(&op->list)) - goto found; - } - } - - return addr; -found: - /* - * If the kprobe can be optimized, original bytes which can be - * overwritten by jump destination address. In this case, original - * bytes must be recovered from op->optinsn.copied_insn buffer. - */ - memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - if (addr == (unsigned long)kp->addr) { - buf[0] = kp->opcode; - memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - } else { - offs = addr - (unsigned long)kp->addr - 1; - memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); - } - - return (unsigned long)buf; -} - -/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ -static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) -{ -#ifdef CONFIG_X86_64 - *addr++ = 0x48; - *addr++ = 0xbf; -#else - *addr++ = 0xb8; -#endif - *(unsigned long *)addr = val; -} - -static void __used __kprobes kprobes_optinsn_template_holder(void) -{ - asm volatile ( - ".global optprobe_template_entry\n" - "optprobe_template_entry:\n" -#ifdef CONFIG_X86_64 - /* We don't bother saving the ss register */ - " pushq %rsp\n" - " pushfq\n" - SAVE_REGS_STRING - " movq %rsp, %rsi\n" - ".global optprobe_template_val\n" - "optprobe_template_val:\n" - ASM_NOP5 - ASM_NOP5 - ".global optprobe_template_call\n" - "optprobe_template_call:\n" - ASM_NOP5 - /* Move flags to rsp */ - " movq 144(%rsp), %rdx\n" - " movq %rdx, 152(%rsp)\n" - RESTORE_REGS_STRING - /* Skip flags entry */ - " addq $8, %rsp\n" - " popfq\n" -#else /* CONFIG_X86_32 */ - " pushf\n" - SAVE_REGS_STRING - " movl %esp, %edx\n" - ".global optprobe_template_val\n" - "optprobe_template_val:\n" - ASM_NOP5 - ".global optprobe_template_call\n" - "optprobe_template_call:\n" - ASM_NOP5 - RESTORE_REGS_STRING - " addl $4, %esp\n" /* skip cs */ - " popf\n" -#endif - ".global optprobe_template_end\n" - "optprobe_template_end:\n"); -} - -#define TMPL_MOVE_IDX \ - ((long)&optprobe_template_val - (long)&optprobe_template_entry) -#define TMPL_CALL_IDX \ - ((long)&optprobe_template_call - (long)&optprobe_template_entry) -#define TMPL_END_IDX \ - ((long)&optprobe_template_end - (long)&optprobe_template_entry) - -#define INT3_SIZE sizeof(kprobe_opcode_t) - -/* Optimized kprobe call back function: called from optinsn */ -static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - unsigned long flags; - - /* This is possible if op is under delayed unoptimizing */ - if (kprobe_disabled(&op->kp)) - return; - - local_irq_save(flags); - if (kprobe_running()) { - kprobes_inc_nmissed_count(&op->kp); - } else { - /* Save skipped registers */ -#ifdef CONFIG_X86_64 - regs->cs = __KERNEL_CS; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); - regs->gs = 0; -#endif - regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; - regs->orig_ax = ~0UL; - - __this_cpu_write(current_kprobe, &op->kp); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - opt_pre_handler(&op->kp, regs); - __this_cpu_write(current_kprobe, NULL); - } - local_irq_restore(flags); -} - -static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) -{ - int len = 0, ret; - - while (len < RELATIVEJUMP_SIZE) { - ret = __copy_instruction(dest + len, src + len); - if (!ret || !can_boost(dest + len)) - return -EINVAL; - len += ret; - } - /* Check whether the address range is reserved */ - if (ftrace_text_reserved(src, src + len - 1) || - alternatives_text_reserved(src, src + len - 1) || - jump_label_text_reserved(src, src + len - 1)) - return -EBUSY; - - return len; -} - -/* Check whether insn is indirect jump */ -static int __kprobes insn_is_indirect_jump(struct insn *insn) -{ - return ((insn->opcode.bytes[0] == 0xff && - (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ - insn->opcode.bytes[0] == 0xea); /* Segment based jump */ -} - -/* Check whether insn jumps into specified address range */ -static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) -{ - unsigned long target = 0; - - switch (insn->opcode.bytes[0]) { - case 0xe0: /* loopne */ - case 0xe1: /* loope */ - case 0xe2: /* loop */ - case 0xe3: /* jcxz */ - case 0xe9: /* near relative jump */ - case 0xeb: /* short relative jump */ - break; - case 0x0f: - if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ - break; - return 0; - default: - if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ - break; - return 0; - } - target = (unsigned long)insn->next_byte + insn->immediate.value; - - return (start <= target && target <= start + len); -} - -/* Decode whole function to ensure any instructions don't jump into target */ -static int __kprobes can_optimize(unsigned long paddr) -{ - unsigned long addr, size = 0, offset = 0; - struct insn insn; - kprobe_opcode_t buf[MAX_INSN_SIZE]; - - /* Lookup symbol including addr */ - if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) - return 0; - - /* - * Do not optimize in the entry code due to the unstable - * stack handling. - */ - if ((paddr >= (unsigned long)__entry_text_start) && - (paddr < (unsigned long)__entry_text_end)) - return 0; - - /* Check there is enough space for a relative jump. */ - if (size - offset < RELATIVEJUMP_SIZE) - return 0; - - /* Decode instructions */ - addr = paddr - offset; - while (addr < paddr - offset + size) { /* Decode until function end */ - if (search_exception_tables(addr)) - /* - * Since some fixup code will jumps into this function, - * we can't optimize kprobe in this function. - */ - return 0; - kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); - insn_get_length(&insn); - /* Another subsystem puts a breakpoint */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) - return 0; - /* Recover address */ - insn.kaddr = (void *)addr; - insn.next_byte = (void *)(addr + insn.length); - /* Check any instructions don't jump into target */ - if (insn_is_indirect_jump(&insn) || - insn_jump_into_range(&insn, paddr + INT3_SIZE, - RELATIVE_ADDR_SIZE)) - return 0; - addr += insn.length; - } - - return 1; -} - -/* Check optimized_kprobe can actually be optimized. */ -int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) -{ - int i; - struct kprobe *p; - - for (i = 1; i < op->optinsn.size; i++) { - p = get_kprobe(op->kp.addr + i); - if (p && !kprobe_disabled(p)) - return -EEXIST; - } - - return 0; -} - -/* Check the addr is within the optimized instructions. */ -int __kprobes -arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) -{ - return ((unsigned long)op->kp.addr <= addr && - (unsigned long)op->kp.addr + op->optinsn.size > addr); -} - -/* Free optimized instruction slot */ -static __kprobes -void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) -{ - if (op->optinsn.insn) { - free_optinsn_slot(op->optinsn.insn, dirty); - op->optinsn.insn = NULL; - op->optinsn.size = 0; - } -} - -void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) -{ - __arch_remove_optimized_kprobe(op, 1); -} - -/* - * Copy replacing target instructions - * Target instructions MUST be relocatable (checked inside) - * This is called when new aggr(opt)probe is allocated or reused. - */ -int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) -{ - u8 *buf; - int ret; - long rel; - - if (!can_optimize((unsigned long)op->kp.addr)) - return -EILSEQ; - - op->optinsn.insn = get_optinsn_slot(); - if (!op->optinsn.insn) - return -ENOMEM; - - /* - * Verify if the address gap is in 2GB range, because this uses - * a relative jump. - */ - rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; - if (abs(rel) > 0x7fffffff) - return -ERANGE; - - buf = (u8 *)op->optinsn.insn; - - /* Copy instructions into the out-of-line buffer */ - ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); - if (ret < 0) { - __arch_remove_optimized_kprobe(op, 0); - return ret; - } - op->optinsn.size = ret; - - /* Copy arch-dep-instance from template */ - memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); - - /* Set probe information */ - synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); - - /* Set probe function call */ - synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); - - /* Set returning jmp instruction at the tail of out-of-line buffer */ - synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, - (u8 *)op->kp.addr + op->optinsn.size); - - flush_icache_range((unsigned long) buf, - (unsigned long) buf + TMPL_END_IDX + - op->optinsn.size + RELATIVEJUMP_SIZE); - return 0; -} - -#define MAX_OPTIMIZE_PROBES 256 -static struct text_poke_param *jump_poke_params; -static struct jump_poke_buffer { - u8 buf[RELATIVEJUMP_SIZE]; -} *jump_poke_bufs; - -static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, - u8 *insn_buf, - struct optimized_kprobe *op) -{ - s32 rel = (s32)((long)op->optinsn.insn - - ((long)op->kp.addr + RELATIVEJUMP_SIZE)); - - /* Backup instructions which will be replaced by jump address */ - memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, - RELATIVE_ADDR_SIZE); - - insn_buf[0] = RELATIVEJUMP_OPCODE; - *(s32 *)(&insn_buf[1]) = rel; - - tprm->addr = op->kp.addr; - tprm->opcode = insn_buf; - tprm->len = RELATIVEJUMP_SIZE; -} - -/* - * Replace breakpoints (int3) with relative jumps. - * Caller must call with locking kprobe_mutex and text_mutex. - */ -void __kprobes arch_optimize_kprobes(struct list_head *oplist) -{ - struct optimized_kprobe *op, *tmp; - int c = 0; - - list_for_each_entry_safe(op, tmp, oplist, list) { - WARN_ON(kprobe_disabled(&op->kp)); - /* Setup param */ - setup_optimize_kprobe(&jump_poke_params[c], - jump_poke_bufs[c].buf, op); - list_del_init(&op->list); - if (++c >= MAX_OPTIMIZE_PROBES) - break; - } - - /* - * text_poke_smp doesn't support NMI/MCE code modifying. - * However, since kprobes itself also doesn't support NMI/MCE - * code probing, it's not a problem. - */ - text_poke_smp_batch(jump_poke_params, c); -} - -static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, - u8 *insn_buf, - struct optimized_kprobe *op) -{ - /* Set int3 to first byte for kprobes */ - insn_buf[0] = BREAKPOINT_INSTRUCTION; - memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - - tprm->addr = op->kp.addr; - tprm->opcode = insn_buf; - tprm->len = RELATIVEJUMP_SIZE; -} - -/* - * Recover original instructions and breakpoints from relative jumps. - * Caller must call with locking kprobe_mutex. - */ -extern void arch_unoptimize_kprobes(struct list_head *oplist, - struct list_head *done_list) -{ - struct optimized_kprobe *op, *tmp; - int c = 0; - - list_for_each_entry_safe(op, tmp, oplist, list) { - /* Setup param */ - setup_unoptimize_kprobe(&jump_poke_params[c], - jump_poke_bufs[c].buf, op); - list_move(&op->list, done_list); - if (++c >= MAX_OPTIMIZE_PROBES) - break; - } - - /* - * text_poke_smp doesn't support NMI/MCE code modifying. - * However, since kprobes itself also doesn't support NMI/MCE - * code probing, it's not a problem. - */ - text_poke_smp_batch(jump_poke_params, c); -} - -/* Replace a relative jump with a breakpoint (int3). */ -void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) -{ - u8 buf[RELATIVEJUMP_SIZE]; - - /* Set int3 to first byte for kprobes */ - buf[0] = BREAKPOINT_INSTRUCTION; - memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); - text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); -} - -int __kprobes -setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) -{ - struct optimized_kprobe *op; - - if (p->flags & KPROBE_FLAG_OPTIMIZED) { - /* This kprobe is really able to run optimized path. */ - op = container_of(p, struct optimized_kprobe, kp); - /* Detour through copied instructions */ - regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; - if (!reenter) - reset_current_kprobe(); - preempt_enable_no_resched(); - return 1; - } - return 0; -} - -int __kprobes arch_init_optprobes(void) -{ - /* Allocate code buffer and parameter array */ - jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * - MAX_OPTIMIZE_PROBES, GFP_KERNEL); - if (!jump_poke_bufs) - return -ENOMEM; - - jump_poke_params = kmalloc(sizeof(struct text_poke_param) * - MAX_OPTIMIZE_PROBES, GFP_KERNEL); - if (!jump_poke_params) { - kfree(jump_poke_bufs); - jump_poke_bufs = NULL; - return -ENOMEM; - } - - return 0; -} diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c deleted file mode 100644 index 18114bf..0000000 --- a/arch/x86/kernel/kprobes.c +++ /dev/null @@ -1,1064 +0,0 @@ -/* - * Kernel Probes (KProbes) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2002, 2004 - * - * 2002-Oct Created by Vamsi Krishna S Kernel - * Probes initial implementation ( includes contributions from - * Rusty Russell). - * 2004-July Suparna Bhattacharya added jumper probes - * interface to access function arguments. - * 2004-Oct Jim Keniston and Prasanna S Panchamukhi - * adapted for x86_64 from i386. - * 2005-Mar Roland McGrath - * Fixed to handle %rip-relative addressing mode correctly. - * 2005-May Hien Nguyen , Jim Keniston - * and Prasanna S Panchamukhi - * added function-return probes. - * 2005-May Rusty Lynch - * Added function return probes functionality - * 2006-Feb Masami Hiramatsu added - * kprobe-booster and kretprobe-booster for i386. - * 2007-Dec Masami Hiramatsu added kprobe-booster - * and kretprobe-booster for x86-64 - * 2007-Dec Masami Hiramatsu , Arjan van de Ven - * and Jim Keniston - * unified x86 kprobes code. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "kprobes-common.h" - -void jprobe_return_end(void); - -DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; -DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); - -#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) - -#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ - (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ - (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ - (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ - (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ - << (row % 32)) - /* - * Undefined/reserved opcodes, conditional jump, Opcode Extension - * Groups, and some special opcodes can not boost. - * This is non-const and volatile to keep gcc from statically - * optimizing it out, as variable_test_bit makes gcc think only - * *(unsigned long*) is used. - */ -static volatile u32 twobyte_is_boostable[256 / 32] = { - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ - /* ---------------------------------------------- */ - W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ - W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */ - W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ - W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ - W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ - W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ - W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */ - W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ - W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */ - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ - W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */ - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */ - W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ - W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */ - W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */ - W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */ - /* ----------------------------------------------- */ - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ -}; -#undef W - -struct kretprobe_blackpoint kretprobe_blacklist[] = { - {"__switch_to", }, /* This function switches only current task, but - doesn't switch kernel stack.*/ - {NULL, NULL} /* Terminator */ -}; - -const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); - -static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) -{ - struct __arch_relative_insn { - u8 op; - s32 raddr; - } __attribute__((packed)) *insn; - - insn = (struct __arch_relative_insn *)from; - insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); - insn->op = op; -} - -/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ -void __kprobes synthesize_reljump(void *from, void *to) -{ - __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); -} - -/* Insert a call instruction at address 'from', which calls address 'to'.*/ -void __kprobes synthesize_relcall(void *from, void *to) -{ - __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); -} - -/* - * Skip the prefixes of the instruction. - */ -static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) -{ - insn_attr_t attr; - - attr = inat_get_opcode_attribute((insn_byte_t)*insn); - while (inat_is_legacy_prefix(attr)) { - insn++; - attr = inat_get_opcode_attribute((insn_byte_t)*insn); - } -#ifdef CONFIG_X86_64 - if (inat_is_rex_prefix(attr)) - insn++; -#endif - return insn; -} - -/* - * Returns non-zero if opcode is boostable. - * RIP relative instructions are adjusted at copying time in 64 bits mode - */ -int __kprobes can_boost(kprobe_opcode_t *opcodes) -{ - kprobe_opcode_t opcode; - kprobe_opcode_t *orig_opcodes = opcodes; - - if (search_exception_tables((unsigned long)opcodes)) - return 0; /* Page fault may occur on this address. */ - -retry: - if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) - return 0; - opcode = *(opcodes++); - - /* 2nd-byte opcode */ - if (opcode == 0x0f) { - if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) - return 0; - return test_bit(*opcodes, - (unsigned long *)twobyte_is_boostable); - } - - switch (opcode & 0xf0) { -#ifdef CONFIG_X86_64 - case 0x40: - goto retry; /* REX prefix is boostable */ -#endif - case 0x60: - if (0x63 < opcode && opcode < 0x67) - goto retry; /* prefixes */ - /* can't boost Address-size override and bound */ - return (opcode != 0x62 && opcode != 0x67); - case 0x70: - return 0; /* can't boost conditional jump */ - case 0xc0: - /* can't boost software-interruptions */ - return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; - case 0xd0: - /* can boost AA* and XLAT */ - return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); - case 0xe0: - /* can boost in/out and absolute jmps */ - return ((opcode & 0x04) || opcode == 0xea); - case 0xf0: - if ((opcode & 0x0c) == 0 && opcode != 0xf1) - goto retry; /* lock/rep(ne) prefix */ - /* clear and set flags are boostable */ - return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); - default: - /* segment override prefixes are boostable */ - if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) - goto retry; /* prefixes */ - /* CS override prefix and call are not boostable */ - return (opcode != 0x2e && opcode != 0x9a); - } -} - -static unsigned long -__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) -{ - struct kprobe *kp; - - kp = get_kprobe((void *)addr); - /* There is no probe, return original address */ - if (!kp) - return addr; - - /* - * Basically, kp->ainsn.insn has an original instruction. - * However, RIP-relative instruction can not do single-stepping - * at different place, __copy_instruction() tweaks the displacement of - * that instruction. In that case, we can't recover the instruction - * from the kp->ainsn.insn. - * - * On the other hand, kp->opcode has a copy of the first byte of - * the probed instruction, which is overwritten by int3. And - * the instruction at kp->addr is not modified by kprobes except - * for the first byte, we can recover the original instruction - * from it and kp->opcode. - */ - memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); - buf[0] = kp->opcode; - return (unsigned long)buf; -} - -/* - * Recover the probed instruction at addr for further analysis. - * Caller must lock kprobes by kprobe_mutex, or disable preemption - * for preventing to release referencing kprobes. - */ -unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) -{ - unsigned long __addr; - - __addr = __recover_optprobed_insn(buf, addr); - if (__addr != addr) - return __addr; - - return __recover_probed_insn(buf, addr); -} - -/* Check if paddr is at an instruction boundary */ -static int __kprobes can_probe(unsigned long paddr) -{ - unsigned long addr, __addr, offset = 0; - struct insn insn; - kprobe_opcode_t buf[MAX_INSN_SIZE]; - - if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) - return 0; - - /* Decode instructions */ - addr = paddr - offset; - while (addr < paddr) { - /* - * Check if the instruction has been modified by another - * kprobe, in which case we replace the breakpoint by the - * original instruction in our buffer. - * Also, jump optimization will change the breakpoint to - * relative-jump. Since the relative-jump itself is - * normally used, we just go through if there is no kprobe. - */ - __addr = recover_probed_instruction(buf, addr); - kernel_insn_init(&insn, (void *)__addr); - insn_get_length(&insn); - - /* - * Another debugging subsystem might insert this breakpoint. - * In that case, we can't recover it. - */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) - return 0; - addr += insn.length; - } - - return (addr == paddr); -} - -/* - * Returns non-zero if opcode modifies the interrupt flag. - */ -static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) -{ - /* Skip prefixes */ - insn = skip_prefixes(insn); - - switch (*insn) { - case 0xfa: /* cli */ - case 0xfb: /* sti */ - case 0xcf: /* iret/iretd */ - case 0x9d: /* popf/popfd */ - return 1; - } - - return 0; -} - -/* - * Copy an instruction and adjust the displacement if the instruction - * uses the %rip-relative addressing mode. - * If it does, Return the address of the 32-bit displacement word. - * If not, return null. - * Only applicable to 64-bit x86. - */ -int __kprobes __copy_instruction(u8 *dest, u8 *src) -{ - struct insn insn; - kprobe_opcode_t buf[MAX_INSN_SIZE]; - - kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); - insn_get_length(&insn); - /* Another subsystem puts a breakpoint, failed to recover */ - if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) - return 0; - memcpy(dest, insn.kaddr, insn.length); - -#ifdef CONFIG_X86_64 - if (insn_rip_relative(&insn)) { - s64 newdisp; - u8 *disp; - kernel_insn_init(&insn, dest); - insn_get_displacement(&insn); - /* - * The copied instruction uses the %rip-relative addressing - * mode. Adjust the displacement for the difference between - * the original location of this instruction and the location - * of the copy that will actually be run. The tricky bit here - * is making sure that the sign extension happens correctly in - * this calculation, since we need a signed 32-bit result to - * be sign-extended to 64 bits when it's added to the %rip - * value and yield the same 64-bit result that the sign- - * extension of the original signed 32-bit displacement would - * have given. - */ - newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; - BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ - disp = (u8 *) dest + insn_offset_displacement(&insn); - *(s32 *) disp = (s32) newdisp; - } -#endif - return insn.length; -} - -static void __kprobes arch_copy_kprobe(struct kprobe *p) -{ - /* Copy an instruction with recovering if other optprobe modifies it.*/ - __copy_instruction(p->ainsn.insn, p->addr); - - /* - * __copy_instruction can modify the displacement of the instruction, - * but it doesn't affect boostable check. - */ - if (can_boost(p->ainsn.insn)) - p->ainsn.boostable = 0; - else - p->ainsn.boostable = -1; - - /* Also, displacement change doesn't affect the first byte */ - p->opcode = p->ainsn.insn[0]; -} - -int __kprobes arch_prepare_kprobe(struct kprobe *p) -{ - if (alternatives_text_reserved(p->addr, p->addr)) - return -EINVAL; - - if (!can_probe((unsigned long)p->addr)) - return -EILSEQ; - /* insn: must be on special executable page on x86. */ - p->ainsn.insn = get_insn_slot(); - if (!p->ainsn.insn) - return -ENOMEM; - arch_copy_kprobe(p); - return 0; -} - -void __kprobes arch_arm_kprobe(struct kprobe *p) -{ - text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); -} - -void __kprobes arch_disarm_kprobe(struct kprobe *p) -{ - text_poke(p->addr, &p->opcode, 1); -} - -void __kprobes arch_remove_kprobe(struct kprobe *p) -{ - if (p->ainsn.insn) { - free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); - p->ainsn.insn = NULL; - } -} - -static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) -{ - kcb->prev_kprobe.kp = kprobe_running(); - kcb->prev_kprobe.status = kcb->kprobe_status; - kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags; - kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; -} - -static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) -{ - __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); - kcb->kprobe_status = kcb->prev_kprobe.status; - kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; - kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; -} - -static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) -{ - __this_cpu_write(current_kprobe, p); - kcb->kprobe_saved_flags = kcb->kprobe_old_flags - = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - if (is_IF_modifier(p->ainsn.insn)) - kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; -} - -static void __kprobes clear_btf(void) -{ - if (test_thread_flag(TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl &= ~DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - } -} - -static void __kprobes restore_btf(void) -{ - if (test_thread_flag(TIF_BLOCKSTEP)) { - unsigned long debugctl = get_debugctlmsr(); - - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); - } -} - -void __kprobes -arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - unsigned long *sara = stack_addr(regs); - - ri->ret_addr = (kprobe_opcode_t *) *sara; - - /* Replace the return addr with trampoline addr */ - *sara = (unsigned long) &kretprobe_trampoline; -} - -static void __kprobes -setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) -{ - if (setup_detour_execution(p, regs, reenter)) - return; - -#if !defined(CONFIG_PREEMPT) - if (p->ainsn.boostable == 1 && !p->post_handler) { - /* Boost up -- we can execute copied instructions directly */ - if (!reenter) - reset_current_kprobe(); - /* - * Reentering boosted probe doesn't reset current_kprobe, - * nor set current_kprobe, because it doesn't use single - * stepping. - */ - regs->ip = (unsigned long)p->ainsn.insn; - preempt_enable_no_resched(); - return; - } -#endif - if (reenter) { - save_previous_kprobe(kcb); - set_current_kprobe(p, regs, kcb); - kcb->kprobe_status = KPROBE_REENTER; - } else - kcb->kprobe_status = KPROBE_HIT_SS; - /* Prepare real single stepping */ - clear_btf(); - regs->flags |= X86_EFLAGS_TF; - regs->flags &= ~X86_EFLAGS_IF; - /* single step inline if the instruction is an int3 */ - if (p->opcode == BREAKPOINT_INSTRUCTION) - regs->ip = (unsigned long)p->addr; - else - regs->ip = (unsigned long)p->ainsn.insn; -} - -/* - * We have reentered the kprobe_handler(), since another probe was hit while - * within the handler. We save the original kprobes variables and just single - * step on the instruction of the new probe without calling any user handlers. - */ -static int __kprobes -reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) -{ - switch (kcb->kprobe_status) { - case KPROBE_HIT_SSDONE: - case KPROBE_HIT_ACTIVE: - kprobes_inc_nmissed_count(p); - setup_singlestep(p, regs, kcb, 1); - break; - case KPROBE_HIT_SS: - /* A probe has been hit in the codepath leading up to, or just - * after, single-stepping of a probed instruction. This entire - * codepath should strictly reside in .kprobes.text section. - * Raise a BUG or we'll continue in an endless reentering loop - * and eventually a stack overflow. - */ - printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", - p->addr); - dump_kprobe(p); - BUG(); - default: - /* impossible cases */ - WARN_ON(1); - return 0; - } - - return 1; -} - -/* - * Interrupts are disabled on entry as trap3 is an interrupt gate and they - * remain disabled throughout this function. - */ -static int __kprobes kprobe_handler(struct pt_regs *regs) -{ - kprobe_opcode_t *addr; - struct kprobe *p; - struct kprobe_ctlblk *kcb; - - addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); - /* - * We don't want to be preempted for the entire - * duration of kprobe processing. We conditionally - * re-enable preemption at the end of this function, - * and also in reenter_kprobe() and setup_singlestep(). - */ - preempt_disable(); - - kcb = get_kprobe_ctlblk(); - p = get_kprobe(addr); - - if (p) { - if (kprobe_running()) { - if (reenter_kprobe(p, regs, kcb)) - return 1; - } else { - set_current_kprobe(p, regs, kcb); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; - - /* - * If we have no pre-handler or it returned 0, we - * continue with normal processing. If we have a - * pre-handler and it returned non-zero, it prepped - * for calling the break_handler below on re-entry - * for jprobe processing, so get out doing nothing - * more here. - */ - if (!p->pre_handler || !p->pre_handler(p, regs)) - setup_singlestep(p, regs, kcb, 0); - return 1; - } - } else if (*addr != BREAKPOINT_INSTRUCTION) { - /* - * The breakpoint instruction was removed right - * after we hit it. Another cpu has removed - * either a probepoint or a debugger breakpoint - * at this address. In either case, no further - * handling of this interrupt is appropriate. - * Back up over the (now missing) int3 and run - * the original instruction. - */ - regs->ip = (unsigned long)addr; - preempt_enable_no_resched(); - return 1; - } else if (kprobe_running()) { - p = __this_cpu_read(current_kprobe); - if (p->break_handler && p->break_handler(p, regs)) { - if (!skip_singlestep(p, regs, kcb)) - setup_singlestep(p, regs, kcb, 0); - return 1; - } - } /* else: not a kprobe fault; let the kernel handle it */ - - preempt_enable_no_resched(); - return 0; -} - -/* - * When a retprobed function returns, this code saves registers and - * calls trampoline_handler() runs, which calls the kretprobe's handler. - */ -static void __used __kprobes kretprobe_trampoline_holder(void) -{ - asm volatile ( - ".global kretprobe_trampoline\n" - "kretprobe_trampoline: \n" -#ifdef CONFIG_X86_64 - /* We don't bother saving the ss register */ - " pushq %rsp\n" - " pushfq\n" - SAVE_REGS_STRING - " movq %rsp, %rdi\n" - " call trampoline_handler\n" - /* Replace saved sp with true return address. */ - " movq %rax, 152(%rsp)\n" - RESTORE_REGS_STRING - " popfq\n" -#else - " pushf\n" - SAVE_REGS_STRING - " movl %esp, %eax\n" - " call trampoline_handler\n" - /* Move flags to cs */ - " movl 56(%esp), %edx\n" - " movl %edx, 52(%esp)\n" - /* Replace saved flags with true return address. */ - " movl %eax, 56(%esp)\n" - RESTORE_REGS_STRING - " popf\n" -#endif - " ret\n"); -} - -/* - * Called from kretprobe_trampoline - */ -static __used __kprobes void *trampoline_handler(struct pt_regs *regs) -{ - struct kretprobe_instance *ri = NULL; - struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; - unsigned long flags, orig_ret_address = 0; - unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; - kprobe_opcode_t *correct_ret_addr = NULL; - - INIT_HLIST_HEAD(&empty_rp); - kretprobe_hash_lock(current, &head, &flags); - /* fixup registers */ -#ifdef CONFIG_X86_64 - regs->cs = __KERNEL_CS; -#else - regs->cs = __KERNEL_CS | get_kernel_rpl(); - regs->gs = 0; -#endif - regs->ip = trampoline_address; - regs->orig_ax = ~0UL; - - /* - * It is possible to have multiple instances associated with a given - * task either because multiple functions in the call path have - * return probes installed on them, and/or more than one - * return probe was registered for a target function. - * - * We can handle this because: - * - instances are always pushed into the head of the list - * - when multiple return probes are registered for the same - * function, the (chronologically) first instance's ret_addr - * will be the real return address, and all the rest will - * point to kretprobe_trampoline. - */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - orig_ret_address = (unsigned long)ri->ret_addr; - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - kretprobe_assert(ri, orig_ret_address, trampoline_address); - - correct_ret_addr = ri->ret_addr; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { - if (ri->task != current) - /* another task is sharing our hash bucket */ - continue; - - orig_ret_address = (unsigned long)ri->ret_addr; - if (ri->rp && ri->rp->handler) { - __this_cpu_write(current_kprobe, &ri->rp->kp); - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; - ri->ret_addr = correct_ret_addr; - ri->rp->handler(ri, regs); - __this_cpu_write(current_kprobe, NULL); - } - - recycle_rp_inst(ri, &empty_rp); - - if (orig_ret_address != trampoline_address) - /* - * This is the real return address. Any other - * instances associated with this task are for - * other calls deeper on the call stack - */ - break; - } - - kretprobe_hash_unlock(current, &flags); - - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { - hlist_del(&ri->hlist); - kfree(ri); - } - return (void *)orig_ret_address; -} - -/* - * Called after single-stepping. p->addr is the address of the - * instruction whose first byte has been replaced by the "int 3" - * instruction. To avoid the SMP problems that can occur when we - * temporarily put back the original opcode to single-step, we - * single-stepped a copy of the instruction. The address of this - * copy is p->ainsn.insn. - * - * This function prepares to return from the post-single-step - * interrupt. We have to fix up the stack as follows: - * - * 0) Except in the case of absolute or indirect jump or call instructions, - * the new ip is relative to the copied instruction. We need to make - * it relative to the original instruction. - * - * 1) If the single-stepped instruction was pushfl, then the TF and IF - * flags are set in the just-pushed flags, and may need to be cleared. - * - * 2) If the single-stepped instruction was a call, the return address - * that is atop the stack is the address following the copied instruction. - * We need to make it the address following the original instruction. - * - * If this is the first time we've single-stepped the instruction at - * this probepoint, and the instruction is boostable, boost it: add a - * jump instruction after the copied instruction, that jumps to the next - * instruction after the probepoint. - */ -static void __kprobes -resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) -{ - unsigned long *tos = stack_addr(regs); - unsigned long copy_ip = (unsigned long)p->ainsn.insn; - unsigned long orig_ip = (unsigned long)p->addr; - kprobe_opcode_t *insn = p->ainsn.insn; - - /* Skip prefixes */ - insn = skip_prefixes(insn); - - regs->flags &= ~X86_EFLAGS_TF; - switch (*insn) { - case 0x9c: /* pushfl */ - *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); - *tos |= kcb->kprobe_old_flags; - break; - case 0xc2: /* iret/ret/lret */ - case 0xc3: - case 0xca: - case 0xcb: - case 0xcf: - case 0xea: /* jmp absolute -- ip is correct */ - /* ip is already adjusted, no more changes required */ - p->ainsn.boostable = 1; - goto no_change; - case 0xe8: /* call relative - Fix return addr */ - *tos = orig_ip + (*tos - copy_ip); - break; -#ifdef CONFIG_X86_32 - case 0x9a: /* call absolute -- same as call absolute, indirect */ - *tos = orig_ip + (*tos - copy_ip); - goto no_change; -#endif - case 0xff: - if ((insn[1] & 0x30) == 0x10) { - /* - * call absolute, indirect - * Fix return addr; ip is correct. - * But this is not boostable - */ - *tos = orig_ip + (*tos - copy_ip); - goto no_change; - } else if (((insn[1] & 0x31) == 0x20) || - ((insn[1] & 0x31) == 0x21)) { - /* - * jmp near and far, absolute indirect - * ip is correct. And this is boostable - */ - p->ainsn.boostable = 1; - goto no_change; - } - default: - break; - } - - if (p->ainsn.boostable == 0) { - if ((regs->ip > copy_ip) && - (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) { - /* - * These instructions can be executed directly if it - * jumps back to correct address. - */ - synthesize_reljump((void *)regs->ip, - (void *)orig_ip + (regs->ip - copy_ip)); - p->ainsn.boostable = 1; - } else { - p->ainsn.boostable = -1; - } - } - - regs->ip += orig_ip - copy_ip; - -no_change: - restore_btf(); -} - -/* - * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled throughout this function. - */ -static int __kprobes post_kprobe_handler(struct pt_regs *regs) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - if (!cur) - return 0; - - resume_execution(cur, regs, kcb); - regs->flags |= kcb->kprobe_saved_flags; - - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); - } - - /* Restore back the original saved kprobes variables and continue. */ - if (kcb->kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(kcb); - goto out; - } - reset_current_kprobe(); -out: - preempt_enable_no_resched(); - - /* - * if somebody else is singlestepping across a probe point, flags - * will have TF set, in which case, continue the remaining processing - * of do_debug, as if this is not a probe hit. - */ - if (regs->flags & X86_EFLAGS_TF) - return 0; - - return 1; -} - -int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) -{ - struct kprobe *cur = kprobe_running(); - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - switch (kcb->kprobe_status) { - case KPROBE_HIT_SS: - case KPROBE_REENTER: - /* - * We are here because the instruction being single - * stepped caused a page fault. We reset the current - * kprobe and the ip points back to the probe address - * and allow the page fault handler to continue as a - * normal page fault. - */ - regs->ip = (unsigned long)cur->addr; - regs->flags |= kcb->kprobe_old_flags; - if (kcb->kprobe_status == KPROBE_REENTER) - restore_previous_kprobe(kcb); - else - reset_current_kprobe(); - preempt_enable_no_resched(); - break; - case KPROBE_HIT_ACTIVE: - case KPROBE_HIT_SSDONE: - /* - * We increment the nmissed count for accounting, - * we can also use npre/npostfault count for accounting - * these specific fault cases. - */ - kprobes_inc_nmissed_count(cur); - - /* - * We come here because instructions in the pre/post - * handler caused the page_fault, this could happen - * if handler tries to access user space by - * copy_from_user(), get_user() etc. Let the - * user-specified handler try to fix it first. - */ - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - /* - * In case the user-specified fault handler returned - * zero, try to fix up. - */ - if (fixup_exception(regs)) - return 1; - - /* - * fixup routine could not handle it, - * Let do_page_fault() fix it. - */ - break; - default: - break; - } - return 0; -} - -/* - * Wrapper routine for handling exceptions. - */ -int __kprobes -kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) -{ - struct die_args *args = data; - int ret = NOTIFY_DONE; - - if (args->regs && user_mode_vm(args->regs)) - return ret; - - switch (val) { - case DIE_INT3: - if (kprobe_handler(args->regs)) - ret = NOTIFY_STOP; - break; - case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) { - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; - ret = NOTIFY_STOP; - } - break; - case DIE_GPF: - /* - * To be potentially processing a kprobe fault and to - * trust the result from kprobe_running(), we have - * be non-preemptible. - */ - if (!preemptible() && kprobe_running() && - kprobe_fault_handler(args->regs, args->trapnr)) - ret = NOTIFY_STOP; - break; - default: - break; - } - return ret; -} - -int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct jprobe *jp = container_of(p, struct jprobe, kp); - unsigned long addr; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - kcb->jprobe_saved_regs = *regs; - kcb->jprobe_saved_sp = stack_addr(regs); - addr = (unsigned long)(kcb->jprobe_saved_sp); - - /* - * As Linus pointed out, gcc assumes that the callee - * owns the argument space and could overwrite it, e.g. - * tailcall optimization. So, to be absolutely safe - * we also save and restore enough stack bytes to cover - * the argument area. - */ - memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, - MIN_STACK_SIZE(addr)); - regs->flags &= ~X86_EFLAGS_IF; - trace_hardirqs_off(); - regs->ip = (unsigned long)(jp->entry); - return 1; -} - -void __kprobes jprobe_return(void) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - - asm volatile ( -#ifdef CONFIG_X86_64 - " xchg %%rbx,%%rsp \n" -#else - " xchgl %%ebx,%%esp \n" -#endif - " int3 \n" - " .globl jprobe_return_end\n" - " jprobe_return_end: \n" - " nop \n"::"b" - (kcb->jprobe_saved_sp):"memory"); -} - -int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) -{ - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - u8 *addr = (u8 *) (regs->ip - 1); - struct jprobe *jp = container_of(p, struct jprobe, kp); - - if ((addr > (u8 *) jprobe_return) && - (addr < (u8 *) jprobe_return_end)) { - if (stack_addr(regs) != kcb->jprobe_saved_sp) { - struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; - printk(KERN_ERR - "current sp %p does not match saved sp %p\n", - stack_addr(regs), kcb->jprobe_saved_sp); - printk(KERN_ERR "Saved registers for jprobe %p\n", jp); - show_regs(saved_regs); - printk(KERN_ERR "Current registers\n"); - show_regs(regs); - BUG(); - } - *regs = kcb->jprobe_saved_regs; - memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), - kcb->jprobes_stack, - MIN_STACK_SIZE(kcb->jprobe_saved_sp)); - preempt_enable_no_resched(); - return 1; - } - return 0; -} - -int __init arch_init_kprobes(void) -{ - return arch_init_optprobes(); -} - -int __kprobes arch_trampoline_kprobe(struct kprobe *p) -{ - return 0; -} diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile new file mode 100644 index 0000000..0d33169 --- /dev/null +++ b/arch/x86/kernel/kprobes/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for kernel probes +# + +obj-$(CONFIG_KPROBES) += core.o +obj-$(CONFIG_OPTPROBES) += opt.o +obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h new file mode 100644 index 0000000..2e9d4b5 --- /dev/null +++ b/arch/x86/kernel/kprobes/common.h @@ -0,0 +1,113 @@ +#ifndef __X86_KERNEL_KPROBES_COMMON_H +#define __X86_KERNEL_KPROBES_COMMON_H + +/* Kprobes and Optprobes common header */ + +#ifdef CONFIG_X86_64 +#define SAVE_REGS_STRING \ + /* Skip cs, ip, orig_ax. */ \ + " subq $24, %rsp\n" \ + " pushq %rdi\n" \ + " pushq %rsi\n" \ + " pushq %rdx\n" \ + " pushq %rcx\n" \ + " pushq %rax\n" \ + " pushq %r8\n" \ + " pushq %r9\n" \ + " pushq %r10\n" \ + " pushq %r11\n" \ + " pushq %rbx\n" \ + " pushq %rbp\n" \ + " pushq %r12\n" \ + " pushq %r13\n" \ + " pushq %r14\n" \ + " pushq %r15\n" +#define RESTORE_REGS_STRING \ + " popq %r15\n" \ + " popq %r14\n" \ + " popq %r13\n" \ + " popq %r12\n" \ + " popq %rbp\n" \ + " popq %rbx\n" \ + " popq %r11\n" \ + " popq %r10\n" \ + " popq %r9\n" \ + " popq %r8\n" \ + " popq %rax\n" \ + " popq %rcx\n" \ + " popq %rdx\n" \ + " popq %rsi\n" \ + " popq %rdi\n" \ + /* Skip orig_ax, ip, cs */ \ + " addq $24, %rsp\n" +#else +#define SAVE_REGS_STRING \ + /* Skip cs, ip, orig_ax and gs. */ \ + " subl $16, %esp\n" \ + " pushl %fs\n" \ + " pushl %es\n" \ + " pushl %ds\n" \ + " pushl %eax\n" \ + " pushl %ebp\n" \ + " pushl %edi\n" \ + " pushl %esi\n" \ + " pushl %edx\n" \ + " pushl %ecx\n" \ + " pushl %ebx\n" +#define RESTORE_REGS_STRING \ + " popl %ebx\n" \ + " popl %ecx\n" \ + " popl %edx\n" \ + " popl %esi\n" \ + " popl %edi\n" \ + " popl %ebp\n" \ + " popl %eax\n" \ + /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ + " addl $24, %esp\n" +#endif + +/* Ensure if the instruction can be boostable */ +extern int can_boost(kprobe_opcode_t *instruction); +/* Recover instruction if given address is probed */ +extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, + unsigned long addr); +/* + * Copy an instruction and adjust the displacement if the instruction + * uses the %rip-relative addressing mode. + */ +extern int __copy_instruction(u8 *dest, u8 *src); + +/* Generate a relative-jump/call instruction */ +extern void synthesize_reljump(void *from, void *to); +extern void synthesize_relcall(void *from, void *to); + +#ifdef CONFIG_OPTPROBES +extern int arch_init_optprobes(void); +extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter); +extern unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr); +#else /* !CONFIG_OPTPROBES */ +static inline int arch_init_optprobes(void) +{ + return 0; +} +static inline int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) +{ + return 0; +} +static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) +{ + return addr; +} +#endif + +#ifdef CONFIG_KPROBES_ON_FTRACE +extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb); +#else +static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + return 0; +} +#endif +#endif diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c new file mode 100644 index 0000000..e124554 --- /dev/null +++ b/arch/x86/kernel/kprobes/core.c @@ -0,0 +1,1064 @@ +/* + * Kernel Probes (KProbes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * + * 2002-Oct Created by Vamsi Krishna S Kernel + * Probes initial implementation ( includes contributions from + * Rusty Russell). + * 2004-July Suparna Bhattacharya added jumper probes + * interface to access function arguments. + * 2004-Oct Jim Keniston and Prasanna S Panchamukhi + * adapted for x86_64 from i386. + * 2005-Mar Roland McGrath + * Fixed to handle %rip-relative addressing mode correctly. + * 2005-May Hien Nguyen , Jim Keniston + * and Prasanna S Panchamukhi + * added function-return probes. + * 2005-May Rusty Lynch + * Added function return probes functionality + * 2006-Feb Masami Hiramatsu added + * kprobe-booster and kretprobe-booster for i386. + * 2007-Dec Masami Hiramatsu added kprobe-booster + * and kretprobe-booster for x86-64 + * 2007-Dec Masami Hiramatsu , Arjan van de Ven + * and Jim Keniston + * unified x86 kprobes code. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" + +void jprobe_return_end(void); + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +#define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) + +#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ + (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ + (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \ + (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \ + (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \ + << (row % 32)) + /* + * Undefined/reserved opcodes, conditional jump, Opcode Extension + * Groups, and some special opcodes can not boost. + * This is non-const and volatile to keep gcc from statically + * optimizing it out, as variable_test_bit makes gcc think only + * *(unsigned long*) is used. + */ +static volatile u32 twobyte_is_boostable[256 / 32] = { + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ + /* ---------------------------------------------- */ + W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */ + W(0x10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 10 */ + W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 20 */ + W(0x30, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */ + W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */ + W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ + W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1) | /* 60 */ + W(0x70, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */ + W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 80 */ + W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ + W(0xa0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* a0 */ + W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) , /* b0 */ + W(0xc0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */ + W(0xd0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) , /* d0 */ + W(0xe0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1) | /* e0 */ + W(0xf0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0) /* f0 */ + /* ----------------------------------------------- */ + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ +}; +#undef W + +struct kretprobe_blackpoint kretprobe_blacklist[] = { + {"__switch_to", }, /* This function switches only current task, but + doesn't switch kernel stack.*/ + {NULL, NULL} /* Terminator */ +}; + +const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); + +static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) +{ + struct __arch_relative_insn { + u8 op; + s32 raddr; + } __packed *insn; + + insn = (struct __arch_relative_insn *)from; + insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); + insn->op = op; +} + +/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ +void __kprobes synthesize_reljump(void *from, void *to) +{ + __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); +} + +/* Insert a call instruction at address 'from', which calls address 'to'.*/ +void __kprobes synthesize_relcall(void *from, void *to) +{ + __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); +} + +/* + * Skip the prefixes of the instruction. + */ +static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) +{ + insn_attr_t attr; + + attr = inat_get_opcode_attribute((insn_byte_t)*insn); + while (inat_is_legacy_prefix(attr)) { + insn++; + attr = inat_get_opcode_attribute((insn_byte_t)*insn); + } +#ifdef CONFIG_X86_64 + if (inat_is_rex_prefix(attr)) + insn++; +#endif + return insn; +} + +/* + * Returns non-zero if opcode is boostable. + * RIP relative instructions are adjusted at copying time in 64 bits mode + */ +int __kprobes can_boost(kprobe_opcode_t *opcodes) +{ + kprobe_opcode_t opcode; + kprobe_opcode_t *orig_opcodes = opcodes; + + if (search_exception_tables((unsigned long)opcodes)) + return 0; /* Page fault may occur on this address. */ + +retry: + if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) + return 0; + opcode = *(opcodes++); + + /* 2nd-byte opcode */ + if (opcode == 0x0f) { + if (opcodes - orig_opcodes > MAX_INSN_SIZE - 1) + return 0; + return test_bit(*opcodes, + (unsigned long *)twobyte_is_boostable); + } + + switch (opcode & 0xf0) { +#ifdef CONFIG_X86_64 + case 0x40: + goto retry; /* REX prefix is boostable */ +#endif + case 0x60: + if (0x63 < opcode && opcode < 0x67) + goto retry; /* prefixes */ + /* can't boost Address-size override and bound */ + return (opcode != 0x62 && opcode != 0x67); + case 0x70: + return 0; /* can't boost conditional jump */ + case 0xc0: + /* can't boost software-interruptions */ + return (0xc1 < opcode && opcode < 0xcc) || opcode == 0xcf; + case 0xd0: + /* can boost AA* and XLAT */ + return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); + case 0xe0: + /* can boost in/out and absolute jmps */ + return ((opcode & 0x04) || opcode == 0xea); + case 0xf0: + if ((opcode & 0x0c) == 0 && opcode != 0xf1) + goto retry; /* lock/rep(ne) prefix */ + /* clear and set flags are boostable */ + return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); + default: + /* segment override prefixes are boostable */ + if (opcode == 0x26 || opcode == 0x36 || opcode == 0x3e) + goto retry; /* prefixes */ + /* CS override prefix and call are not boostable */ + return (opcode != 0x2e && opcode != 0x9a); + } +} + +static unsigned long +__recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) +{ + struct kprobe *kp; + + kp = get_kprobe((void *)addr); + /* There is no probe, return original address */ + if (!kp) + return addr; + + /* + * Basically, kp->ainsn.insn has an original instruction. + * However, RIP-relative instruction can not do single-stepping + * at different place, __copy_instruction() tweaks the displacement of + * that instruction. In that case, we can't recover the instruction + * from the kp->ainsn.insn. + * + * On the other hand, kp->opcode has a copy of the first byte of + * the probed instruction, which is overwritten by int3. And + * the instruction at kp->addr is not modified by kprobes except + * for the first byte, we can recover the original instruction + * from it and kp->opcode. + */ + memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + buf[0] = kp->opcode; + return (unsigned long)buf; +} + +/* + * Recover the probed instruction at addr for further analysis. + * Caller must lock kprobes by kprobe_mutex, or disable preemption + * for preventing to release referencing kprobes. + */ +unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) +{ + unsigned long __addr; + + __addr = __recover_optprobed_insn(buf, addr); + if (__addr != addr) + return __addr; + + return __recover_probed_insn(buf, addr); +} + +/* Check if paddr is at an instruction boundary */ +static int __kprobes can_probe(unsigned long paddr) +{ + unsigned long addr, __addr, offset = 0; + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) + return 0; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr) { + /* + * Check if the instruction has been modified by another + * kprobe, in which case we replace the breakpoint by the + * original instruction in our buffer. + * Also, jump optimization will change the breakpoint to + * relative-jump. Since the relative-jump itself is + * normally used, we just go through if there is no kprobe. + */ + __addr = recover_probed_instruction(buf, addr); + kernel_insn_init(&insn, (void *)__addr); + insn_get_length(&insn); + + /* + * Another debugging subsystem might insert this breakpoint. + * In that case, we can't recover it. + */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; + addr += insn.length; + } + + return (addr == paddr); +} + +/* + * Returns non-zero if opcode modifies the interrupt flag. + */ +static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) +{ + /* Skip prefixes */ + insn = skip_prefixes(insn); + + switch (*insn) { + case 0xfa: /* cli */ + case 0xfb: /* sti */ + case 0xcf: /* iret/iretd */ + case 0x9d: /* popf/popfd */ + return 1; + } + + return 0; +} + +/* + * Copy an instruction and adjust the displacement if the instruction + * uses the %rip-relative addressing mode. + * If it does, Return the address of the 32-bit displacement word. + * If not, return null. + * Only applicable to 64-bit x86. + */ +int __kprobes __copy_instruction(u8 *dest, u8 *src) +{ + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, (unsigned long)src)); + insn_get_length(&insn); + /* Another subsystem puts a breakpoint, failed to recover */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; + memcpy(dest, insn.kaddr, insn.length); + +#ifdef CONFIG_X86_64 + if (insn_rip_relative(&insn)) { + s64 newdisp; + u8 *disp; + kernel_insn_init(&insn, dest); + insn_get_displacement(&insn); + /* + * The copied instruction uses the %rip-relative addressing + * mode. Adjust the displacement for the difference between + * the original location of this instruction and the location + * of the copy that will actually be run. The tricky bit here + * is making sure that the sign extension happens correctly in + * this calculation, since we need a signed 32-bit result to + * be sign-extended to 64 bits when it's added to the %rip + * value and yield the same 64-bit result that the sign- + * extension of the original signed 32-bit displacement would + * have given. + */ + newdisp = (u8 *) src + (s64) insn.displacement.value - (u8 *) dest; + BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ + disp = (u8 *) dest + insn_offset_displacement(&insn); + *(s32 *) disp = (s32) newdisp; + } +#endif + return insn.length; +} + +static void __kprobes arch_copy_kprobe(struct kprobe *p) +{ + /* Copy an instruction with recovering if other optprobe modifies it.*/ + __copy_instruction(p->ainsn.insn, p->addr); + + /* + * __copy_instruction can modify the displacement of the instruction, + * but it doesn't affect boostable check. + */ + if (can_boost(p->ainsn.insn)) + p->ainsn.boostable = 0; + else + p->ainsn.boostable = -1; + + /* Also, displacement change doesn't affect the first byte */ + p->opcode = p->ainsn.insn[0]; +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + if (alternatives_text_reserved(p->addr, p->addr)) + return -EINVAL; + + if (!can_probe((unsigned long)p->addr)) + return -EILSEQ; + /* insn: must be on special executable page on x86. */ + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + arch_copy_kprobe(p); + return 0; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + text_poke(p->addr, &p->opcode, 1); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); + p->ainsn.insn = NULL; + } +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.old_flags = kcb->kprobe_old_flags; + kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags; + kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; +} + +static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, p); + kcb->kprobe_saved_flags = kcb->kprobe_old_flags + = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); + if (is_IF_modifier(p->ainsn.insn)) + kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; +} + +static void __kprobes clear_btf(void) +{ + if (test_thread_flag(TIF_BLOCKSTEP)) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl &= ~DEBUGCTLMSR_BTF; + update_debugctlmsr(debugctl); + } +} + +static void __kprobes restore_btf(void) +{ + if (test_thread_flag(TIF_BLOCKSTEP)) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl |= DEBUGCTLMSR_BTF; + update_debugctlmsr(debugctl); + } +} + +void __kprobes +arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + unsigned long *sara = stack_addr(regs); + + ri->ret_addr = (kprobe_opcode_t *) *sara; + + /* Replace the return addr with trampoline addr */ + *sara = (unsigned long) &kretprobe_trampoline; +} + +static void __kprobes +setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) +{ + if (setup_detour_execution(p, regs, reenter)) + return; + +#if !defined(CONFIG_PREEMPT) + if (p->ainsn.boostable == 1 && !p->post_handler) { + /* Boost up -- we can execute copied instructions directly */ + if (!reenter) + reset_current_kprobe(); + /* + * Reentering boosted probe doesn't reset current_kprobe, + * nor set current_kprobe, because it doesn't use single + * stepping. + */ + regs->ip = (unsigned long)p->ainsn.insn; + preempt_enable_no_resched(); + return; + } +#endif + if (reenter) { + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_REENTER; + } else + kcb->kprobe_status = KPROBE_HIT_SS; + /* Prepare real single stepping */ + clear_btf(); + regs->flags |= X86_EFLAGS_TF; + regs->flags &= ~X86_EFLAGS_IF; + /* single step inline if the instruction is an int3 */ + if (p->opcode == BREAKPOINT_INSTRUCTION) + regs->ip = (unsigned long)p->addr; + else + regs->ip = (unsigned long)p->ainsn.insn; +} + +/* + * We have reentered the kprobe_handler(), since another probe was hit while + * within the handler. We save the original kprobes variables and just single + * step on the instruction of the new probe without calling any user handlers. + */ +static int __kprobes +reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + setup_singlestep(p, regs, kcb, 1); + break; + case KPROBE_HIT_SS: + /* A probe has been hit in the codepath leading up to, or just + * after, single-stepping of a probed instruction. This entire + * codepath should strictly reside in .kprobes.text section. + * Raise a BUG or we'll continue in an endless reentering loop + * and eventually a stack overflow. + */ + printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", + p->addr); + dump_kprobe(p); + BUG(); + default: + /* impossible cases */ + WARN_ON(1); + return 0; + } + + return 1; +} + +/* + * Interrupts are disabled on entry as trap3 is an interrupt gate and they + * remain disabled throughout this function. + */ +static int __kprobes kprobe_handler(struct pt_regs *regs) +{ + kprobe_opcode_t *addr; + struct kprobe *p; + struct kprobe_ctlblk *kcb; + + addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); + /* + * We don't want to be preempted for the entire + * duration of kprobe processing. We conditionally + * re-enable preemption at the end of this function, + * and also in reenter_kprobe() and setup_singlestep(). + */ + preempt_disable(); + + kcb = get_kprobe_ctlblk(); + p = get_kprobe(addr); + + if (p) { + if (kprobe_running()) { + if (reenter_kprobe(p, regs, kcb)) + return 1; + } else { + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + /* + * If we have no pre-handler or it returned 0, we + * continue with normal processing. If we have a + * pre-handler and it returned non-zero, it prepped + * for calling the break_handler below on re-entry + * for jprobe processing, so get out doing nothing + * more here. + */ + if (!p->pre_handler || !p->pre_handler(p, regs)) + setup_singlestep(p, regs, kcb, 0); + return 1; + } + } else if (*addr != BREAKPOINT_INSTRUCTION) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + * Back up over the (now missing) int3 and run + * the original instruction. + */ + regs->ip = (unsigned long)addr; + preempt_enable_no_resched(); + return 1; + } else if (kprobe_running()) { + p = __this_cpu_read(current_kprobe); + if (p->break_handler && p->break_handler(p, regs)) { + if (!skip_singlestep(p, regs, kcb)) + setup_singlestep(p, regs, kcb, 0); + return 1; + } + } /* else: not a kprobe fault; let the kernel handle it */ + + preempt_enable_no_resched(); + return 0; +} + +/* + * When a retprobed function returns, this code saves registers and + * calls trampoline_handler() runs, which calls the kretprobe's handler. + */ +static void __used __kprobes kretprobe_trampoline_holder(void) +{ + asm volatile ( + ".global kretprobe_trampoline\n" + "kretprobe_trampoline: \n" +#ifdef CONFIG_X86_64 + /* We don't bother saving the ss register */ + " pushq %rsp\n" + " pushfq\n" + SAVE_REGS_STRING + " movq %rsp, %rdi\n" + " call trampoline_handler\n" + /* Replace saved sp with true return address. */ + " movq %rax, 152(%rsp)\n" + RESTORE_REGS_STRING + " popfq\n" +#else + " pushf\n" + SAVE_REGS_STRING + " movl %esp, %eax\n" + " call trampoline_handler\n" + /* Move flags to cs */ + " movl 56(%esp), %edx\n" + " movl %edx, 52(%esp)\n" + /* Replace saved flags with true return address. */ + " movl %eax, 56(%esp)\n" + RESTORE_REGS_STRING + " popf\n" +#endif + " ret\n"); +} + +/* + * Called from kretprobe_trampoline + */ +static __used __kprobes void *trampoline_handler(struct pt_regs *regs) +{ + struct kretprobe_instance *ri = NULL; + struct hlist_head *head, empty_rp; + struct hlist_node *node, *tmp; + unsigned long flags, orig_ret_address = 0; + unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; + + INIT_HLIST_HEAD(&empty_rp); + kretprobe_hash_lock(current, &head, &flags); + /* fixup registers */ +#ifdef CONFIG_X86_64 + regs->cs = __KERNEL_CS; +#else + regs->cs = __KERNEL_CS | get_kernel_rpl(); + regs->gs = 0; +#endif + regs->ip = trampoline_address; + regs->orig_ax = ~0UL; + + /* + * It is possible to have multiple instances associated with a given + * task either because multiple functions in the call path have + * return probes installed on them, and/or more than one + * return probe was registered for a target function. + * + * We can handle this because: + * - instances are always pushed into the head of the list + * - when multiple return probes are registered for the same + * function, the (chronologically) first instance's ret_addr + * will be the real return address, and all the rest will + * point to kretprobe_trampoline. + */ + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; + if (ri->rp && ri->rp->handler) { + __this_cpu_write(current_kprobe, &ri->rp->kp); + get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; + ri->rp->handler(ri, regs); + __this_cpu_write(current_kprobe, NULL); + } + + recycle_rp_inst(ri, &empty_rp); + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_hash_unlock(current, &flags); + + hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_del(&ri->hlist); + kfree(ri); + } + return (void *)orig_ret_address; +} + +/* + * Called after single-stepping. p->addr is the address of the + * instruction whose first byte has been replaced by the "int 3" + * instruction. To avoid the SMP problems that can occur when we + * temporarily put back the original opcode to single-step, we + * single-stepped a copy of the instruction. The address of this + * copy is p->ainsn.insn. + * + * This function prepares to return from the post-single-step + * interrupt. We have to fix up the stack as follows: + * + * 0) Except in the case of absolute or indirect jump or call instructions, + * the new ip is relative to the copied instruction. We need to make + * it relative to the original instruction. + * + * 1) If the single-stepped instruction was pushfl, then the TF and IF + * flags are set in the just-pushed flags, and may need to be cleared. + * + * 2) If the single-stepped instruction was a call, the return address + * that is atop the stack is the address following the copied instruction. + * We need to make it the address following the original instruction. + * + * If this is the first time we've single-stepped the instruction at + * this probepoint, and the instruction is boostable, boost it: add a + * jump instruction after the copied instruction, that jumps to the next + * instruction after the probepoint. + */ +static void __kprobes +resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +{ + unsigned long *tos = stack_addr(regs); + unsigned long copy_ip = (unsigned long)p->ainsn.insn; + unsigned long orig_ip = (unsigned long)p->addr; + kprobe_opcode_t *insn = p->ainsn.insn; + + /* Skip prefixes */ + insn = skip_prefixes(insn); + + regs->flags &= ~X86_EFLAGS_TF; + switch (*insn) { + case 0x9c: /* pushfl */ + *tos &= ~(X86_EFLAGS_TF | X86_EFLAGS_IF); + *tos |= kcb->kprobe_old_flags; + break; + case 0xc2: /* iret/ret/lret */ + case 0xc3: + case 0xca: + case 0xcb: + case 0xcf: + case 0xea: /* jmp absolute -- ip is correct */ + /* ip is already adjusted, no more changes required */ + p->ainsn.boostable = 1; + goto no_change; + case 0xe8: /* call relative - Fix return addr */ + *tos = orig_ip + (*tos - copy_ip); + break; +#ifdef CONFIG_X86_32 + case 0x9a: /* call absolute -- same as call absolute, indirect */ + *tos = orig_ip + (*tos - copy_ip); + goto no_change; +#endif + case 0xff: + if ((insn[1] & 0x30) == 0x10) { + /* + * call absolute, indirect + * Fix return addr; ip is correct. + * But this is not boostable + */ + *tos = orig_ip + (*tos - copy_ip); + goto no_change; + } else if (((insn[1] & 0x31) == 0x20) || + ((insn[1] & 0x31) == 0x21)) { + /* + * jmp near and far, absolute indirect + * ip is correct. And this is boostable + */ + p->ainsn.boostable = 1; + goto no_change; + } + default: + break; + } + + if (p->ainsn.boostable == 0) { + if ((regs->ip > copy_ip) && + (regs->ip - copy_ip) + 5 < MAX_INSN_SIZE) { + /* + * These instructions can be executed directly if it + * jumps back to correct address. + */ + synthesize_reljump((void *)regs->ip, + (void *)orig_ip + (regs->ip - copy_ip)); + p->ainsn.boostable = 1; + } else { + p->ainsn.boostable = -1; + } + } + + regs->ip += orig_ip - copy_ip; + +no_change: + restore_btf(); +} + +/* + * Interrupts are disabled on entry as trap1 is an interrupt gate and they + * remain disabled throughout this function. + */ +static int __kprobes post_kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + resume_execution(cur, regs, kcb); + regs->flags |= kcb->kprobe_saved_flags; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + /* Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); +out: + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, flags + * will have TF set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (regs->flags & X86_EFLAGS_TF) + return 0; + + return 1; +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + switch (kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the ip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->ip = (unsigned long)cur->addr; + regs->flags |= kcb->kprobe_old_flags; + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accounting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + + /* + * fixup routine could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; + } + return 0; +} + +/* + * Wrapper routine for handling exceptions. + */ +int __kprobes +kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) +{ + struct die_args *args = data; + int ret = NOTIFY_DONE; + + if (args->regs && user_mode_vm(args->regs)) + return ret; + + switch (val) { + case DIE_INT3: + if (kprobe_handler(args->regs)) + ret = NOTIFY_STOP; + break; + case DIE_DEBUG: + if (post_kprobe_handler(args->regs)) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; + ret = NOTIFY_STOP; + } + break; + case DIE_GPF: + /* + * To be potentially processing a kprobe fault and to + * trust the result from kprobe_running(), we have + * be non-preemptible. + */ + if (!preemptible() && kprobe_running() && + kprobe_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; +} + +int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct jprobe *jp = container_of(p, struct jprobe, kp); + unsigned long addr; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + kcb->jprobe_saved_regs = *regs; + kcb->jprobe_saved_sp = stack_addr(regs); + addr = (unsigned long)(kcb->jprobe_saved_sp); + + /* + * As Linus pointed out, gcc assumes that the callee + * owns the argument space and could overwrite it, e.g. + * tailcall optimization. So, to be absolutely safe + * we also save and restore enough stack bytes to cover + * the argument area. + */ + memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, + MIN_STACK_SIZE(addr)); + regs->flags &= ~X86_EFLAGS_IF; + trace_hardirqs_off(); + regs->ip = (unsigned long)(jp->entry); + return 1; +} + +void __kprobes jprobe_return(void) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + asm volatile ( +#ifdef CONFIG_X86_64 + " xchg %%rbx,%%rsp \n" +#else + " xchgl %%ebx,%%esp \n" +#endif + " int3 \n" + " .globl jprobe_return_end\n" + " jprobe_return_end: \n" + " nop \n"::"b" + (kcb->jprobe_saved_sp):"memory"); +} + +int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + u8 *addr = (u8 *) (regs->ip - 1); + struct jprobe *jp = container_of(p, struct jprobe, kp); + + if ((addr > (u8 *) jprobe_return) && + (addr < (u8 *) jprobe_return_end)) { + if (stack_addr(regs) != kcb->jprobe_saved_sp) { + struct pt_regs *saved_regs = &kcb->jprobe_saved_regs; + printk(KERN_ERR + "current sp %p does not match saved sp %p\n", + stack_addr(regs), kcb->jprobe_saved_sp); + printk(KERN_ERR "Saved registers for jprobe %p\n", jp); + show_regs(saved_regs); + printk(KERN_ERR "Current registers\n"); + show_regs(regs); + BUG(); + } + *regs = kcb->jprobe_saved_regs; + memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp), + kcb->jprobes_stack, + MIN_STACK_SIZE(kcb->jprobe_saved_sp)); + preempt_enable_no_resched(); + return 1; + } + return 0; +} + +int __init arch_init_kprobes(void) +{ + return arch_init_optprobes(); +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return 0; +} diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c new file mode 100644 index 0000000..23ef5c5 --- /dev/null +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -0,0 +1,93 @@ +/* + * Dynamic Ftrace based Kprobes Optimization + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) Hitachi Ltd., 2012 + */ +#include +#include +#include +#include +#include + +#include "common.h" + +static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + /* + * Emulate singlestep (and also recover regs->ip) + * as if there is a 5byte nop + */ + regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE; + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + __this_cpu_write(current_kprobe, NULL); + return 1; +} + +int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + if (kprobe_ftrace(p)) + return __skip_singlestep(p, regs, kcb); + else + return 0; +} + +/* Ftrace callback handler for kprobes */ +void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct pt_regs *regs) +{ + struct kprobe *p; + struct kprobe_ctlblk *kcb; + unsigned long flags; + + /* Disable irq for emulating a breakpoint and avoiding preempt */ + local_irq_save(flags); + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto end; + + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */ + regs->ip = ip + sizeof(kprobe_opcode_t); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) + __skip_singlestep(p, regs, kcb); + /* + * If pre_handler returns !0, it sets regs->ip and + * resets current kprobe. + */ + } +end: + local_irq_restore(flags); +} + +int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.insn = NULL; + p->ainsn.boostable = -1; + return 0; +} diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c new file mode 100644 index 0000000..76dc6f0 --- /dev/null +++ b/arch/x86/kernel/kprobes/opt.c @@ -0,0 +1,512 @@ +/* + * Kernel Probes Jump Optimization (Optprobes) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Hitachi Ltd., 2012 + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" + +unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr) +{ + struct optimized_kprobe *op; + struct kprobe *kp; + long offs; + int i; + + for (i = 0; i < RELATIVEJUMP_SIZE; i++) { + kp = get_kprobe((void *)addr - i); + /* This function only handles jump-optimized kprobe */ + if (kp && kprobe_optimized(kp)) { + op = container_of(kp, struct optimized_kprobe, kp); + /* If op->list is not empty, op is under optimizing */ + if (list_empty(&op->list)) + goto found; + } + } + + return addr; +found: + /* + * If the kprobe can be optimized, original bytes which can be + * overwritten by jump destination address. In this case, original + * bytes must be recovered from op->optinsn.copied_insn buffer. + */ + memcpy(buf, (void *)addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + if (addr == (unsigned long)kp->addr) { + buf[0] = kp->opcode; + memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + } else { + offs = addr - (unsigned long)kp->addr - 1; + memcpy(buf, op->optinsn.copied_insn + offs, RELATIVE_ADDR_SIZE - offs); + } + + return (unsigned long)buf; +} + +/* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ +static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) +{ +#ifdef CONFIG_X86_64 + *addr++ = 0x48; + *addr++ = 0xbf; +#else + *addr++ = 0xb8; +#endif + *(unsigned long *)addr = val; +} + +static void __used __kprobes kprobes_optinsn_template_holder(void) +{ + asm volatile ( + ".global optprobe_template_entry\n" + "optprobe_template_entry:\n" +#ifdef CONFIG_X86_64 + /* We don't bother saving the ss register */ + " pushq %rsp\n" + " pushfq\n" + SAVE_REGS_STRING + " movq %rsp, %rsi\n" + ".global optprobe_template_val\n" + "optprobe_template_val:\n" + ASM_NOP5 + ASM_NOP5 + ".global optprobe_template_call\n" + "optprobe_template_call:\n" + ASM_NOP5 + /* Move flags to rsp */ + " movq 144(%rsp), %rdx\n" + " movq %rdx, 152(%rsp)\n" + RESTORE_REGS_STRING + /* Skip flags entry */ + " addq $8, %rsp\n" + " popfq\n" +#else /* CONFIG_X86_32 */ + " pushf\n" + SAVE_REGS_STRING + " movl %esp, %edx\n" + ".global optprobe_template_val\n" + "optprobe_template_val:\n" + ASM_NOP5 + ".global optprobe_template_call\n" + "optprobe_template_call:\n" + ASM_NOP5 + RESTORE_REGS_STRING + " addl $4, %esp\n" /* skip cs */ + " popf\n" +#endif + ".global optprobe_template_end\n" + "optprobe_template_end:\n"); +} + +#define TMPL_MOVE_IDX \ + ((long)&optprobe_template_val - (long)&optprobe_template_entry) +#define TMPL_CALL_IDX \ + ((long)&optprobe_template_call - (long)&optprobe_template_entry) +#define TMPL_END_IDX \ + ((long)&optprobe_template_end - (long)&optprobe_template_entry) + +#define INT3_SIZE sizeof(kprobe_opcode_t) + +/* Optimized kprobe call back function: called from optinsn */ +static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long flags; + + /* This is possible if op is under delayed unoptimizing */ + if (kprobe_disabled(&op->kp)) + return; + + local_irq_save(flags); + if (kprobe_running()) { + kprobes_inc_nmissed_count(&op->kp); + } else { + /* Save skipped registers */ +#ifdef CONFIG_X86_64 + regs->cs = __KERNEL_CS; +#else + regs->cs = __KERNEL_CS | get_kernel_rpl(); + regs->gs = 0; +#endif + regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; + regs->orig_ax = ~0UL; + + __this_cpu_write(current_kprobe, &op->kp); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + opt_pre_handler(&op->kp, regs); + __this_cpu_write(current_kprobe, NULL); + } + local_irq_restore(flags); +} + +static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) +{ + int len = 0, ret; + + while (len < RELATIVEJUMP_SIZE) { + ret = __copy_instruction(dest + len, src + len); + if (!ret || !can_boost(dest + len)) + return -EINVAL; + len += ret; + } + /* Check whether the address range is reserved */ + if (ftrace_text_reserved(src, src + len - 1) || + alternatives_text_reserved(src, src + len - 1) || + jump_label_text_reserved(src, src + len - 1)) + return -EBUSY; + + return len; +} + +/* Check whether insn is indirect jump */ +static int __kprobes insn_is_indirect_jump(struct insn *insn) +{ + return ((insn->opcode.bytes[0] == 0xff && + (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ + insn->opcode.bytes[0] == 0xea); /* Segment based jump */ +} + +/* Check whether insn jumps into specified address range */ +static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) +{ + unsigned long target = 0; + + switch (insn->opcode.bytes[0]) { + case 0xe0: /* loopne */ + case 0xe1: /* loope */ + case 0xe2: /* loop */ + case 0xe3: /* jcxz */ + case 0xe9: /* near relative jump */ + case 0xeb: /* short relative jump */ + break; + case 0x0f: + if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ + break; + return 0; + default: + if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ + break; + return 0; + } + target = (unsigned long)insn->next_byte + insn->immediate.value; + + return (start <= target && target <= start + len); +} + +/* Decode whole function to ensure any instructions don't jump into target */ +static int __kprobes can_optimize(unsigned long paddr) +{ + unsigned long addr, size = 0, offset = 0; + struct insn insn; + kprobe_opcode_t buf[MAX_INSN_SIZE]; + + /* Lookup symbol including addr */ + if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) + return 0; + + /* + * Do not optimize in the entry code due to the unstable + * stack handling. + */ + if ((paddr >= (unsigned long)__entry_text_start) && + (paddr < (unsigned long)__entry_text_end)) + return 0; + + /* Check there is enough space for a relative jump. */ + if (size - offset < RELATIVEJUMP_SIZE) + return 0; + + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr - offset + size) { /* Decode until function end */ + if (search_exception_tables(addr)) + /* + * Since some fixup code will jumps into this function, + * we can't optimize kprobe in this function. + */ + return 0; + kernel_insn_init(&insn, (void *)recover_probed_instruction(buf, addr)); + insn_get_length(&insn); + /* Another subsystem puts a breakpoint */ + if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) + return 0; + /* Recover address */ + insn.kaddr = (void *)addr; + insn.next_byte = (void *)(addr + insn.length); + /* Check any instructions don't jump into target */ + if (insn_is_indirect_jump(&insn) || + insn_jump_into_range(&insn, paddr + INT3_SIZE, + RELATIVE_ADDR_SIZE)) + return 0; + addr += insn.length; + } + + return 1; +} + +/* Check optimized_kprobe can actually be optimized. */ +int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) +{ + int i; + struct kprobe *p; + + for (i = 1; i < op->optinsn.size; i++) { + p = get_kprobe(op->kp.addr + i); + if (p && !kprobe_disabled(p)) + return -EEXIST; + } + + return 0; +} + +/* Check the addr is within the optimized instructions. */ +int __kprobes +arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) +{ + return ((unsigned long)op->kp.addr <= addr && + (unsigned long)op->kp.addr + op->optinsn.size > addr); +} + +/* Free optimized instruction slot */ +static __kprobes +void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) +{ + if (op->optinsn.insn) { + free_optinsn_slot(op->optinsn.insn, dirty); + op->optinsn.insn = NULL; + op->optinsn.size = 0; + } +} + +void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) +{ + __arch_remove_optimized_kprobe(op, 1); +} + +/* + * Copy replacing target instructions + * Target instructions MUST be relocatable (checked inside) + * This is called when new aggr(opt)probe is allocated or reused. + */ +int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) +{ + u8 *buf; + int ret; + long rel; + + if (!can_optimize((unsigned long)op->kp.addr)) + return -EILSEQ; + + op->optinsn.insn = get_optinsn_slot(); + if (!op->optinsn.insn) + return -ENOMEM; + + /* + * Verify if the address gap is in 2GB range, because this uses + * a relative jump. + */ + rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; + if (abs(rel) > 0x7fffffff) + return -ERANGE; + + buf = (u8 *)op->optinsn.insn; + + /* Copy instructions into the out-of-line buffer */ + ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); + if (ret < 0) { + __arch_remove_optimized_kprobe(op, 0); + return ret; + } + op->optinsn.size = ret; + + /* Copy arch-dep-instance from template */ + memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); + + /* Set probe information */ + synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); + + /* Set probe function call */ + synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); + + /* Set returning jmp instruction at the tail of out-of-line buffer */ + synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, + (u8 *)op->kp.addr + op->optinsn.size); + + flush_icache_range((unsigned long) buf, + (unsigned long) buf + TMPL_END_IDX + + op->optinsn.size + RELATIVEJUMP_SIZE); + return 0; +} + +#define MAX_OPTIMIZE_PROBES 256 +static struct text_poke_param *jump_poke_params; +static struct jump_poke_buffer { + u8 buf[RELATIVEJUMP_SIZE]; +} *jump_poke_bufs; + +static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm, + u8 *insn_buf, + struct optimized_kprobe *op) +{ + s32 rel = (s32)((long)op->optinsn.insn - + ((long)op->kp.addr + RELATIVEJUMP_SIZE)); + + /* Backup instructions which will be replaced by jump address */ + memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, + RELATIVE_ADDR_SIZE); + + insn_buf[0] = RELATIVEJUMP_OPCODE; + *(s32 *)(&insn_buf[1]) = rel; + + tprm->addr = op->kp.addr; + tprm->opcode = insn_buf; + tprm->len = RELATIVEJUMP_SIZE; +} + +/* + * Replace breakpoints (int3) with relative jumps. + * Caller must call with locking kprobe_mutex and text_mutex. + */ +void __kprobes arch_optimize_kprobes(struct list_head *oplist) +{ + struct optimized_kprobe *op, *tmp; + int c = 0; + + list_for_each_entry_safe(op, tmp, oplist, list) { + WARN_ON(kprobe_disabled(&op->kp)); + /* Setup param */ + setup_optimize_kprobe(&jump_poke_params[c], + jump_poke_bufs[c].buf, op); + list_del_init(&op->list); + if (++c >= MAX_OPTIMIZE_PROBES) + break; + } + + /* + * text_poke_smp doesn't support NMI/MCE code modifying. + * However, since kprobes itself also doesn't support NMI/MCE + * code probing, it's not a problem. + */ + text_poke_smp_batch(jump_poke_params, c); +} + +static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm, + u8 *insn_buf, + struct optimized_kprobe *op) +{ + /* Set int3 to first byte for kprobes */ + insn_buf[0] = BREAKPOINT_INSTRUCTION; + memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + + tprm->addr = op->kp.addr; + tprm->opcode = insn_buf; + tprm->len = RELATIVEJUMP_SIZE; +} + +/* + * Recover original instructions and breakpoints from relative jumps. + * Caller must call with locking kprobe_mutex. + */ +extern void arch_unoptimize_kprobes(struct list_head *oplist, + struct list_head *done_list) +{ + struct optimized_kprobe *op, *tmp; + int c = 0; + + list_for_each_entry_safe(op, tmp, oplist, list) { + /* Setup param */ + setup_unoptimize_kprobe(&jump_poke_params[c], + jump_poke_bufs[c].buf, op); + list_move(&op->list, done_list); + if (++c >= MAX_OPTIMIZE_PROBES) + break; + } + + /* + * text_poke_smp doesn't support NMI/MCE code modifying. + * However, since kprobes itself also doesn't support NMI/MCE + * code probing, it's not a problem. + */ + text_poke_smp_batch(jump_poke_params, c); +} + +/* Replace a relative jump with a breakpoint (int3). */ +void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) +{ + u8 buf[RELATIVEJUMP_SIZE]; + + /* Set int3 to first byte for kprobes */ + buf[0] = BREAKPOINT_INSTRUCTION; + memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); +} + +int __kprobes +setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) +{ + struct optimized_kprobe *op; + + if (p->flags & KPROBE_FLAG_OPTIMIZED) { + /* This kprobe is really able to run optimized path. */ + op = container_of(p, struct optimized_kprobe, kp); + /* Detour through copied instructions */ + regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; + if (!reenter) + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } + return 0; +} + +int __kprobes arch_init_optprobes(void) +{ + /* Allocate code buffer and parameter array */ + jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) * + MAX_OPTIMIZE_PROBES, GFP_KERNEL); + if (!jump_poke_bufs) + return -ENOMEM; + + jump_poke_params = kmalloc(sizeof(struct text_poke_param) * + MAX_OPTIMIZE_PROBES, GFP_KERNEL); + if (!jump_poke_params) { + kfree(jump_poke_bufs); + jump_poke_bufs = NULL; + return -ENOMEM; + } + + return 0; +} -- cgit v1.1 From ddd70cf93d784af3698c86315aa0ded87ca0a0c5 Mon Sep 17 00:00:00 2001 From: Jun Nakajima Date: Mon, 21 Jan 2013 17:23:09 +0000 Subject: goldfish: platform device for x86 Based on code by Jun Nakajima but stripped of all the old x86 mach-foo stuff and turned into a single file for the Goldfish virtual bus layer. The actual created platform device and bus enumeration is portable between the ARM and x86 Goldfish emulations. Signed-off-by: Sheng Yang Link: http://lkml.kernel.org/r/20130121172205.19517.22535.stgit@bob.linux.org.uk Signed-off-by: Yunhong Jiang Signed-off-by: Xiaohui Xin Signed-off-by: Jun Nakajima Signed-off-by: Bruce Beare [Ported to 3.7 and reorganised so that we can keep most of the code shared properly] Signed-off-by: Alan Cox Signed-off-by: H. Peter Anvin Cc: Matthew Garrett --- arch/x86/Kconfig | 12 +++++++++ arch/x86/platform/Makefile | 1 + arch/x86/platform/goldfish/Makefile | 1 + arch/x86/platform/goldfish/goldfish.c | 51 +++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+) create mode 100644 arch/x86/platform/goldfish/Makefile create mode 100644 arch/x86/platform/goldfish/goldfish.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 79795af..46fb28c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -320,6 +320,10 @@ config X86_BIGSMP ---help--- This option is needed for the systems that have more than 8 CPUs +config GOLDFISH + def_bool y + depends on X86_GOLDFISH + if X86_32 config X86_EXTENDED_PLATFORM bool "Support for extended (non-PC) x86 platforms" @@ -402,6 +406,14 @@ config X86_UV # Following is an alphabetically sorted list of 32 bit extended platforms # Please maintain the alphabetic order if and when there are additions +config X86_GOLDFISH + bool "Goldfish (Virtual Platform)" + depends on X86_32 + ---help--- + Enable support for the Goldfish virtual platform used primarily + for Android development. Unless you are building for the Android + Goldfish emulator say N here. + config X86_INTEL_CE bool "CE4100 TV platform" depends on PCI diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 8d87439..bfe917f 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -2,6 +2,7 @@ obj-y += ce4100/ obj-y += efi/ obj-y += geode/ +obj-y += goldfish/ obj-y += iris/ obj-y += mrst/ obj-y += olpc/ diff --git a/arch/x86/platform/goldfish/Makefile b/arch/x86/platform/goldfish/Makefile new file mode 100644 index 0000000..f030b53 --- /dev/null +++ b/arch/x86/platform/goldfish/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_GOLDFISH) += goldfish.o diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c new file mode 100644 index 0000000..1693107 --- /dev/null +++ b/arch/x86/platform/goldfish/goldfish.c @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2007 Google, Inc. + * Copyright (C) 2011 Intel, Inc. + * Copyright (C) 2013 Intel, Inc. + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include + +/* + * Where in virtual device memory the IO devices (timers, system controllers + * and so on) + */ + +#define GOLDFISH_PDEV_BUS_BASE (0xff001000) +#define GOLDFISH_PDEV_BUS_END (0xff7fffff) +#define GOLDFISH_PDEV_BUS_IRQ (4) + +#define GOLDFISH_TTY_BASE (0x2000) + +static struct resource goldfish_pdev_bus_resources[] = { + { + .start = GOLDFISH_PDEV_BUS_BASE, + .end = GOLDFISH_PDEV_BUS_END, + .flags = IORESOURCE_MEM, + }, + { + .start = GOLDFISH_PDEV_BUS_IRQ, + .end = GOLDFISH_PDEV_BUS_IRQ, + .flags = IORESOURCE_IRQ, + } +}; + +static int __init goldfish_init(void) +{ + platform_device_register_simple("goldfish_pdev_bus", -1, + goldfish_pdev_bus_resources, 2); + return 0; +} +device_initcall(goldfish_init); -- cgit v1.1 From 22368028febf81f5c6d1e9fdd737d50543219b00 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 13 Jan 2013 23:44:12 +0800 Subject: KVM: x86: clean up reexecute_instruction Little cleanup for reexecute_instruction, also use gpa_to_gfn in retry_instruction Reviewed-by: Gleb Natapov Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5483228..f0288c5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4759,19 +4759,18 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) if (tdp_enabled) return false; + gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); + if (gpa == UNMAPPED_GVA) + return true; /* let cpu generate fault */ + /* * if emulation was due to access to shadowed page table * and it failed try to unshadow page and re-enter the * guest to let CPU execute the instruction. */ - if (kvm_mmu_unprotect_page_virt(vcpu, gva)) + if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa))) return true; - gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); - - if (gpa == UNMAPPED_GVA) - return true; /* let cpu generate fault */ - /* * Do not retry the unhandleable instruction if it faults on the * readonly host memory, otherwise it will goto a infinite loop: @@ -4826,7 +4825,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, if (!vcpu->arch.mmu.direct_map) gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); - kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT); + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); return true; } -- cgit v1.1 From 95b3cf69bdf8b27a02d878e24ca353cebb4e009e Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 13 Jan 2013 23:46:52 +0800 Subject: KVM: x86: let reexecute_instruction work for tdp Currently, reexecute_instruction refused to retry all instructions if tdp is enabled. If nested npt is used, the emulation may be caused by shadow page, it can be fixed by dropping the shadow page. And the only condition that tdp can not retry the instruction is the access fault on error pfn Reviewed-by: Gleb Natapov Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 61 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f0288c5..6f9cab0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4751,25 +4751,25 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) return r; } -static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) +static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2) { - gpa_t gpa; + gpa_t gpa = cr2; pfn_t pfn; - if (tdp_enabled) - return false; - - gpa = kvm_mmu_gva_to_gpa_system(vcpu, gva, NULL); - if (gpa == UNMAPPED_GVA) - return true; /* let cpu generate fault */ + if (!vcpu->arch.mmu.direct_map) { + /* + * Write permission should be allowed since only + * write access need to be emulated. + */ + gpa = kvm_mmu_gva_to_gpa_write(vcpu, cr2, NULL); - /* - * if emulation was due to access to shadowed page table - * and it failed try to unshadow page and re-enter the - * guest to let CPU execute the instruction. - */ - if (kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa))) - return true; + /* + * If the mapping is invalid in guest, let cpu retry + * it to generate fault. + */ + if (gpa == UNMAPPED_GVA) + return true; + } /* * Do not retry the unhandleable instruction if it faults on the @@ -4778,12 +4778,37 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva) * instruction -> ... */ pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa)); - if (!is_error_noslot_pfn(pfn)) { - kvm_release_pfn_clean(pfn); + + /* + * If the instruction failed on the error pfn, it can not be fixed, + * report the error to userspace. + */ + if (is_error_noslot_pfn(pfn)) + return false; + + kvm_release_pfn_clean(pfn); + + /* The instructions are well-emulated on direct mmu. */ + if (vcpu->arch.mmu.direct_map) { + unsigned int indirect_shadow_pages; + + spin_lock(&vcpu->kvm->mmu_lock); + indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages; + spin_unlock(&vcpu->kvm->mmu_lock); + + if (indirect_shadow_pages) + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + return true; } - return false; + /* + * if emulation was due to access to shadowed page table + * and it failed try to unshadow page and re-enter the + * guest to let CPU execute the instruction. + */ + kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); + return true; } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, -- cgit v1.1 From 93c05d3ef25275829d421a255271595ac219a518 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 13 Jan 2013 23:49:07 +0800 Subject: KVM: x86: improve reexecute_instruction The current reexecute_instruction can not well detect the failed instruction emulation. It allows guest to retry all the instructions except it accesses on error pfn For example, some cases are nested-write-protect - if the page we want to write is used as PDE but it chains to itself. Under this case, we should stop the emulation and report the case to userspace Reviewed-by: Gleb Natapov Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/kvm/paging_tmpl.h | 27 ++++++++++++++++++++------- arch/x86/kvm/x86.c | 22 ++++++++++++++++++---- 3 files changed, 45 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f75e1fe..77d56a4 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -497,6 +497,13 @@ struct kvm_vcpu_arch { u64 msr_val; struct gfn_to_hva_cache data; } pv_eoi; + + /* + * Indicate whether the access faults on its page table in guest + * which is set when fix page fault and used to detect unhandeable + * instruction. + */ + bool write_fault_to_shadow_pgtable; }; struct kvm_lpage_info { diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 3d1a352..ca69dcc 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -497,26 +497,34 @@ out_gpte_changed: * created when kvm establishes shadow page table that stop kvm using large * page size. Do it early can avoid unnecessary #PF and emulation. * + * @write_fault_to_shadow_pgtable will return true if the fault gfn is + * currently used as its page table. + * * Note: the PDPT page table is not checked for PAE-32 bit guest. It is ok * since the PDPT is always shadowed, that means, we can not use large page * size to map the gfn which is used as PDPT. */ static bool FNAME(is_self_change_mapping)(struct kvm_vcpu *vcpu, - struct guest_walker *walker, int user_fault) + struct guest_walker *walker, int user_fault, + bool *write_fault_to_shadow_pgtable) { int level; gfn_t mask = ~(KVM_PAGES_PER_HPAGE(walker->level) - 1); + bool self_changed = false; if (!(walker->pte_access & ACC_WRITE_MASK || (!is_write_protection(vcpu) && !user_fault))) return false; - for (level = walker->level; level <= walker->max_level; level++) - if (!((walker->gfn ^ walker->table_gfn[level - 1]) & mask)) - return true; + for (level = walker->level; level <= walker->max_level; level++) { + gfn_t gfn = walker->gfn ^ walker->table_gfn[level - 1]; + + self_changed |= !(gfn & mask); + *write_fault_to_shadow_pgtable |= !gfn; + } - return false; + return self_changed; } /* @@ -544,7 +552,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, int level = PT_PAGE_TABLE_LEVEL; int force_pt_level; unsigned long mmu_seq; - bool map_writable; + bool map_writable, is_self_change_mapping; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); @@ -572,9 +580,14 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, return 0; } + vcpu->arch.write_fault_to_shadow_pgtable = false; + + is_self_change_mapping = FNAME(is_self_change_mapping)(vcpu, + &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); + if (walker.level >= PT_DIRECTORY_LEVEL) force_pt_level = mapping_level_dirty_bitmap(vcpu, walker.gfn) - || FNAME(is_self_change_mapping)(vcpu, &walker, user_fault); + || is_self_change_mapping; else force_pt_level = 1; if (!force_pt_level) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6f9cab0..e00dd05 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4751,7 +4751,8 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) return r; } -static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2) +static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2, + bool write_fault_to_shadow_pgtable) { gpa_t gpa = cr2; pfn_t pfn; @@ -4808,7 +4809,13 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2) * guest to let CPU execute the instruction. */ kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa)); - return true; + + /* + * If the access faults on its page table, it can not + * be fixed by unprotecting shadow page and it should + * be reported to userspace. + */ + return !write_fault_to_shadow_pgtable; } static bool retry_instruction(struct x86_emulate_ctxt *ctxt, @@ -4867,7 +4874,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, int r; struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; bool writeback = true; + bool write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable; + /* + * Clear write_fault_to_shadow_pgtable here to ensure it is + * never reused. + */ + vcpu->arch.write_fault_to_shadow_pgtable = false; kvm_clear_exception_queue(vcpu); if (!(emulation_type & EMULTYPE_NO_DECODE)) { @@ -4886,7 +4899,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (r != EMULATION_OK) { if (emulation_type & EMULTYPE_TRAP_UD) return EMULATE_FAIL; - if (reexecute_instruction(vcpu, cr2)) + if (reexecute_instruction(vcpu, cr2, + write_fault_to_spt)) return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; @@ -4916,7 +4930,7 @@ restart: return EMULATE_DONE; if (r == EMULATION_FAILED) { - if (reexecute_instruction(vcpu, cr2)) + if (reexecute_instruction(vcpu, cr2, write_fault_to_spt)) return EMULATE_DONE; return handle_emulation_failure(vcpu); -- cgit v1.1 From 3d48aab1d5035fac04fe2fbce63eedc345c0e92e Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 18 Jan 2013 13:45:59 +0000 Subject: x86: add support for Intel Low Power Subsystem We are starting to see traditional SoC peripherals also in the x86 world in chips like Intel Lynxpoint. Typically we already have a Linux driver for the peripheral but it takes advantage of the common clk framework to control and retrieve information about the peripheral clock. So far there hasn't been a standard way on x86 to pass information such as clock rate from whatever the configuration system is used to the driver, but instead different variations have emerged, like adding this information to the platform data. Solve this by adding a new config option X86_INTEL_LPSS. If this is selected we enable common clk framework (and everything else) that is needed to support the Intel LPSS drivers. Enabling common clk framework on x86 was originally proposed by Mark Brown. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- arch/x86/Kconfig | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 79795af..c8c9b14 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -454,6 +454,16 @@ config X86_MDFLD endif +config X86_INTEL_LPSS + bool "Intel Low Power Subsystem Support" + depends on ACPI + select COMMON_CLK + ---help--- + Select to build support for Intel Low Power Subsystem such as + found on Intel Lynxpoint PCH. Selecting this option enables + things like clock tree (common clock framework) which are needed + by the LPSS peripheral drivers. + config X86_RDC321X bool "RDC R-321x SoC" depends on X86_32 -- cgit v1.1 From 0bdea06892e33afddbdc5da6df305e9fe9c41365 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Jan 2013 19:51:50 +0200 Subject: KVM: x86 emulator: Convert SHLD, SHRD to fastop Reviewed-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 619a33d..a21773f 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -454,6 +454,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) #define FOP_END \ ".popsection") +#define FOPNOP() FOP_ALIGN FOP_RET + #define FOP1E(op, dst) \ FOP_ALIGN #op " %" #dst " \n\t" FOP_RET @@ -476,6 +478,18 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) ON64(FOP2E(op##q, rax, rbx)) \ FOP_END +#define FOP3E(op, dst, src, src2) \ + FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET + +/* 3-operand, word-only, src2=cl */ +#define FASTOP3WCL(op) \ + FOP_START(op) \ + FOPNOP() \ + FOP3E(op##w, ax, bx, cl) \ + FOP3E(op##l, eax, ebx, cl) \ + ON64(FOP3E(op##q, rax, rbx, cl)) \ + FOP_END + #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ do { \ unsigned long _tmp; \ @@ -3036,6 +3050,9 @@ FASTOP2(xor); FASTOP2(cmp); FASTOP2(test); +FASTOP3WCL(shld); +FASTOP3WCL(shrd); + static int em_xchg(struct x86_emulate_ctxt *ctxt) { /* Write back the register source. */ @@ -4015,14 +4032,14 @@ static const struct opcode twobyte_table[256] = { /* 0xA0 - 0xA7 */ I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), - D(DstMem | SrcReg | Src2ImmByte | ModRM), - D(DstMem | SrcReg | Src2CL | ModRM), N, N, + F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), + F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, /* 0xA8 - 0xAF */ I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), DI(ImplicitOps, rsm), I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), - D(DstMem | SrcReg | Src2ImmByte | ModRM), - D(DstMem | SrcReg | Src2CL | ModRM), + F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), + F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), /* 0xB0 - 0xB7 */ I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), @@ -4834,14 +4851,6 @@ twobyte_insn: case 0x90 ... 0x9f: /* setcc r/m8 */ ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); break; - case 0xa4: /* shld imm8, r, r/m */ - case 0xa5: /* shld cl, r, r/m */ - emulate_2op_cl(ctxt, "shld"); - break; - case 0xac: /* shrd imm8, r, r/m */ - case 0xad: /* shrd cl, r, r/m */ - emulate_2op_cl(ctxt, "shrd"); - break; case 0xae: /* clflush */ break; case 0xb6 ... 0xb7: /* movzx */ -- cgit v1.1 From 007a3b547512d69f67ceb9641796d64552bd337e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Jan 2013 19:51:51 +0200 Subject: KVM: x86 emulator: convert shift/rotate instructions to fastop SHL, SHR, ROL, ROR, RCL, RCR, SAR, SAL Reviewed-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 72 ++++++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a21773f..a94b1d7 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -478,6 +478,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) ON64(FOP2E(op##q, rax, rbx)) \ FOP_END +/* 2 operand, src is CL */ +#define FASTOP2CL(op) \ + FOP_START(op) \ + FOP2E(op##b, al, cl) \ + FOP2E(op##w, ax, cl) \ + FOP2E(op##l, eax, cl) \ + ON64(FOP2E(op##q, rax, cl)) \ + FOP_END + #define FOP3E(op, dst, src, src2) \ FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET @@ -2046,38 +2055,17 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_grp2(struct x86_emulate_ctxt *ctxt) -{ - switch (ctxt->modrm_reg) { - case 0: /* rol */ - emulate_2op_SrcB(ctxt, "rol"); - break; - case 1: /* ror */ - emulate_2op_SrcB(ctxt, "ror"); - break; - case 2: /* rcl */ - emulate_2op_SrcB(ctxt, "rcl"); - break; - case 3: /* rcr */ - emulate_2op_SrcB(ctxt, "rcr"); - break; - case 4: /* sal/shl */ - case 6: /* sal/shl */ - emulate_2op_SrcB(ctxt, "sal"); - break; - case 5: /* shr */ - emulate_2op_SrcB(ctxt, "shr"); - break; - case 7: /* sar */ - emulate_2op_SrcB(ctxt, "sar"); - break; - } - return X86EMUL_CONTINUE; -} - FASTOP1(not); FASTOP1(neg); +FASTOP2CL(rol); +FASTOP2CL(ror); +FASTOP2CL(rcl); +FASTOP2CL(rcr); +FASTOP2CL(shl); +FASTOP2CL(shr); +FASTOP2CL(sar); + static int em_mul_ex(struct x86_emulate_ctxt *ctxt) { u8 ex = 0; @@ -3726,6 +3714,17 @@ static const struct opcode group1A[] = { I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N, }; +static const struct opcode group2[] = { + F(DstMem | ModRM, em_rol), + F(DstMem | ModRM, em_ror), + F(DstMem | ModRM, em_rcl), + F(DstMem | ModRM, em_rcr), + F(DstMem | ModRM, em_shl), + F(DstMem | ModRM, em_shr), + F(DstMem | ModRM, em_shl), + F(DstMem | ModRM, em_sar), +}; + static const struct opcode group3[] = { F(DstMem | SrcImm | NoWrite, em_test), F(DstMem | SrcImm | NoWrite, em_test), @@ -3949,7 +3948,7 @@ static const struct opcode opcode_table[256] = { /* 0xB8 - 0xBF */ X8(I(DstReg | SrcImm64 | Mov, em_mov)), /* 0xC0 - 0xC7 */ - D2bv(DstMem | SrcImmByte | ModRM), + G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2), I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm), I(ImplicitOps | Stack, em_ret), I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg), @@ -3961,7 +3960,8 @@ static const struct opcode opcode_table[256] = { D(ImplicitOps), DI(SrcImmByte, intn), D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret), /* 0xD0 - 0xD7 */ - D2bv(DstMem | SrcOne | ModRM), D2bv(DstMem | ModRM), + G(Src2One | ByteOp, group2), G(Src2One, group2), + G(Src2CL | ByteOp, group2), G(Src2CL, group2), N, I(DstAcc | SrcImmByte | No64, em_aad), N, N, /* 0xD8 - 0xDF */ N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N, @@ -4713,9 +4713,6 @@ special_insn: case 8: ctxt->dst.val = (s32)ctxt->dst.val; break; } break; - case 0xc0 ... 0xc1: - rc = em_grp2(ctxt); - break; case 0xcc: /* int3 */ rc = emulate_int(ctxt, 3); break; @@ -4726,13 +4723,6 @@ special_insn: if (ctxt->eflags & EFLG_OF) rc = emulate_int(ctxt, 4); break; - case 0xd0 ... 0xd1: /* Grp2 */ - rc = em_grp2(ctxt); - break; - case 0xd2 ... 0xd3: /* Grp2 */ - ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX); - rc = em_grp2(ctxt); - break; case 0xe9: /* jmp rel */ case 0xeb: /* jmp rel short */ jmp_rel(ctxt, ctxt->src.val); -- cgit v1.1 From 9ae9febae9500a0a6f5ce29ee4b8d942b5332529 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Jan 2013 19:51:52 +0200 Subject: KVM: x86 emulator: covert SETCC to fastop This is a bit of a special case since we don't have the usual byte/word/long/quad switch; instead we switch on the condition code embedded in the instruction. Reviewed-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 60 ++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index a94b1d7..e13138d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -499,6 +499,28 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) ON64(FOP3E(op##q, rax, rbx, cl)) \ FOP_END +/* Special case for SETcc - 1 instruction per cc */ +#define FOP_SETCC(op) ".align 4; " #op " %al; ret \n\t" + +FOP_START(setcc) +FOP_SETCC(seto) +FOP_SETCC(setno) +FOP_SETCC(setc) +FOP_SETCC(setnc) +FOP_SETCC(setz) +FOP_SETCC(setnz) +FOP_SETCC(setbe) +FOP_SETCC(setnbe) +FOP_SETCC(sets) +FOP_SETCC(setns) +FOP_SETCC(setp) +FOP_SETCC(setnp) +FOP_SETCC(setl) +FOP_SETCC(setnl) +FOP_SETCC(setle) +FOP_SETCC(setnle) +FOP_END; + #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ do { \ unsigned long _tmp; \ @@ -939,39 +961,15 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, return rc; } -static int test_cc(unsigned int condition, unsigned int flags) +static u8 test_cc(unsigned int condition, unsigned long flags) { - int rc = 0; + u8 rc; + void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); - switch ((condition & 15) >> 1) { - case 0: /* o */ - rc |= (flags & EFLG_OF); - break; - case 1: /* b/c/nae */ - rc |= (flags & EFLG_CF); - break; - case 2: /* z/e */ - rc |= (flags & EFLG_ZF); - break; - case 3: /* be/na */ - rc |= (flags & (EFLG_CF|EFLG_ZF)); - break; - case 4: /* s */ - rc |= (flags & EFLG_SF); - break; - case 5: /* p/pe */ - rc |= (flags & EFLG_PF); - break; - case 7: /* le/ng */ - rc |= (flags & EFLG_ZF); - /* fall through */ - case 6: /* l/nge */ - rc |= (!(flags & EFLG_SF) != !(flags & EFLG_OF)); - break; - } - - /* Odd condition identifiers (lsb == 1) have inverted sense. */ - return (!!rc ^ (condition & 1)); + flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; + asm("pushq %[flags]; popf; call *%[fastop]" + : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); + return rc; } static void fetch_register_operand(struct operand *op) -- cgit v1.1 From 95413dc41398fec2518abf4e0449503b1306dcbc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Jan 2013 19:51:53 +0200 Subject: KVM: x86 emulator: convert INC/DEC to fastop Reviewed-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e13138d..edb09e9 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2055,6 +2055,8 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) FASTOP1(not); FASTOP1(neg); +FASTOP1(inc); +FASTOP1(dec); FASTOP2CL(rol); FASTOP2CL(ror); @@ -2105,12 +2107,6 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) int rc = X86EMUL_CONTINUE; switch (ctxt->modrm_reg) { - case 0: /* inc */ - emulate_1op(ctxt, "inc"); - break; - case 1: /* dec */ - emulate_1op(ctxt, "dec"); - break; case 2: /* call near abs */ { long int old_eip; old_eip = ctxt->_eip; @@ -3735,14 +3731,14 @@ static const struct opcode group3[] = { }; static const struct opcode group4[] = { - I(ByteOp | DstMem | SrcNone | Lock, em_grp45), - I(ByteOp | DstMem | SrcNone | Lock, em_grp45), + F(ByteOp | DstMem | SrcNone | Lock, em_inc), + F(ByteOp | DstMem | SrcNone | Lock, em_dec), N, N, N, N, N, N, }; static const struct opcode group5[] = { - I(DstMem | SrcNone | Lock, em_grp45), - I(DstMem | SrcNone | Lock, em_grp45), + F(DstMem | SrcNone | Lock, em_inc), + F(DstMem | SrcNone | Lock, em_dec), I(SrcMem | Stack, em_grp45), I(SrcMemFAddr | ImplicitOps | Stack, em_call_far), I(SrcMem | Stack, em_grp45), @@ -3891,7 +3887,7 @@ static const struct opcode opcode_table[256] = { /* 0x38 - 0x3F */ F6ALU(NoWrite, em_cmp), N, N, /* 0x40 - 0x4F */ - X16(D(DstReg)), + X8(F(DstReg, em_inc)), X8(F(DstReg, em_dec)), /* 0x50 - 0x57 */ X8(I(SrcReg | Stack, em_push)), /* 0x58 - 0x5F */ @@ -4681,12 +4677,6 @@ special_insn: goto twobyte_insn; switch (ctxt->b) { - case 0x40 ... 0x47: /* inc r16/r32 */ - emulate_1op(ctxt, "inc"); - break; - case 0x48 ... 0x4f: /* dec r16/r32 */ - emulate_1op(ctxt, "dec"); - break; case 0x63: /* movsxd */ if (ctxt->mode != X86EMUL_MODE_PROT64) goto cannot_emulate; -- cgit v1.1 From 11c363ba8f8eb163c275920b4a27697eb43da6e9 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Jan 2013 19:51:54 +0200 Subject: KVM: x86 emulator: convert BT/BTS/BTR/BTC/BSF/BSR to fastop Reviewed-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 76 +++++++++++++++++--------------------------------- 1 file changed, 26 insertions(+), 50 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index edb09e9..62014dc 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -478,6 +478,15 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) ON64(FOP2E(op##q, rax, rbx)) \ FOP_END +/* 2 operand, word only */ +#define FASTOP2W(op) \ + FOP_START(op) \ + FOPNOP() \ + FOP2E(op##w, ax, bx) \ + FOP2E(op##l, eax, ebx) \ + ON64(FOP2E(op##q, rax, rbx)) \ + FOP_END + /* 2 operand, src is CL */ #define FASTOP2CL(op) \ FOP_START(op) \ @@ -2066,6 +2075,13 @@ FASTOP2CL(shl); FASTOP2CL(shr); FASTOP2CL(sar); +FASTOP2W(bsf); +FASTOP2W(bsr); +FASTOP2W(bt); +FASTOP2W(bts); +FASTOP2W(btr); +FASTOP2W(btc); + static int em_mul_ex(struct x86_emulate_ctxt *ctxt) { u8 ex = 0; @@ -3377,47 +3393,6 @@ static int em_sti(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_bt(struct x86_emulate_ctxt *ctxt) -{ - /* Disable writeback. */ - ctxt->dst.type = OP_NONE; - /* only subword offset */ - ctxt->src.val &= (ctxt->dst.bytes << 3) - 1; - - emulate_2op_SrcV_nobyte(ctxt, "bt"); - return X86EMUL_CONTINUE; -} - -static int em_bts(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV_nobyte(ctxt, "bts"); - return X86EMUL_CONTINUE; -} - -static int em_btr(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV_nobyte(ctxt, "btr"); - return X86EMUL_CONTINUE; -} - -static int em_btc(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV_nobyte(ctxt, "btc"); - return X86EMUL_CONTINUE; -} - -static int em_bsf(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV_nobyte(ctxt, "bsf"); - return X86EMUL_CONTINUE; -} - -static int em_bsr(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV_nobyte(ctxt, "bsr"); - return X86EMUL_CONTINUE; -} - static int em_cpuid(struct x86_emulate_ctxt *ctxt) { u32 eax, ebx, ecx, edx; @@ -3773,10 +3748,10 @@ static const struct group_dual group7 = { { static const struct opcode group8[] = { N, N, N, N, - I(DstMem | SrcImmByte, em_bt), - I(DstMem | SrcImmByte | Lock | PageTable, em_bts), - I(DstMem | SrcImmByte | Lock, em_btr), - I(DstMem | SrcImmByte | Lock | PageTable, em_btc), + F(DstMem | SrcImmByte | NoWrite, em_bt), + F(DstMem | SrcImmByte | Lock | PageTable, em_bts), + F(DstMem | SrcImmByte | Lock, em_btr), + F(DstMem | SrcImmByte | Lock | PageTable, em_btc), }; static const struct group_dual group9 = { { @@ -4025,28 +4000,29 @@ static const struct opcode twobyte_table[256] = { X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)), /* 0xA0 - 0xA7 */ I(Stack | Src2FS, em_push_sreg), I(Stack | Src2FS, em_pop_sreg), - II(ImplicitOps, em_cpuid, cpuid), I(DstMem | SrcReg | ModRM | BitOp, em_bt), + II(ImplicitOps, em_cpuid, cpuid), + F(DstMem | SrcReg | ModRM | BitOp | NoWrite, em_bt), F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shld), F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, /* 0xA8 - 0xAF */ I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), DI(ImplicitOps, rsm), - I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), + F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), /* 0xB0 - 0xB7 */ I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), - I(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), + F(DstMem | SrcReg | ModRM | BitOp | Lock, em_btr), I(DstReg | SrcMemFAddr | ModRM | Src2FS, em_lseg), I(DstReg | SrcMemFAddr | ModRM | Src2GS, em_lseg), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xB8 - 0xBF */ N, N, G(BitOp, group8), - I(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), - I(DstReg | SrcMem | ModRM, em_bsf), I(DstReg | SrcMem | ModRM, em_bsr), + F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), + F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ D2bv(DstMem | SrcReg | ModRM | Lock), -- cgit v1.1 From 4d7583493e1777f42cc0fda9573d312e4753aa3c Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Jan 2013 19:51:55 +0200 Subject: KVM: x86 emulator: convert 2-operand IMUL to fastop Reviewed-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 62014dc..45ddec8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -441,6 +441,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) } \ } while (0) +static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)); + #define FOP_ALIGN ".align " __stringify(FASTOP_SIZE) " \n\t" #define FOP_RET "ret \n\t" @@ -3051,6 +3053,8 @@ FASTOP2(test); FASTOP3WCL(shld); FASTOP3WCL(shrd); +FASTOP2W(imul); + static int em_xchg(struct x86_emulate_ctxt *ctxt) { /* Write back the register source. */ @@ -3063,16 +3067,10 @@ static int em_xchg(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_imul(struct x86_emulate_ctxt *ctxt) -{ - emulate_2op_SrcV_nobyte(ctxt, "imul"); - return X86EMUL_CONTINUE; -} - static int em_imul_3op(struct x86_emulate_ctxt *ctxt) { ctxt->dst.val = ctxt->src2.val; - return em_imul(ctxt); + return fastop(ctxt, em_imul); } static int em_cwd(struct x86_emulate_ctxt *ctxt) @@ -4010,7 +4008,7 @@ static const struct opcode twobyte_table[256] = { F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), - D(ModRM), I(DstReg | SrcMem | ModRM, em_imul), + D(ModRM), F(DstReg | SrcMem | ModRM, em_imul), /* 0xB0 - 0xB7 */ I2bv(DstMem | SrcReg | ModRM | Lock | PageTable, em_cmpxchg), I(DstReg | SrcMemFAddr | ModRM | Src2SS, em_lseg), -- cgit v1.1 From 34b77652b9e98b5796b3a69df600e1717572e51d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Jan 2013 19:51:56 +0200 Subject: KVM: x86 emulator: rearrange fastop definitions Make fastop opcodes usable in other emulations. Reviewed-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 70 +++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 45ddec8..d06354d 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -972,6 +972,41 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt, return rc; } +FASTOP2(add); +FASTOP2(or); +FASTOP2(adc); +FASTOP2(sbb); +FASTOP2(and); +FASTOP2(sub); +FASTOP2(xor); +FASTOP2(cmp); +FASTOP2(test); + +FASTOP3WCL(shld); +FASTOP3WCL(shrd); + +FASTOP2W(imul); + +FASTOP1(not); +FASTOP1(neg); +FASTOP1(inc); +FASTOP1(dec); + +FASTOP2CL(rol); +FASTOP2CL(ror); +FASTOP2CL(rcl); +FASTOP2CL(rcr); +FASTOP2CL(shl); +FASTOP2CL(shr); +FASTOP2CL(sar); + +FASTOP2W(bsf); +FASTOP2W(bsr); +FASTOP2W(bt); +FASTOP2W(bts); +FASTOP2W(btr); +FASTOP2W(btc); + static u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; @@ -2064,26 +2099,6 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -FASTOP1(not); -FASTOP1(neg); -FASTOP1(inc); -FASTOP1(dec); - -FASTOP2CL(rol); -FASTOP2CL(ror); -FASTOP2CL(rcl); -FASTOP2CL(rcr); -FASTOP2CL(shl); -FASTOP2CL(shr); -FASTOP2CL(sar); - -FASTOP2W(bsf); -FASTOP2W(bsr); -FASTOP2W(bt); -FASTOP2W(bts); -FASTOP2W(btr); -FASTOP2W(btc); - static int em_mul_ex(struct x86_emulate_ctxt *ctxt) { u8 ex = 0; @@ -3040,21 +3055,6 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -FASTOP2(add); -FASTOP2(or); -FASTOP2(adc); -FASTOP2(sbb); -FASTOP2(and); -FASTOP2(sub); -FASTOP2(xor); -FASTOP2(cmp); -FASTOP2(test); - -FASTOP3WCL(shld); -FASTOP3WCL(shrd); - -FASTOP2W(imul); - static int em_xchg(struct x86_emulate_ctxt *ctxt) { /* Write back the register source. */ -- cgit v1.1 From 158de57f905ed97ea0f993feac1c40a40f5c7a04 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Jan 2013 19:51:57 +0200 Subject: KVM: x86 emulator: convert a few freestanding emulations to fastop Reviewed-by: Gleb Natapov Signed-off-by: Avi Kivity Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/emulate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index d06354d..e99fb72 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2209,7 +2209,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) /* Save real source value, then compare EAX against destination. */ ctxt->src.orig_val = ctxt->src.val; ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); - emulate_2op_SrcV(ctxt, "cmp"); + fastop(ctxt, em_cmp); if (ctxt->eflags & EFLG_ZF) { /* Success: write back to memory. */ @@ -2977,7 +2977,7 @@ static int em_das(struct x86_emulate_ctxt *ctxt) ctxt->src.type = OP_IMM; ctxt->src.val = 0; ctxt->src.bytes = 1; - emulate_2op_SrcV(ctxt, "or"); + fastop(ctxt, em_or); ctxt->eflags &= ~(X86_EFLAGS_AF | X86_EFLAGS_CF); if (cf) ctxt->eflags |= X86_EFLAGS_CF; @@ -4816,7 +4816,7 @@ twobyte_insn: (s16) ctxt->src.val; break; case 0xc0 ... 0xc1: /* xadd */ - emulate_2op_SrcV(ctxt, "add"); + fastop(ctxt, em_add); /* Write back the register source. */ ctxt->src.val = ctxt->dst.orig_val; write_register_operand(&ctxt->src); -- cgit v1.1 From 1f3141e80b149e7215313dff29e9a0c47811b1d1 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 21 Jan 2013 15:36:41 +0200 Subject: KVM: VMX: remove special CPL cache access during transition to real mode. Since vmx_get_cpl() always returns 0 when VCPU is in real mode it is no longer needed. Also reset CPL cache to zero during transaction to protected mode since transaction may happen while CS.selectors & 3 != 0, but in reality CPL is 0. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index dd2a85c..9d2ec88 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2817,6 +2817,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu) fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); + + /* CPL is always 0 when CPU enters protected mode */ + __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); + vmx->cpl = 0; } static gva_t rmode_tss_base(struct kvm *kvm) @@ -3229,14 +3233,6 @@ static int vmx_get_cpl(struct kvm_vcpu *vcpu) && (kvm_get_rflags(vcpu) & X86_EFLAGS_VM)) /* if virtual 8086 */ return 3; - /* - * If we enter real mode with cs.sel & 3 != 0, the normal CPL calculations - * fail; use the cache instead. - */ - if (unlikely(vmx->emulation_required && emulate_invalid_guest_state)) { - return vmx->cpl; - } - if (!test_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail)) { __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); vmx->cpl = vmx_read_guest_seg_selector(vmx, VCPU_SREG_CS) & 3; -- cgit v1.1 From 2f143240cb822c0d23ad591b89fe10e7c1f842f5 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 21 Jan 2013 15:36:42 +0200 Subject: KVM: VMX: reset CPL only on CS register write. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9d2ec88..edfbe94 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3269,7 +3269,8 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; vmx_segment_cache_clear(vmx); - __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); + if (seg == VCPU_SREG_CS) + __clear_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); if (vmx->rmode.vm86_active && seg != VCPU_SREG_LDTR) { vmx->rmode.segs[seg] = *var; -- cgit v1.1 From c5e97c80b5ddd6139bdadcbd44e263c2a3e7fae6 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 21 Jan 2013 15:36:43 +0200 Subject: KVM: VMX: if unrestricted guest is enabled vcpu state is always valid. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index edfbe94..f942b20 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3488,6 +3488,9 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) */ static bool guest_state_valid(struct kvm_vcpu *vcpu) { + if (enable_unrestricted_guest) + return true; + /* real mode guest state checks */ if (!is_protmode(vcpu)) { if (!rmode_segment_valid(vcpu, VCPU_SREG_CS)) -- cgit v1.1 From 286da4156dc65c8a054580fdd96b7709132dce8d Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 21 Jan 2013 15:36:44 +0200 Subject: KVM: VMX: remove hack that disables emulation on vcpu reset/init There is no reason for it. If state is suitable for vmentry it will be detected during guest entry and no emulation will happen. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f942b20..20409bd 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4035,9 +4035,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) ret = 0; - /* HACK: Don't enable emulation on guest boot/reset */ - vmx->emulation_required = 0; - return ret; } -- cgit v1.1 From 218e763f458c44f30041c1b48b4371e130fd4317 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 21 Jan 2013 15:36:45 +0200 Subject: KVM: VMX: skip vmx->rmode.vm86_active check on cr0 write if unrestricted guest is enabled vmx->rmode.vm86_active is never true is unrestricted guest is enabled. Make it more explicit that neither enter_pmode() nor enter_rmode() is called in this case. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 20409bd..319e840 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2877,9 +2877,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu) unsigned long flags; struct vcpu_vmx *vmx = to_vmx(vcpu); - if (enable_unrestricted_guest) - return; - vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS); @@ -3086,14 +3083,15 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (enable_unrestricted_guest) hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST) | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; - else + else { hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; - if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) - enter_pmode(vcpu); + if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) + enter_pmode(vcpu); - if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) - enter_rmode(vcpu); + if (!vmx->rmode.vm86_active && !(cr0 & X86_CR0_PE)) + enter_rmode(vcpu); + } #ifdef CONFIG_X86_64 if (vcpu->arch.efer & EFER_LME) { -- cgit v1.1 From 25391454e73e3156202264eb3c473825afe4bc94 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 21 Jan 2013 15:36:46 +0200 Subject: KVM: VMX: don't clobber segment AR of unusable segments. Usability is returned in unusable field, so not need to clobber entire AR. Callers have to know how to deal with unusable segments already since if emulate_invalid_guest_state=true AR is not zeroed. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 319e840..77212d1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3196,8 +3196,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, var->limit = vmx_read_guest_seg_limit(vmx, seg); var->selector = vmx_read_guest_seg_selector(vmx, seg); ar = vmx_read_guest_seg_ar(vmx, seg); - if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state) - ar = 0; var->type = ar & 15; var->s = (ar >> 4) & 1; var->dpl = (ar >> 5) & 3; -- cgit v1.1 From 91b0aa2ca63e60ac69c5dd96bff580d3947073d4 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 21 Jan 2013 15:36:47 +0200 Subject: KVM: VMX: rename fix_pmode_dataseg to fix_pmode_seg. The function deals with code segment too. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 77212d1..9bc68c7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2759,7 +2759,7 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } -static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, +static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { if (!emulate_invalid_guest_state) { @@ -2811,12 +2811,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu) update_exception_bitmap(vcpu); - fix_pmode_dataseg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); - fix_pmode_dataseg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); - fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); - fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); - fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); - fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); + fix_pmode_seg(vcpu, VCPU_SREG_CS, &vmx->rmode.segs[VCPU_SREG_CS]); + fix_pmode_seg(vcpu, VCPU_SREG_SS, &vmx->rmode.segs[VCPU_SREG_SS]); + fix_pmode_seg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]); + fix_pmode_seg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]); + fix_pmode_seg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]); + fix_pmode_seg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]); /* CPL is always 0 when CPU enters protected mode */ __set_bit(VCPU_EXREG_CPL, (ulong *)&vcpu->arch.regs_avail); -- cgit v1.1 From 378a8b099fc207ddcb91b19a8c1457667e0af398 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 21 Jan 2013 15:36:48 +0200 Subject: KVM: x86: fix use of uninitialized memory as segment descriptor in emulator. If VMX reports segment as unusable, zero descriptor passed by the emulator before returning. Such descriptor will be considered not present by the emulator. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e00dd05..b9f5529 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4489,8 +4489,10 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector, kvm_get_segment(emul_to_vcpu(ctxt), &var, seg); *selector = var.selector; - if (var.unusable) + if (var.unusable) { + memset(desc, 0, sizeof(*desc)); return false; + } if (var.g) var.limit >>= 12; -- cgit v1.1 From 141687869fb904e912568c6b94a6b1fa2114f6ed Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 21 Jan 2013 15:36:49 +0200 Subject: KVM: VMX: set vmx->emulation_required only when needed. If emulate_invalid_guest_state=false vmx->emulation_required is never actually used, but it ends up to be always set to true since handle_invalid_guest_state(), the only place it is reset back to false, is never called. This, besides been not very clean, makes vmexit and vmentry path to check emulate_invalid_guest_state needlessly. The patch fixes that by keeping emulation_required coherent with emulate_invalid_guest_state setting. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9bc68c7..02eeba8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2759,6 +2759,11 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } +static bool emulation_required(struct kvm_vcpu *vcpu) +{ + return emulate_invalid_guest_state && !guest_state_valid(vcpu); +} + static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { @@ -2794,7 +2799,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); - vmx->emulation_required = 1; vmx->rmode.vm86_active = 0; vmx_segment_cache_clear(vmx); @@ -2885,7 +2889,6 @@ static void enter_rmode(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_SS], VCPU_SREG_SS); vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_CS], VCPU_SREG_CS); - vmx->emulation_required = 1; vmx->rmode.vm86_active = 1; /* @@ -3111,6 +3114,9 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; + + /* depends on vcpu->arch.cr0 to be set to a new value */ + vmx->emulation_required = emulation_required(vcpu); } static u64 construct_eptp(unsigned long root_hpa) @@ -3298,8 +3304,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(var)); out: - if (!vmx->emulation_required) - vmx->emulation_required = !guest_state_valid(vcpu); + vmx->emulation_required |= emulation_required(vcpu); } static void vmx_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) @@ -5027,7 +5032,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) schedule(); } - vmx->emulation_required = !guest_state_valid(vcpu); + vmx->emulation_required = emulation_required(vcpu); out: return ret; } @@ -5970,7 +5975,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 vectoring_info = vmx->idt_vectoring_info; /* If guest state is invalid, start emulating */ - if (vmx->emulation_required && emulate_invalid_guest_state) + if (vmx->emulation_required) return handle_invalid_guest_state(vcpu); /* @@ -6253,7 +6258,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Don't enter VMX if guest state is invalid, let the exit handler start emulation until we arrive back to a valid state */ - if (vmx->emulation_required && emulate_invalid_guest_state) + if (vmx->emulation_required) return; if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty)) -- cgit v1.1 From 2c922cd07a0ada4d68e22453c972a5d77be8f20d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 22 Jan 2013 13:01:19 -0800 Subject: x86/cpu/hotplug: Remove CONFIG_EXPERIMENTAL dependency The CONFIG_EXPERIMENTAL config item has not carried much meaning for a while now and is almost always enabled by default. As agreed during the Linux kernel summit, remove it from any "depends on" lines in Kconfigs. Signed-off-by: Kees Cook Cc: Fenghua Yu Cc: Greg Kroah-Hartman Link: http://lkml.kernel.org/r/20130122210119.GA311@www.outflux.net Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 79795af..2d62103 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1699,7 +1699,7 @@ config HOTPLUG_CPU config BOOTPARAM_HOTPLUG_CPU0 bool "Set default setting of cpu0_hotpluggable" default n - depends on HOTPLUG_CPU && EXPERIMENTAL + depends on HOTPLUG_CPU ---help--- Set whether default state of cpu0_hotpluggable is on or off. @@ -1728,7 +1728,7 @@ config BOOTPARAM_HOTPLUG_CPU0 config DEBUG_HOTPLUG_CPU0 def_bool n prompt "Debug CPU0 hotplug" - depends on HOTPLUG_CPU && EXPERIMENTAL + depends on HOTPLUG_CPU ---help--- Enabling this option offlines CPU0 (if CPU0 can be offlined) as soon as possible and boots up userspace with CPU0 offlined. User -- cgit v1.1 From d59fe3f13d070489e63d04e1c9bfd819d5f71542 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 22 Jan 2013 11:24:12 +0000 Subject: ix86: Tighten asmlinkage_protect() constraints While the description of the commit that originally introduced asmlinkage_protect() validly says that this doesn't guarantee clobbering of the function arguments, using "m" constraints rather than "g" ones reduces the risk (by making it less attractive to the compiler to move those variables into registers) and generally results in better code (because we know the arguments are in memory anyway, and are frequently - if not always - used just once, with the second [compiler visible] use in asmlinkage_protect() itself being a fake one). Signed-off-by: Jan Beulich Cc: Cc: Cc: Linus Torvalds Cc: Andrew Morton Link: http://lkml.kernel.org/r/50FE84EC02000078000B83B7@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/linkage.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 4814297..79327e9 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -27,20 +27,20 @@ #define __asmlinkage_protect0(ret) \ __asmlinkage_protect_n(ret) #define __asmlinkage_protect1(ret, arg1) \ - __asmlinkage_protect_n(ret, "g" (arg1)) + __asmlinkage_protect_n(ret, "m" (arg1)) #define __asmlinkage_protect2(ret, arg1, arg2) \ - __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2)) + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) #define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ - __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3)) + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) #define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ - __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ - "g" (arg4)) + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4)) #define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ - __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ - "g" (arg4), "g" (arg5)) + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5)) #define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ - __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ - "g" (arg4), "g" (arg5), "g" (arg6)) + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5), "m" (arg6)) #endif /* CONFIG_X86_32 */ -- cgit v1.1 From e3f0f36ddf1b2743a0d4ea312996536a9c37e1c7 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 18 Jan 2013 12:58:47 -0500 Subject: x86/apic: Remove noisy zero-mask warning from default_send_IPI_mask_logical() Since circa 3.5, we've had dozens of reports of people hitting this warning. Forwarded reports have been met with silence, so just remove the warning if no-one cares. Example reports: https://bugzilla.redhat.com/show_bug.cgi?id=797687 https://bugzilla.redhat.com/show_bug.cgi?id=867174 https://bugzilla.redhat.com/show_bug.cgi?id=894865 Signed-off-by: Dave Jones Cc: Andrew Morton Link: http://lkml.kernel.org/r/20130118175847.GA27662@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/ipi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index cce91bf..7434d85 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) unsigned long mask = cpumask_bits(cpumask)[0]; unsigned long flags; - if (WARN_ONCE(!mask, "empty IPI mask")) + if (!mask) return; local_irq_save(flags); -- cgit v1.1 From b9975dabe3f0a6e4d1af52c47f66b5558df207a3 Mon Sep 17 00:00:00 2001 From: Cong Ding Date: Mon, 14 Jan 2013 22:39:18 +0100 Subject: x86/apb/timer: Remove unnecessary "if" adev has no chance to be NULL, so we don't need to check it. It is also dereferenced just before the check . Signed-off-by: Cong Ding Cc: Sasha Levin Link: http://lkml.kernel.org/r/1358199561-15518-1-git-send-email-dinggnu@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apb_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index afdc3f75..cf92735 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -240,7 +240,7 @@ static int apbt_cpuhp_notify(struct notifier_block *n, dw_apb_clockevent_pause(adev->timer); if (system_state == SYSTEM_RUNNING) { pr_debug("skipping APBT CPU %lu offline\n", cpu); - } else if (adev) { + } else { pr_debug("APBT clockevent for cpu %lu offline\n", cpu); dw_apb_clockevent_stop(adev->timer); } -- cgit v1.1 From 4cca6ea04d31c22a7d0436949c072b27bde41f86 Mon Sep 17 00:00:00 2001 From: Alok N Kataria Date: Thu, 17 Jan 2013 15:44:42 -0800 Subject: x86/apic: Allow x2apic without IR on VMware platform This patch updates x2apic initializaition code to allow x2apic on VMware platform even without interrupt remapping support. The hypervisor_x2apic_available hook was added in x2apic initialization code and used by KVM and XEN, before this. I have also cleaned up that code to export this hook through the hypervisor_x86 structure. Compile tested for KVM and XEN configs, this patch doesn't have any functional effect on those two platforms. On VMware platform, verified that x2apic is used in physical mode on products that support this. Signed-off-by: Alok N Kataria Reviewed-by: Doug Covelli Reviewed-by: Dan Hecht Acked-by: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Cc: Avi Kivity Link: http://lkml.kernel.org/r/1358466282.423.60.camel@akataria-dtop.eng.vmware.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hypervisor.h | 13 ++++--------- arch/x86/kernel/cpu/hypervisor.c | 7 +++++++ arch/x86/kernel/cpu/vmware.c | 13 +++++++++++++ arch/x86/kernel/kvm.c | 1 + arch/x86/xen/enlighten.c | 1 + 5 files changed, 26 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h index b518c75..86095ed 100644 --- a/arch/x86/include/asm/hypervisor.h +++ b/arch/x86/include/asm/hypervisor.h @@ -25,6 +25,7 @@ extern void init_hypervisor(struct cpuinfo_x86 *c); extern void init_hypervisor_platform(void); +extern bool hypervisor_x2apic_available(void); /* * x86 hypervisor information @@ -41,6 +42,9 @@ struct hypervisor_x86 { /* Platform setup (run once per boot) */ void (*init_platform)(void); + + /* X2APIC detection (run once per boot) */ + bool (*x2apic_available)(void); }; extern const struct hypervisor_x86 *x86_hyper; @@ -51,13 +55,4 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv; extern const struct hypervisor_x86 x86_hyper_xen_hvm; extern const struct hypervisor_x86 x86_hyper_kvm; -static inline bool hypervisor_x2apic_available(void) -{ - if (kvm_para_available()) - return true; - if (xen_x2apic_para_available()) - return true; - return false; -} - #endif diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index a8f8fa9..1e7e84a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c @@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void) if (x86_hyper->init_platform) x86_hyper->init_platform(); } + +bool __init hypervisor_x2apic_available(void) +{ + return x86_hyper && + x86_hyper->x2apic_available && + x86_hyper->x2apic_available(); +} diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index d22d0c4..03a3632 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -33,6 +33,9 @@ #define VMWARE_PORT_CMD_GETVERSION 10 #define VMWARE_PORT_CMD_GETHZ 45 +#define VMWARE_PORT_CMD_GETVCPU_INFO 68 +#define VMWARE_PORT_CMD_LEGACY_X2APIC 3 +#define VMWARE_PORT_CMD_VCPU_RESERVED 31 #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ __asm__("inl (%%dx)" : \ @@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); } +/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */ +static bool __init vmware_legacy_x2apic_available(void) +{ + uint32_t eax, ebx, ecx, edx; + VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx); + return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 && + (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0; +} + const __refconst struct hypervisor_x86 x86_hyper_vmware = { .name = "VMware", .detect = vmware_platform, .set_cpu_features = vmware_set_cpu_features, .init_platform = vmware_platform_setup, + .x2apic_available = vmware_legacy_x2apic_available, }; EXPORT_SYMBOL(x86_hyper_vmware); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9c2bd8b..2b44ea5 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -505,6 +505,7 @@ static bool __init kvm_detect(void) const struct hypervisor_x86 x86_hyper_kvm __refconst = { .name = "KVM", .detect = kvm_detect, + .x2apic_available = kvm_para_available, }; EXPORT_SYMBOL_GPL(x86_hyper_kvm); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 138e566..8b4c56d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1668,6 +1668,7 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = { .name = "Xen HVM", .detect = xen_hvm_platform, .init_platform = xen_hvm_guest_init, + .x2apic_available = xen_x2apic_para_available, }; EXPORT_SYMBOL(x86_hyper_xen_hvm); #endif -- cgit v1.1 From 479a99a8e510c8839e0d3d3de8391f8bc61b9760 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 8 Jan 2013 16:18:41 -0800 Subject: x86/srat: Simplify memory affinity init error handling The acpi_numa_memory_affinity_init() function can fail in several scenarios, use a single point of error return. Signed-off-by: Davidlohr Bueso Link: http://lkml.kernel.org/r/1357690721.1890.15.camel@buesod1.americas.hpqcorp.net [ Cleaned up the label naming a bit. ] Signed-off-by: Ingo Molnar --- arch/x86/mm/srat.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 4ddf497..cdd0da9 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -149,39 +149,40 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) int node, pxm; if (srat_disabled()) - return -1; - if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) { - bad_srat(); - return -1; - } + goto out_err; + if (ma->header.length != sizeof(struct acpi_srat_mem_affinity)) + goto out_err_bad_srat; if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) - return -1; - + goto out_err; if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) - return -1; + goto out_err; + start = ma->base_address; end = start + ma->length; pxm = ma->proximity_domain; if (acpi_srat_revision <= 1) pxm &= 0xff; + node = setup_node(pxm); if (node < 0) { printk(KERN_ERR "SRAT: Too many proximity domains.\n"); - bad_srat(); - return -1; + goto out_err_bad_srat; } - if (numa_add_memblk(node, start, end) < 0) { - bad_srat(); - return -1; - } + if (numa_add_memblk(node, start, end) < 0) + goto out_err_bad_srat; node_set(node, numa_nodes_parsed); printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1); + return 0; +out_err_bad_srat: + bad_srat(); +out_err: + return -1; } void __init acpi_numa_arch_fixup(void) {} -- cgit v1.1 From 6125bc8b86d9da75ddac77e38f41afbf9f5de3e3 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 4 Jan 2013 15:41:47 -0700 Subject: x86/time/rtc: Don't print extended CMOS year when reading RTC We shouldn't print the current century every time we read the RTC. Signed-off-by: Bjorn Helgaas Acked-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20130104224146.15189.14874.stgit@bhelgaas.mtv.corp.google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/rtc.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 801602b..2e8f3d3 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -149,7 +149,6 @@ unsigned long mach_get_cmos_time(void) if (century) { century = bcd2bin(century); year += century * 100; - printk(KERN_INFO "Extended CMOS year: %d\n", century * 100); } else year += CMOS_YEARS_OFFS; -- cgit v1.1 From 0927b482ae69acb7605f6be1ad3860b657512fbd Mon Sep 17 00:00:00 2001 From: ShuoX Liu Date: Sat, 29 Dec 2012 00:48:44 +0800 Subject: perf/x86: Enable Intel Lincroft/Penwell/Cloverview Atom support These three chip are based on Atom and have different model id. So add such three id for perf HW event support. Signed-off-by: ShuoX Liu Cc: yanmin_zhang@intel.linux.com Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1356713324-12442-1-git-send-email-shuox.liu@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 93b9e11..cb313a5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2019,7 +2019,10 @@ __init int intel_pmu_init(void) break; case 28: /* Atom */ - case 54: /* Cedariew */ + case 38: /* Lincroft */ + case 39: /* Penwell */ + case 53: /* Cloverview */ + case 54: /* Cedarview */ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); -- cgit v1.1 From 55a6e622e66a27ab106fae00cac15ba630e7fbd4 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 18 Dec 2012 12:22:12 -0800 Subject: arch/x86/tools/insn_sanity.c: Identify source of messages The kernel build prints: Building modules, stage 2. TEST posttest MODPOST 3821 modules TEST posttest Success: decoded and checked 1000000 random instructions with 0 errors (seed:0xaac4bc47) CC arch/x86/boot/a20.o CC arch/x86/boot/cmdline.o AS arch/x86/boot/copy.o HOSTCC arch/x86/boot/mkcpustr CC arch/x86/boot/cpucheck.o CC arch/x86/boot/early_serial_console.o which is irritating because you don't know what program is proudly pronouncing its success. So, as described in "console mode programming user interface guidelines version 101" which doesn't exist, change this program to identify the source of its messages. Signed-off-by: Andrew Morton Cc: "H. Peter Anvin" Cc: Masami Hiramatsu Signed-off-by: Ingo Molnar --- arch/x86/tools/insn_sanity.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c index cc2f8c1..872eb60 100644 --- a/arch/x86/tools/insn_sanity.c +++ b/arch/x86/tools/insn_sanity.c @@ -55,7 +55,7 @@ static FILE *input_file; /* Input file name */ static void usage(const char *err) { if (err) - fprintf(stderr, "Error: %s\n\n", err); + fprintf(stderr, "%s: Error: %s\n\n", prog, err); fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog); fprintf(stderr, "\t-y 64bit mode\n"); fprintf(stderr, "\t-n 32bit mode\n"); @@ -269,7 +269,13 @@ int main(int argc, char **argv) insns++; } - fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed); + fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", + prog, + (errors) ? "Failure" : "Success", + insns, + (input_file) ? "given" : "random", + errors, + seed); return errors ? 1 : 0; } -- cgit v1.1 From 9faec5be3a27f95ee359b42c6c81b3173eb13958 Mon Sep 17 00:00:00 2001 From: yangyongqiang Date: Tue, 18 Dec 2012 12:22:25 -0800 Subject: perf/x86: Fix P6 driver section warning Fix a compile warning - 'a section type conflict' by removing __initconst. Signed-off-by: yangyongqiang Cc: Cyrill Gorcunov Cc: Vince Weaver Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_p6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index f2af39f..4820c23 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -19,7 +19,7 @@ static const u64 p6_perfmon_event_map[] = }; -static __initconst u64 p6_hw_cache_event_ids +static u64 p6_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = -- cgit v1.1 From 602e018607ba5c92922c0ffae40e346e1b95fa84 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 18 Dec 2012 12:22:18 -0800 Subject: x86/mm: Convert update_mmu_cache() and update_mmu_cache_pmd() to functions Converting macros to functions unhide type problems before changes will be integrated and trigger problems on other architectures. Signed-off-by: Kirill A. Shutemov Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 12 ++++++++++++ arch/x86/include/asm/pgtable_32.h | 7 ------- arch/x86/include/asm/pgtable_64.h | 3 --- 3 files changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5199db2..512ec6b 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -781,6 +781,18 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) memcpy(dst, src, count * sizeof(pgd_t)); } +/* + * The x86 doesn't have any external MMU info: the kernel page + * tables contain all the necessary information. + */ +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ +} +static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmd) +{ +} #include #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 8faa215..9ee3221 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -66,13 +66,6 @@ do { \ __flush_tlb_one((vaddr)); \ } while (0) -/* - * The i386 doesn't have any external MMU info: the kernel page - * tables contain all the necessary information. - */ -#define update_mmu_cache(vma, address, ptep) do { } while (0) -#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) - #endif /* !__ASSEMBLY__ */ /* diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 47356f9..615b0c7 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -142,9 +142,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_unmap(pte) ((void)(pte))/* NOP */ -#define update_mmu_cache(vma, address, ptep) do { } while (0) -#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) - /* Encode and de-code a swap entry */ #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) -- cgit v1.1 From f73568a059c3afd6323a9ee3860938df91252ee4 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Tue, 18 Dec 2012 12:22:21 -0800 Subject: x86/mm: Fix the argument passed to sync_global_pgds() The address range of sync_global_pgds() should be [start, end], but we pass [start, end) to this function. Signed-off-by: Wen Congyang Cc: Yasuaki Ishimatsu Cc: David Rientjes Cc: Jiang Liu Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/mm/init_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 2ead3c8..e779e0b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -605,7 +605,7 @@ kernel_physical_mapping_init(unsigned long start, } if (pgd_changed) - sync_global_pgds(addr, end); + sync_global_pgds(addr, end - 1); __flush_tlb_all(); @@ -981,7 +981,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) } } - sync_global_pgds((unsigned long)start_page, end); + sync_global_pgds((unsigned long)start_page, end - 1); return 0; } -- cgit v1.1 From 923d8697e24847000490c187de1aeaca622611a3 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Tue, 18 Dec 2012 12:20:23 -0500 Subject: x86/perf: Add IvyBridge EP support Running the perf utility on a Ivybridge EP server we encounter "not supported" events: L1-dcache-loads L1-dcache-load-misses L1-dcache-stores L1-dcache-store-misses L1-dcache-prefetches L1-dcache-prefetch-misses This patch adds support for this processor. Signed-off-by: Youquan Song Cc: Andi Kleen Cc: Youquan Song Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1355851223-27705-1-git-send-email-youquan.song@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index cb313a5..4914e94 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -2087,6 +2087,7 @@ __init int intel_pmu_init(void) pr_cont("SandyBridge events, "); break; case 58: /* IvyBridge */ + case 62: /* IvyBridge EP */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs, -- cgit v1.1 From e3e81aca8d51a50e19d6c67fafc4c9c4f0404bf1 Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Mon, 17 Dec 2012 17:42:56 +0800 Subject: x86: Fix a typo legact -> legacy Signed-off-by: Yuanhan Liu Signed-off-by: Ingo Molnar --- arch/x86/kernel/sys_x86_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 97ef74b..dbded5a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -157,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (flags & MAP_FIXED) return addr; - /* for MAP_32BIT mappings we force the legact mmap base */ + /* for MAP_32BIT mappings we force the legacy mmap base */ if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) goto bottomup; -- cgit v1.1 From 13f0e4d2b9e2209f13d5a4122478eb79e6136870 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 23 Nov 2012 16:30:07 +0000 Subject: x86/EFI: Properly init-annotate BGRT code These items are only ever referenced from initialization code. Signed-off-by: Jan Beulich Cc: Link: http://lkml.kernel.org/r/50AFB29F02000078000AAE8E@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi-bgrt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index d9c1b95..7145ec6 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -11,20 +11,21 @@ * published by the Free Software Foundation. */ #include +#include #include #include #include struct acpi_table_bgrt *bgrt_tab; -void *bgrt_image; -size_t bgrt_image_size; +void *__initdata bgrt_image; +size_t __initdata bgrt_image_size; struct bmp_header { u16 id; u32 size; } __packed; -void efi_bgrt_init(void) +void __init efi_bgrt_init(void) { acpi_status status; void __iomem *image; -- cgit v1.1 From 9611dc7a8de8a5c6244886dad020995b1a896236 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 23 Nov 2012 16:33:05 +0000 Subject: x86: Convert a few mistaken __cpuinit annotations to __init The first two are functions serving as initcalls; the SFI one is only being called from __init code. Signed-off-by: Jan Beulich Link: http://lkml.kernel.org/r/50AFB35102000078000AAECA@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 2 +- arch/x86/mm/tlb.c | 2 +- arch/x86/platform/sfi/sfi.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index fe9edec..0e46240 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -1227,7 +1227,7 @@ static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { .notifier_call = cacheinfo_cpu_callback, }; -static int __cpuinit cache_sysfs_init(void) +static int __init cache_sysfs_init(void) { int i; diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 13a6b29..282375f 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -335,7 +335,7 @@ static const struct file_operations fops_tlbflush = { .llseek = default_llseek, }; -static int __cpuinit create_tlb_flushall_shift(void) +static int __init create_tlb_flushall_shift(void) { debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR, arch_debugfs_dir, NULL, &fops_tlbflush); diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c index 7785b72..bcd1a70 100644 --- a/arch/x86/platform/sfi/sfi.c +++ b/arch/x86/platform/sfi/sfi.c @@ -35,7 +35,7 @@ static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; /* All CPUs enumerated by SFI must be present and enabled */ -static void __cpuinit mp_sfi_register_lapic(u8 id) +static void __init mp_sfi_register_lapic(u8 id) { if (MAX_LOCAL_APIC - id <= 0) { pr_warning("Processor #%d invalid (max %d)\n", -- cgit v1.1 From 51906e779f2b13b38f8153774c4c7163d412ffd9 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 19 Nov 2012 16:01:29 +0100 Subject: x86/MSI: Support multiple MSIs in presense of IRQ remapping The MSI specification has several constraints in comparison with MSI-X, most notable of them is the inability to configure MSIs independently. As a result, it is impossible to dispatch interrupts from different queues to different CPUs. This is largely devalues the support of multiple MSIs in SMP systems. Also, a necessity to allocate a contiguous block of vector numbers for devices capable of multiple MSIs might cause a considerable pressure on x86 interrupt vector allocator and could lead to fragmentation of the interrupt vectors space. This patch overcomes both drawbacks in presense of IRQ remapping and lets devices take advantage of multiple queues and per-IRQ affinity assignments. Signed-off-by: Alexander Gordeev Cc: Bjorn Helgaas Cc: Suresh Siddha Cc: Yinghai Lu Cc: Matthew Wilcox Cc: Jeff Garzik Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c8bd86ff56b5fc118257436768aaa04489ac0a4c.1353324359.git.agordeev@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 165 +++++++++++++++++++++++++++++++++-------- 1 file changed, 133 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b739d39..2016f9d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -300,9 +300,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) return cfg; } -static int alloc_irq_from(unsigned int from, int node) +static int alloc_irqs_from(unsigned int from, unsigned int count, int node) { - return irq_alloc_desc_from(from, node); + return irq_alloc_descs_from(from, count, node); } static void free_irq_at(unsigned int at, struct irq_cfg *cfg) @@ -2982,37 +2982,58 @@ device_initcall(ioapic_init_ops); /* * Dynamic irq allocate and deallocation */ -unsigned int create_irq_nr(unsigned int from, int node) +unsigned int __create_irqs(unsigned int from, unsigned int count, int node) { - struct irq_cfg *cfg; + struct irq_cfg **cfg; unsigned long flags; - unsigned int ret = 0; - int irq; + int irq, i; if (from < nr_irqs_gsi) from = nr_irqs_gsi; - irq = alloc_irq_from(from, node); - if (irq < 0) - return 0; - cfg = alloc_irq_cfg(irq, node); - if (!cfg) { - free_irq_at(irq, NULL); + cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node); + if (!cfg) return 0; + + irq = alloc_irqs_from(from, count, node); + if (irq < 0) + goto out_cfgs; + + for (i = 0; i < count; i++) { + cfg[i] = alloc_irq_cfg(irq + i, node); + if (!cfg[i]) + goto out_irqs; } raw_spin_lock_irqsave(&vector_lock, flags); - if (!__assign_irq_vector(irq, cfg, apic->target_cpus())) - ret = irq; + for (i = 0; i < count; i++) + if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus())) + goto out_vecs; raw_spin_unlock_irqrestore(&vector_lock, flags); - if (ret) { - irq_set_chip_data(irq, cfg); - irq_clear_status_flags(irq, IRQ_NOREQUEST); - } else { - free_irq_at(irq, cfg); + for (i = 0; i < count; i++) { + irq_set_chip_data(irq + i, cfg[i]); + irq_clear_status_flags(irq + i, IRQ_NOREQUEST); } - return ret; + + kfree(cfg); + return irq; + +out_vecs: + for (i--; i >= 0; i--) + __clear_irq_vector(irq + i, cfg[i]); + raw_spin_unlock_irqrestore(&vector_lock, flags); +out_irqs: + for (i = 0; i < count; i++) + free_irq_at(irq + i, cfg[i]); +out_cfgs: + kfree(cfg); + return 0; +} + +unsigned int create_irq_nr(unsigned int from, int node) +{ + return __create_irqs(from, 1, node); } int create_irq(void) @@ -3045,6 +3066,14 @@ void destroy_irq(unsigned int irq) free_irq_at(irq, cfg); } +static inline void destroy_irqs(unsigned int irq, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; i++) + destroy_irq(irq + i); +} + /* * MSI message composition */ @@ -3071,7 +3100,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, if (irq_remapped(cfg)) { compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); - return err; + return 0; } if (x2apic_enabled()) @@ -3098,7 +3127,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); - return err; + return 0; } static int @@ -3136,18 +3165,26 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) +static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset) { struct irq_chip *chip = &msi_chip; struct msi_msg msg; + unsigned int irq = irq_base + irq_offset; int ret; ret = msi_compose_msg(dev, irq, &msg, -1); if (ret < 0) return ret; - irq_set_msi_desc(irq, msidesc); - write_msi_msg(irq, &msg); + irq_set_msi_desc_off(irq_base, irq_offset, msidesc); + + /* + * MSI-X message is written per-IRQ, the offset is always 0. + * MSI message denotes a contiguous group of IRQs, written for 0th IRQ. + */ + if (!irq_offset) + write_msi_msg(irq, &msg); if (irq_remapped(irq_get_chip_data(irq))) { irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); @@ -3161,23 +3198,19 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) return 0; } -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +int setup_msix_irqs(struct pci_dev *dev, int nvec) { int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; struct msi_desc *msidesc; - /* x86 doesn't support multiple MSI yet */ - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = create_irq_nr(irq_want, node); if (irq == 0) - return -1; + return -ENOSPC; irq_want = irq + 1; if (!irq_remapping_enabled) goto no_ir; @@ -3199,7 +3232,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) goto error; } no_ir: - ret = setup_msi_irq(dev, msidesc, irq); + ret = setup_msi_irq(dev, msidesc, irq, 0); if (ret < 0) goto error; sub_handle++; @@ -3211,6 +3244,74 @@ error: return ret; } +int setup_msi_irqs(struct pci_dev *dev, int nvec) +{ + int node, ret, sub_handle, index = 0; + unsigned int irq; + struct msi_desc *msidesc; + + if (nvec > 1 && !irq_remapping_enabled) + return 1; + + nvec = __roundup_pow_of_two(nvec); + + WARN_ON(!list_is_singular(&dev->msi_list)); + msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); + WARN_ON(msidesc->irq); + WARN_ON(msidesc->msi_attrib.multiple); + + node = dev_to_node(&dev->dev); + irq = __create_irqs(nr_irqs_gsi, nvec, node); + if (irq == 0) + return -ENOSPC; + + if (!irq_remapping_enabled) { + ret = setup_msi_irq(dev, msidesc, irq, 0); + if (ret < 0) + goto error; + return 0; + } + + msidesc->msi_attrib.multiple = ilog2(nvec); + for (sub_handle = 0; sub_handle < nvec; sub_handle++) { + if (!sub_handle) { + index = msi_alloc_remapped_irq(dev, irq, nvec); + if (index < 0) { + ret = index; + goto error; + } + } else { + ret = msi_setup_remapped_irq(dev, irq + sub_handle, + index, sub_handle); + if (ret < 0) + goto error; + } + ret = setup_msi_irq(dev, msidesc, irq, sub_handle); + if (ret < 0) + goto error; + } + return 0; + +error: + destroy_irqs(irq, nvec); + + /* + * Restore altered MSI descriptor fields and prevent just destroyed + * IRQs from tearing down again in default_teardown_msi_irqs() + */ + msidesc->irq = 0; + msidesc->msi_attrib.multiple = 0; + + return ret; +} + +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) +{ + if (type == PCI_CAP_ID_MSI) + return setup_msi_irqs(dev, nvec); + return setup_msix_irqs(dev, nvec); +} + void native_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); -- cgit v1.1 From 349eab6eb07794c59e37703ccbfeb5920721885c Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Tue, 6 Nov 2012 14:45:46 +0800 Subject: x86/process: Change %8s to %s for pr_warn() in release_thread() the length of dead_task->comm[] is 16 (TASK_COMM_LEN) on pr_warn(), it is not meaningful to use %8s for task->comm[]. So change it to %s, since the line is not solid anyway. Additional information: %8s limit the width, not for the original string output length if name length is more than 8, it still can be fully displayed. if name length is less than 8, the ' ' will be filled before name. %.8s truly limit the original string output length (precision) Signed-off-by: Chen Gang Link: http://lkml.kernel.org/n/tip-nridm1zvreai1tgfLjuexDmd@git.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6e68a61..0f49677 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -117,7 +117,7 @@ void release_thread(struct task_struct *dead_task) { if (dead_task->mm) { if (dead_task->mm->context.size) { - pr_warn("WARNING: dead process %8s still has LDT? <%p/%d>\n", + pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", dead_task->comm, dead_task->mm->context.ldt, dead_task->mm->context.size); -- cgit v1.1 From e8f6e3f8a14bae98197c6d9f280cd23d22eb1a33 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 2 Nov 2012 14:19:18 +0000 Subject: x86/xor: Unify SSE-base xor-block routines Besides folding duplicate code, this has the advantage of fixing x86-64's failure to use proper (para-virtualizable) accessors for dealing with CR0.TS. Signed-off-by: Jan Beulich Acked-by: H. Peter Anvin Cc: Linus Torvalds Link: http://lkml.kernel.org/r/5093E47602000078000A615B@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/xor.h | 319 +++++++++++++++++++++++++++++++++++++++++- arch/x86/include/asm/xor_32.h | 286 +------------------------------------ arch/x86/include/asm/xor_64.h | 295 -------------------------------------- 3 files changed, 319 insertions(+), 581 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index f8fde90..c661571 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -1,10 +1,327 @@ #ifdef CONFIG_KMEMCHECK /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ # include +#elif !defined(_ASM_X86_XOR_H) +#define _ASM_X86_XOR_H + +/* + * Optimized RAID-5 checksumming functions for SSE. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * You should have received a copy of the GNU General Public License + * (for example /usr/src/linux/COPYING); if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * Cache avoiding checksumming functions utilizing KNI instructions + * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) + */ + +/* + * Based on + * High-speed RAID5 checksumming functions utilizing SSE instructions. + * Copyright (C) 1998 Ingo Molnar. + */ + +/* + * x86-64 changes / gcc fixes from Andi Kleen. + * Copyright 2002 Andi Kleen, SuSE Labs. + * + * This hasn't been optimized for the hammer yet, but there are likely + * no advantages to be gotten from x86-64 here anyways. + */ + +#include + +#ifdef CONFIG_X86_32 +/* reduce register pressure */ +# define XOR_CONSTANT_CONSTRAINT "i" #else +# define XOR_CONSTANT_CONSTRAINT "re" +#endif + +#define OFFS(x) "16*("#x")" +#define PF_OFFS(x) "256+16*("#x")" +#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n" +#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n" +#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n" +#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n" +#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n" +#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n" +#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n" +#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n" +#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" +#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" +#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" + +static void +xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + LD(i, 0) \ + LD(i + 1, 1) \ + PF1(i) \ + PF1(i + 2) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), + [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void +xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + PF1(i) \ + PF1(i + 2) \ + LD(i, 0) \ + LD(i + 1, 1) \ + LD(i + 2, 2) \ + LD(i + 3, 3) \ + PF2(i) \ + PF2(i + 2) \ + XO1(i, 0) \ + XO1(i + 1, 1) \ + XO1(i + 2, 2) \ + XO1(i + 3, 3) \ + PF3(i) \ + PF3(i + 2) \ + XO2(i, 0) \ + XO2(i + 1, 1) \ + XO2(i + 2, 2) \ + XO2(i + 3, 3) \ + PF4(i) \ + PF4(i + 2) \ + PF0(i + 4) \ + PF0(i + 6) \ + XO3(i, 0) \ + XO3(i + 1, 1) \ + XO3(i + 2, 2) \ + XO3(i + 3, 3) \ + XO4(i, 0) \ + XO4(i + 1, 1) \ + XO4(i + 2, 2) \ + XO4(i + 3, 3) \ + ST(i, 0) \ + ST(i + 1, 1) \ + ST(i + 2, 2) \ + ST(i + 3, 3) \ + + + PF0(0) + PF0(2) + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " add %[inc], %[p5] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), + [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +#undef LD +#undef XO1 +#undef XO2 +#undef XO3 +#undef XO4 +#undef ST +#undef BLOCK + +#undef XOR_CONSTANT_CONSTRAINT + #ifdef CONFIG_X86_32 # include #else # include #endif -#endif + +#endif /* _ASM_X86_XOR_H */ diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index f79cb7e..b85dc87 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -2,7 +2,7 @@ #define _ASM_X86_XOR_32_H /* - * Optimized RAID-5 checksumming functions for MMX and SSE. + * Optimized RAID-5 checksumming functions for MMX. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -529,290 +529,6 @@ static struct xor_block_template xor_block_p5_mmx = { .do_5 = xor_p5_mmx_5, }; -/* - * Cache avoiding checksumming functions utilizing KNI instructions - * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) - */ - -#define OFFS(x) "16*("#x")" -#define PF_OFFS(x) "256+16*("#x")" -#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n" -#define LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n" -#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n" -#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n" -#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n" -#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n" -#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n" -#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n" -#define XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n" -#define XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n" -#define XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n" -#define XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n" -#define XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n" - - -static void -xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - LD(i, 0) \ - LD(i + 1, 1) \ - PF1(i) \ - PF1(i + 2) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2) - : - : "memory"); - - kernel_fpu_end(); -} - -static void -xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i,0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO1(i,0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - XO2(i,0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - ST(i,0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r"(p2), "+r"(p3) - : - : "memory" ); - - kernel_fpu_end(); -} - -static void -xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i,0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - XO1(i,0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - PF3(i) \ - PF3(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO2(i,0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - XO3(i,0) \ - XO3(i + 1, 1) \ - XO3(i + 2, 2) \ - XO3(i + 3, 3) \ - ST(i,0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " addl $256, %4 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) - : - : "memory" ); - - kernel_fpu_end(); -} - -static void -xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) -{ - unsigned long lines = bytes >> 8; - - kernel_fpu_begin(); - - /* Make sure GCC forgets anything it knows about p4 or p5, - such that it won't pass to the asm volatile below a - register that is shared with any other variable. That's - because we modify p4 and p5 there, but we can't mark them - as read/write, otherwise we'd overflow the 10-asm-operands - limit of GCC < 3.1. */ - asm("" : "+r" (p4), "+r" (p5)); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i,0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - XO1(i,0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - PF3(i) \ - PF3(i + 2) \ - XO2(i,0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - PF4(i) \ - PF4(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO3(i,0) \ - XO3(i + 1, 1) \ - XO3(i + 2, 2) \ - XO3(i + 3, 3) \ - XO4(i,0) \ - XO4(i + 1, 1) \ - XO4(i + 2, 2) \ - XO4(i + 3, 3) \ - ST(i,0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addl $256, %1 ;\n" - " addl $256, %2 ;\n" - " addl $256, %3 ;\n" - " addl $256, %4 ;\n" - " addl $256, %5 ;\n" - " decl %0 ;\n" - " jnz 1b ;\n" - : "+r" (lines), - "+r" (p1), "+r" (p2), "+r" (p3) - : "r" (p4), "r" (p5) - : "memory"); - - /* p4 and p5 were modified, and now the variables are dead. - Clobber them just to be sure nobody does something stupid - like assuming they have some legal value. */ - asm("" : "=r" (p4), "=r" (p5)); - - kernel_fpu_end(); -} - static struct xor_block_template xor_block_pIII_sse = { .name = "pIII_sse", .do_2 = xor_sse_2, diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index 87ac522..1baf89d 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h @@ -1,301 +1,6 @@ #ifndef _ASM_X86_XOR_64_H #define _ASM_X86_XOR_64_H -/* - * Optimized RAID-5 checksumming functions for MMX and SSE. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * You should have received a copy of the GNU General Public License - * (for example /usr/src/linux/COPYING); if not, write to the Free - * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - - -/* - * Cache avoiding checksumming functions utilizing KNI instructions - * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo) - */ - -/* - * Based on - * High-speed RAID5 checksumming functions utilizing SSE instructions. - * Copyright (C) 1998 Ingo Molnar. - */ - -/* - * x86-64 changes / gcc fixes from Andi Kleen. - * Copyright 2002 Andi Kleen, SuSE Labs. - * - * This hasn't been optimized for the hammer yet, but there are likely - * no advantages to be gotten from x86-64 here anyways. - */ - -#include - -#define OFFS(x) "16*("#x")" -#define PF_OFFS(x) "256+16*("#x")" -#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n" -#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n" -#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n" -#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n" -#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n" -#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n" -#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n" -#define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n" -#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n" -#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" -#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" -#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" -#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n" - - -static void -xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) -{ - unsigned int lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - LD(i, 0) \ - LD(i + 1, 1) \ - PF1(i) \ - PF1(i + 2) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addq %[inc], %[p1] ;\n" - " addq %[inc], %[p2] ;\n" - " decl %[cnt] ; jnz 1b" - : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines) - : [inc] "r" (256UL) - : "memory"); - - kernel_fpu_end(); -} - -static void -xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) -{ - unsigned int lines = bytes >> 8; - - kernel_fpu_begin(); - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i, 0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - XO2(i, 0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addq %[inc], %[p1] ;\n" - " addq %[inc], %[p2] ;\n" - " addq %[inc], %[p3] ;\n" - " decl %[cnt] ; jnz 1b" - : [cnt] "+r" (lines), - [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) - : [inc] "r" (256UL) - : "memory"); - kernel_fpu_end(); -} - -static void -xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) -{ - unsigned int lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i, 0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - PF3(i) \ - PF3(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO2(i, 0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - XO3(i, 0) \ - XO3(i + 1, 1) \ - XO3(i + 2, 2) \ - XO3(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addq %[inc], %[p1] ;\n" - " addq %[inc], %[p2] ;\n" - " addq %[inc], %[p3] ;\n" - " addq %[inc], %[p4] ;\n" - " decl %[cnt] ; jnz 1b" - : [cnt] "+c" (lines), - [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) - : [inc] "r" (256UL) - : "memory" ); - - kernel_fpu_end(); -} - -static void -xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) -{ - unsigned int lines = bytes >> 8; - - kernel_fpu_begin(); - - asm volatile( -#undef BLOCK -#define BLOCK(i) \ - PF1(i) \ - PF1(i + 2) \ - LD(i, 0) \ - LD(i + 1, 1) \ - LD(i + 2, 2) \ - LD(i + 3, 3) \ - PF2(i) \ - PF2(i + 2) \ - XO1(i, 0) \ - XO1(i + 1, 1) \ - XO1(i + 2, 2) \ - XO1(i + 3, 3) \ - PF3(i) \ - PF3(i + 2) \ - XO2(i, 0) \ - XO2(i + 1, 1) \ - XO2(i + 2, 2) \ - XO2(i + 3, 3) \ - PF4(i) \ - PF4(i + 2) \ - PF0(i + 4) \ - PF0(i + 6) \ - XO3(i, 0) \ - XO3(i + 1, 1) \ - XO3(i + 2, 2) \ - XO3(i + 3, 3) \ - XO4(i, 0) \ - XO4(i + 1, 1) \ - XO4(i + 2, 2) \ - XO4(i + 3, 3) \ - ST(i, 0) \ - ST(i + 1, 1) \ - ST(i + 2, 2) \ - ST(i + 3, 3) \ - - - PF0(0) - PF0(2) - - " .align 32 ;\n" - " 1: ;\n" - - BLOCK(0) - BLOCK(4) - BLOCK(8) - BLOCK(12) - - " addq %[inc], %[p1] ;\n" - " addq %[inc], %[p2] ;\n" - " addq %[inc], %[p3] ;\n" - " addq %[inc], %[p4] ;\n" - " addq %[inc], %[p5] ;\n" - " decl %[cnt] ; jnz 1b" - : [cnt] "+c" (lines), - [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4), - [p5] "+r" (p5) - : [inc] "r" (256UL) - : "memory"); - - kernel_fpu_end(); -} - static struct xor_block_template xor_block_sse = { .name = "generic_sse", .do_2 = xor_sse_2, -- cgit v1.1 From f317820cb6ee3fb173319bf76e0e62437be78ad2 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Fri, 2 Nov 2012 14:20:24 +0000 Subject: x86/xor: Add alternative SSE implementation only prefetching once per 64-byte line On CPUs with 64-byte last level cache lines, this yields roughly 10% better performance, independent of CPU vendor or specific model (as far as I was able to test). Signed-off-by: Jan Beulich Acked-by: H. Peter Anvin Cc: Linus Torvalds Link: http://lkml.kernel.org/r/5093E4B802000078000A615E@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/xor.h | 172 ++++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/xor_32.h | 23 +++--- arch/x86/include/asm/xor_64.h | 10 +-- 3 files changed, 187 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index c661571..d882975 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -58,6 +58,14 @@ #define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n" #define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n" #define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n" +#define NOP(x) + +#define BLK64(pf, op, i) \ + pf(i) \ + op(i, 0) \ + op(i + 1, 1) \ + op(i + 2, 2) \ + op(i + 3, 3) static void xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) @@ -111,6 +119,40 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void +xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3) { @@ -170,6 +212,43 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void +xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), + [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4) { @@ -236,6 +315,45 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void +xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(PF3, XO3, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), + [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static void xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, unsigned long *p3, unsigned long *p4, unsigned long *p5) { @@ -308,12 +426,63 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, kernel_fpu_end(); } +static void +xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + unsigned long lines = bytes >> 8; + + kernel_fpu_begin(); + + asm volatile( +#undef BLOCK +#define BLOCK(i) \ + BLK64(PF0, LD, i) \ + BLK64(PF1, XO1, i) \ + BLK64(PF2, XO2, i) \ + BLK64(PF3, XO3, i) \ + BLK64(PF4, XO4, i) \ + BLK64(NOP, ST, i) \ + + " .align 32 ;\n" + " 1: ;\n" + + BLOCK(0) + BLOCK(4) + BLOCK(8) + BLOCK(12) + + " add %[inc], %[p1] ;\n" + " add %[inc], %[p2] ;\n" + " add %[inc], %[p3] ;\n" + " add %[inc], %[p4] ;\n" + " add %[inc], %[p5] ;\n" + " dec %[cnt] ;\n" + " jnz 1b ;\n" + : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2), + [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5) + : [inc] XOR_CONSTANT_CONSTRAINT (256UL) + : "memory"); + + kernel_fpu_end(); +} + +static struct xor_block_template xor_block_sse_pf64 = { + .name = "prefetch64-sse", + .do_2 = xor_sse_2_pf64, + .do_3 = xor_sse_3_pf64, + .do_4 = xor_sse_4_pf64, + .do_5 = xor_sse_5_pf64, +}; + #undef LD #undef XO1 #undef XO2 #undef XO3 #undef XO4 #undef ST +#undef NOP +#undef BLK64 #undef BLOCK #undef XOR_CONSTANT_CONSTRAINT @@ -324,4 +493,7 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, # include #endif +#define XOR_SELECT_TEMPLATE(FASTEST) \ + AVX_SELECT(FASTEST) + #endif /* _ASM_X86_XOR_H */ diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index b85dc87..ce05722 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -543,26 +543,25 @@ static struct xor_block_template xor_block_pIII_sse = { /* Also try the generic routines. */ #include +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ #undef XOR_TRY_TEMPLATES #define XOR_TRY_TEMPLATES \ do { \ - xor_speed(&xor_block_8regs); \ - xor_speed(&xor_block_8regs_p); \ - xor_speed(&xor_block_32regs); \ - xor_speed(&xor_block_32regs_p); \ AVX_XOR_SPEED; \ - if (cpu_has_xmm) \ + if (cpu_has_xmm) { \ xor_speed(&xor_block_pIII_sse); \ - if (cpu_has_mmx) { \ + xor_speed(&xor_block_sse_pf64); \ + } else if (cpu_has_mmx) { \ xor_speed(&xor_block_pII_mmx); \ xor_speed(&xor_block_p5_mmx); \ + } else { \ + xor_speed(&xor_block_8regs); \ + xor_speed(&xor_block_8regs_p); \ + xor_speed(&xor_block_32regs); \ + xor_speed(&xor_block_32regs_p); \ } \ } while (0) -/* We force the use of the SSE xor block because it can write around L2. - We may also be able to load into the L1 only depending on how the cpu - deals with a load to a line that is being prefetched. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ - AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST) - #endif /* _ASM_X86_XOR_32_H */ diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h index 1baf89d..546f1e3 100644 --- a/arch/x86/include/asm/xor_64.h +++ b/arch/x86/include/asm/xor_64.h @@ -13,17 +13,15 @@ static struct xor_block_template xor_block_sse = { /* Also try the AVX routines */ #include +/* We force the use of the SSE xor block because it can write around L2. + We may also be able to load into the L1 only depending on how the cpu + deals with a load to a line that is being prefetched. */ #undef XOR_TRY_TEMPLATES #define XOR_TRY_TEMPLATES \ do { \ AVX_XOR_SPEED; \ + xor_speed(&xor_block_sse_pf64); \ xor_speed(&xor_block_sse); \ } while (0) -/* We force the use of the SSE xor block because it can write around L2. - We may also be able to load into the L1 only depending on how the cpu - deals with a load to a line that is being prefetched. */ -#define XOR_SELECT_TEMPLATE(FASTEST) \ - AVX_SELECT(&xor_block_sse) - #endif /* _ASM_X86_XOR_64_H */ -- cgit v1.1 From 7d0291256ca99cbb6124f63228003329e7a64b21 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 4 Jan 2013 16:18:14 -0500 Subject: x86: Add TS-5500 platform support The Technologic Systems TS-5500 is an x86-based (AMD Elan SC520) single board computer. This driver registers most of its devices and exposes sysfs attributes for information such as jumpers' state or presence of some of its options. This driver currently registers the TS-5500 platform, its on-board LED, 2 pin blocks (GPIO) and its analog/digital converter. It can be extended to support other Technologic Systems products, such as the TS-5600. Signed-off-by: Vivien Didelot Acked-by: Thomas Gleixner Cc: Savoir-faire Linux Inc. Link: http://lkml.kernel.org/r/1357334294-12760-1-git-send-email-vivien.didelot@savoirfairelinux.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 9 + arch/x86/platform/Makefile | 1 + arch/x86/platform/ts5500/Makefile | 1 + arch/x86/platform/ts5500/ts5500.c | 339 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 350 insertions(+) create mode 100644 arch/x86/platform/ts5500/Makefile create mode 100644 arch/x86/platform/ts5500/ts5500.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 46fb28c..0709e34 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2199,6 +2199,15 @@ config GEOS ---help--- This option enables system support for the Traverse Technologies GEOS. +config TS5500 + bool "Technologic Systems TS-5500 platform support" + depends on MELAN + select CHECK_SIGNATURE + select NEW_LEDS + select LEDS_CLASS + ---help--- + This option enables system support for the Technologic Systems TS-5500. + endif # X86_32 config AMD_NB diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index bfe917f..01e0231 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile @@ -8,5 +8,6 @@ obj-y += mrst/ obj-y += olpc/ obj-y += scx200/ obj-y += sfi/ +obj-y += ts5500/ obj-y += visws/ obj-y += uv/ diff --git a/arch/x86/platform/ts5500/Makefile b/arch/x86/platform/ts5500/Makefile new file mode 100644 index 0000000..c54e348 --- /dev/null +++ b/arch/x86/platform/ts5500/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_TS5500) += ts5500.o diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c new file mode 100644 index 0000000..39febb2 --- /dev/null +++ b/arch/x86/platform/ts5500/ts5500.c @@ -0,0 +1,339 @@ +/* + * Technologic Systems TS-5500 Single Board Computer support + * + * Copyright (C) 2013 Savoir-faire Linux Inc. + * Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any later + * version. + * + * + * This driver registers the Technologic Systems TS-5500 Single Board Computer + * (SBC) and its devices, and exposes information to userspace such as jumpers' + * state or available options. For further information about sysfs entries, see + * Documentation/ABI/testing/sysfs-platform-ts5500. + * + * This code actually supports the TS-5500 platform, but it may be extended to + * support similar Technologic Systems x86-based platforms, such as the TS-5600. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Product code register */ +#define TS5500_PRODUCT_CODE_ADDR 0x74 +#define TS5500_PRODUCT_CODE 0x60 /* TS-5500 product code */ + +/* SRAM/RS-485/ADC options, and RS-485 RTS/Automatic RS-485 flags register */ +#define TS5500_SRAM_RS485_ADC_ADDR 0x75 +#define TS5500_SRAM BIT(0) /* SRAM option */ +#define TS5500_RS485 BIT(1) /* RS-485 option */ +#define TS5500_ADC BIT(2) /* A/D converter option */ +#define TS5500_RS485_RTS BIT(6) /* RTS for RS-485 */ +#define TS5500_RS485_AUTO BIT(7) /* Automatic RS-485 */ + +/* External Reset/Industrial Temperature Range options register */ +#define TS5500_ERESET_ITR_ADDR 0x76 +#define TS5500_ERESET BIT(0) /* External Reset option */ +#define TS5500_ITR BIT(1) /* Indust. Temp. Range option */ + +/* LED/Jumpers register */ +#define TS5500_LED_JP_ADDR 0x77 +#define TS5500_LED BIT(0) /* LED flag */ +#define TS5500_JP1 BIT(1) /* Automatic CMOS */ +#define TS5500_JP2 BIT(2) /* Enable Serial Console */ +#define TS5500_JP3 BIT(3) /* Write Enable Drive A */ +#define TS5500_JP4 BIT(4) /* Fast Console (115K baud) */ +#define TS5500_JP5 BIT(5) /* User Jumper */ +#define TS5500_JP6 BIT(6) /* Console on COM1 (req. JP2) */ +#define TS5500_JP7 BIT(7) /* Undocumented (Unused) */ + +/* A/D Converter registers */ +#define TS5500_ADC_CONV_BUSY_ADDR 0x195 /* Conversion state register */ +#define TS5500_ADC_CONV_BUSY BIT(0) +#define TS5500_ADC_CONV_INIT_LSB_ADDR 0x196 /* Start conv. / LSB register */ +#define TS5500_ADC_CONV_MSB_ADDR 0x197 /* MSB register */ +#define TS5500_ADC_CONV_DELAY 12 /* usec */ + +/** + * struct ts5500_sbc - TS-5500 board description + * @id: Board product ID. + * @sram: Flag for SRAM option. + * @rs485: Flag for RS-485 option. + * @adc: Flag for Analog/Digital converter option. + * @ereset: Flag for External Reset option. + * @itr: Flag for Industrial Temperature Range option. + * @jumpers: Bitfield for jumpers' state. + */ +struct ts5500_sbc { + int id; + bool sram; + bool rs485; + bool adc; + bool ereset; + bool itr; + u8 jumpers; +}; + +/* Board signatures in BIOS shadow RAM */ +static const struct { + const char * const string; + const ssize_t offset; +} ts5500_signatures[] __initdata = { + { "TS-5x00 AMD Elan", 0xb14 }, +}; + +static int __init ts5500_check_signature(void) +{ + void __iomem *bios; + int i, ret = -ENODEV; + + bios = ioremap(0xf0000, 0x10000); + if (!bios) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(ts5500_signatures); i++) { + if (check_signature(bios + ts5500_signatures[i].offset, + ts5500_signatures[i].string, + strlen(ts5500_signatures[i].string))) { + ret = 0; + break; + } + } + + iounmap(bios); + return ret; +} + +static int __init ts5500_detect_config(struct ts5500_sbc *sbc) +{ + u8 tmp; + int ret = 0; + + if (!request_region(TS5500_PRODUCT_CODE_ADDR, 4, "ts5500")) + return -EBUSY; + + tmp = inb(TS5500_PRODUCT_CODE_ADDR); + if (tmp != TS5500_PRODUCT_CODE) { + pr_err("This platform is not a TS-5500 (found ID 0x%x)\n", tmp); + ret = -ENODEV; + goto cleanup; + } + sbc->id = tmp; + + tmp = inb(TS5500_SRAM_RS485_ADC_ADDR); + sbc->sram = tmp & TS5500_SRAM; + sbc->rs485 = tmp & TS5500_RS485; + sbc->adc = tmp & TS5500_ADC; + + tmp = inb(TS5500_ERESET_ITR_ADDR); + sbc->ereset = tmp & TS5500_ERESET; + sbc->itr = tmp & TS5500_ITR; + + tmp = inb(TS5500_LED_JP_ADDR); + sbc->jumpers = tmp & ~TS5500_LED; + +cleanup: + release_region(TS5500_PRODUCT_CODE_ADDR, 4); + return ret; +} + +static ssize_t ts5500_show_id(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ts5500_sbc *sbc = dev_get_drvdata(dev); + + return sprintf(buf, "0x%.2x\n", sbc->id); +} + +static ssize_t ts5500_show_jumpers(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ts5500_sbc *sbc = dev_get_drvdata(dev); + + return sprintf(buf, "0x%.2x\n", sbc->jumpers >> 1); +} + +#define TS5500_SHOW(field) \ + static ssize_t ts5500_show_##field(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ + { \ + struct ts5500_sbc *sbc = dev_get_drvdata(dev); \ + return sprintf(buf, "%d\n", sbc->field); \ + } + +TS5500_SHOW(sram) +TS5500_SHOW(rs485) +TS5500_SHOW(adc) +TS5500_SHOW(ereset) +TS5500_SHOW(itr) + +static DEVICE_ATTR(id, S_IRUGO, ts5500_show_id, NULL); +static DEVICE_ATTR(jumpers, S_IRUGO, ts5500_show_jumpers, NULL); +static DEVICE_ATTR(sram, S_IRUGO, ts5500_show_sram, NULL); +static DEVICE_ATTR(rs485, S_IRUGO, ts5500_show_rs485, NULL); +static DEVICE_ATTR(adc, S_IRUGO, ts5500_show_adc, NULL); +static DEVICE_ATTR(ereset, S_IRUGO, ts5500_show_ereset, NULL); +static DEVICE_ATTR(itr, S_IRUGO, ts5500_show_itr, NULL); + +static struct attribute *ts5500_attributes[] = { + &dev_attr_id.attr, + &dev_attr_jumpers.attr, + &dev_attr_sram.attr, + &dev_attr_rs485.attr, + &dev_attr_adc.attr, + &dev_attr_ereset.attr, + &dev_attr_itr.attr, + NULL +}; + +static const struct attribute_group ts5500_attr_group = { + .attrs = ts5500_attributes, +}; + +static struct resource ts5500_dio1_resource[] = { + DEFINE_RES_IRQ_NAMED(7, "DIO1 interrupt"), +}; + +static struct platform_device ts5500_dio1_pdev = { + .name = "ts5500-dio1", + .id = -1, + .resource = ts5500_dio1_resource, + .num_resources = 1, +}; + +static struct resource ts5500_dio2_resource[] = { + DEFINE_RES_IRQ_NAMED(6, "DIO2 interrupt"), +}; + +static struct platform_device ts5500_dio2_pdev = { + .name = "ts5500-dio2", + .id = -1, + .resource = ts5500_dio2_resource, + .num_resources = 1, +}; + +static void ts5500_led_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + outb(!!brightness, TS5500_LED_JP_ADDR); +} + +static enum led_brightness ts5500_led_get(struct led_classdev *led_cdev) +{ + return (inb(TS5500_LED_JP_ADDR) & TS5500_LED) ? LED_FULL : LED_OFF; +} + +static struct led_classdev ts5500_led_cdev = { + .name = "ts5500:green:", + .brightness_set = ts5500_led_set, + .brightness_get = ts5500_led_get, +}; + +static int ts5500_adc_convert(u8 ctrl) +{ + u8 lsb, msb; + + /* Start conversion (ensure the 3 MSB are set to 0) */ + outb(ctrl & 0x1f, TS5500_ADC_CONV_INIT_LSB_ADDR); + + /* + * The platform has CPLD logic driving the A/D converter. + * The conversion must complete within 11 microseconds, + * otherwise we have to re-initiate a conversion. + */ + udelay(TS5500_ADC_CONV_DELAY); + if (inb(TS5500_ADC_CONV_BUSY_ADDR) & TS5500_ADC_CONV_BUSY) + return -EBUSY; + + /* Read the raw data */ + lsb = inb(TS5500_ADC_CONV_INIT_LSB_ADDR); + msb = inb(TS5500_ADC_CONV_MSB_ADDR); + + return (msb << 8) | lsb; +} + +static struct max197_platform_data ts5500_adc_pdata = { + .convert = ts5500_adc_convert, +}; + +static struct platform_device ts5500_adc_pdev = { + .name = "max197", + .id = -1, + .dev = { + .platform_data = &ts5500_adc_pdata, + }, +}; + +static int __init ts5500_init(void) +{ + struct platform_device *pdev; + struct ts5500_sbc *sbc; + int err; + + /* + * There is no DMI available or PCI bridge subvendor info, + * only the BIOS provides a 16-bit identification call. + * It is safer to find a signature in the BIOS shadow RAM. + */ + err = ts5500_check_signature(); + if (err) + return err; + + pdev = platform_device_register_simple("ts5500", -1, NULL, 0); + if (IS_ERR(pdev)) + return PTR_ERR(pdev); + + sbc = devm_kzalloc(&pdev->dev, sizeof(struct ts5500_sbc), GFP_KERNEL); + if (!sbc) { + err = -ENOMEM; + goto error; + } + + err = ts5500_detect_config(sbc); + if (err) + goto error; + + platform_set_drvdata(pdev, sbc); + + err = sysfs_create_group(&pdev->dev.kobj, &ts5500_attr_group); + if (err) + goto error; + + ts5500_dio1_pdev.dev.parent = &pdev->dev; + if (platform_device_register(&ts5500_dio1_pdev)) + dev_warn(&pdev->dev, "DIO1 block registration failed\n"); + ts5500_dio2_pdev.dev.parent = &pdev->dev; + if (platform_device_register(&ts5500_dio2_pdev)) + dev_warn(&pdev->dev, "DIO2 block registration failed\n"); + + if (led_classdev_register(&pdev->dev, &ts5500_led_cdev)) + dev_warn(&pdev->dev, "LED registration failed\n"); + + if (sbc->adc) { + ts5500_adc_pdev.dev.parent = &pdev->dev; + if (platform_device_register(&ts5500_adc_pdev)) + dev_warn(&pdev->dev, "ADC registration failed\n"); + } + + return 0; +error: + platform_device_unregister(pdev); + return err; +} +device_initcall(ts5500_init); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Savoir-faire Linux Inc. "); +MODULE_DESCRIPTION("Technologic Systems TS-5500 platform driver"); -- cgit v1.1 From 51fac8388a0325a43f0ae67453ece2c373e2ec28 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 24 Jan 2013 00:24:48 +0100 Subject: ACPI: Remove useless type argument of driver .remove() operation The second argument of ACPI driver .remove() operation is only used by the ACPI processor driver and the value passed to that driver through it is always available from the given struct acpi_device object's removal_type field. For this reason, the second ACPI driver .remove() argument is in fact useless, so drop it. Signed-off-by: Rafael J. Wysocki Reviewed-by: Jiang Liu Acked-by: Toshi Kani Acked-by: Yinghai Lu --- arch/x86/platform/olpc/olpc-xo15-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index 2fdca25..fef7d0b 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c @@ -195,7 +195,7 @@ err_sysfs: return r; } -static int xo15_sci_remove(struct acpi_device *device, int type) +static int xo15_sci_remove(struct acpi_device *device) { acpi_disable_gpe(NULL, xo15_sci_gpe); acpi_remove_gpe_handler(NULL, xo15_sci_gpe, xo15_sci_gpe_handler); -- cgit v1.1 From 43720bd6014327ac454434496cb953edcdb9f8d6 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Fri, 11 Jan 2013 13:43:45 +0100 Subject: PM / tracing: remove deprecated power trace API The text in Documentation said it would be removed in 2.6.41; the text in the Kconfig said removal in the 3.1 release. Either way you look at it, we are well past both, so push it off a cliff. Note that the POWER_CSTATE and the POWER_PSTATE are part of the legacy tracing API. Remove all tracepoints which use these flags. As can be seen from context, most already have a trace entry via trace_cpu_idle anyways. Also, the cpufreq/cpufreq.c PSTATE one is actually unpaired, as compared to the CSTATE ones which all have a clear start/stop. As part of this, the trace_power_frequency also becomes orphaned, so it too is deleted. Signed-off-by: Paul Gortmaker Acked-by: Steven Rostedt Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/process.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2ed787f..dcfc1f4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -375,7 +375,6 @@ void cpu_idle(void) */ void default_idle(void) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); trace_cpu_idle_rcuidle(1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* @@ -389,7 +388,6 @@ void default_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; - trace_power_end_rcuidle(smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_APM_MODULE @@ -423,7 +421,6 @@ void stop_this_cpu(void *dummy) static void mwait_idle(void) { if (!need_resched()) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); trace_cpu_idle_rcuidle(1, smp_processor_id()); if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -434,7 +431,6 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); - trace_power_end_rcuidle(smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } else local_irq_enable(); @@ -447,12 +443,10 @@ static void mwait_idle(void) */ static void poll_idle(void) { - trace_power_start_rcuidle(POWER_CSTATE, 0, smp_processor_id()); trace_cpu_idle_rcuidle(0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end_rcuidle(smp_processor_id()); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } -- cgit v1.1 From a25b9316841c5afa226f8f70a457861b35276a92 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Jan 2013 13:24:30 -0800 Subject: x86, mm: Make DEBUG_VIRTUAL work earlier in boot The KVM code has some repeated bugs in it around use of __pa() on per-cpu data. Those data are not in an area on which using __pa() is valid. However, they are also called early enough in boot that __vmalloc_start_set is not set, and thus the CONFIG_DEBUG_VIRTUAL debugging does not catch them. This adds a check to also verify __pa() calls against max_low_pfn, which we can use earler in boot than is_vmalloc_addr(). However, if we are super-early in boot, max_low_pfn=0 and this will trip on every call, so also make sure that max_low_pfn is set before we try to use it. With this patch applied, CONFIG_DEBUG_VIRTUAL will actually catch the bug I was chasing (and fix later in this series). I'd love to find a generic way so that any __pa() call on percpu areas could do a BUG_ON(), but there don't appear to be any nice and easy ways to check if an address is a percpu one. Anybody have ideas on a way to do this? Signed-off-by: Dave Hansen Link: http://lkml.kernel.org/r/20130122212430.F46F8159@kernel.stglabs.ibm.com Signed-off-by: H. Peter Anvin --- arch/x86/mm/numa.c | 2 +- arch/x86/mm/pat.c | 4 ++-- arch/x86/mm/physaddr.c | 9 ++++++++- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 2d125be..76604eb 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -219,7 +219,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end) */ nd = alloc_remap(nid, nd_size); if (nd) { - nd_pa = __pa(nd); + nd_pa = __phys_addr_nodebug(nd); remapped = true; } else { nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 0eb572e..2610bd9 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -560,10 +560,10 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) { unsigned long id_sz; - if (base >= __pa(high_memory)) + if (base > __pa(high_memory-1)) return 0; - id_sz = (__pa(high_memory) < base + size) ? + id_sz = (__pa(high_memory-1) <= base + size) ? __pa(high_memory) - base : size; diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c index c73fedd..e666cbb 100644 --- a/arch/x86/mm/physaddr.c +++ b/arch/x86/mm/physaddr.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -68,10 +69,16 @@ EXPORT_SYMBOL(__virt_addr_valid); #ifdef CONFIG_DEBUG_VIRTUAL unsigned long __phys_addr(unsigned long x) { + unsigned long phys_addr = x - PAGE_OFFSET; /* VMALLOC_* aren't constants */ VIRTUAL_BUG_ON(x < PAGE_OFFSET); VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x)); - return x - PAGE_OFFSET; + /* max_low_pfn is set early, but not _that_ early */ + if (max_low_pfn) { + VIRTUAL_BUG_ON((phys_addr >> PAGE_SHIFT) > max_low_pfn); + BUG_ON(slow_virt_to_phys((void *)x) != phys_addr); + } + return phys_addr; } EXPORT_SYMBOL(__phys_addr); #endif -- cgit v1.1 From 4cbeb51b860c57ba8b2ae50c4016ee7a41f5fbd5 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Jan 2013 13:24:31 -0800 Subject: x86, mm: Pagetable level size/shift/mask helpers I plan to use lookup_address() to walk the kernel pagetables in a later patch. It returns a "pte" and the level in the pagetables where the "pte" was found. The level is just an enum and needs to be converted to a useful value in order to do address calculations with it. These helpers will be used in at least two places. This also gives the anonymous enum a real name so that no one gets confused about what they should be passing in to these helpers. "PTE_SHIFT" was chosen for naming consistency with the other pagetable levels (PGD/PUD/PMD_SHIFT). Cc: H. Peter Anvin Signed-off-by: Dave Hansen Link: http://lkml.kernel.org/r/20130122212431.405D3A8C@kernel.stglabs.ibm.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 14 ++++++++++++++ arch/x86/include/asm/pgtable_types.h | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5199db2..bc28e6f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -390,6 +390,7 @@ pte_t *populate_extra_pte(unsigned long vaddr); #ifndef __ASSEMBLY__ #include +#include static inline int pte_none(pte_t pte) { @@ -781,6 +782,19 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) memcpy(dst, src, count * sizeof(pgd_t)); } +#define PTE_SHIFT ilog2(PTRS_PER_PTE) +static inline int page_level_shift(enum pg_level level) +{ + return (PAGE_SHIFT - PTE_SHIFT) + level * PTE_SHIFT; +} +static inline unsigned long page_level_size(enum pg_level level) +{ + return 1UL << page_level_shift(level); +} +static inline unsigned long page_level_mask(enum pg_level level) +{ + return ~(page_level_size(level) - 1); +} #include #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 3c32db8..6c297e7 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -331,7 +331,7 @@ extern void native_pagetable_init(void); struct seq_file; extern void arch_report_meminfo(struct seq_file *m); -enum { +enum pg_level { PG_LEVEL_NONE, PG_LEVEL_4K, PG_LEVEL_2M, -- cgit v1.1 From f3c4fbb68e93b10c781c0cc462a9d80770244da6 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Jan 2013 13:24:32 -0800 Subject: x86, mm: Use new pagetable helpers in try_preserve_large_page() try_preserve_large_page() can be slightly simplified by using the new page_level_*() helpers. This also moves the 'level' over to the new pg_level enum type. Signed-off-by: Dave Hansen Link: http://lkml.kernel.org/r/20130122212432.14F3D993@kernel.stglabs.ibm.com Signed-off-by: H. Peter Anvin --- arch/x86/mm/pageattr.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 40f92f3..2a5c9ab 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -396,7 +396,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pte_t new_pte, old_pte, *tmp; pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; - unsigned int level; + enum pg_level level; if (cpa->force_split) return 1; @@ -412,15 +412,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, switch (level) { case PG_LEVEL_2M: - psize = PMD_PAGE_SIZE; - pmask = PMD_PAGE_MASK; - break; #ifdef CONFIG_X86_64 case PG_LEVEL_1G: - psize = PUD_PAGE_SIZE; - pmask = PUD_PAGE_MASK; - break; #endif + psize = page_level_size(level); + pmask = page_level_mask(level); + break; default: do_split = -EINVAL; goto out_unlock; -- cgit v1.1 From d765653445129b7c476758040e3079480775f80a Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Jan 2013 13:24:33 -0800 Subject: x86, mm: Create slow_virt_to_phys() This is necessary because __pa() does not work on some kinds of memory, like vmalloc() or the alloc_remap() areas on 32-bit NUMA systems. We have some functions to do conversions _like_ this in the vmalloc() code (like vmalloc_to_page()), but they do not work on sizes other than 4k pages. We would potentially need to be able to handle all the page sizes that we use for the kernel linear mapping (4k, 2M, 1G). In practice, on 32-bit NUMA systems, the percpu areas get stuck in the alloc_remap() area. Any __pa() call on them will break and basically return garbage. This patch introduces a new function slow_virt_to_phys(), which walks the kernel page tables on x86 and should do precisely the same logical thing as __pa(), but actually work on a wider range of memory. It should work on the normal linear mapping, vmalloc(), kmap(), etc... Signed-off-by: Dave Hansen Link: http://lkml.kernel.org/r/20130122212433.4D1FCA62@kernel.stglabs.ibm.com Acked-by: Rik van Riel Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable_types.h | 1 + arch/x86/mm/pageattr.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 6c297e7..9f82690 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -352,6 +352,7 @@ static inline void update_page_count(int level, unsigned long pages) { } * as a pte too. */ extern pte_t *lookup_address(unsigned long address, unsigned int *level); +extern phys_addr_t slow_virt_to_phys(void *__address); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 2a5c9ab..6d13d2a 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -364,6 +364,37 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) EXPORT_SYMBOL_GPL(lookup_address); /* + * This is necessary because __pa() does not work on some + * kinds of memory, like vmalloc() or the alloc_remap() + * areas on 32-bit NUMA systems. The percpu areas can + * end up in this kind of memory, for instance. + * + * This could be optimized, but it is only intended to be + * used at inititalization time, and keeping it + * unoptimized should increase the testing coverage for + * the more obscure platforms. + */ +phys_addr_t slow_virt_to_phys(void *__virt_addr) +{ + unsigned long virt_addr = (unsigned long)__virt_addr; + phys_addr_t phys_addr; + unsigned long offset; + enum pg_level level; + unsigned long psize; + unsigned long pmask; + pte_t *pte; + + pte = lookup_address(virt_addr, &level); + BUG_ON(!pte); + psize = page_level_size(level); + pmask = page_level_mask(level); + offset = virt_addr & ~pmask; + phys_addr = pte_pfn(*pte) << PAGE_SHIFT; + return (phys_addr | offset); +} +EXPORT_SYMBOL_GPL(slow_virt_to_phys); + +/* * Set the new pmd in all the pgds we know about: */ static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) -- cgit v1.1 From 5dfd486c4750c9278c63fa96e6e85bdd2fb58e9d Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 22 Jan 2013 13:24:35 -0800 Subject: x86, kvm: Fix kvm's use of __pa() on percpu areas In short, it is illegal to call __pa() on an address holding a percpu variable. This replaces those __pa() calls with slow_virt_to_phys(). All of the cases in this patch are in boot time (or CPU hotplug time at worst) code, so the slow pagetable walking in slow_virt_to_phys() is not expected to have a performance impact. The times when this actually matters are pretty obscure (certain 32-bit NUMA systems), but it _does_ happen. It is important to keep KVM guests working on these systems because the real hardware is getting harder and harder to find. This bug manifested first by me seeing a plain hang at boot after this message: CPU 0 irqstacks, hard=f3018000 soft=f301a000 or, sometimes, it would actually make it out to the console: [ 0.000000] BUG: unable to handle kernel paging request at ffffffff I eventually traced it down to the KVM async pagefault code. This can be worked around by disabling that code either at compile-time, or on the kernel command-line. The kvm async pagefault code was injecting page faults in to the guest which the guest misinterpreted because its "reason" was not being properly sent from the host. The guest passes a physical address of an per-cpu async page fault structure via an MSR to the host. Since __pa() is broken on percpu data, the physical address it sent was bascially bogus and the host went scribbling on random data. The guest never saw the real reason for the page fault (it was injected by the host), assumed that the kernel had taken a _real_ page fault, and panic()'d. The behavior varied, though, depending on what got corrupted by the bad write. Signed-off-by: Dave Hansen Link: http://lkml.kernel.org/r/20130122212435.4905663F@kernel.stglabs.ibm.com Acked-by: Rik van Riel Reviewed-by: Marcelo Tosatti Signed-off-by: H. Peter Anvin --- arch/x86/kernel/kvm.c | 9 +++++---- arch/x86/kernel/kvmclock.c | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9c2bd8b..aa7e58b 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -297,9 +297,9 @@ static void kvm_register_steal_time(void) memset(st, 0, sizeof(*st)); - wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED)); + wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", - cpu, __pa(st)); + cpu, slow_virt_to_phys(st)); } static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; @@ -324,7 +324,7 @@ void __cpuinit kvm_guest_cpu_init(void) return; if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { - u64 pa = __pa(&__get_cpu_var(apf_reason)); + u64 pa = slow_virt_to_phys(&__get_cpu_var(apf_reason)); #ifdef CONFIG_PREEMPT pa |= KVM_ASYNC_PF_SEND_ALWAYS; @@ -340,7 +340,8 @@ void __cpuinit kvm_guest_cpu_init(void) /* Size alignment is implied but just to make it explicit. */ BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); __get_cpu_var(kvm_apic_eoi) = 0; - pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED; + pa = slow_virt_to_phys(&__get_cpu_var(kvm_apic_eoi)) + | KVM_MSR_ENABLED; wrmsrl(MSR_KVM_PV_EOI_EN, pa); } diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 220a360..9f966dc 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -162,8 +162,8 @@ int kvm_register_clock(char *txt) int low, high, ret; struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti; - low = (int)__pa(src) | 1; - high = ((u64)__pa(src) >> 32); + low = (int)slow_virt_to_phys(src) | 1; + high = ((u64)slow_virt_to_phys(src) >> 32); ret = native_write_msr_safe(msr_kvm_system_time, low, high); printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", cpu, high, low, txt); -- cgit v1.1 From 3f0c3d0bb2bcc4b88b22452a7cf0073ee9a0f1e6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 26 Jan 2013 23:56:04 +0200 Subject: KVM: x86 emulator: fix test_cc() build failure on i386 'pushq' doesn't exist on i386. Replace with 'push', which should work since the operand is a register. Signed-off-by: Avi Kivity Signed-off-by: Gleb Natapov --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e99fb72..2b11318 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1013,7 +1013,7 @@ static u8 test_cc(unsigned int condition, unsigned long flags) void (*fop)(void) = (void *)em_setcc + 4 * (condition & 0xf); flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF; - asm("pushq %[flags]; popf; call *%[fastop]" + asm("push %[flags]; popf; call *%[fastop]" : "=a"(rc) : [fastop]"r"(fop), [flags]"r"(flags)); return rc; } -- cgit v1.1 From 6fac4829ce0ef9b7f24369086ce5f0e9f38d37bc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 13 Nov 2012 14:20:55 +0100 Subject: cputime: Use accessors to read task cputime stats This is in preparation for the full dynticks feature. While remotely reading the cputime of a task running in a full dynticks CPU, we'll need to do some extra-computation. This way we can account the time it spent tickless in userspace since its last cputime snapshot. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- arch/x86/kernel/apm_32.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index d65464e..8d7012b 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -899,6 +899,7 @@ static void apm_cpu_idle(void) static int use_apm_idle; /* = 0 */ static unsigned int last_jiffies; /* = 0 */ static unsigned int last_stime; /* = 0 */ + cputime_t stime; int apm_idle_done = 0; unsigned int jiffies_since_last_check = jiffies - last_jiffies; @@ -906,23 +907,23 @@ static void apm_cpu_idle(void) WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); recalc: + task_cputime(current, NULL, &stime); if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; } else if (jiffies_since_last_check > idle_period) { unsigned int idle_percentage; - idle_percentage = current->stime - last_stime; + idle_percentage = stime - last_stime; idle_percentage *= 100; idle_percentage /= jiffies_since_last_check; use_apm_idle = (idle_percentage > idle_threshold); if (apm_info.forbid_idle) use_apm_idle = 0; - last_jiffies = jiffies; - last_stime = current->stime; } + last_jiffies = jiffies; + last_stime = stime; + bucket = IDLE_LEAKY_MAX; while (!need_resched()) { -- cgit v1.1 From 70733e0c7ed22177e2cfe660fa2a0e90f1f39126 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:33 +0200 Subject: x86, apic: Move irq_remapping_enabled checks into IRQ-remapping code Move the three easy to move checks in the x86' apic.c file into the IRQ-remapping code. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/kernel/apic/apic.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index b994cc8..8d741e6 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1477,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void) * Now that local APIC setup is completed for BP, configure the fault * handling for interrupt remapping. */ - if (irq_remapping_enabled) - irq_remap_enable_fault_handling(); + irq_remap_enable_fault_handling(); } @@ -2251,8 +2250,7 @@ static int lapic_suspend(void) local_irq_save(flags); disable_local_APIC(); - if (irq_remapping_enabled) - irq_remapping_disable(); + irq_remapping_disable(); local_irq_restore(flags); return 0; @@ -2320,8 +2318,7 @@ static void lapic_resume(void) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); - if (irq_remapping_enabled) - irq_remapping_reenable(x2apic_mode); + irq_remapping_reenable(x2apic_mode); local_irq_restore(flags); } -- cgit v1.1 From 336224ba5e4fb42a95d02ab0aa0fdff21649bb38 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:34 +0200 Subject: x86, apic: Mask IO-APIC and PIC unconditionally on LAPIC resume IO-APIC and PIC use the same resume routines when IRQ remapping is enabled or disabled. So it should be safe to mask the other APICs for the IRQ-remapping-disabled case too. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/kernel/apic/apic.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 8d741e6..a5b4dce 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2266,16 +2266,15 @@ static void lapic_resume(void) return; local_irq_save(flags); - if (irq_remapping_enabled) { - /* - * IO-APIC and PIC have their own resume routines. - * We just mask them here to make sure the interrupt - * subsystem is completely quiet while we enable x2apic - * and interrupt-remapping. - */ - mask_ioapic_entries(); - legacy_pic->mask_all(); - } + + /* + * IO-APIC and PIC have their own resume routines. + * We just mask them here to make sure the interrupt + * subsystem is completely quiet while we enable x2apic + * and interrupt-remapping. + */ + mask_ioapic_entries(); + legacy_pic->mask_all(); if (x2apic_mode) enable_x2apic(); -- cgit v1.1 From 1c4248ca4e783e47cc34e313d9f82b4ea52774cc Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:35 +0200 Subject: x86, io_apic: Introduce x86_io_apic_ops.disable() This function pointer is used to call a system-specific function for disabling the IO-APIC. Currently this is used for IRQ remapping which has its own disable routine. Also introduce the necessary infrastructure in the interrupt remapping code to overwrite this and other function pointers as necessary by interrupt remapping. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/io_apic.h | 2 ++ arch/x86/include/asm/x86_init.h | 9 +++++---- arch/x86/kernel/apic/io_apic.c | 41 +++++++++++++++++++---------------------- arch/x86/kernel/x86_init.c | 9 +++++---- 4 files changed, 31 insertions(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 73d8c53..d59e172 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -179,6 +179,7 @@ extern void __init native_io_apic_init_mappings(void); extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); +extern void native_disable_io_apic(void); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { @@ -223,6 +224,7 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_read NULL #define native_io_apic_write NULL #define native_io_apic_modify NULL +#define native_disable_io_apic NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 5769349..b1d2d6a 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -190,10 +190,11 @@ struct x86_msi_ops { }; struct x86_io_apic_ops { - void (*init) (void); - unsigned int (*read) (unsigned int apic, unsigned int reg); - void (*write) (unsigned int apic, unsigned int reg, unsigned int value); - void (*modify)(unsigned int apic, unsigned int reg, unsigned int value); + void (*init) (void); + unsigned int (*read) (unsigned int apic, unsigned int reg); + void (*write) (unsigned int apic, unsigned int reg, unsigned int value); + void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); + void (*disable)(void); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 2016f9d..cd5f4d7 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1921,30 +1921,14 @@ void __init enable_IO_APIC(void) clear_IO_APIC(); } -/* - * Not an __init, needed by the reboot code - */ -void disable_IO_APIC(void) +void native_disable_io_apic(void) { /* - * Clear the IO-APIC before rebooting: - */ - clear_IO_APIC(); - - if (!legacy_pic->nr_legacy_irqs) - return; - - /* * If the i8259 is routed through an IOAPIC * Put that IOAPIC in virtual wire mode * so legacy interrupts can be delivered. - * - * With interrupt-remapping, for now we will use virtual wire A mode, - * as virtual wire B is little complex (need to configure both - * IOAPIC RTE as well as interrupt-remapping table entry). - * As this gets called during crash dump, keep this simple for now. */ - if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) { + if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; memset(&entry, 0, sizeof(entry)); @@ -1964,12 +1948,25 @@ void disable_IO_APIC(void) ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); } + if (cpu_has_apic || apic_from_smp_config()) + disconnect_bsp_APIC(ioapic_i8259.pin != -1); + +} + +/* + * Not an __init, needed by the reboot code + */ +void disable_IO_APIC(void) +{ /* - * Use virtual wire A mode when interrupt remapping is enabled. + * Clear the IO-APIC before rebooting: */ - if (cpu_has_apic || apic_from_smp_config()) - disconnect_bsp_APIC(!irq_remapping_enabled && - ioapic_i8259.pin != -1); + clear_IO_APIC(); + + if (!legacy_pic->nr_legacy_irqs) + return; + + x86_io_apic_ops.disable(); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7a3d075..754524a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -118,8 +118,9 @@ struct x86_msi_ops x86_msi = { }; struct x86_io_apic_ops x86_io_apic_ops = { - .init = native_io_apic_init_mappings, - .read = native_io_apic_read, - .write = native_io_apic_write, - .modify = native_io_apic_modify, + .init = native_io_apic_init_mappings, + .read = native_io_apic_read, + .write = native_io_apic_write, + .modify = native_io_apic_modify, + .disable = native_disable_io_apic, }; -- cgit v1.1 From afcc8a40a090f7a65d3b72bac1a26fc6dbb63b10 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:36 +0200 Subject: x86, io_apic: Introduce x86_io_apic_ops.print_entries for debugging This call-back is used to dump IO-APIC entries for debugging purposes into the kernel log. VT-d needs a special routine for this and will overwrite the default. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/io_apic.h | 3 ++ arch/x86/include/asm/x86_init.h | 1 + arch/x86/kernel/apic/io_apic.c | 109 +++++++++++++++++++++------------------- arch/x86/kernel/x86_init.c | 1 + 4 files changed, 61 insertions(+), 53 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index d59e172..21aa81e 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -180,6 +180,8 @@ extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val); extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val); extern void native_disable_io_apic(void); +extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); +extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { @@ -225,6 +227,7 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_write NULL #define native_io_apic_modify NULL #define native_disable_io_apic NULL +#define native_io_apic_print_entries NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b1d2d6a..8ff79f7 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -195,6 +195,7 @@ struct x86_io_apic_ops { void (*write) (unsigned int apic, unsigned int reg, unsigned int value); void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); void (*disable)(void); + void (*print_entries)(unsigned int apic, unsigned int nr_entries); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index cd5f4d7..a18e27a 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1513,9 +1513,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, ioapic_write_entry(ioapic_idx, pin, entry); } -__apicdebuginit(void) print_IO_APIC(int ioapic_idx) +void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries) +{ + int i; + + pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n"); + + for (i = 0; i <= nr_entries; i++) { + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + + pr_debug(" %02x %02X ", i, entry.dest); + pr_cont("%1d %1d %1d %1d %1d " + "%1d %1d %02X\n", + entry.mask, + entry.trigger, + entry.irr, + entry.polarity, + entry.delivery_status, + entry.dest_mode, + entry.delivery_mode, + entry.vector); + } +} + +void intel_ir_io_apic_print_entries(unsigned int apic, + unsigned int nr_entries) { int i; + + pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n"); + + for (i = 0; i <= nr_entries; i++) { + struct IR_IO_APIC_route_entry *ir_entry; + struct IO_APIC_route_entry entry; + + entry = ioapic_read_entry(apic, i); + + ir_entry = (struct IR_IO_APIC_route_entry *)&entry; + + pr_debug(" %02x %04X ", i, ir_entry->index); + pr_cont("%1d %1d %1d %1d %1d " + "%1d %1d %X %02X\n", + ir_entry->format, + ir_entry->mask, + ir_entry->trigger, + ir_entry->irr, + ir_entry->polarity, + ir_entry->delivery_status, + ir_entry->index2, + ir_entry->zero, + ir_entry->vector); + } +} + +__apicdebuginit(void) print_IO_APIC(int ioapic_idx) +{ union IO_APIC_reg_00 reg_00; union IO_APIC_reg_01 reg_01; union IO_APIC_reg_02 reg_02; @@ -1568,58 +1622,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx) printk(KERN_DEBUG ".... IRQ redirection table:\n"); - if (irq_remapping_enabled) { - printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR" - " Pol Stat Indx2 Zero Vect:\n"); - } else { - printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" - " Stat Dmod Deli Vect:\n"); - } - - for (i = 0; i <= reg_01.bits.entries; i++) { - if (irq_remapping_enabled) { - struct IO_APIC_route_entry entry; - struct IR_IO_APIC_route_entry *ir_entry; - - entry = ioapic_read_entry(ioapic_idx, i); - ir_entry = (struct IR_IO_APIC_route_entry *) &entry; - printk(KERN_DEBUG " %02x %04X ", - i, - ir_entry->index - ); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %X %02X\n", - ir_entry->format, - ir_entry->mask, - ir_entry->trigger, - ir_entry->irr, - ir_entry->polarity, - ir_entry->delivery_status, - ir_entry->index2, - ir_entry->zero, - ir_entry->vector - ); - } else { - struct IO_APIC_route_entry entry; - - entry = ioapic_read_entry(ioapic_idx, i); - printk(KERN_DEBUG " %02x %02X ", - i, - entry.dest - ); - pr_cont("%1d %1d %1d %1d %1d " - "%1d %1d %02X\n", - entry.mask, - entry.trigger, - entry.irr, - entry.polarity, - entry.delivery_status, - entry.dest_mode, - entry.delivery_mode, - entry.vector - ); - } - } + x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries); } __apicdebuginit(void) print_IO_APICs(void) diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 754524a..ee4af8b 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -123,4 +123,5 @@ struct x86_io_apic_ops x86_io_apic_ops = { .write = native_io_apic_write, .modify = native_io_apic_modify, .disable = native_disable_io_apic, + .print_entries = native_io_apic_print_entries, }; -- cgit v1.1 From 71054d8841b442bb3d8be60bde2bfac0483c19da Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:37 +0200 Subject: x86, hpet: Introduce x86_msi_ops.setup_hpet_msi This function pointer can be overwritten by the IRQ remapping code. The irq_remapping_enabled check can be removed from default_setup_hpet_msi. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/hpet.h | 5 +++-- arch/x86/include/asm/x86_init.h | 1 + arch/x86/kernel/apic/io_apic.c | 8 +------- arch/x86/kernel/hpet.c | 2 +- arch/x86/kernel/x86_init.c | 10 ++++++---- 5 files changed, 12 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 434e210..b18df57 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg); extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg); #ifdef CONFIG_PCI_MSI -extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id); +extern int default_setup_hpet_msi(unsigned int irq, unsigned int id); #else -static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id) +static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id) { return -EINVAL; } @@ -111,6 +111,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler); static inline int hpet_enable(void) { return 0; } static inline int is_hpet_enabled(void) { return 0; } #define hpet_readl(a) 0 +#define default_setup_hpet_msi NULL #endif #endif /* _ASM_X86_HPET_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 8ff79f7..1ee10ca 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -187,6 +187,7 @@ struct x86_msi_ops { void (*teardown_msi_irq)(unsigned int irq); void (*teardown_msi_irqs)(struct pci_dev *dev); void (*restore_msi_irqs)(struct pci_dev *dev, int irq); + int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; struct x86_io_apic_ops { diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a18e27a..e7b8763 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3399,18 +3399,12 @@ static struct irq_chip hpet_msi_type = { .irq_retrigger = ioapic_retrigger_irq, }; -int arch_setup_hpet_msi(unsigned int irq, unsigned int id) +int default_setup_hpet_msi(unsigned int irq, unsigned int id) { struct irq_chip *chip = &hpet_msi_type; struct msi_msg msg; int ret; - if (irq_remapping_enabled) { - ret = setup_hpet_msi_remapped(irq, id); - if (ret) - return ret; - } - ret = msi_compose_msg(NULL, irq, &msg, id); if (ret < 0) return ret; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index e28670f..da85a8e 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta, static int hpet_setup_msi_irq(unsigned int irq) { - if (arch_setup_hpet_msi(irq, hpet_blockid)) { + if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) { destroy_irq(irq); return -EINVAL; } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ee4af8b..0357eee 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -111,10 +112,11 @@ struct x86_platform_ops x86_platform = { EXPORT_SYMBOL_GPL(x86_platform); struct x86_msi_ops x86_msi = { - .setup_msi_irqs = native_setup_msi_irqs, - .teardown_msi_irq = native_teardown_msi_irq, - .teardown_msi_irqs = default_teardown_msi_irqs, - .restore_msi_irqs = default_restore_msi_irqs, + .setup_msi_irqs = native_setup_msi_irqs, + .teardown_msi_irq = native_teardown_msi_irq, + .teardown_msi_irqs = default_teardown_msi_irqs, + .restore_msi_irqs = default_restore_msi_irqs, + .setup_hpet_msi = default_setup_hpet_msi, }; struct x86_io_apic_ops x86_io_apic_ops = { -- cgit v1.1 From 5afba62cc8a16716508605e02c1b02ee5f969184 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:38 +0200 Subject: x86, msi: Use IRQ remapping specific setup_msi_irqs routine Use seperate routines to setup MSI IRQs for both irq_remapping_enabled cases. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/irq_remapping.h | 12 ---- arch/x86/include/asm/pci.h | 3 + arch/x86/kernel/apic/io_apic.c | 104 ++++------------------------------- 3 files changed, 13 insertions(+), 106 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 5fb9bbb..0ee1e88 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -47,9 +47,6 @@ extern void free_remapped_irq(int irq); extern void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); -extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec); -extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle); extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); #else /* CONFIG_IRQ_REMAP */ @@ -83,15 +80,6 @@ static inline void compose_remapped_msi_msg(struct pci_dev *pdev, struct msi_msg *msg, u8 hpet_id) { } -static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec) -{ - return -ENODEV; -} -static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, - int index, int sub_handle) -{ - return -ENODEV; -} static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) { return -ENODEV; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index dba7805..c28fd02 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq) #define arch_teardown_msi_irq x86_teardown_msi_irq #define arch_restore_msi_irqs x86_restore_msi_irqs /* implemented in arch/x86/kernel/apic/io_apic. */ +struct msi_desc; int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void native_teardown_msi_irq(unsigned int irq); void native_restore_msi_irqs(struct pci_dev *dev, int irq); +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset); /* default to the implementation in drivers/lib/msi.c */ #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS #define HAVE_DEFAULT_MSI_RESTORE_IRQS diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e7b8763..d4b045e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3066,7 +3066,7 @@ void destroy_irq(unsigned int irq) free_irq_at(irq, cfg); } -static inline void destroy_irqs(unsigned int irq, unsigned int count) +void destroy_irqs(unsigned int irq, unsigned int count) { unsigned int i; @@ -3165,8 +3165,8 @@ static struct irq_chip msi_chip = { .irq_retrigger = ioapic_retrigger_irq, }; -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, - unsigned int irq_base, unsigned int irq_offset) +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, + unsigned int irq_base, unsigned int irq_offset) { struct irq_chip *chip = &msi_chip; struct msi_msg msg; @@ -3198,44 +3198,28 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, return 0; } -int setup_msix_irqs(struct pci_dev *dev, int nvec) +int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int node, ret, sub_handle, index = 0; unsigned int irq, irq_want; struct msi_desc *msidesc; + int node, ret; + + /* Multiple MSI vectors only supported with interrupt remapping */ + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; node = dev_to_node(&dev->dev); irq_want = nr_irqs_gsi; - sub_handle = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { irq = create_irq_nr(irq_want, node); if (irq == 0) return -ENOSPC; + irq_want = irq + 1; - if (!irq_remapping_enabled) - goto no_ir; - if (!sub_handle) { - /* - * allocate the consecutive block of IRTE's - * for 'nvec' - */ - index = msi_alloc_remapped_irq(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - ret = msi_setup_remapped_irq(dev, irq, index, - sub_handle); - if (ret < 0) - goto error; - } -no_ir: ret = setup_msi_irq(dev, msidesc, irq, 0); if (ret < 0) goto error; - sub_handle++; } return 0; @@ -3244,74 +3228,6 @@ error: return ret; } -int setup_msi_irqs(struct pci_dev *dev, int nvec) -{ - int node, ret, sub_handle, index = 0; - unsigned int irq; - struct msi_desc *msidesc; - - if (nvec > 1 && !irq_remapping_enabled) - return 1; - - nvec = __roundup_pow_of_two(nvec); - - WARN_ON(!list_is_singular(&dev->msi_list)); - msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); - WARN_ON(msidesc->irq); - WARN_ON(msidesc->msi_attrib.multiple); - - node = dev_to_node(&dev->dev); - irq = __create_irqs(nr_irqs_gsi, nvec, node); - if (irq == 0) - return -ENOSPC; - - if (!irq_remapping_enabled) { - ret = setup_msi_irq(dev, msidesc, irq, 0); - if (ret < 0) - goto error; - return 0; - } - - msidesc->msi_attrib.multiple = ilog2(nvec); - for (sub_handle = 0; sub_handle < nvec; sub_handle++) { - if (!sub_handle) { - index = msi_alloc_remapped_irq(dev, irq, nvec); - if (index < 0) { - ret = index; - goto error; - } - } else { - ret = msi_setup_remapped_irq(dev, irq + sub_handle, - index, sub_handle); - if (ret < 0) - goto error; - } - ret = setup_msi_irq(dev, msidesc, irq, sub_handle); - if (ret < 0) - goto error; - } - return 0; - -error: - destroy_irqs(irq, nvec); - - /* - * Restore altered MSI descriptor fields and prevent just destroyed - * IRQs from tearing down again in default_teardown_msi_irqs() - */ - msidesc->irq = 0; - msidesc->msi_attrib.multiple = 0; - - return ret; -} - -int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) -{ - if (type == PCI_CAP_ID_MSI) - return setup_msi_irqs(dev, nvec); - return setup_msix_irqs(dev, nvec); -} - void native_teardown_msi_irq(unsigned int irq) { destroy_irq(irq); -- cgit v1.1 From 373dd7a27f2469020e7b56744cf47b82986b9749 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:39 +0200 Subject: x86, io_apic: Introduce set_affinity function pointer With interrupt remapping a special function is used to change the affinity of an IO-APIC interrupt. Abstract this with a function pointer. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/io_apic.h | 4 ++++ arch/x86/include/asm/irq_remapping.h | 9 --------- arch/x86/include/asm/x86_init.h | 6 ++++++ arch/x86/kernel/apic/io_apic.c | 17 +++++++---------- arch/x86/kernel/x86_init.c | 1 + 5 files changed, 18 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 21aa81e..a744cbb 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -182,6 +182,9 @@ extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned extern void native_disable_io_apic(void); extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries); +extern int native_ioapic_set_affinity(struct irq_data *, + const struct cpumask *, + bool); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { @@ -228,6 +231,7 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_modify NULL #define native_disable_io_apic NULL #define native_io_apic_print_entries NULL +#define native_ioapic_set_affinity NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 0ee1e88..f1afa04 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -40,9 +40,6 @@ extern int setup_ioapic_remapped_entry(int irq, unsigned int destination, int vector, struct io_apic_irq_attr *attr); -extern int set_remapped_irq_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force); extern void free_remapped_irq(int irq); extern void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, @@ -68,12 +65,6 @@ static inline int setup_ioapic_remapped_entry(int irq, { return -ENODEV; } -static inline int set_remapped_irq_affinity(struct irq_data *data, - const struct cpumask *mask, - bool force) -{ - return 0; -} static inline void free_remapped_irq(int irq) { } static inline void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 1ee10ca..20d9f97 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -190,6 +190,9 @@ struct x86_msi_ops { int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; +struct irq_data; +struct cpumask; + struct x86_io_apic_ops { void (*init) (void); unsigned int (*read) (unsigned int apic, unsigned int reg); @@ -197,6 +200,9 @@ struct x86_io_apic_ops { void (*modify) (unsigned int apic, unsigned int reg, unsigned int value); void (*disable)(void); void (*print_entries)(unsigned int apic, unsigned int nr_entries); + int (*set_affinity)(struct irq_data *data, + const struct cpumask *mask, + bool force); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d4b045e..d9ca3be 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2369,9 +2369,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, return 0; } -static int -ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask, - bool force) + +int native_ioapic_set_affinity(struct irq_data *data, + const struct cpumask *mask, + bool force) { unsigned int dest, irq = data->irq; unsigned long flags; @@ -2570,8 +2571,7 @@ static void irq_remap_modify_chip_defaults(struct irq_chip *chip) chip->irq_print_chip = ir_print_prefix; chip->irq_ack = ir_ack_apic_edge; chip->irq_eoi = ir_ack_apic_level; - - chip->irq_set_affinity = set_remapped_irq_affinity; + chip->irq_set_affinity = x86_io_apic_ops.set_affinity; } #endif /* CONFIG_IRQ_REMAP */ @@ -2582,7 +2582,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_unmask = unmask_ioapic_irq, .irq_ack = ack_apic_edge, .irq_eoi = ack_apic_level, - .irq_set_affinity = ioapic_set_affinity, + .irq_set_affinity = native_ioapic_set_affinity, .irq_retrigger = ioapic_retrigger_irq, }; @@ -3694,10 +3694,7 @@ void __init setup_ioapic_dest(void) else mask = apic->target_cpus(); - if (irq_remapping_enabled) - set_remapped_irq_affinity(idata, mask, false); - else - ioapic_set_affinity(idata, mask, false); + x86_io_apic_ops.set_affinity(idata, mask, false); } } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 0357eee..2ca3475 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -126,4 +126,5 @@ struct x86_io_apic_ops x86_io_apic_ops = { .modify = native_io_apic_modify, .disable = native_disable_io_apic, .print_entries = native_io_apic_print_entries, + .set_affinity = native_ioapic_set_affinity, }; -- cgit v1.1 From a6a25dd3270944f3c4182ffcbe0f60482471e849 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:40 +0200 Subject: x86, io_apic: Convert setup_ioapic_entry to function pointer This pointer is changed to a different function when IRQ remapping is enabled. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/io_apic.h | 5 +++++ arch/x86/include/asm/x86_init.h | 5 +++++ arch/x86/kernel/apic/io_apic.c | 14 +++++--------- arch/x86/kernel/x86_init.c | 1 + 4 files changed, 16 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index a744cbb..71f5f08 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -149,6 +149,10 @@ extern int io_apic_set_pci_routing(struct device *dev, int irq, void setup_IO_APIC_irq_extra(u32 gsi); extern void ioapic_insert_resources(void); +extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, + unsigned int, int, + struct io_apic_irq_attr *); + int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); extern int save_ioapic_entries(void); @@ -232,6 +236,7 @@ static inline void disable_ioapic_support(void) { } #define native_disable_io_apic NULL #define native_io_apic_print_entries NULL #define native_ioapic_set_affinity NULL +#define native_setup_ioapic_entry NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 20d9f97..17da29c 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -190,6 +190,8 @@ struct x86_msi_ops { int (*setup_hpet_msi)(unsigned int irq, unsigned int id); }; +struct IO_APIC_route_entry; +struct io_apic_irq_attr; struct irq_data; struct cpumask; @@ -203,6 +205,9 @@ struct x86_io_apic_ops { int (*set_affinity)(struct irq_data *data, const struct cpumask *mask, bool force); + int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d9ca3be..9a7131f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1315,14 +1315,10 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, fasteoi ? "fasteoi" : "edge"); } -static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, - unsigned int destination, int vector, - struct io_apic_irq_attr *attr) +int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry, + unsigned int destination, int vector, + struct io_apic_irq_attr *attr) { - if (irq_remapping_enabled) - return setup_ioapic_remapped_entry(irq, entry, destination, - vector, attr); - memset(entry, 0, sizeof(*entry)); entry->delivery_mode = apic->irq_delivery_mode; @@ -1370,8 +1366,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg, attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin, cfg->vector, irq, attr->trigger, attr->polarity, dest); - if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) { - pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", + if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) { + pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n", mpc_ioapic_id(attr->ioapic), attr->ioapic_pin); __clear_irq_vector(irq, cfg); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 2ca3475..06db44f 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -127,4 +127,5 @@ struct x86_io_apic_ops x86_io_apic_ops = { .disable = native_disable_io_apic, .print_entries = native_io_apic_print_entries, .set_affinity = native_ioapic_set_affinity, + .setup_entry = native_setup_ioapic_entry, }; -- cgit v1.1 From 6a9f5de27216801b4e38ccd8aa0168a5dd8eca9b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:41 +0200 Subject: x86, io_apic: Move irq_remapping_enabled checks out of check_timer() Move these checks to IRQ remapping code by introducing the panic_on_irq_remap() function. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/irq_remapping.h | 5 +++++ arch/x86/kernel/apic/io_apic.c | 6 ++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index f1afa04..fb99a73 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -45,6 +45,7 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); +extern void panic_if_irq_remap(const char *msg); #else /* CONFIG_IRQ_REMAP */ @@ -75,6 +76,10 @@ static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) { return -ENODEV; } + +static inline void panic_if_irq_remap(const char *msg) +{ +} #endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9a7131f..aa2b753 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2777,8 +2777,7 @@ static inline void __init check_timer(void) * 8259A. */ if (pin1 == -1) { - if (irq_remapping_enabled) - panic("BIOS bug: timer not connected to IO-APIC"); + panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC"); pin1 = pin2; apic1 = apic2; no_pin1 = 1; @@ -2810,8 +2809,7 @@ static inline void __init check_timer(void) clear_IO_APIC_pin(0, pin1); goto out; } - if (irq_remapping_enabled) - panic("timer doesn't work through Interrupt-remapped IO-APIC"); + panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC"); local_irq_disable(); clear_IO_APIC_pin(apic1, pin1); if (!no_pin1) -- cgit v1.1 From 1d254428c0ba30a0fbb8112d875ba64f4e60db25 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:42 +0200 Subject: x86, io_apic: Remove irq_remapping_enabled check in setup_timer_IRQ0_pin This function is only called when irq-remapping is disabled. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/kernel/apic/io_apic.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index aa2b753..ee0757d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1475,9 +1475,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx, struct IO_APIC_route_entry entry; unsigned int dest; - if (irq_remapping_enabled) - return; - memset(&entry, 0, sizeof(entry)); /* -- cgit v1.1 From 078e1ee26a061663bd7a4773c06b33cdb997380d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:43 +0200 Subject: x86, irq: Move irq_remapping_enabled declaration to iommu code Remove the last left-over from this flag from x86 code. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/irq_remapping.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index fb99a73..6f4b48b 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -26,8 +26,6 @@ #ifdef CONFIG_IRQ_REMAP -extern int irq_remapping_enabled; - extern void setup_irq_remapping_ops(void); extern int irq_remapping_supported(void); extern int irq_remapping_prepare(void); @@ -49,8 +47,6 @@ extern void panic_if_irq_remap(const char *msg); #else /* CONFIG_IRQ_REMAP */ -#define irq_remapping_enabled 0 - static inline void setup_irq_remapping_ops(void) { } static inline int irq_remapping_supported(void) { return 0; } static inline int irq_remapping_prepare(void) { return -ENODEV; } -- cgit v1.1 From 819508d302e5b6d6dacb5c3d5e4756091e32cc7d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:44 +0200 Subject: x86, irq: Add data structure to keep AMD specific irq remapping information Add a data structure to store information the IOMMU driver can use to get from a 'struct irq_cfg' to the remapping entry. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/hw_irq.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index eb92a6e..fc89a2a 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -101,6 +101,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, irq_attr->polarity = polarity; } +/* Intel specific interrupt remapping information */ struct irq_2_iommu { struct intel_iommu *iommu; u16 irte_index; @@ -108,6 +109,12 @@ struct irq_2_iommu { u8 irte_mask; }; +/* AMD specific interrupt remapping information */ +struct irq_2_irte { + u16 devid; /* Device ID for IRTE table */ + u16 index; /* Index into IRTE table*/ +}; + /* * This is performance-critical, we want to do it O(1) * @@ -120,7 +127,10 @@ struct irq_cfg { u8 vector; u8 move_in_progress : 1; #ifdef CONFIG_IRQ_REMAP - struct irq_2_iommu irq_2_iommu; + union { + struct irq_2_iommu irq_2_iommu; + struct irq_2_irte irq_2_irte; + }; #endif }; -- cgit v1.1 From 9b1b0e42f54bc452817f4bb6a8d939afe4f04303 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:45 +0200 Subject: x86, io-apic: Move CONFIG_IRQ_REMAP code out of x86 core Move all the code to either to the header file asm/irq_remapping.h or to drivers/iommu/. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/hw_irq.h | 1 + arch/x86/include/asm/io_apic.h | 5 ++++ arch/x86/include/asm/irq_remapping.h | 17 ++++++++++++++ arch/x86/kernel/apic/io_apic.c | 44 +----------------------------------- 4 files changed, 24 insertions(+), 43 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index fc89a2a..10a78c3 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -127,6 +127,7 @@ struct irq_cfg { u8 vector; u8 move_in_progress : 1; #ifdef CONFIG_IRQ_REMAP + u8 remapped : 1; union { struct irq_2_iommu irq_2_iommu; struct irq_2_irte irq_2_irte; diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 71f5f08..36fb5ab 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -144,6 +144,7 @@ extern int timer_through_8259; (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) struct io_apic_irq_attr; +struct irq_cfg; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); void setup_IO_APIC_irq_extra(u32 gsi); @@ -152,6 +153,10 @@ extern void ioapic_insert_resources(void); extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, unsigned int, int, struct io_apic_irq_attr *); +extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, + unsigned int, int, + struct io_apic_irq_attr *); +extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 6f4b48b..562db68 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -45,6 +45,13 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev, extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); extern void panic_if_irq_remap(const char *msg); +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return (cfg->remapped == 1); +} + +void irq_remap_modify_chip_defaults(struct irq_chip *chip); + #else /* CONFIG_IRQ_REMAP */ static inline void setup_irq_remapping_ops(void) { } @@ -76,6 +83,16 @@ static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) static inline void panic_if_irq_remap(const char *msg) { } + +static inline bool irq_remapped(struct irq_cfg *cfg) +{ + return false; +} + +static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) +{ +} + #endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ee0757d..0fd5f30 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -68,22 +68,6 @@ #define for_each_irq_pin(entry, head) \ for (entry = head; entry; entry = entry->next) -#ifdef CONFIG_IRQ_REMAP -static void irq_remap_modify_chip_defaults(struct irq_chip *chip); -static inline bool irq_remapped(struct irq_cfg *cfg) -{ - return cfg->irq_2_iommu.iommu != NULL; -} -#else -static inline bool irq_remapped(struct irq_cfg *cfg) -{ - return false; -} -static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) -{ -} -#endif - /* * Is the SiS APIC rmw bug present ? * -1 = don't know, 0 = no, 1 = yes @@ -606,7 +590,7 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) } } -static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { struct irq_pin_list *entry; unsigned long flags; @@ -2542,32 +2526,6 @@ static void ack_apic_level(struct irq_data *data) ioapic_irqd_unmask(data, cfg, masked); } -#ifdef CONFIG_IRQ_REMAP -static void ir_ack_apic_edge(struct irq_data *data) -{ - ack_APIC_irq(); -} - -static void ir_ack_apic_level(struct irq_data *data) -{ - ack_APIC_irq(); - eoi_ioapic_irq(data->irq, data->chip_data); -} - -static void ir_print_prefix(struct irq_data *data, struct seq_file *p) -{ - seq_printf(p, " IR-%s", data->chip->name); -} - -static void irq_remap_modify_chip_defaults(struct irq_chip *chip) -{ - chip->irq_print_chip = ir_print_prefix; - chip->irq_ack = ir_ack_apic_edge; - chip->irq_eoi = ir_ack_apic_level; - chip->irq_set_affinity = x86_io_apic_ops.set_affinity; -} -#endif /* CONFIG_IRQ_REMAP */ - static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .irq_startup = startup_ioapic_irq, -- cgit v1.1 From 9f9d39e403faf5e1a22334fe4df96516e4f389a8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:46 +0200 Subject: x86, io-apic: Remove !irq_remapped() check from __target_IO_APIC_irq() This function is only called from default_ioapic_set_affinity() which is only used when interrupt remapping is disabled since the introduction of the set_affinity function pointer. So the check will always evaluate as true and can be removed. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/kernel/apic/io_apic.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 0fd5f30..5b7eb70 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2299,12 +2299,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq apic = entry->apic; pin = entry->pin; - /* - * With interrupt-remapping, destination information comes - * from interrupt-remapping table entry. - */ - if (!irq_remapped(cfg)) - io_apic_write(apic, 0x11 + pin*2, dest); + + io_apic_write(apic, 0x11 + pin*2, dest); reg = io_apic_read(apic, 0x10 + pin*2); reg &= ~IO_APIC_REDIR_VECTOR_MASK; reg |= vector; -- cgit v1.1 From 11b4a1cc3836ac71a214446d350e923c76012368 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:47 +0200 Subject: x86, irq: Move irq_remapped() check into free_remapped_irq The function is called unconditionally now in IO-APIC code removing another irq_remapped() check from x86 core code. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/kernel/apic/io_apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5b7eb70..1104839 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3003,8 +3003,8 @@ void destroy_irq(unsigned int irq) irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); - if (irq_remapped(cfg)) - free_remapped_irq(irq); + free_remapped_irq(irq); + raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); raw_spin_unlock_irqrestore(&vector_lock, flags); -- cgit v1.1 From 2976fd8417f5744de3bb9109e4f30f353a36b1c0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:48 +0200 Subject: x86, irq: Introduce setup_remapped_irq() This function does irq-remapping specific interrupt setup like modifying the chip defaults. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/irq_remapping.h | 9 +++++++++ arch/x86/kernel/apic/io_apic.c | 13 +++---------- 2 files changed, 12 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 562db68..b30fca1 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -44,6 +44,9 @@ extern void compose_remapped_msi_msg(struct pci_dev *pdev, struct msi_msg *msg, u8 hpet_id); extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id); extern void panic_if_irq_remap(const char *msg); +extern bool setup_remapped_irq(int irq, + struct irq_cfg *cfg, + struct irq_chip *chip); static inline bool irq_remapped(struct irq_cfg *cfg) { @@ -93,6 +96,12 @@ static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) { } +static inline bool setup_remapped_irq(int irq, + struct irq_cfg *cfg, + struct irq_chip *chip) +{ + return false; +} #endif /* CONFIG_IRQ_REMAP */ #endif /* __X86_IRQ_REMAPPING_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1104839..3725122 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1288,11 +1288,8 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, fasteoi = false; } - if (irq_remapped(cfg)) { - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - irq_remap_modify_chip_defaults(chip); + if (setup_remapped_irq(irq, cfg, chip)) fasteoi = trigger != 0; - } hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; irq_set_chip_and_handler_name(irq, chip, hdl, @@ -3131,10 +3128,7 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, if (!irq_offset) write_msi_msg(irq, &msg); - if (irq_remapped(irq_get_chip_data(irq))) { - irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - irq_remap_modify_chip_defaults(chip); - } + setup_remapped_irq(irq, irq_get_chip_data(irq), chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); @@ -3272,8 +3266,7 @@ int default_setup_hpet_msi(unsigned int irq, unsigned int id) hpet_msi_write(irq_get_handler_data(irq), &msg); irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - if (irq_remapped(irq_get_chip_data(irq))) - irq_remap_modify_chip_defaults(chip); + setup_remapped_irq(irq, irq_get_chip_data(irq), chip); irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); return 0; -- cgit v1.1 From 7601384f91be1a5ea60cb4ef6e28cad628e6cd1e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:49 +0200 Subject: x86, msi: Introduce x86_msi.compose_msi_msg call-back This call-back points to the right function for initializing the msi_msg structure. The old code for msi_msg generation was split up into the irq-remapped and the default case. The irq-remapped case just calls into the specific Intel or AMD implementation when the device is behind an IOMMU. Otherwise the default function is called. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/io_apic.h | 4 +++ arch/x86/include/asm/x86_init.h | 4 +++ arch/x86/kernel/apic/io_apic.c | 57 ++++++++++++++++++++++------------------- arch/x86/kernel/x86_init.c | 1 + 4 files changed, 39 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 36fb5ab..1838e88 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -158,6 +158,9 @@ extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *, struct io_apic_irq_attr *); extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); +extern void native_compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id); int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); extern int save_ioapic_entries(void); @@ -242,6 +245,7 @@ static inline void disable_ioapic_support(void) { } #define native_io_apic_print_entries NULL #define native_ioapic_set_affinity NULL #define native_setup_ioapic_entry NULL +#define native_compose_msi_msg NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 17da29c..c9f87be 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -181,9 +181,13 @@ struct x86_platform_ops { }; struct pci_dev; +struct msi_msg; struct x86_msi_ops { int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type); + void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq, + unsigned int dest, struct msi_msg *msg, + u8 hpet_id); void (*teardown_msi_irq)(unsigned int irq); void (*teardown_msi_irqs)(struct pci_dev *dev); void (*restore_msi_irqs)(struct pci_dev *dev, int irq); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 3725122..b832810 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3019,37 +3019,16 @@ void destroy_irqs(unsigned int irq, unsigned int count) /* * MSI message composition */ -#ifdef CONFIG_PCI_MSI -static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, - struct msi_msg *msg, u8 hpet_id) +void native_compose_msi_msg(struct pci_dev *pdev, + unsigned int irq, unsigned int dest, + struct msi_msg *msg, u8 hpet_id) { - struct irq_cfg *cfg; - int err; - unsigned dest; + struct irq_cfg *cfg = irq_cfg(irq); - if (disable_apic) - return -ENXIO; - - cfg = irq_cfg(irq); - err = assign_irq_vector(irq, cfg, apic->target_cpus()); - if (err) - return err; - - err = apic->cpu_mask_to_apicid_and(cfg->domain, - apic->target_cpus(), &dest); - if (err) - return err; - - if (irq_remapped(cfg)) { - compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id); - return 0; - } + msg->address_hi = MSI_ADDR_BASE_HI; if (x2apic_enabled()) - msg->address_hi = MSI_ADDR_BASE_HI | - MSI_ADDR_EXT_DEST_ID(dest); - else - msg->address_hi = MSI_ADDR_BASE_HI; + msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest); msg->address_lo = MSI_ADDR_BASE_LO | @@ -3068,6 +3047,30 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, MSI_DATA_DELIVERY_FIXED: MSI_DATA_DELIVERY_LOWPRI) | MSI_DATA_VECTOR(cfg->vector); +} + +#ifdef CONFIG_PCI_MSI +static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, + struct msi_msg *msg, u8 hpet_id) +{ + struct irq_cfg *cfg; + int err; + unsigned dest; + + if (disable_apic) + return -ENXIO; + + cfg = irq_cfg(irq); + err = assign_irq_vector(irq, cfg, apic->target_cpus()); + if (err) + return err; + + err = apic->cpu_mask_to_apicid_and(cfg->domain, + apic->target_cpus(), &dest); + if (err) + return err; + + x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id); return 0; } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 06db44f..ee4a17c 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -113,6 +113,7 @@ struct x86_platform_ops x86_platform = { EXPORT_SYMBOL_GPL(x86_platform); struct x86_msi_ops x86_msi = { .setup_msi_irqs = native_setup_msi_irqs, + .compose_msi_msg = native_compose_msi_msg, .teardown_msi_irq = native_teardown_msi_irq, .teardown_msi_irqs = default_teardown_msi_irqs, .restore_msi_irqs = default_restore_msi_irqs, -- cgit v1.1 From da165322dfb6cbc50042b1051f07b837a26f3bb8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:50 +0200 Subject: x86, io_apic: Introduce eoi_ioapic_pin call-back This callback replaces the old __eoi_ioapic_pin function which needs a special path for interrupt remapping. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/io_apic.h | 5 +++++ arch/x86/include/asm/x86_init.h | 1 + arch/x86/kernel/apic/io_apic.c | 20 ++++++-------------- arch/x86/kernel/x86_init.c | 1 + 4 files changed, 13 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 1838e88..459e50a 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -161,6 +161,7 @@ extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg); extern void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq, unsigned int dest, struct msi_msg *msg, u8 hpet_id); +extern void native_eoi_ioapic_pin(int apic, int pin, int vector); int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr); extern int save_ioapic_entries(void); @@ -211,6 +212,9 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned { x86_io_apic_ops.modify(apic, reg, value); } + +extern void io_apic_eoi(unsigned int apic, unsigned int vector); + #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 @@ -246,6 +250,7 @@ static inline void disable_ioapic_support(void) { } #define native_ioapic_set_affinity NULL #define native_setup_ioapic_entry NULL #define native_compose_msi_msg NULL +#define native_eoi_ioapic_pin NULL #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c9f87be..7669941 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -212,6 +212,7 @@ struct x86_io_apic_ops { int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry, unsigned int destination, int vector, struct io_apic_irq_attr *attr); + void (*eoi_ioapic_pin)(int apic, int pin, int vector); }; extern struct x86_init_ops x86_init; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b832810..9ed796c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -310,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) + (mpc_ioapic_addr(idx) & ~PAGE_MASK); } -static inline void io_apic_eoi(unsigned int apic, unsigned int vector) +void io_apic_eoi(unsigned int apic, unsigned int vector) { struct io_apic __iomem *io_apic = io_apic_base(apic); writel(vector, &io_apic->eoi); @@ -557,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data) * Otherwise, we simulate the EOI message manually by changing the trigger * mode to edge and then back to level, with RTE being masked during this. */ -static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg) +void native_eoi_ioapic_pin(int apic, int pin, int vector) { if (mpc_ioapic_ver(apic) >= 0x20) { - /* - * Intr-remapping uses pin number as the virtual vector - * in the RTE. Actual vector is programmed in - * intr-remapping table entry. Hence for the io-apic - * EOI we use the pin number. - */ - if (cfg && irq_remapped(cfg)) - io_apic_eoi(apic, pin); - else - io_apic_eoi(apic, vector); + io_apic_eoi(apic, vector); } else { struct IO_APIC_route_entry entry, entry1; @@ -597,7 +588,8 @@ void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) raw_spin_lock_irqsave(&ioapic_lock, flags); for_each_irq_pin(entry, cfg->irq_2_pin) - __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg); + x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin, + cfg->vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -634,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) } raw_spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_pin(apic, pin, entry.vector, NULL); + x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector); raw_spin_unlock_irqrestore(&ioapic_lock, flags); } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ee4a17c..d065d67 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -129,4 +129,5 @@ struct x86_io_apic_ops x86_io_apic_ops = { .print_entries = native_io_apic_print_entries, .set_affinity = native_ioapic_set_affinity, .setup_entry = native_setup_ioapic_entry, + .eoi_ioapic_pin = native_eoi_ioapic_pin, }; -- cgit v1.1 From a1bb20c232d066de0762f8e7cf332e5ce8385210 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Sep 2012 12:44:51 +0200 Subject: x86, irq: Move irq_remapped out of x86 core code The irq_remapped function is only used in IOMMU code after the last patch. So move its definition there too. Signed-off-by: Joerg Roedel Acked-by: Sebastian Andrzej Siewior Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/irq_remapping.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index b30fca1..95fd352 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -48,11 +48,6 @@ extern bool setup_remapped_irq(int irq, struct irq_cfg *cfg, struct irq_chip *chip); -static inline bool irq_remapped(struct irq_cfg *cfg) -{ - return (cfg->remapped == 1); -} - void irq_remap_modify_chip_defaults(struct irq_chip *chip); #else /* CONFIG_IRQ_REMAP */ @@ -87,11 +82,6 @@ static inline void panic_if_irq_remap(const char *msg) { } -static inline bool irq_remapped(struct irq_cfg *cfg) -{ - return false; -} - static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip) { } -- cgit v1.1 From 83a57a4de1a222c351667ef9a0fedaac1295e85b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 20 Dec 2012 01:16:20 +0000 Subject: x86: Enable ARCH_USE_BUILTIN_BSWAP With -mmovbe enabled (implicit with -march=atom), this allows the compiler to use the movbe instruction. This doesn't have a significant effect on code size (unlike on PowerPC), because the movbe instruction actually takes as many bytes to encode as a simple mov and a bswap. But for Atom in particular I believe it should give a performance win over the mov+bswap alternative. That was kind of why movbe was invented in the first place, after all... I've done basic functionality testing with IPv6 and Legacy IP, but no performance testing. The EFI firmware on my test box unfortunately no longer starts up. Signed-off-by: David Woodhouse Link: http://lkml.kernel.org/r/1355966180.18919.102.camel@shinybook.infradead.org Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 79795af..3e941aa 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -114,6 +114,7 @@ config X86 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 select GENERIC_SIGALTSTACK + select ARCH_USE_BUILTIN_BSWAP config INSTRUCTION_DECODER def_bool y -- cgit v1.1 From 2b9b6d8c715b23fa119261c32ad360681f4464a9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 28 Jan 2013 17:49:50 +0000 Subject: x86: Require MOVBE feature in cpuid when we use it Add MOVBE to asm/required-features.h so we check for it during startup and don't bother checking for it later. CONFIG_MATOM is used because it corresponds to -march=atom in the Makefiles. If the rules get more complicated it may be necessary to make this an explicit Kconfig option which uses -mmovbe/-mno-movbe to control the use of this instruction explicitly. Signed-off-by: David Woodhouse Link: http://lkml.kernel.org/r/1359395390.3529.65.camel@shinybook.infradead.org [ hpa: added a patch description ] Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/required-features.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 6c7fc25..5c6e4fb 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -47,6 +47,12 @@ # define NEED_NOPL 0 #endif +#ifdef CONFIG_MATOM +# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31)) +#else +# define NEED_MOVBE 0 +#endif + #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -80,7 +86,7 @@ #define REQUIRED_MASK2 0 #define REQUIRED_MASK3 (NEED_NOPL) -#define REQUIRED_MASK4 0 +#define REQUIRED_MASK4 (NEED_MOVBE) #define REQUIRED_MASK5 0 #define REQUIRED_MASK6 0 #define REQUIRED_MASK7 0 -- cgit v1.1 From 3b4a505821615b6c055536a0c23ea37c349bb6a9 Mon Sep 17 00:00:00 2001 From: Alok N Kataria Date: Mon, 28 Jan 2013 18:59:12 -0800 Subject: x86, kvm: Fix intialization warnings in kvm.c With commit: 4cca6ea04d31 ("x86/apic: Allow x2apic without IR on VMware platform") we started seeing "incompatible initialization" warning messages, since x2apic_available() expects a bool return type while kvm_para_available() returns an int. Reported by: kbuild test robot Signed-off-by: Alok N Kataria Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kvm_para.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 5ed1f161..65231e1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -85,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, return ret; } -static inline int kvm_para_available(void) +static inline bool kvm_para_available(void) { unsigned int eax, ebx, ecx, edx; char signature[13]; if (boot_cpu_data.cpuid_level < 0) - return 0; /* So we don't blow up on old processors */ + return false; /* So we don't blow up on old processors */ if (cpu_has_hypervisor) { cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx); @@ -101,10 +101,10 @@ static inline int kvm_para_available(void) signature[12] = 0; if (strcmp(signature, "KVMKVMKVM") == 0) - return 1; + return true; } - return 0; + return false; } static inline unsigned int kvm_arch_para_features(void) -- cgit v1.1 From 83d4c286931c9d28c5be21bac3c73a2332cab681 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Fri, 25 Jan 2013 10:18:49 +0800 Subject: x86, apicv: add APICv register virtualization support - APIC read doesn't cause VM-Exit - APIC write becomes trap-like Reviewed-by: Marcelo Tosatti Signed-off-by: Kevin Tian Signed-off-by: Yang Zhang Signed-off-by: Gleb Natapov --- arch/x86/include/asm/vmx.h | 2 ++ arch/x86/kvm/lapic.c | 15 +++++++++++++++ arch/x86/kvm/lapic.h | 2 ++ arch/x86/kvm/vmx.c | 33 ++++++++++++++++++++++++++++++++- 4 files changed, 51 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index e385df9..44c3f7e 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -66,6 +66,7 @@ #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 #define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_APIC_WRITE 56 #define EXIT_REASON_INVPCID 58 #define VMX_EXIT_REASONS \ @@ -141,6 +142,7 @@ #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9392f52..0664c13 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1212,6 +1212,21 @@ void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi); +/* emulate APIC access in a trap manner */ +void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset) +{ + u32 val = 0; + + /* hw has done the conditional check and inst decode */ + offset &= 0xff0; + + apic_reg_read(vcpu->arch.apic, offset, 4, &val); + + /* TODO: optimize to just emulate side effect w/o one more write */ + apic_reg_write(vcpu->arch.apic, offset, val); +} +EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode); + void kvm_free_lapic(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index e5ebf9f..9a8ee22 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -64,6 +64,8 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); +void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); + void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 02eeba8..5ad7c85 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,6 +84,9 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); +static bool __read_mostly enable_apicv_reg = 1; +module_param(enable_apicv_reg, bool, S_IRUGO); + /* * If nested=1, nested virtualization is supported, i.e., guests may use * VMX and be a hypervisor for its own guests. If nested=0, guests may not @@ -764,6 +767,12 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; } +static inline bool cpu_has_vmx_apic_register_virt(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_APIC_REGISTER_VIRT; +} + static inline bool cpu_has_vmx_flexpriority(void) { return cpu_has_vmx_tpr_shadow() && @@ -2540,7 +2549,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_UNRESTRICTED_GUEST | SECONDARY_EXEC_PAUSE_LOOP_EXITING | SECONDARY_EXEC_RDTSCP | - SECONDARY_EXEC_ENABLE_INVPCID; + SECONDARY_EXEC_ENABLE_INVPCID | + SECONDARY_EXEC_APIC_REGISTER_VIRT; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -2551,6 +2561,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) _cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW; #endif + + if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) + _cpu_based_2nd_exec_control &= ~( + SECONDARY_EXEC_APIC_REGISTER_VIRT); + if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { /* CR3 accesses and invlpg don't need to cause VM Exits when EPT enabled */ @@ -2748,6 +2763,9 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; + if (!cpu_has_vmx_apic_register_virt()) + enable_apicv_reg = 0; + if (nested) nested_vmx_setup_ctls_msrs(); @@ -3829,6 +3847,8 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; if (!ple_gap) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; + if (!enable_apicv_reg || !irqchip_in_kernel(vmx->vcpu.kvm)) + exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; return exec_control; } @@ -4787,6 +4807,16 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return emulate_instruction(vcpu, 0) == EMULATE_DONE; } +static int handle_apic_write(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + u32 offset = exit_qualification & 0xfff; + + /* APIC-write VM exit is trap-like and thus no need to adjust IP */ + kvm_apic_write_nodecode(vcpu, offset); + return 1; +} + static int handle_task_switch(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -5721,6 +5751,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_VMON] = handle_vmon, [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, + [EXIT_REASON_APIC_WRITE] = handle_apic_write, [EXIT_REASON_WBINVD] = handle_wbinvd, [EXIT_REASON_XSETBV] = handle_xsetbv, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, -- cgit v1.1 From 8d14695f9542e9e0195d6e41ddaa52c32322adf5 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Fri, 25 Jan 2013 10:18:50 +0800 Subject: x86, apicv: add virtual x2apic support basically to benefit from apicv, we need to enable virtualized x2apic mode. Currently, we only enable it when guest is really using x2apic. Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled x2apic: 0x800 - 0x8ff: no read intercept for apicv register virtualization, except APIC ID and TMCCT which need software's assistance to get right value. Reviewed-by: Marcelo Tosatti Signed-off-by: Kevin Tian Signed-off-by: Yang Zhang Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/lapic.c | 19 ++-- arch/x86/kvm/lapic.h | 5 + arch/x86/kvm/svm.c | 6 ++ arch/x86/kvm/vmx.c | 198 ++++++++++++++++++++++++++++++++++++---- 6 files changed, 201 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 77d56a4..d42c283 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -699,6 +699,7 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 44c3f7e..0a54df0 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -139,6 +139,7 @@ #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 #define SECONDARY_EXEC_ENABLE_EPT 0x00000002 #define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 #define SECONDARY_EXEC_ENABLE_VPID 0x00000020 #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0664c13..f69fc50 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -140,11 +140,6 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) -static inline int apic_x2apic_mode(struct kvm_lapic *apic) -{ - return apic->vcpu->arch.apic_base & X2APIC_ENABLE; -} - static inline int kvm_apic_id(struct kvm_lapic *apic) { return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; @@ -1303,6 +1298,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu) void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) { + u64 old_value = vcpu->arch.apic_base; struct kvm_lapic *apic = vcpu->arch.apic; if (!apic) { @@ -1324,11 +1320,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) value &= ~MSR_IA32_APICBASE_BSP; vcpu->arch.apic_base = value; - if (apic_x2apic_mode(apic)) { - u32 id = kvm_apic_id(apic); - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); - kvm_apic_set_ldr(apic, ldr); + if ((old_value ^ value) & X2APIC_ENABLE) { + if (value & X2APIC_ENABLE) { + u32 id = kvm_apic_id(apic); + u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + kvm_apic_set_ldr(apic, ldr); + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true); + } else + kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false); } + apic->base_address = apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_BASE; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 9a8ee22..22a5397 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -126,4 +126,9 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu) return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic); } +static inline int apic_x2apic_mode(struct kvm_lapic *apic) +{ + return apic->vcpu->arch.apic_base & X2APIC_ENABLE; +} + #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d29d3cd..38407e9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } +static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +{ + return; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4290,6 +4295,7 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, + .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5ad7c85..3ce8a16 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -643,6 +643,8 @@ static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; +static unsigned long *vmx_msr_bitmap_legacy_x2apic; +static unsigned long *vmx_msr_bitmap_longmode_x2apic; static bool cpu_has_load_ia32_efer; static bool cpu_has_load_perf_global_ctrl; @@ -767,6 +769,12 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void) SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; } +static inline bool cpu_has_vmx_virtualize_x2apic_mode(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; +} + static inline bool cpu_has_vmx_apic_register_virt(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -1830,6 +1838,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) vmx->guest_msrs[from] = tmp; } +static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) +{ + unsigned long *msr_bitmap; + + if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) { + if (is_long_mode(vcpu)) + msr_bitmap = vmx_msr_bitmap_longmode_x2apic; + else + msr_bitmap = vmx_msr_bitmap_legacy_x2apic; + } else { + if (is_long_mode(vcpu)) + msr_bitmap = vmx_msr_bitmap_longmode; + else + msr_bitmap = vmx_msr_bitmap_legacy; + } + + vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); +} + /* * Set up the vmcs to automatically save and restore system * msrs. Don't touch the 64-bit msrs if the guest is in legacy @@ -1838,7 +1865,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) static void setup_msrs(struct vcpu_vmx *vmx) { int save_nmsrs, index; - unsigned long *msr_bitmap; save_nmsrs = 0; #ifdef CONFIG_X86_64 @@ -1870,14 +1896,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx->save_nmsrs = save_nmsrs; - if (cpu_has_vmx_msr_bitmap()) { - if (is_long_mode(&vmx->vcpu)) - msr_bitmap = vmx_msr_bitmap_longmode; - else - msr_bitmap = vmx_msr_bitmap_legacy; - - vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); - } + if (cpu_has_vmx_msr_bitmap()) + vmx_set_msr_bitmap(&vmx->vcpu); } /* @@ -2543,6 +2563,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) { min2 = 0; opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_WBINVD_EXITING | SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_ENABLE_EPT | @@ -2564,7 +2585,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) _cpu_based_2nd_exec_control &= ~( - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { /* CR3 accesses and invlpg don't need to cause VM Exits when EPT @@ -3725,7 +3747,10 @@ static void free_vpid(struct vcpu_vmx *vmx) spin_unlock(&vmx_vpid_lock); } -static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) +#define MSR_TYPE_R 1 +#define MSR_TYPE_W 2 +static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) { int f = sizeof(unsigned long); @@ -3738,20 +3763,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. */ if (msr <= 0x1fff) { - __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ - __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ + if (type & MSR_TYPE_R) + /* read-low */ + __clear_bit(msr, msr_bitmap + 0x000 / f); + + if (type & MSR_TYPE_W) + /* write-low */ + __clear_bit(msr, msr_bitmap + 0x800 / f); + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { msr &= 0x1fff; - __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ - __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ + if (type & MSR_TYPE_R) + /* read-high */ + __clear_bit(msr, msr_bitmap + 0x400 / f); + + if (type & MSR_TYPE_W) + /* write-high */ + __clear_bit(msr, msr_bitmap + 0xc00 / f); + + } +} + +static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, + u32 msr, int type) +{ + int f = sizeof(unsigned long); + + if (!cpu_has_vmx_msr_bitmap()) + return; + + /* + * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals + * have the write-low and read-high bitmap offsets the wrong way round. + * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. + */ + if (msr <= 0x1fff) { + if (type & MSR_TYPE_R) + /* read-low */ + __set_bit(msr, msr_bitmap + 0x000 / f); + + if (type & MSR_TYPE_W) + /* write-low */ + __set_bit(msr, msr_bitmap + 0x800 / f); + + } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { + msr &= 0x1fff; + if (type & MSR_TYPE_R) + /* read-high */ + __set_bit(msr, msr_bitmap + 0x400 / f); + + if (type & MSR_TYPE_W) + /* write-high */ + __set_bit(msr, msr_bitmap + 0xc00 / f); + } } static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) { if (!longmode_only) - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); - __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, + msr, MSR_TYPE_R | MSR_TYPE_W); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, + msr, MSR_TYPE_R | MSR_TYPE_W); +} + +static void vmx_enable_intercept_msr_read_x2apic(u32 msr) +{ + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, + msr, MSR_TYPE_R); + __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, + msr, MSR_TYPE_R); +} + +static void vmx_disable_intercept_msr_read_x2apic(u32 msr) +{ + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, + msr, MSR_TYPE_R); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, + msr, MSR_TYPE_R); +} + +static void vmx_disable_intercept_msr_write_x2apic(u32 msr) +{ + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic, + msr, MSR_TYPE_W); + __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic, + msr, MSR_TYPE_W); } /* @@ -3849,6 +3947,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; if (!enable_apicv_reg || !irqchip_in_kernel(vmx->vcpu.kvm)) exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; + exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; return exec_control; } @@ -6101,6 +6200,34 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) vmcs_write32(TPR_THRESHOLD, irr); } +static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) +{ + u32 sec_exec_control; + + /* + * There is not point to enable virtualize x2apic without enable + * apicv + */ + if (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg) + return; + + if (!vm_need_tpr_shadow(vcpu->kvm)) + return; + + sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + + if (set) { + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + } else { + sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; + sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + } + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); + + vmx_set_msr_bitmap(vcpu); +} + static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -7364,6 +7491,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .enable_nmi_window = enable_nmi_window, .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, + .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, @@ -7396,7 +7524,7 @@ static struct kvm_x86_ops vmx_x86_ops = { static int __init vmx_init(void) { - int r, i; + int r, i, msr; rdmsrl_safe(MSR_EFER, &host_efer); @@ -7417,11 +7545,19 @@ static int __init vmx_init(void) if (!vmx_msr_bitmap_legacy) goto out1; + vmx_msr_bitmap_legacy_x2apic = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_legacy_x2apic) + goto out2; vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_msr_bitmap_longmode) - goto out2; + goto out3; + vmx_msr_bitmap_longmode_x2apic = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx_msr_bitmap_longmode_x2apic) + goto out4; /* * Allow direct access to the PC debug port (it is often used for I/O @@ -7453,6 +7589,24 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); + memcpy(vmx_msr_bitmap_legacy_x2apic, + vmx_msr_bitmap_legacy, PAGE_SIZE); + memcpy(vmx_msr_bitmap_longmode_x2apic, + vmx_msr_bitmap_longmode, PAGE_SIZE); + + if (enable_apicv_reg) { + for (msr = 0x800; msr <= 0x8ff; msr++) + vmx_disable_intercept_msr_read_x2apic(msr); + + /* According SDM, in x2apic mode, the whole id reg is used. + * But in KVM, it only use the highest eight bits. Need to + * intercept it */ + vmx_enable_intercept_msr_read_x2apic(0x802); + /* TMCCT */ + vmx_enable_intercept_msr_read_x2apic(0x839); + /* TPR */ + vmx_disable_intercept_msr_write_x2apic(0x808); + } if (enable_ept) { kvm_mmu_set_mask_ptes(0ull, @@ -7466,8 +7620,10 @@ static int __init vmx_init(void) return 0; -out3: +out4: free_page((unsigned long)vmx_msr_bitmap_longmode); +out3: + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); out2: free_page((unsigned long)vmx_msr_bitmap_legacy); out1: @@ -7479,6 +7635,8 @@ out: static void __exit vmx_exit(void) { + free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic); + free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); free_page((unsigned long)vmx_msr_bitmap_legacy); free_page((unsigned long)vmx_msr_bitmap_longmode); free_page((unsigned long)vmx_io_bitmap_b); -- cgit v1.1 From c7c9c56ca26f7b9458711b2d78b60b60e0d38ba7 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Fri, 25 Jan 2013 10:18:51 +0800 Subject: x86, apicv: add virtual interrupt delivery support Virtual interrupt delivery avoids KVM to inject vAPIC interrupts manually, which is fully taken care of by the hardware. This needs some special awareness into existing interrupr injection path: - for pending interrupt, instead of direct injection, we may need update architecture specific indicators before resuming to guest. - A pending interrupt, which is masked by ISR, should be also considered in above update action, since hardware will decide when to inject it at right time. Current has_interrupt and get_interrupt only returns a valid vector from injection p.o.v. Reviewed-by: Marcelo Tosatti Signed-off-by: Kevin Tian Signed-off-by: Yang Zhang Signed-off-by: Gleb Natapov --- arch/x86/include/asm/kvm_host.h | 5 ++ arch/x86/include/asm/vmx.h | 11 ++++ arch/x86/kvm/irq.c | 56 +++++++++++++++++-- arch/x86/kvm/lapic.c | 106 +++++++++++++++++++++++++++-------- arch/x86/kvm/lapic.h | 27 +++++++++ arch/x86/kvm/svm.c | 18 ++++++ arch/x86/kvm/vmx.c | 119 ++++++++++++++++++++++++++++++++++++---- arch/x86/kvm/x86.c | 23 +++++++- 8 files changed, 325 insertions(+), 40 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d42c283..635a74d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -699,6 +699,10 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); + int (*vm_has_apicv)(struct kvm *kvm); + void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); + void (*hwapic_isr_update)(struct kvm *kvm, int isr); + void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); @@ -994,6 +998,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long hva); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 0a54df0..694586c 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -62,6 +62,7 @@ #define EXIT_REASON_MCE_DURING_VMENTRY 41 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 #define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 @@ -144,6 +145,7 @@ #define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 #define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 #define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 #define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 @@ -181,6 +183,7 @@ enum vmcs_field { GUEST_GS_SELECTOR = 0x0000080a, GUEST_LDTR_SELECTOR = 0x0000080c, GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, HOST_ES_SELECTOR = 0x00000c00, HOST_CS_SELECTOR = 0x00000c02, HOST_SS_SELECTOR = 0x00000c04, @@ -208,6 +211,14 @@ enum vmcs_field { APIC_ACCESS_ADDR_HIGH = 0x00002015, EPT_POINTER = 0x0000201a, EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, GUEST_PHYSICAL_ADDRESS = 0x00002400, GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, VMCS_LINK_POINTER = 0x00002800, diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index b111aee..484bc87 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -38,6 +38,38 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) EXPORT_SYMBOL(kvm_cpu_has_pending_timer); /* + * check if there is pending interrupt from + * non-APIC source without intack. + */ +static int kvm_cpu_has_extint(struct kvm_vcpu *v) +{ + if (kvm_apic_accept_pic_intr(v)) + return pic_irqchip(v->kvm)->output; /* PIC */ + else + return 0; +} + +/* + * check if there is injectable interrupt: + * when virtual interrupt delivery enabled, + * interrupt from apic will handled by hardware, + * we don't need to check it here. + */ +int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) +{ + if (!irqchip_in_kernel(v->kvm)) + return v->arch.interrupt.pending; + + if (kvm_cpu_has_extint(v)) + return 1; + + if (kvm_apic_vid_enabled(v->kvm)) + return 0; + + return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ +} + +/* * check if there is pending interrupt without * intack. */ @@ -46,27 +78,41 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.pending; - if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output) - return pic_irqchip(v->kvm)->output; /* PIC */ + if (kvm_cpu_has_extint(v)) + return 1; return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ } EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt); /* + * Read pending interrupt(from non-APIC source) + * vector and intack. + */ +static int kvm_cpu_get_extint(struct kvm_vcpu *v) +{ + if (kvm_cpu_has_extint(v)) + return kvm_pic_read_irq(v->kvm); /* PIC */ + return -1; +} + +/* * Read pending interrupt vector and intack. */ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) { + int vector; + if (!irqchip_in_kernel(v->kvm)) return v->arch.interrupt.nr; - if (kvm_apic_accept_pic_intr(v) && pic_irqchip(v->kvm)->output) - return kvm_pic_read_irq(v->kvm); /* PIC */ + vector = kvm_cpu_get_extint(v); + + if (kvm_apic_vid_enabled(v->kvm) || vector != -1) + return vector; /* PIC */ return kvm_get_apic_interrupt(v); /* APIC */ } -EXPORT_SYMBOL_GPL(kvm_cpu_get_interrupt); void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f69fc50..02b51dd 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -145,21 +145,51 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } -static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) +void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, + struct kvm_lapic_irq *irq, + u64 *eoi_exit_bitmap) { - u16 cid; - ldr >>= 32 - map->ldr_bits; - cid = (ldr >> map->cid_shift) & map->cid_mask; + struct kvm_lapic **dst; + struct kvm_apic_map *map; + unsigned long bitmap = 1; + int i; - BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); + rcu_read_lock(); + map = rcu_dereference(vcpu->kvm->arch.apic_map); - return cid; -} + if (unlikely(!map)) { + __set_bit(irq->vector, (unsigned long *)eoi_exit_bitmap); + goto out; + } -static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) -{ - ldr >>= (32 - map->ldr_bits); - return ldr & map->lid_mask; + if (irq->dest_mode == 0) { /* physical mode */ + if (irq->delivery_mode == APIC_DM_LOWEST || + irq->dest_id == 0xff) { + __set_bit(irq->vector, + (unsigned long *)eoi_exit_bitmap); + goto out; + } + dst = &map->phys_map[irq->dest_id & 0xff]; + } else { + u32 mda = irq->dest_id << (32 - map->ldr_bits); + + dst = map->logical_map[apic_cluster_id(map, mda)]; + + bitmap = apic_logical_id(map, mda); + } + + for_each_set_bit(i, &bitmap, 16) { + if (!dst[i]) + continue; + if (dst[i]->vcpu == vcpu) { + __set_bit(irq->vector, + (unsigned long *)eoi_exit_bitmap); + break; + } + } + +out: + rcu_read_unlock(); } static void recalculate_apic_map(struct kvm *kvm) @@ -225,6 +255,8 @@ out: if (old) kfree_rcu(old, rcu); + + kvm_ioapic_make_eoibitmap_request(kvm); } static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id) @@ -340,6 +372,10 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) { int result; + /* + * Note that irr_pending is just a hint. It will be always + * true with virtual interrupt delivery enabled. + */ if (!apic->irr_pending) return -1; @@ -456,6 +492,8 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) static inline int apic_find_highest_isr(struct kvm_lapic *apic) { int result; + + /* Note that isr_count is always 1 with vid enabled */ if (!apic->isr_count) return -1; if (likely(apic->highest_isr_cache != -1)) @@ -735,6 +773,19 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; } +static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) +{ + if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && + kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { + int trigger_mode; + if (apic_test_vector(vector, apic->regs + APIC_TMR)) + trigger_mode = IOAPIC_LEVEL_TRIG; + else + trigger_mode = IOAPIC_EDGE_TRIG; + kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); + } +} + static int apic_set_eoi(struct kvm_lapic *apic) { int vector = apic_find_highest_isr(apic); @@ -751,19 +802,26 @@ static int apic_set_eoi(struct kvm_lapic *apic) apic_clear_isr(vector, apic); apic_update_ppr(apic); - if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) && - kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) { - int trigger_mode; - if (apic_test_vector(vector, apic->regs + APIC_TMR)) - trigger_mode = IOAPIC_LEVEL_TRIG; - else - trigger_mode = IOAPIC_EDGE_TRIG; - kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode); - } + kvm_ioapic_send_eoi(apic, vector); kvm_make_request(KVM_REQ_EVENT, apic->vcpu); return vector; } +/* + * this interface assumes a trap-like exit, which has already finished + * desired side effect including vISR and vPPR update. + */ +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + trace_kvm_eoi(apic, vector); + + kvm_ioapic_send_eoi(apic, vector); + kvm_make_request(KVM_REQ_EVENT, apic->vcpu); +} +EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated); + static void apic_send_ipi(struct kvm_lapic *apic) { u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR); @@ -1375,8 +1433,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); } - apic->irr_pending = false; - apic->isr_count = 0; + apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm); + apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm); apic->highest_isr_cache = -1; update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); @@ -1591,8 +1649,10 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; - apic->isr_count = count_vectors(apic->regs + APIC_ISR); + apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ? + 1 : count_vectors(apic->regs + APIC_ISR); apic->highest_isr_cache = -1; + kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); kvm_make_request(KVM_REQ_EVENT, vcpu); } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 22a5397..1676d34 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -65,6 +65,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset); +void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector); void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); @@ -131,4 +132,30 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) return apic->vcpu->arch.apic_base & X2APIC_ENABLE; } +static inline bool kvm_apic_vid_enabled(struct kvm *kvm) +{ + return kvm_x86_ops->vm_has_apicv(kvm); +} + +static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) +{ + u16 cid; + ldr >>= 32 - map->ldr_bits; + cid = (ldr >> map->cid_shift) & map->cid_mask; + + BUG_ON(cid >= ARRAY_SIZE(map->logical_map)); + + return cid; +} + +static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) +{ + ldr >>= (32 - map->ldr_bits); + return ldr & map->lid_mask; +} + +void kvm_calculate_eoi_exitmap(struct kvm_vcpu *vcpu, + struct kvm_lapic_irq *irq, + u64 *eoi_bitmap); + #endif diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 38407e9..e1b1ce2 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3576,6 +3576,21 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) return; } +static int svm_vm_has_apicv(struct kvm *kvm) +{ + return 0; +} + +static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +{ + return; +} + +static void svm_hwapic_isr_update(struct kvm *kvm, int isr) +{ + return; +} + static int svm_nmi_allowed(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4296,6 +4311,9 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, + .vm_has_apicv = svm_vm_has_apicv, + .load_eoi_exitmap = svm_load_eoi_exitmap, + .hwapic_isr_update = svm_hwapic_isr_update, .set_tss_addr = svm_set_tss_addr, .get_tdp_level = get_npt_level, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3ce8a16..0cf74a6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,8 +84,8 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv_reg = 1; -module_param(enable_apicv_reg, bool, S_IRUGO); +static bool __read_mostly enable_apicv_reg_vid = 1; +module_param(enable_apicv_reg_vid, bool, S_IRUGO); /* * If nested=1, nested virtualization is supported, i.e., guests may use @@ -781,6 +781,12 @@ static inline bool cpu_has_vmx_apic_register_virt(void) SECONDARY_EXEC_APIC_REGISTER_VIRT; } +static inline bool cpu_has_vmx_virtual_intr_delivery(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; +} + static inline bool cpu_has_vmx_flexpriority(void) { return cpu_has_vmx_tpr_shadow() && @@ -2571,7 +2577,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) SECONDARY_EXEC_PAUSE_LOOP_EXITING | SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_ENABLE_INVPCID | - SECONDARY_EXEC_APIC_REGISTER_VIRT; + SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY; if (adjust_vmx_controls(min2, opt2, MSR_IA32_VMX_PROCBASED_CTLS2, &_cpu_based_2nd_exec_control) < 0) @@ -2586,7 +2593,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)) _cpu_based_2nd_exec_control &= ~( SECONDARY_EXEC_APIC_REGISTER_VIRT | - SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE); + SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) { /* CR3 accesses and invlpg don't need to cause VM Exits when EPT @@ -2785,8 +2793,14 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; - if (!cpu_has_vmx_apic_register_virt()) - enable_apicv_reg = 0; + if (!cpu_has_vmx_apic_register_virt() || + !cpu_has_vmx_virtual_intr_delivery()) + enable_apicv_reg_vid = 0; + + if (enable_apicv_reg_vid) + kvm_x86_ops->update_cr8_intercept = NULL; + else + kvm_x86_ops->hwapic_irr_update = NULL; if (nested) nested_vmx_setup_ctls_msrs(); @@ -3928,6 +3942,11 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) return exec_control; } +static int vmx_vm_has_apicv(struct kvm *kvm) +{ + return enable_apicv_reg_vid && irqchip_in_kernel(kvm); +} + static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_2nd_exec_ctrl; @@ -3945,8 +3964,9 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; if (!ple_gap) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - if (!enable_apicv_reg || !irqchip_in_kernel(vmx->vcpu.kvm)) - exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT; + if (!vmx_vm_has_apicv(vmx->vcpu.kvm)) + exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | + SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; return exec_control; } @@ -3992,6 +4012,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx_secondary_exec_control(vmx)); } + if (enable_apicv_reg_vid) { + vmcs_write64(EOI_EXIT_BITMAP0, 0); + vmcs_write64(EOI_EXIT_BITMAP1, 0); + vmcs_write64(EOI_EXIT_BITMAP2, 0); + vmcs_write64(EOI_EXIT_BITMAP3, 0); + + vmcs_write16(GUEST_INTR_STATUS, 0); + } + if (ple_gap) { vmcs_write32(PLE_GAP, ple_gap); vmcs_write32(PLE_WINDOW, ple_window); @@ -4906,6 +4935,16 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) return emulate_instruction(vcpu, 0) == EMULATE_DONE; } +static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) +{ + unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + int vector = exit_qualification & 0xff; + + /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ + kvm_apic_set_eoi_accelerated(vcpu, vector); + return 1; +} + static int handle_apic_write(struct kvm_vcpu *vcpu) { unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); @@ -5851,6 +5890,7 @@ static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_APIC_WRITE] = handle_apic_write, + [EXIT_REASON_EOI_INDUCED] = handle_apic_eoi_induced, [EXIT_REASON_WBINVD] = handle_wbinvd, [EXIT_REASON_XSETBV] = handle_xsetbv, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, @@ -6208,7 +6248,8 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) * There is not point to enable virtualize x2apic without enable * apicv */ - if (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg) + if (!cpu_has_vmx_virtualize_x2apic_mode() || + !vmx_vm_has_apicv(vcpu->kvm)) return; if (!vm_need_tpr_shadow(vcpu->kvm)) @@ -6228,6 +6269,56 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) vmx_set_msr_bitmap(vcpu); } +static void vmx_hwapic_isr_update(struct kvm *kvm, int isr) +{ + u16 status; + u8 old; + + if (!vmx_vm_has_apicv(kvm)) + return; + + if (isr == -1) + isr = 0; + + status = vmcs_read16(GUEST_INTR_STATUS); + old = status >> 8; + if (isr != old) { + status &= 0xff; + status |= isr << 8; + vmcs_write16(GUEST_INTR_STATUS, status); + } +} + +static void vmx_set_rvi(int vector) +{ + u16 status; + u8 old; + + status = vmcs_read16(GUEST_INTR_STATUS); + old = (u8)status & 0xff; + if ((u8)vector != old) { + status &= ~0xff; + status |= (u8)vector; + vmcs_write16(GUEST_INTR_STATUS, status); + } +} + +static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) +{ + if (max_irr == -1) + return; + + vmx_set_rvi(max_irr); +} + +static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +{ + vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); + vmcs_write64(EOI_EXIT_BITMAP1, eoi_exit_bitmap[1]); + vmcs_write64(EOI_EXIT_BITMAP2, eoi_exit_bitmap[2]); + vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); +} + static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -7492,6 +7583,10 @@ static struct kvm_x86_ops vmx_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, + .vm_has_apicv = vmx_vm_has_apicv, + .load_eoi_exitmap = vmx_load_eoi_exitmap, + .hwapic_irr_update = vmx_hwapic_irr_update, + .hwapic_isr_update = vmx_hwapic_isr_update, .set_tss_addr = vmx_set_tss_addr, .get_tdp_level = get_ept_level, @@ -7594,7 +7689,7 @@ static int __init vmx_init(void) memcpy(vmx_msr_bitmap_longmode_x2apic, vmx_msr_bitmap_longmode, PAGE_SIZE); - if (enable_apicv_reg) { + if (enable_apicv_reg_vid) { for (msr = 0x800; msr <= 0x8ff; msr++) vmx_disable_intercept_msr_read_x2apic(msr); @@ -7606,6 +7701,10 @@ static int __init vmx_init(void) vmx_enable_intercept_msr_read_x2apic(0x839); /* TPR */ vmx_disable_intercept_msr_write_x2apic(0x808); + /* EOI */ + vmx_disable_intercept_msr_write_x2apic(0x80b); + /* SELF-IPI */ + vmx_disable_intercept_msr_write_x2apic(0x83f); } if (enable_ept) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b9f5529..cf512e70 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5565,7 +5565,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) vcpu->arch.nmi_injected = true; kvm_x86_ops->set_nmi(vcpu); } - } else if (kvm_cpu_has_interrupt(vcpu)) { + } else if (kvm_cpu_has_injectable_intr(vcpu)) { if (kvm_x86_ops->interrupt_allowed(vcpu)) { kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); @@ -5633,6 +5633,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) #endif } +static void update_eoi_exitmap(struct kvm_vcpu *vcpu) +{ + u64 eoi_exit_bitmap[4]; + + memset(eoi_exit_bitmap, 0, 32); + + kvm_ioapic_calculate_eoi_exitmap(vcpu, eoi_exit_bitmap); + kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); +} + static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; @@ -5686,6 +5696,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_handle_pmu_event(vcpu); if (kvm_check_request(KVM_REQ_PMI, vcpu)) kvm_deliver_pmi(vcpu); + if (kvm_check_request(KVM_REQ_EOIBITMAP, vcpu)) + update_eoi_exitmap(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -5694,10 +5706,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) /* enable NMI/IRQ window open exits if needed */ if (vcpu->arch.nmi_pending) kvm_x86_ops->enable_nmi_window(vcpu); - else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) + else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) kvm_x86_ops->enable_irq_window(vcpu); if (kvm_lapic_enabled(vcpu)) { + /* + * Update architecture specific hints for APIC + * virtual interrupt delivery. + */ + if (kvm_x86_ops->hwapic_irr_update) + kvm_x86_ops->hwapic_irr_update(vcpu, + kvm_lapic_find_highest_irr(vcpu)); update_cr8_intercept(vcpu); kvm_lapic_sync_to_vapic(vcpu); } -- cgit v1.1 From 5dcd14ecd41ea2b3ae3295a9b30d98769d52165f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 29 Jan 2013 01:05:24 -0800 Subject: x86, boot: Sanitize boot_params if not zeroed on creation Use the new sentinel field to detect bootloaders which fail to follow protocol and don't initialize fields in struct boot_params that they do not explicitly initialize to zero. Based on an original patch and research by Yinghai Lu. Changed by hpa to be invoked both in the decompression path and in the kernel proper; the latter for the case where a bootloader takes over decompression. Originally-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-26-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/misc.c | 2 ++ arch/x86/boot/compressed/misc.h | 1 + arch/x86/include/asm/bootparam_utils.h | 38 ++++++++++++++++++++++++++++++++++ arch/x86/kernel/head32.c | 3 +++ arch/x86/kernel/head64.c | 2 ++ 5 files changed, 46 insertions(+) create mode 100644 arch/x86/include/asm/bootparam_utils.h (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 88f7ff6..7cb56c6 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -325,6 +325,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, { real_mode = rmode; + sanitize_boot_params(real_mode); + if (real_mode->screen_info.orig_video_mode == 7) { vidmem = (char *) 0xb0000; vidport = 0x3b4; diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 0e6dc0e..674019d 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -18,6 +18,7 @@ #include #include #include +#include #define BOOT_BOOT_H #include "../ctype.h" diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h new file mode 100644 index 0000000..5b5e9cb --- /dev/null +++ b/arch/x86/include/asm/bootparam_utils.h @@ -0,0 +1,38 @@ +#ifndef _ASM_X86_BOOTPARAM_UTILS_H +#define _ASM_X86_BOOTPARAM_UTILS_H + +#include + +/* + * This file is included from multiple environments. Do not + * add completing #includes to make it standalone. + */ + +/* + * Deal with bootloaders which fail to initialize unknown fields in + * boot_params to zero. The list fields in this list are taken from + * analysis of kexec-tools; if other broken bootloaders initialize a + * different set of fields we will need to figure out how to disambiguate. + * + */ +static void sanitize_boot_params(struct boot_params *boot_params) +{ + if (boot_params->sentinel) { + /*fields in boot_params are not valid, clear them */ + memset(&boot_params->olpc_ofw_header, 0, + (char *)&boot_params->alt_mem_k - + (char *)&boot_params->olpc_ofw_header); + memset(&boot_params->kbd_status, 0, + (char *)&boot_params->hdr - + (char *)&boot_params->kbd_status); + memset(&boot_params->_pad7[0], 0, + (char *)&boot_params->edd_mbr_sig_buffer[0] - + (char *)&boot_params->_pad7[0]); + memset(&boot_params->_pad8[0], 0, + (char *)&boot_params->eddbuf[0] - + (char *)&boot_params->_pad8[0]); + memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9)); + } +} + +#endif /* _ASM_X86_BOOTPARAM_UTILS_H */ diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index c18f59d..6773c91 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -18,6 +18,7 @@ #include #include #include +#include static void __init i386_default_early_setup(void) { @@ -30,6 +31,8 @@ static void __init i386_default_early_setup(void) void __init i386_start_kernel(void) { + sanitize_boot_params(&boot_params); + memblock_reserve(__pa_symbol(&_text), __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 037df57..849fc9e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -25,6 +25,7 @@ #include #include #include +#include static void __init zap_identity_mappings(void) { @@ -46,6 +47,7 @@ static void __init copy_bootdata(char *real_mode_data) char * command_line; memcpy(&boot_params, real_mode_data, sizeof boot_params); + sanitize_boot_params(&boot_params); if (boot_params.hdr.cmd_line_ptr) { command_line = __va(boot_params.hdr.cmd_line_ptr); memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); -- cgit v1.1 From ffca80b5567c5f4405b3316129aac6890a42750d Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Tue, 29 Jan 2013 10:40:22 +0100 Subject: x86, efi: fix comment typo in head_32.S Signed-off-by: Olaf Hering Signed-off-by: Jiri Kosina --- arch/x86/boot/compressed/head_32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index aa4aaf1..7315e11 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -38,7 +38,7 @@ ENTRY(startup_32) .balign 0x10 /* * We don't need the return address, so set up the stack so - * efi_main() can find its arugments. + * efi_main() can find its arguments. */ add $0x4, %esp -- cgit v1.1 From 6f16eebe1ff82176339a0439c98ebec9768b0ee2 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 25 Jan 2013 17:08:12 -0800 Subject: timekeeping: Switch HAS_PERSISTENT_CLOCK to ALWAYS_USE_PERSISTENT_CLOCK Jason pointed out the HAS_PERSISTENT_CLOCK name isn't quite accurate for the config, as some systems may have the persistent_clock in some cases, but not always. So change the config name to the more clear ALWAYS_USE_PERSISTENT_CLOCK. Signed-off-by: John Stultz --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a4135b5..335da90 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -108,7 +108,7 @@ config X86 select GENERIC_STRNLEN_USER select HAVE_RCU_USER_QS if X86_64 select HAVE_IRQ_TIME_ACCOUNTING - select HAS_PERSISTENT_CLOCK + select ALWAYS_USE_PERSISTENT_CLOCK select GENERIC_KERNEL_THREAD select GENERIC_KERNEL_EXECVE select MODULES_USE_ELF_REL if X86_32 -- cgit v1.1 From c9b3234a6abadaa12684083d39552939baaed1f4 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:42 -0800 Subject: x86, mm: Fix page table early allocation offset checking During debugging loading kernel above 4G, found that one page is not used in pre-allocated BRK area for early page allocation. pgt_buf_top is address that can not be used, so should check if that new end is above that top, otherwise last page will not be used. Fix that checking and also add print out for allocation from pre-allocated BRK area to catch possible bugs later. But after we get back that page for pgt, it tiggers one bug in pgt allocation with xen: We need to avoid to use page as pgt to map range that is overlapping with that pgt page. Add checking about overlapping, when it happens, use memblock allocation instead. That fixes crash on Xen PV guest with 2G that Stefan found. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-2-git-send-email-yinghai@kernel.org Acked-by: Stefano Stabellini Tested-by: Stefano Stabellini Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 6f85de8..78d1ef3 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -25,6 +25,8 @@ static unsigned long __initdata pgt_buf_top; static unsigned long min_pfn_mapped; +static bool __initdata can_use_brk_pgt = true; + /* * Pages returned are already directly mapped. * @@ -47,7 +49,7 @@ __ref void *alloc_low_pages(unsigned int num) __GFP_ZERO, order); } - if ((pgt_buf_end + num) >= pgt_buf_top) { + if ((pgt_buf_end + num) > pgt_buf_top || !can_use_brk_pgt) { unsigned long ret; if (min_pfn_mapped >= max_pfn_mapped) panic("alloc_low_page: ran out of memory"); @@ -61,6 +63,8 @@ __ref void *alloc_low_pages(unsigned int num) } else { pfn = pgt_buf_end; pgt_buf_end += num; + printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n", + pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1); } for (i = 0; i < num; i++) { @@ -370,8 +374,15 @@ static unsigned long __init init_range_memory_mapping( if (start >= end) continue; + /* + * if it is overlapping with brk pgt, we need to + * alloc pgt buf from memblock instead. + */ + can_use_brk_pgt = max(start, (u64)pgt_buf_end<= + min(end, (u64)pgt_buf_top< Date: Thu, 24 Jan 2013 12:19:45 -0800 Subject: x86: Factor out e820_add_kernel_range() Separate out the reservation of the kernel static memory areas into a separate function. Also add support for case when memmap=xxM$yyM is used without exactmap. Need to remove reserved range at first before we add E820_RAM range, otherwise added E820_RAM range will be ignored. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-5-git-send-email-yinghai@kernel.org Cc: Jacob Shin Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2681937..5552d04 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -702,6 +702,27 @@ static void __init trim_bios_range(void) sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } +/* called before trim_bios_range() to spare extra sanitize */ +static void __init e820_add_kernel_range(void) +{ + u64 start = __pa_symbol(_text); + u64 size = __pa_symbol(_end) - start; + + /* + * Complain if .text .data and .bss are not marked as E820_RAM and + * attempt to fix it by adding the range. We may have a confused BIOS, + * or the user may have used memmap=exactmap or memmap=xxM$yyM to + * exclude kernel range. If we really are running on top non-RAM, + * we will crash later anyways. + */ + if (e820_all_mapped(start, start + size, E820_RAM)) + return; + + pr_warn(".text .data .bss are not marked as E820_RAM!\n"); + e820_remove_range(start, size, E820_RAM, 0); + e820_add_region(start, size, E820_RAM); +} + static int __init parse_reservelow(char *p) { unsigned long long size; @@ -897,20 +918,7 @@ void __init setup_arch(char **cmdline_p) insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); - /* - * Complain if .text .data and .bss are not marked as E820_RAM and - * attempt to fix it by adding the range. We may have a confused BIOS, - * or the user may have incorrectly supplied it via memmap=exactmap. If - * we really are running on top non-RAM, we will crash later anyways. - */ - if (!e820_all_mapped(code_resource.start, __pa(__brk_limit), E820_RAM)) { - pr_warn(".text .data .bss are not marked as E820_RAM!\n"); - - e820_add_region(code_resource.start, - __pa(__brk_limit) - code_resource.start + 1, - E820_RAM); - } - + e820_add_kernel_range(); trim_bios_range(); #ifdef CONFIG_X86_32 if (ppro_with_ram_bug()) { -- cgit v1.1 From c2bdee594ebcf4a531afe795baf18da509438392 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:46 -0800 Subject: x86, 64bit, mm: Make pgd next calculation consistent with pud/pmd Just like the way we calculate next for pud and pmd, aka round down and add size. Also, do not do boundary-checking with 'next', and just pass 'end' down to phys_pud_init() instead. Because the loop in phys_pud_init() stops at PTRS_PER_PUD and thus can handle a possibly bigger 'end' properly. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-6-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_64.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 191ab12..d7af907 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -530,9 +530,7 @@ kernel_physical_mapping_init(unsigned long start, pgd_t *pgd = pgd_offset_k(start); pud_t *pud; - next = (start + PGDIR_SIZE) & PGDIR_MASK; - if (next > end) - next = end; + next = (start & PGDIR_MASK) + PGDIR_SIZE; if (pgd_val(*pgd)) { pud = (pud_t *)pgd_page_vaddr(*pgd); @@ -542,7 +540,7 @@ kernel_physical_mapping_init(unsigned long start, } pud = alloc_low_page(); - last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), + last_map_addr = phys_pud_init(pud, __pa(start), __pa(end), page_size_mask); spin_lock(&init_mm.page_table_lock); -- cgit v1.1 From 231b3642a3c73fb9f1221dcb96fe8c0fbb658dfd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:47 -0800 Subject: x86, realmode: Set real_mode permissions early Trampoline code is executed by APs with kernel low mapping on 64bit. We need to set trampoline code to EXEC early before we boot APs. Found the problem after switching to #PF handler set page table, and we do not set initial kernel low mapping with EXEC anymore in arch/x86/kernel/head_64.S. Change to use early_initcall instead that will make sure trampoline will have EXEC set. -v2: Merge two comments according to Borislav Petkov Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-7-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/realmode/init.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index cbca565..c44ea7c 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -84,10 +84,12 @@ void __init setup_real_mode(void) } /* - * set_real_mode_permissions() gets called very early, to guarantee the - * availability of low memory. This is before the proper kernel page + * setup_real_mode() gets called very early, to guarantee the + * availability of low memory. This is before the proper kernel page * tables are set up, so we cannot set page permissions in that - * function. Thus, we use an arch_initcall instead. + * function. Also trampoline code will be executed by APs so we + * need to mark it executable at do_pre_smp_initcalls() at least, + * thus run it as a early_initcall(). */ static int __init set_real_mode_permissions(void) { @@ -111,5 +113,4 @@ static int __init set_real_mode_permissions(void) return 0; } - -arch_initcall(set_real_mode_permissions); +early_initcall(set_real_mode_permissions); -- cgit v1.1 From aece27851d44bde62fc0587e06f5e8e27fd96e5f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:48 -0800 Subject: x86, 64bit, mm: Add generic kernel/ident mapping helper It is simple version for kernel_physical_mapping_init. it will work to build one page table that will be used later. Use mapping_info to control 1. alloc_pg_page method 2. if PMD is EXEC, 3. if pgd is with kernel low mapping or ident mapping. Will use to replace some local versions in kexec, hibernation and etc. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-8-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/init.h | 9 ++++++ arch/x86/mm/init_64.c | 74 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h index bac770b..2230420 100644 --- a/arch/x86/include/asm/init.h +++ b/arch/x86/include/asm/init.h @@ -1,5 +1,14 @@ #ifndef _ASM_X86_INIT_H #define _ASM_X86_INIT_H +struct x86_mapping_info { + void *(*alloc_pgt_page)(void *); /* allocate buf for page table */ + void *context; /* context for alloc_pgt_page */ + unsigned long pmd_flag; /* page flag for PMD entry */ + bool kernel_mapping; /* kernel mapping or ident mapping */ +}; + +int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, + unsigned long addr, unsigned long end); #endif /* _ASM_X86_INIT_H */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d7af907..9fbb85c 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -56,6 +56,80 @@ #include "mm_internal.h" +static void ident_pmd_init(unsigned long pmd_flag, pmd_t *pmd_page, + unsigned long addr, unsigned long end) +{ + addr &= PMD_MASK; + for (; addr < end; addr += PMD_SIZE) { + pmd_t *pmd = pmd_page + pmd_index(addr); + + if (!pmd_present(*pmd)) + set_pmd(pmd, __pmd(addr | pmd_flag)); + } +} +static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, + unsigned long addr, unsigned long end) +{ + unsigned long next; + + for (; addr < end; addr = next) { + pud_t *pud = pud_page + pud_index(addr); + pmd_t *pmd; + + next = (addr & PUD_MASK) + PUD_SIZE; + if (next > end) + next = end; + + if (pud_present(*pud)) { + pmd = pmd_offset(pud, 0); + ident_pmd_init(info->pmd_flag, pmd, addr, next); + continue; + } + pmd = (pmd_t *)info->alloc_pgt_page(info->context); + if (!pmd) + return -ENOMEM; + ident_pmd_init(info->pmd_flag, pmd, addr, next); + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + } + + return 0; +} + +int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, + unsigned long addr, unsigned long end) +{ + unsigned long next; + int result; + int off = info->kernel_mapping ? pgd_index(__PAGE_OFFSET) : 0; + + for (; addr < end; addr = next) { + pgd_t *pgd = pgd_page + pgd_index(addr) + off; + pud_t *pud; + + next = (addr & PGDIR_MASK) + PGDIR_SIZE; + if (next > end) + next = end; + + if (pgd_present(*pgd)) { + pud = pud_offset(pgd, 0); + result = ident_pud_init(info, pud, addr, next); + if (result) + return result; + continue; + } + + pud = (pud_t *)info->alloc_pgt_page(info->context); + if (!pud) + return -ENOMEM; + result = ident_pud_init(info, pud, addr, next); + if (result) + return result; + set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); + } + + return 0; +} + static int __init parse_direct_gbpages_off(char *arg) { direct_gbpages = 0; -- cgit v1.1 From fa2bbce985ca97943305cdc81d9626e6810ed7f2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:49 -0800 Subject: x86, 64bit: Copy struct boot_params early We want to support struct boot_params (formerly known as the zero-page, or real-mode data) above the 4 GiB mark. We will have #PF handler to set page table for not accessible ram early, but want to limit it before x86_64_start_reservations to limit the code change to native path only. Also we will need the ramdisk info in struct boot_params to access the microcode blob in ramdisk in x86_64_start_kernel, so copy struct boot_params early makes it accessing ramdisk info simple. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-9-git-send-email-yinghai@kernel.org Cc: Alexander Duyck Cc: Fenghua Yu Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head64.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 849fc9e..7785e668 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -89,6 +89,8 @@ void __init x86_64_start_kernel(char * real_mode_data) } load_idt((const struct desc_ptr *)&idt_descr); + copy_bootdata(__va(real_mode_data)); + if (console_loglevel == 10) early_printk("Kernel alive\n"); @@ -97,7 +99,9 @@ void __init x86_64_start_kernel(char * real_mode_data) void __init x86_64_start_reservations(char *real_mode_data) { - copy_bootdata(__va(real_mode_data)); + /* version is always not zero if it is copied */ + if (!boot_params.hdr.version) + copy_bootdata(__va(real_mode_data)); memblock_reserve(__pa_symbol(&_text), __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); -- cgit v1.1 From 9735e91e9c29c0d8fe432aef1152e43e50bdb316 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:50 -0800 Subject: x86, 64bit, realmode: Use init_level4_pgt to set trampoline_pgd directly with #PF handler way to set early page table, level3_ident will go away with 64bit native path. So just use entries in init_level4_pgt to set them in trampoline_pgd. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-10-git-send-email-yinghai@kernel.org Cc: Jarkko Sakkinen Acked-by: Jarkko Sakkinen Signed-off-by: H. Peter Anvin --- arch/x86/realmode/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index c44ea7c..ffee06a 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -78,8 +78,8 @@ void __init setup_real_mode(void) *trampoline_cr4_features = read_cr4(); trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); - trampoline_pgd[0] = __pa(level3_ident_pgt) + _KERNPG_TABLE; - trampoline_pgd[511] = __pa(level3_kernel_pgt) + _KERNPG_TABLE; + trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; + trampoline_pgd[511] = init_level4_pgt[511].pgd; #endif } -- cgit v1.1 From 4f7b92263ad68cdc72b11808320d9c881bfa857e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:51 -0800 Subject: x86, realmode: Separate real_mode reserve and setup After we switch to use #PF handler help to set page table, init_level4_pgt will only have entries set after init_mem_mapping(). We need to move copying init_level4_pgt to trampoline_pgd after that. So split reserve and setup, and move the setup after init_mem_mapping() Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-11-git-send-email-yinghai@kernel.org Cc: Jarkko Sakkinen Acked-by: Jarkko Sakkinen Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/realmode.h | 3 ++- arch/x86/kernel/setup.c | 4 +++- arch/x86/realmode/init.c | 32 ++++++++++++++++++++------------ 3 files changed, 25 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index fe1ec5b..9c6b890 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -58,6 +58,7 @@ extern unsigned char boot_gdt[]; extern unsigned char secondary_startup_64[]; #endif -extern void __init setup_real_mode(void); +void reserve_real_mode(void); +void setup_real_mode(void); #endif /* _ARCH_X86_REALMODE_H */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5552d04..85a8290 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -999,12 +999,14 @@ void __init setup_arch(char **cmdline_p) printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", (max_pfn_mapped< Date: Thu, 24 Jan 2013 12:19:52 -0800 Subject: x86, 64bit: Use a #PF handler to materialize early mappings on demand Linear mode (CR0.PG = 0) is mutually exclusive with 64-bit mode; all 64-bit code has to use page tables. This makes it awkward before we have first set up properly all-covering page tables to access objects that are outside the static kernel range. So far we have dealt with that simply by mapping a fixed amount of low memory, but that fails in at least two upcoming use cases: 1. We will support load and run kernel, struct boot_params, ramdisk, command line, etc. above the 4 GiB mark. 2. need to access ramdisk early to get microcode to update that as early possible. We could use early_iomap to access them too, but it will make code to messy and hard to be unified with 32 bit. Hence, set up a #PF table and use a fixed number of buffers to set up page tables on demand. If the buffers fill up then we simply flush them and start over. These buffers are all in __initdata, so it does not increase RAM usage at runtime. Thus, with the help of the #PF handler, we can set the final kernel mapping from blank, and switch to init_level4_pgt later. During the switchover in head_64.S, before #PF handler is available, we use three pages to handle kernel crossing 1G, 512G boundaries with sharing page by playing games with page aliasing: the same page is mapped twice in the higher-level tables with appropriate wraparound. The kernel region itself will be properly mapped; other mappings may be spurious. early_make_pgtable is using kernel high mapping address to access pages to set page table. -v4: Add phys_base offset to make kexec happy, and add init_mapping_kernel() - Yinghai -v5: fix compiling with xen, and add back ident level3 and level2 for xen also move back init_level4_pgt from BSS to DATA again. because we have to clear it anyway. - Yinghai -v6: switch to init_level4_pgt in init_mem_mapping. - Yinghai -v7: remove not needed clear_page for init_level4_page it is with fill 512,8,0 already in head_64.S - Yinghai -v8: we need to keep that handler alive until init_mem_mapping and don't let early_trap_init to trash that early #PF handler. So split early_trap_pf_init out and move it down. - Yinghai -v9: switchover only cover kernel space instead of 1G so could avoid touch possible mem holes. - Yinghai -v11: change far jmp back to far return to initial_code, that is needed to fix failure that is reported by Konrad on AMD systems. - Yinghai Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-12-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable_64_types.h | 4 + arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/head64.c | 81 ++++++++++-- arch/x86/kernel/head_64.S | 210 +++++++++++++++++++------------- arch/x86/kernel/setup.c | 2 + arch/x86/kernel/traps.c | 9 ++ arch/x86/mm/init.c | 3 +- 7 files changed, 219 insertions(+), 91 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 766ea16..2d88344 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_PGTABLE_64_DEFS_H #define _ASM_X86_PGTABLE_64_DEFS_H +#include + #ifndef __ASSEMBLY__ #include @@ -60,4 +62,6 @@ typedef struct { pteval_t pte; } pte_t; #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) +#define EARLY_DYNAMIC_PAGE_TABLES 64 + #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 888184b..bdee8bd 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -731,6 +731,7 @@ extern void enable_sep_cpu(void); extern int sysenter_setup(void); extern void early_trap_init(void); +void early_trap_pf_init(void); /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7785e668..f57df05 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -27,11 +27,73 @@ #include #include -static void __init zap_identity_mappings(void) +/* + * Manage page tables very early on. + */ +extern pgd_t early_level4_pgt[PTRS_PER_PGD]; +extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD]; +static unsigned int __initdata next_early_pgt = 2; + +/* Wipe all early page tables except for the kernel symbol map */ +static void __init reset_early_page_tables(void) { - pgd_t *pgd = pgd_offset_k(0UL); - pgd_clear(pgd); - __flush_tlb_all(); + unsigned long i; + + for (i = 0; i < PTRS_PER_PGD-1; i++) + early_level4_pgt[i].pgd = 0; + + next_early_pgt = 0; + + write_cr3(__pa(early_level4_pgt)); +} + +/* Create a new PMD entry */ +int __init early_make_pgtable(unsigned long address) +{ + unsigned long physaddr = address - __PAGE_OFFSET; + unsigned long i; + pgdval_t pgd, *pgd_p; + pudval_t *pud_p; + pmdval_t pmd, *pmd_p; + + /* Invalid address or early pgt is done ? */ + if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt)) + return -1; + + i = (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1); + pgd_p = &early_level4_pgt[i].pgd; + pgd = *pgd_p; + + /* + * The use of __START_KERNEL_map rather than __PAGE_OFFSET here is + * critical -- __PAGE_OFFSET would point us back into the dynamic + * range and we might end up looping forever... + */ + if (pgd && next_early_pgt < EARLY_DYNAMIC_PAGE_TABLES) { + pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); + } else { + if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES-1) + reset_early_page_tables(); + + pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; + for (i = 0; i < PTRS_PER_PUD; i++) + pud_p[i] = 0; + + *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; + } + i = (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); + pud_p += i; + + pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; + pmd = (physaddr & PUD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL); + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd_p[i] = pmd; + pmd += PMD_SIZE; + } + + *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; + + return 0; } /* Don't add a printk in there. printk relies on the PDA which is not initialized @@ -72,12 +134,13 @@ void __init x86_64_start_kernel(char * real_mode_data) (__START_KERNEL & PGDIR_MASK))); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); + /* Kill off the identity-map trampoline */ + reset_early_page_tables(); + /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); - /* Make NULL pointers segfault */ - zap_identity_mappings(); - + /* XXX - this is wrong... we need to build page tables from scratch */ max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { @@ -94,6 +157,10 @@ void __init x86_64_start_kernel(char * real_mode_data) if (console_loglevel == 10) early_printk("Kernel alive\n"); + clear_page(init_level4_pgt); + /* set init_level4_pgt kernel high mapping*/ + init_level4_pgt[511] = early_level4_pgt[511]; + x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 980053c..d94f6d6 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -47,14 +47,13 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) .code64 .globl startup_64 startup_64: - /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, * and someone has loaded an identity mapped page table * for us. These identity mapped page tables map all of the * kernel pages and possibly all of memory. * - * %esi holds a physical pointer to real_mode_data. + * %rsi holds a physical pointer to real_mode_data. * * We come here either directly from a 64bit bootloader, or from * arch/x86_64/boot/compressed/head.S. @@ -66,7 +65,8 @@ startup_64: * tables and then reload them. */ - /* Compute the delta between the address I am compiled to run at and the + /* + * Compute the delta between the address I am compiled to run at and the * address I am actually running at. */ leaq _text(%rip), %rbp @@ -78,45 +78,62 @@ startup_64: testl %eax, %eax jnz bad_address - /* Is the address too large? */ - leaq _text(%rip), %rdx - movq $PGDIR_SIZE, %rax - cmpq %rax, %rdx - jae bad_address - - /* Fixup the physical addresses in the page table + /* + * Is the address too large? */ - addq %rbp, init_level4_pgt + 0(%rip) - addq %rbp, init_level4_pgt + (L4_PAGE_OFFSET*8)(%rip) - addq %rbp, init_level4_pgt + (L4_START_KERNEL*8)(%rip) + leaq _text(%rip), %rax + shrq $MAX_PHYSMEM_BITS, %rax + jnz bad_address - addq %rbp, level3_ident_pgt + 0(%rip) + /* + * Fixup the physical addresses in the page table + */ + addq %rbp, early_level4_pgt + (L4_START_KERNEL*8)(%rip) addq %rbp, level3_kernel_pgt + (510*8)(%rip) addq %rbp, level3_kernel_pgt + (511*8)(%rip) addq %rbp, level2_fixmap_pgt + (506*8)(%rip) - /* Add an Identity mapping if I am above 1G */ + /* + * Set up the identity mapping for the switchover. These + * entries should *NOT* have the global bit set! This also + * creates a bunch of nonsense entries but that is fine -- + * it avoids problems around wraparound. + */ leaq _text(%rip), %rdi - andq $PMD_PAGE_MASK, %rdi + leaq early_level4_pgt(%rip), %rbx movq %rdi, %rax - shrq $PUD_SHIFT, %rax - andq $(PTRS_PER_PUD - 1), %rax - jz ident_complete + shrq $PGDIR_SHIFT, %rax - leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx - leaq level3_ident_pgt(%rip), %rbx - movq %rdx, 0(%rbx, %rax, 8) + leaq (4096 + _KERNPG_TABLE)(%rbx), %rdx + movq %rdx, 0(%rbx,%rax,8) + movq %rdx, 8(%rbx,%rax,8) + addq $4096, %rdx movq %rdi, %rax - shrq $PMD_SHIFT, %rax - andq $(PTRS_PER_PMD - 1), %rax - leaq __PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx - leaq level2_spare_pgt(%rip), %rbx - movq %rdx, 0(%rbx, %rax, 8) -ident_complete: + shrq $PUD_SHIFT, %rax + andl $(PTRS_PER_PUD-1), %eax + movq %rdx, (4096+0)(%rbx,%rax,8) + movq %rdx, (4096+8)(%rbx,%rax,8) + + addq $8192, %rbx + movq %rdi, %rax + shrq $PMD_SHIFT, %rdi + addq $(__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL), %rax + leaq (_end - 1)(%rip), %rcx + shrq $PMD_SHIFT, %rcx + subq %rdi, %rcx + incl %ecx + +1: + andq $(PTRS_PER_PMD - 1), %rdi + movq %rax, (%rbx,%rdi,8) + incq %rdi + addq $PMD_SIZE, %rax + decl %ecx + jnz 1b /* * Fixup the kernel text+data virtual addresses. Note that @@ -124,7 +141,6 @@ ident_complete: * cleanup_highmap() fixes this up along with the mappings * beyond _end. */ - leaq level2_kernel_pgt(%rip), %rdi leaq 4096(%rdi), %r8 /* See if it is a valid page table entry */ @@ -139,17 +155,14 @@ ident_complete: /* Fixup phys_base */ addq %rbp, phys_base(%rip) - /* Due to ENTRY(), sometimes the empty space gets filled with - * zeros. Better take a jmp than relying on empty space being - * filled with 0x90 (nop) - */ - jmp secondary_startup_64 + movq $(early_level4_pgt - __START_KERNEL_map), %rax + jmp 1f ENTRY(secondary_startup_64) /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, * and someone has loaded a mapped page table. * - * %esi holds a physical pointer to real_mode_data. + * %rsi holds a physical pointer to real_mode_data. * * We come here either from startup_64 (using physical addresses) * or from trampoline.S (using virtual addresses). @@ -159,12 +172,14 @@ ENTRY(secondary_startup_64) * after the boot processor executes this code. */ + movq $(init_level4_pgt - __START_KERNEL_map), %rax +1: + /* Enable PAE mode and PGE */ - movl $(X86_CR4_PAE | X86_CR4_PGE), %eax - movq %rax, %cr4 + movl $(X86_CR4_PAE | X86_CR4_PGE), %ecx + movq %rcx, %cr4 /* Setup early boot stage 4 level pagetables. */ - movq $(init_level4_pgt - __START_KERNEL_map), %rax addq phys_base(%rip), %rax movq %rax, %cr3 @@ -196,7 +211,7 @@ ENTRY(secondary_startup_64) movq %rax, %cr0 /* Setup a boot time stack */ - movq stack_start(%rip),%rsp + movq stack_start(%rip), %rsp /* zero EFLAGS after setting rsp */ pushq $0 @@ -236,15 +251,33 @@ ENTRY(secondary_startup_64) movl initial_gs+4(%rip),%edx wrmsr - /* esi is pointer to real mode structure with interesting info. + /* rsi is pointer to real mode structure with interesting info. pass it to C */ - movl %esi, %edi + movq %rsi, %rdi /* Finally jump to run C code and to be on real kernel address * Since we are running on identity-mapped space we have to jump * to the full 64bit address, this is only possible as indirect * jump. In addition we need to ensure %cs is set so we make this * a far return. + * + * Note: do not change to far jump indirect with 64bit offset. + * + * AMD does not support far jump indirect with 64bit offset. + * AMD64 Architecture Programmer's Manual, Volume 3: states only + * JMP FAR mem16:16 FF /5 Far jump indirect, + * with the target specified by a far pointer in memory. + * JMP FAR mem16:32 FF /5 Far jump indirect, + * with the target specified by a far pointer in memory. + * + * Intel64 does support 64bit offset. + * Software Developer Manual Vol 2: states: + * FF /5 JMP m16:16 Jump far, absolute indirect, + * address given in m16:16 + * FF /5 JMP m16:32 Jump far, absolute indirect, + * address given in m16:32. + * REX.W + FF /5 JMP m16:64 Jump far, absolute indirect, + * address given in m16:64. */ movq initial_code(%rip),%rax pushq $0 # fake return address to stop unwinder @@ -270,13 +303,13 @@ ENDPROC(start_cpu0) /* SMP bootup changes these two */ __REFDATA - .align 8 - ENTRY(initial_code) + .balign 8 + GLOBAL(initial_code) .quad x86_64_start_kernel - ENTRY(initial_gs) + GLOBAL(initial_gs) .quad INIT_PER_CPU_VAR(irq_stack_union) - ENTRY(stack_start) + GLOBAL(stack_start) .quad init_thread_union+THREAD_SIZE-8 .word 0 __FINITDATA @@ -284,7 +317,7 @@ ENDPROC(start_cpu0) bad_address: jmp bad_address - .section ".init.text","ax" + __INIT .globl early_idt_handlers early_idt_handlers: # 104(%rsp) %rflags @@ -321,14 +354,22 @@ ENTRY(early_idt_handler) pushq %r11 # 0(%rsp) cmpl $__KERNEL_CS,96(%rsp) - jne 10f + jne 11f + + cmpl $14,72(%rsp) # Page fault? + jnz 10f + GET_CR2_INTO(%rdi) # can clobber any volatile register if pv + call early_make_pgtable + andl %eax,%eax + jz 20f # All good +10: leaq 88(%rsp),%rdi # Pointer to %rip call early_fixup_exception andl %eax,%eax jnz 20f # Found an exception entry -10: +11: #ifdef CONFIG_EARLY_PRINTK GET_CR2_INTO(%r9) # can clobber any volatile register if pv movl 80(%rsp),%r8d # error code @@ -350,7 +391,7 @@ ENTRY(early_idt_handler) 1: hlt jmp 1b -20: # Exception table entry found +20: # Exception table entry found or page table generated popq %r11 popq %r10 popq %r9 @@ -364,6 +405,8 @@ ENTRY(early_idt_handler) decl early_recursion_flag(%rip) INTERRUPT_RETURN + __INITDATA + .balign 4 early_recursion_flag: .long 0 @@ -374,11 +417,10 @@ early_idt_msg: early_idt_ripmsg: .asciz "RIP %s\n" #endif /* CONFIG_EARLY_PRINTK */ - .previous #define NEXT_PAGE(name) \ .balign PAGE_SIZE; \ -ENTRY(name) +GLOBAL(name) /* Automate the creation of 1 to 1 mapping pmd entries */ #define PMDS(START, PERM, COUNT) \ @@ -388,24 +430,37 @@ ENTRY(name) i = i + 1 ; \ .endr + __INITDATA +NEXT_PAGE(early_level4_pgt) + .fill 511,8,0 + .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE + +NEXT_PAGE(early_dynamic_pgts) + .fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0 + .data - /* - * This default setting generates an ident mapping at address 0x100000 - * and a mapping for the kernel that precisely maps virtual address - * 0xffffffff80000000 to physical address 0x000000. (always using - * 2Mbyte large pages provided by PAE mode) - */ + +#ifndef CONFIG_XEN NEXT_PAGE(init_level4_pgt) - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 - .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .org init_level4_pgt + L4_START_KERNEL*8, 0 + .fill 512,8,0 +#else +NEXT_PAGE(init_level4_pgt) + .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_PAGE_OFFSET*8, 0 + .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE + .org init_level4_pgt + L4_START_KERNEL*8, 0 /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ - .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE + .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE NEXT_PAGE(level3_ident_pgt) .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE - .fill 511,8,0 + .fill 511, 8, 0 +NEXT_PAGE(level2_ident_pgt) + /* Since I easily can, map the first 1G. + * Don't set NX because code runs from these pages. + */ + PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) +#endif NEXT_PAGE(level3_kernel_pgt) .fill L3_START_KERNEL,8,0 @@ -413,21 +468,6 @@ NEXT_PAGE(level3_kernel_pgt) .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE -NEXT_PAGE(level2_fixmap_pgt) - .fill 506,8,0 - .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE - /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ - .fill 5,8,0 - -NEXT_PAGE(level1_fixmap_pgt) - .fill 512,8,0 - -NEXT_PAGE(level2_ident_pgt) - /* Since I easily can, map the first 1G. - * Don't set NX because code runs from these pages. - */ - PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) - NEXT_PAGE(level2_kernel_pgt) /* * 512 MB kernel mapping. We spend a full page on this pagetable @@ -442,11 +482,16 @@ NEXT_PAGE(level2_kernel_pgt) PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE) -NEXT_PAGE(level2_spare_pgt) - .fill 512, 8, 0 +NEXT_PAGE(level2_fixmap_pgt) + .fill 506,8,0 + .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE + /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */ + .fill 5,8,0 + +NEXT_PAGE(level1_fixmap_pgt) + .fill 512,8,0 #undef PMDS -#undef NEXT_PAGE .data .align 16 @@ -472,6 +517,5 @@ ENTRY(nmi_idt_table) .skip IDT_ENTRIES * 16 __PAGE_ALIGNED_BSS - .align PAGE_SIZE -ENTRY(empty_zero_page) +NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 85a8290..db9c41d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1005,6 +1005,8 @@ void __init setup_arch(char **cmdline_p) init_mem_mapping(); + early_trap_pf_init(); + setup_real_mode(); memblock.current_limit = get_max_mapped(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ecffca1..68bda7a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -688,10 +688,19 @@ void __init early_trap_init(void) set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); /* int3 can be called from all */ set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); +#ifdef CONFIG_X86_32 set_intr_gate(X86_TRAP_PF, &page_fault); +#endif load_idt(&idt_descr); } +void __init early_trap_pf_init(void) +{ +#ifdef CONFIG_X86_64 + set_intr_gate(X86_TRAP_PF, &page_fault); +#endif +} + void __init trap_init(void) { int i; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 78d1ef3..3364a76 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -446,9 +446,10 @@ void __init init_mem_mapping(void) } #else early_ioremap_page_table_range_init(); +#endif + load_cr3(swapper_pg_dir); __flush_tlb_all(); -#endif early_memtest(0, max_pfn_mapped << PAGE_SHIFT); } -- cgit v1.1 From 6b9c75aca6cba4d99a6e8d8274b1788d4d4b50d9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:53 -0800 Subject: x86, 64bit: #PF handler set page to cover only 2M per #PF We only map a single 2 MiB page per #PF, even though we should be able to do this a full gigabyte at a time with no additional memory cost. This is a workaround for a broken AMD reference BIOS (and its derivatives in shipping system) which maps a large chunk of memory as WB in the MTRR system but will #MC if the processor wanders off and tries to prefetch that memory, which can happen any time the memory is mapped in the TLB. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-13-git-send-email-yinghai@kernel.org Cc: Alexander Duyck [ hpa: rewrote the patch description ] Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head64.c | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index f57df05..816fc85 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -53,15 +53,15 @@ int __init early_make_pgtable(unsigned long address) unsigned long physaddr = address - __PAGE_OFFSET; unsigned long i; pgdval_t pgd, *pgd_p; - pudval_t *pud_p; + pudval_t pud, *pud_p; pmdval_t pmd, *pmd_p; /* Invalid address or early pgt is done ? */ if (physaddr >= MAXMEM || read_cr3() != __pa(early_level4_pgt)) return -1; - i = (address >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1); - pgd_p = &early_level4_pgt[i].pgd; +again: + pgd_p = &early_level4_pgt[pgd_index(address)].pgd; pgd = *pgd_p; /* @@ -69,29 +69,37 @@ int __init early_make_pgtable(unsigned long address) * critical -- __PAGE_OFFSET would point us back into the dynamic * range and we might end up looping forever... */ - if (pgd && next_early_pgt < EARLY_DYNAMIC_PAGE_TABLES) { + if (pgd) pud_p = (pudval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); - } else { - if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES-1) + else { + if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { reset_early_page_tables(); + goto again; + } pud_p = (pudval_t *)early_dynamic_pgts[next_early_pgt++]; for (i = 0; i < PTRS_PER_PUD; i++) pud_p[i] = 0; - *pgd_p = (pgdval_t)pud_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; } - i = (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1); - pud_p += i; - - pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; - pmd = (physaddr & PUD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL); - for (i = 0; i < PTRS_PER_PMD; i++) { - pmd_p[i] = pmd; - pmd += PMD_SIZE; - } + pud_p += pud_index(address); + pud = *pud_p; - *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; + if (pud) + pmd_p = (pmdval_t *)((pud & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); + else { + if (next_early_pgt >= EARLY_DYNAMIC_PAGE_TABLES) { + reset_early_page_tables(); + goto again; + } + + pmd_p = (pmdval_t *)early_dynamic_pgts[next_early_pgt++]; + for (i = 0; i < PTRS_PER_PMD; i++) + pmd_p[i] = 0; + *pud_p = (pudval_t)pmd_p - __START_KERNEL_map + phys_base + _KERNPG_TABLE; + } + pmd = (physaddr & PMD_MASK) + (__PAGE_KERNEL_LARGE & ~_PAGE_GLOBAL); + pmd_p[pmd_index(address)] = pmd; return 0; } -- cgit v1.1 From 100542306f644fc580857a8ca4896fb12b794d41 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:54 -0800 Subject: x86, 64bit: Don't set max_pfn_mapped wrong value early on native path We are not having max_pfn_mapped set correctly until init_memory_mapping. So don't print its initial value for 64bit Also need to use KERNEL_IMAGE_SIZE directly for highmap cleanup. -v2: update comments about max_pfn_mapped according to Stefano Stabellini. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-14-git-send-email-yinghai@kernel.org Acked-by: Borislav Petkov Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head64.c | 3 --- arch/x86/kernel/setup.c | 2 ++ arch/x86/mm/init_64.c | 10 +++++++++- 3 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 816fc85..f3b1968 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -148,9 +148,6 @@ void __init x86_64_start_kernel(char * real_mode_data) /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); - /* XXX - this is wrong... we need to build page tables from scratch */ - max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; - for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { #ifdef CONFIG_EARLY_PRINTK set_intr_gate(i, &early_idt_handlers[i]); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index db9c41d..d58083a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -996,8 +996,10 @@ void __init setup_arch(char **cmdline_p) setup_bios_corruption_check(); #endif +#ifdef CONFIG_X86_32 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", (max_pfn_mapped< Date: Thu, 24 Jan 2013 12:19:55 -0800 Subject: x86: Merge early_reserve_initrd for 32bit and 64bit They are the same, could move them out from head32/64.c to setup.c. We are using memblock, and it could handle overlapping properly, so we don't need to reserve some at first to hold the location, and just need to make sure we reserve them before we are using memblock to find free mem to use. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-15-git-send-email-yinghai@kernel.org Reviewed-by: Pekka Enberg Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head32.c | 11 ----------- arch/x86/kernel/head64.c | 11 ----------- arch/x86/kernel/setup.c | 22 ++++++++++++++++++---- 3 files changed, 18 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 6773c91..a795b54 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -36,17 +36,6 @@ void __init i386_start_kernel(void) memblock_reserve(__pa_symbol(&_text), __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); -#ifdef CONFIG_BLK_DEV_INITRD - /* Reserve INITRD */ - if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { - /* Assume only end is not page aligned */ - u64 ramdisk_image = boot_params.hdr.ramdisk_image; - u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); - } -#endif - /* Call the subarch specific early setup function */ switch (boot_params.hdr.hardware_subarch) { case X86_SUBARCH_MRST: diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index f3b1968..b88a1fa 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -178,17 +178,6 @@ void __init x86_64_start_reservations(char *real_mode_data) memblock_reserve(__pa_symbol(&_text), __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); -#ifdef CONFIG_BLK_DEV_INITRD - /* Reserve INITRD */ - if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { - /* Assume only end is not page aligned */ - unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; - unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; - unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); - } -#endif - reserve_ebda_region(); /* diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d58083a..8e35692 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -360,6 +360,19 @@ static u64 __init get_mem_size(unsigned long limit_pfn) return mapped_pages << PAGE_SHIFT; } +static void __init early_reserve_initrd(void) +{ + /* Assume only end is not page aligned */ + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); + + if (!boot_params.hdr.type_of_loader || + !ramdisk_image || !ramdisk_size) + return; /* No initrd provided by bootloader */ + + memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); +} static void __init reserve_initrd(void) { /* Assume only end is not page aligned */ @@ -386,10 +399,6 @@ static void __init reserve_initrd(void) if (pfn_range_is_mapped(PFN_DOWN(ramdisk_image), PFN_DOWN(ramdisk_end))) { /* All are mapped, easy case */ - /* - * don't need to reserve again, already reserved early - * in i386_start_kernel - */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; return; @@ -400,6 +409,9 @@ static void __init reserve_initrd(void) memblock_free(ramdisk_image, ramdisk_end - ramdisk_image); } #else +static void __init early_reserve_initrd(void) +{ +} static void __init reserve_initrd(void) { } @@ -760,6 +772,8 @@ early_param("reservelow", parse_reservelow); void __init setup_arch(char **cmdline_p) { + early_reserve_initrd(); + #ifdef CONFIG_X86_32 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); visws_early_detect(); -- cgit v1.1 From a8a51a88d5152aa40e5e07dcdd939c7fafc42224 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:56 -0800 Subject: x86: Add get_ramdisk_image/size() There are several places to find ramdisk information early for reserving and relocating. Use accessor functions to make code more readable and consistent. Later will add ext_ramdisk_image/size in those functions to support loading ramdisk above 4g. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-16-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8e35692..83b3861 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -294,12 +294,25 @@ static void __init reserve_brk(void) #ifdef CONFIG_BLK_DEV_INITRD +static u64 __init get_ramdisk_image(void) +{ + u64 ramdisk_image = boot_params.hdr.ramdisk_image; + + return ramdisk_image; +} +static u64 __init get_ramdisk_size(void) +{ + u64 ramdisk_size = boot_params.hdr.ramdisk_size; + + return ramdisk_size; +} + #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) static void __init relocate_initrd(void) { /* Assume only end is not page aligned */ - u64 ramdisk_image = boot_params.hdr.ramdisk_image; - u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_image = get_ramdisk_image(); + u64 ramdisk_size = get_ramdisk_size(); u64 area_size = PAGE_ALIGN(ramdisk_size); u64 ramdisk_here; unsigned long slop, clen, mapaddr; @@ -338,8 +351,8 @@ static void __init relocate_initrd(void) ramdisk_size -= clen; } - ramdisk_image = boot_params.hdr.ramdisk_image; - ramdisk_size = boot_params.hdr.ramdisk_size; + ramdisk_image = get_ramdisk_image(); + ramdisk_size = get_ramdisk_size(); printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" " [mem %#010llx-%#010llx]\n", ramdisk_image, ramdisk_image + ramdisk_size - 1, @@ -363,8 +376,8 @@ static u64 __init get_mem_size(unsigned long limit_pfn) static void __init early_reserve_initrd(void) { /* Assume only end is not page aligned */ - u64 ramdisk_image = boot_params.hdr.ramdisk_image; - u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_image = get_ramdisk_image(); + u64 ramdisk_size = get_ramdisk_size(); u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); if (!boot_params.hdr.type_of_loader || @@ -376,8 +389,8 @@ static void __init early_reserve_initrd(void) static void __init reserve_initrd(void) { /* Assume only end is not page aligned */ - u64 ramdisk_image = boot_params.hdr.ramdisk_image; - u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 ramdisk_image = get_ramdisk_image(); + u64 ramdisk_size = get_ramdisk_size(); u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); u64 mapped_size; -- cgit v1.1 From f1da834cd902f5e5df0b11a3948fc43c6071b590 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:57 -0800 Subject: x86, boot: Add get_cmd_line_ptr() Add an accessor function for the command line address. Later we will add support for holding a 64-bit address via ext_cmd_line_ptr. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-17-git-send-email-yinghai@kernel.org Cc: Gokul Caushik Cc: Josh Triplett Cc: Joe Millenbach Cc: Alexander Duyck Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/cmdline.c | 10 ++++++++-- arch/x86/kernel/head64.c | 13 +++++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index 10f6b117..b4c913c 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -13,13 +13,19 @@ static inline char rdfs8(addr_t addr) return *((char *)(fs + addr)); } #include "../cmdline.c" +static unsigned long get_cmd_line_ptr(void) +{ + unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr; + + return cmd_line_ptr; +} int cmdline_find_option(const char *option, char *buffer, int bufsize) { - return __cmdline_find_option(real_mode->hdr.cmd_line_ptr, option, buffer, bufsize); + return __cmdline_find_option(get_cmd_line_ptr(), option, buffer, bufsize); } int cmdline_find_option_bool(const char *option) { - return __cmdline_find_option_bool(real_mode->hdr.cmd_line_ptr, option); + return __cmdline_find_option_bool(get_cmd_line_ptr(), option); } #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index b88a1fa..62c8ce4 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -112,14 +112,23 @@ static void __init clear_bss(void) (unsigned long) __bss_stop - (unsigned long) __bss_start); } +static unsigned long get_cmd_line_ptr(void) +{ + unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; + + return cmd_line_ptr; +} + static void __init copy_bootdata(char *real_mode_data) { char * command_line; + unsigned long cmd_line_ptr; memcpy(&boot_params, real_mode_data, sizeof boot_params); sanitize_boot_params(&boot_params); - if (boot_params.hdr.cmd_line_ptr) { - command_line = __va(boot_params.hdr.cmd_line_ptr); + cmd_line_ptr = get_cmd_line_ptr(); + if (cmd_line_ptr) { + command_line = __va(cmd_line_ptr); memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); } } -- cgit v1.1 From 16a4baa642cf448742aaf150c4daa093f9dbebbb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:58 -0800 Subject: x86, boot: Move checking of cmd_line_ptr out of common path cmdline.c::__cmdline_find_option... are shared between 16-bit setup code and 32/64 bit decompressor code. for 32/64 only path via kexec, we should not check if ptr is less 1M. as those cmdline could be put above 1M, or even 4G. Move out accessible checking out of __cmdline_find_option() So decompressor in misc.c can parse cmdline correctly. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-18-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/boot/boot.h | 14 ++++++++++++-- arch/x86/boot/cmdline.c | 8 ++++---- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 18997e5..7fadf80 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -289,12 +289,22 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option); static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) { - return __cmdline_find_option(boot_params.hdr.cmd_line_ptr, option, buffer, bufsize); + u32 cmd_line_ptr = boot_params.hdr.cmd_line_ptr; + + if (cmd_line_ptr >= 0x100000) + return -1; /* inaccessible */ + + return __cmdline_find_option(cmd_line_ptr, option, buffer, bufsize); } static inline int cmdline_find_option_bool(const char *option) { - return __cmdline_find_option_bool(boot_params.hdr.cmd_line_ptr, option); + u32 cmd_line_ptr = boot_params.hdr.cmd_line_ptr; + + if (cmd_line_ptr >= 0x100000) + return -1; /* inaccessible */ + + return __cmdline_find_option_bool(cmd_line_ptr, option); } diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 6b3b6f7..768f00f 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c @@ -41,8 +41,8 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int st_bufcpy /* Copying this to buffer */ } state = st_wordstart; - if (!cmdline_ptr || cmdline_ptr >= 0x100000) - return -1; /* No command line, or inaccessible */ + if (!cmdline_ptr) + return -1; /* No command line */ cptr = cmdline_ptr & 0xf; set_fs(cmdline_ptr >> 4); @@ -111,8 +111,8 @@ int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option) st_wordskip, /* Miscompare, skip */ } state = st_wordstart; - if (!cmdline_ptr || cmdline_ptr >= 0x100000) - return -1; /* No command line, or inaccessible */ + if (!cmdline_ptr) + return -1; /* No command line */ cptr = cmdline_ptr & 0xf; set_fs(cmdline_ptr >> 4); -- cgit v1.1 From 3db07e70f0b4742f8daeda5c4aa8fbe7aeb3799e Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:19:59 -0800 Subject: x86, boot: Pass cmd_line_ptr with unsigned long instead boot/compressed/misc.c is used for bzImage in 64bit and 32bit, and cmd_line_ptr could point to buffer that is above 4g, cmd_line_ptr should be 64bit otherwise high 32bit will be capped out. So need to change data type to unsigned long, that will be 64bit get correct address of command line buffer. And it is still ok with 32bit bzImage, because unsigned long on 32bit kernel is still 32bit. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-19-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/boot/boot.h | 8 ++++---- arch/x86/boot/cmdline.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 7fadf80..5b75319 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -285,11 +285,11 @@ struct biosregs { void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); /* cmdline.c */ -int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize); -int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option); +int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize); +int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option); static inline int cmdline_find_option(const char *option, char *buffer, int bufsize) { - u32 cmd_line_ptr = boot_params.hdr.cmd_line_ptr; + unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; if (cmd_line_ptr >= 0x100000) return -1; /* inaccessible */ @@ -299,7 +299,7 @@ static inline int cmdline_find_option(const char *option, char *buffer, int bufs static inline int cmdline_find_option_bool(const char *option) { - u32 cmd_line_ptr = boot_params.hdr.cmd_line_ptr; + unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; if (cmd_line_ptr >= 0x100000) return -1; /* inaccessible */ diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 768f00f..625d21b 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c @@ -27,7 +27,7 @@ static inline int myisspace(u8 c) * Returns the length of the argument (regardless of if it was * truncated to fit in the buffer), or -1 on not found. */ -int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int bufsize) +int __cmdline_find_option(unsigned long cmdline_ptr, const char *option, char *buffer, int bufsize) { addr_t cptr; char c; @@ -99,7 +99,7 @@ int __cmdline_find_option(u32 cmdline_ptr, const char *option, char *buffer, int * Returns the position of that option (starts counting with 1) * or 0 on not found */ -int __cmdline_find_option_bool(u32 cmdline_ptr, const char *option) +int __cmdline_find_option_bool(unsigned long cmdline_ptr, const char *option) { addr_t cptr; char c; -- cgit v1.1 From 187a8a73cee295b9407de0d6bfba65471a1f39d6 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:00 -0800 Subject: x86, boot: Move verify_cpu.S and no_longmode down We need to move some code to 32bit section in following patch: x86, boot: Move lldt/ltr out of 64bit code section but that will push startup_64 down from 0x200. According to hpa, we can not change startup_64 position and that is an ABI. We could move function verify_cpu and no_longmode down, because verify_cpu is used via function call and no_longmode will not return, then we don't need to add extra code for jumping back. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-20-git-send-email-yinghai@kernel.org Cc: Matt Fleming Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_64.S | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 2c4b171..fb984c0 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -176,14 +176,6 @@ ENTRY(startup_32) lret ENDPROC(startup_32) -no_longmode: - /* This isn't an x86-64 CPU so hang */ -1: - hlt - jmp 1b - -#include "../../kernel/verify_cpu.S" - /* * Be careful here startup_64 needs to be at a predictable * address so I can export it in an ELF header. Bootloaders @@ -349,6 +341,15 @@ relocated: */ jmp *%rbp + .code32 +no_longmode: + /* This isn't an x86-64 CPU so hang */ +1: + hlt + jmp 1b + +#include "../../kernel/verify_cpu.S" + .data gdt: .word gdt_end - gdt -- cgit v1.1 From d3c433bf9a01b6951286ec2cbf52e3549623d878 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:01 -0800 Subject: x86, boot: Move lldt/ltr out of 64bit code section commit 08da5a2ca x86_64: Early segment setup for VT sets up LDT and TR into a valid state in order to speed up boot decompression under VT. Those code are put in code64, and it is using GDT that is only loaded from code32 path. That breaks booting with 64bit bootloader that does not go through code32 path and jump to startup_64 directly, and it has different GDT. Move those lines into code32 after their GDT is loaded. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-21-git-send-email-yinghai@kernel.org Cc: Zachary Amsden Cc: Matt Fleming Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_64.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fb984c0..5c80b94 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -154,6 +154,12 @@ ENTRY(startup_32) btsl $_EFER_LME, %eax wrmsr + /* After gdt is loaded */ + xorl %eax, %eax + lldt %ax + movl $0x20, %eax + ltr %ax + /* * Setup for the jump to 64bit mode * @@ -239,9 +245,6 @@ preferred_addr: movl %eax, %ss movl %eax, %fs movl %eax, %gs - lldt %ax - movl $0x20, %eax - ltr %ax /* * Compute the decompressed kernel start address. It is where -- cgit v1.1 From 577af55d802d9fe114287e750504e09e7c677c9c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:02 -0800 Subject: x86, kexec: Remove 1024G limitation for kexec buffer on 64bit Now 64bit kernel supports more than 1T ram and kexec tools could find buffer above 1T, remove that obsolete limitation. and use MAXMEM instead. Tested on system with more than 1024G ram. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-22-git-send-email-yinghai@kernel.org Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/kexec.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 6080d26..17483a4 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -48,11 +48,11 @@ # define vmcore_elf_check_arch_cross(x) ((x)->e_machine == EM_X86_64) #else /* Maximum physical address we can use pages from */ -# define KEXEC_SOURCE_MEMORY_LIMIT (0xFFFFFFFFFFUL) +# define KEXEC_SOURCE_MEMORY_LIMIT (MAXMEM-1) /* Maximum address we can reach in physical address mode */ -# define KEXEC_DESTINATION_MEMORY_LIMIT (0xFFFFFFFFFFUL) +# define KEXEC_DESTINATION_MEMORY_LIMIT (MAXMEM-1) /* Maximum address we can use for the control pages */ -# define KEXEC_CONTROL_MEMORY_LIMIT (0xFFFFFFFFFFUL) +# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1) /* Allocate one page for the pdp and the second for the code */ # define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL) -- cgit v1.1 From 084d1283986a530828b8898f206adf44d5d3146d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:03 -0800 Subject: x86, kexec: Set ident mapping for kernel that is above max_pfn When first kernel is booted with memmap= or mem= to limit max_pfn. kexec can load second kernel above that max_pfn. We need to set ident mapping for whole image in this case instead of just for first 2M. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-23-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/machine_kexec_64.c | 43 ++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index b3ea9db..be14ee1 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -56,6 +56,25 @@ out: return result; } +static int ident_mapping_init(struct kimage *image, pgd_t *level4p, + unsigned long mstart, unsigned long mend) +{ + int result; + + mstart = round_down(mstart, PMD_SIZE); + mend = round_up(mend - 1, PMD_SIZE); + + while (mstart < mend) { + result = init_one_level2_page(image, level4p, mstart); + if (result) + return result; + + mstart += PMD_SIZE; + } + + return 0; +} + static void init_level2_page(pmd_t *level2p, unsigned long addr) { unsigned long end_addr; @@ -184,22 +203,34 @@ err: return result; } - static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { + unsigned long mstart, mend; pgd_t *level4p; int result; + int i; + level4p = (pgd_t *)__va(start_pgtable); result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); if (result) return result; + /* - * image->start may be outside 0 ~ max_pfn, for example when - * jump back to original kernel from kexeced kernel + * segments's mem ranges could be outside 0 ~ max_pfn, + * for example when jump back to original kernel from kexeced kernel. + * or first kernel is booted with user mem map, and second kernel + * could be loaded out of that range. */ - result = init_one_level2_page(image, level4p, image->start); - if (result) - return result; + for (i = 0; i < image->nr_segments; i++) { + mstart = image->segment[i].mem; + mend = mstart + image->segment[i].memsz; + + result = ident_mapping_init(image, level4p, mstart, mend); + + if (result) + return result; + } + return init_transition_pgtable(image, level4p); } -- cgit v1.1 From 9ebdc79f7a177d3098b89ba8ef2dd2b235163685 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:04 -0800 Subject: x86, kexec: Replace ident_mapping_init and init_level4_page Now ident_mapping_init is checking if pgd/pud is present for every 2M, so several 2Ms are in same PUD, it will keep checking if pud is there with same pud. init_level4_page just does not check existing pgd/pud. We could use generic mapping_init with different settings in info to replace those two local grown version functions. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-24-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/machine_kexec_64.c | 161 ++++++------------------------------- 1 file changed, 26 insertions(+), 135 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index be14ee1..d2d7e02 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -16,144 +16,12 @@ #include #include +#include #include #include #include #include -static int init_one_level2_page(struct kimage *image, pgd_t *pgd, - unsigned long addr) -{ - pud_t *pud; - pmd_t *pmd; - struct page *page; - int result = -ENOMEM; - - addr &= PMD_MASK; - pgd += pgd_index(addr); - if (!pgd_present(*pgd)) { - page = kimage_alloc_control_pages(image, 0); - if (!page) - goto out; - pud = (pud_t *)page_address(page); - clear_page(pud); - set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE)); - } - pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) { - page = kimage_alloc_control_pages(image, 0); - if (!page) - goto out; - pmd = (pmd_t *)page_address(page); - clear_page(pmd); - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); - } - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); - result = 0; -out: - return result; -} - -static int ident_mapping_init(struct kimage *image, pgd_t *level4p, - unsigned long mstart, unsigned long mend) -{ - int result; - - mstart = round_down(mstart, PMD_SIZE); - mend = round_up(mend - 1, PMD_SIZE); - - while (mstart < mend) { - result = init_one_level2_page(image, level4p, mstart); - if (result) - return result; - - mstart += PMD_SIZE; - } - - return 0; -} - -static void init_level2_page(pmd_t *level2p, unsigned long addr) -{ - unsigned long end_addr; - - addr &= PAGE_MASK; - end_addr = addr + PUD_SIZE; - while (addr < end_addr) { - set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); - addr += PMD_SIZE; - } -} - -static int init_level3_page(struct kimage *image, pud_t *level3p, - unsigned long addr, unsigned long last_addr) -{ - unsigned long end_addr; - int result; - - result = 0; - addr &= PAGE_MASK; - end_addr = addr + PGDIR_SIZE; - while ((addr < last_addr) && (addr < end_addr)) { - struct page *page; - pmd_t *level2p; - - page = kimage_alloc_control_pages(image, 0); - if (!page) { - result = -ENOMEM; - goto out; - } - level2p = (pmd_t *)page_address(page); - init_level2_page(level2p, addr); - set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); - addr += PUD_SIZE; - } - /* clear the unused entries */ - while (addr < end_addr) { - pud_clear(level3p++); - addr += PUD_SIZE; - } -out: - return result; -} - - -static int init_level4_page(struct kimage *image, pgd_t *level4p, - unsigned long addr, unsigned long last_addr) -{ - unsigned long end_addr; - int result; - - result = 0; - addr &= PAGE_MASK; - end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE); - while ((addr < last_addr) && (addr < end_addr)) { - struct page *page; - pud_t *level3p; - - page = kimage_alloc_control_pages(image, 0); - if (!page) { - result = -ENOMEM; - goto out; - } - level3p = (pud_t *)page_address(page); - result = init_level3_page(image, level3p, addr, last_addr); - if (result) - goto out; - set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); - addr += PGDIR_SIZE; - } - /* clear the unused entries */ - while (addr < end_addr) { - pgd_clear(level4p++); - addr += PGDIR_SIZE; - } -out: - return result; -} - static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.pud); @@ -203,15 +71,37 @@ err: return result; } +static void *alloc_pgt_page(void *data) +{ + struct kimage *image = (struct kimage *)data; + struct page *page; + void *p = NULL; + + page = kimage_alloc_control_pages(image, 0); + if (page) { + p = page_address(page); + clear_page(p); + } + + return p; +} + static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { + struct x86_mapping_info info = { + .alloc_pgt_page = alloc_pgt_page, + .context = image, + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, + }; unsigned long mstart, mend; pgd_t *level4p; int result; int i; level4p = (pgd_t *)__va(start_pgtable); - result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); + clear_page(level4p); + result = kernel_ident_mapping_init(&info, level4p, + 0, max_pfn << PAGE_SHIFT); if (result) return result; @@ -225,7 +115,8 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) mstart = image->segment[i].mem; mend = mstart + image->segment[i].memsz; - result = ident_mapping_init(image, level4p, mstart, mend); + result = kernel_ident_mapping_init(&info, + level4p, mstart, mend); if (result) return result; -- cgit v1.1 From 0e691cf824f76adefb4498fe39c300aba2c2575a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:05 -0800 Subject: x86, kexec, 64bit: Only set ident mapping for ram. We should set mappings only for usable memory ranges under max_pfn Otherwise causes same problem that is fixed by x86, mm: Only direct map addresses that are marked as E820_RAM This patch exposes pfn_mapped array, and only sets ident mapping for ranges in that array. This patch relies on new kernel_ident_mapping_init that could handle existing pgd/pud between different calls. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-25-git-send-email-yinghai@kernel.org Cc: Alexander Duyck Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/page.h | 4 ++++ arch/x86/kernel/machine_kexec_64.c | 13 +++++++++---- arch/x86/mm/init.c | 4 ++-- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 8ca8283..100a20c 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -17,6 +17,10 @@ struct page; +#include +extern struct range pfn_mapped[]; +extern int nr_pfn_mapped; + static inline void clear_user_page(void *page, unsigned long vaddr, struct page *pg) { diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index d2d7e02..4eabc16 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -100,10 +100,15 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) level4p = (pgd_t *)__va(start_pgtable); clear_page(level4p); - result = kernel_ident_mapping_init(&info, level4p, - 0, max_pfn << PAGE_SHIFT); - if (result) - return result; + for (i = 0; i < nr_pfn_mapped; i++) { + mstart = pfn_mapped[i].start << PAGE_SHIFT; + mend = pfn_mapped[i].end << PAGE_SHIFT; + + result = kernel_ident_mapping_init(&info, + level4p, mstart, mend); + if (result) + return result; + } /* * segments's mem ranges could be outside 0 ~ max_pfn, diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 3364a76..d418152 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -302,8 +302,8 @@ static int __meminit split_mem_range(struct map_range *mr, int nr_range, return nr_range; } -static struct range pfn_mapped[E820_X_MAX]; -static int nr_pfn_mapped; +struct range pfn_mapped[E820_X_MAX]; +int nr_pfn_mapped; static void add_pfn_range_mapped(unsigned long start_pfn, unsigned long end_pfn) { -- cgit v1.1 From ee92d815027a76ef92f3ec7b155b0c8aa345f239 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 28 Jan 2013 20:16:44 -0800 Subject: x86, boot: Support loading bzImage, boot_params and ramdisk above 4G xloadflags bit 1 indicates that we can load the kernel and all data structures above 4G; it is set if kernel is relocatable and 64bit. bootloader will check if xloadflags bit 1 is set to decide if it could load ramdisk and kernel high above 4G. bootloader will fill value to ext_ramdisk_image/size for high 32bits when it load ramdisk above 4G. kernel use get_ramdisk_image/size to use ext_ramdisk_image/size to get right positon for ramdisk. Signed-off-by: Yinghai Lu Cc: Rob Landley Cc: Matt Fleming Cc: Gokul Caushik Cc: Josh Triplett Cc: Joe Millenbach Link: http://lkml.kernel.org/r/1359058816-7615-26-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/cmdline.c | 2 ++ arch/x86/boot/header.S | 10 +++++++++- arch/x86/kernel/head64.c | 2 ++ arch/x86/kernel/setup.c | 4 ++++ 4 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index b4c913c..bffd73b 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -17,6 +17,8 @@ static unsigned long get_cmd_line_ptr(void) { unsigned long cmd_line_ptr = real_mode->hdr.cmd_line_ptr; + cmd_line_ptr |= (u64)real_mode->ext_cmd_line_ptr << 32; + return cmd_line_ptr; } int cmdline_find_option(const char *option, char *buffer, int bufsize) diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 944ce59..9ec06a1 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -374,6 +374,14 @@ xloadflags: #else # define XLF0 0 #endif + +#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) + /* kernel/boot_param/ramdisk could be loaded above 4g */ +# define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G +#else +# define XLF1 0 +#endif + #ifdef CONFIG_EFI_STUB # ifdef CONFIG_X86_64 # define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */ @@ -383,7 +391,7 @@ xloadflags: #else # define XLF23 0 #endif - .word XLF0 | XLF23 + .word XLF0 | XLF1 | XLF23 cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, #added with boot protocol diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 62c8ce4..6873b07 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -116,6 +116,8 @@ static unsigned long get_cmd_line_ptr(void) { unsigned long cmd_line_ptr = boot_params.hdr.cmd_line_ptr; + cmd_line_ptr |= (u64)boot_params.ext_cmd_line_ptr << 32; + return cmd_line_ptr; } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 83b3861..519f2bc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -298,12 +298,16 @@ static u64 __init get_ramdisk_image(void) { u64 ramdisk_image = boot_params.hdr.ramdisk_image; + ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32; + return ramdisk_image; } static u64 __init get_ramdisk_size(void) { u64 ramdisk_size = boot_params.hdr.ramdisk_size; + ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32; + return ramdisk_size; } -- cgit v1.1 From 8ee2f2dfdbdfe1851fcc0191b2d4faa4c26a39fb Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:07 -0800 Subject: x86, boot: Update comments about entries for 64bit image Now 64bit entry is fixed on 0x200, can not be changed anymore. Update the comments to reflect that. Also put info about it in boot.txt -v2: fix some grammar error Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-27-git-send-email-yinghai@kernel.org Cc: Rob Landley Cc: Matt Fleming Signed-off-by: H. Peter Anvin --- arch/x86/boot/compressed/head_64.S | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 5c80b94..d9ae9a4 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -37,6 +37,12 @@ __HEAD .code32 ENTRY(startup_32) + /* + * 32bit entry is 0 and it is ABI so immutable! + * If we come here directly from a bootloader, + * kernel(text+data+bss+brk) ramdisk, zero_page, command line + * all need to be under the 4G limit. + */ cld /* * Test KEEP_SEGMENTS flag to see if the bootloader is asking @@ -182,20 +188,18 @@ ENTRY(startup_32) lret ENDPROC(startup_32) - /* - * Be careful here startup_64 needs to be at a predictable - * address so I can export it in an ELF header. Bootloaders - * should look at the ELF header to find this address, as - * it may change in the future. - */ .code64 .org 0x200 ENTRY(startup_64) /* + * 64bit entry is 0x200 and it is ABI so immutable! * We come here either from startup_32 or directly from a - * 64bit bootloader. If we come here from a bootloader we depend on - * an identity mapped page table being provied that maps our - * entire text+data+bss and hopefully all of memory. + * 64bit bootloader. + * If we come here from a bootloader, kernel(text+data+bss+brk), + * ramdisk, zero_page, command line could be above 4G. + * We depend on an identity mapped page table being provided + * that maps our entire kernel(text+data+bss+brk), zero page + * and command line. */ #ifdef CONFIG_EFI_STUB /* -- cgit v1.1 From d1af6d045fba6b070fa81f54dfe9227214be99ea Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:08 -0800 Subject: x86, boot: Not need to check setup_header version for setup_data That is for bootloaders. setup_data is in setup_header, and bootloader is copying that from bzImage. So for old bootloader should keep that as 0 already. old kexec-tools till now for elf image set setup_data to 0, so it is ok. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-28-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 519f2bc..b80bee1 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -439,8 +439,6 @@ static void __init parse_setup_data(void) struct setup_data *data; u64 pa_data; - if (boot_params.hdr.version < 0x0209) - return; pa_data = boot_params.hdr.setup_data; while (pa_data) { u32 data_len, map_len; @@ -476,8 +474,6 @@ static void __init e820_reserve_setup_data(void) u64 pa_data; int found = 0; - if (boot_params.hdr.version < 0x0209) - return; pa_data = boot_params.hdr.setup_data; while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); @@ -501,8 +497,6 @@ static void __init memblock_x86_reserve_range_setup_data(void) struct setup_data *data; u64 pa_data; - if (boot_params.hdr.version < 0x0209) - return; pa_data = boot_params.hdr.setup_data; while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); -- cgit v1.1 From 595ad9af8584908ea5fb698b836169d05b99f186 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:09 -0800 Subject: memblock: Add memblock_mem_size() Use it to get mem size under the limit_pfn. to replace local version in x86 reserved_initrd. -v2: remove not needed cast that is pointed out by HPA. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-29-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b80bee1..bbe8cdf 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -363,20 +363,6 @@ static void __init relocate_initrd(void) ramdisk_here, ramdisk_here + ramdisk_size - 1); } -static u64 __init get_mem_size(unsigned long limit_pfn) -{ - int i; - u64 mapped_pages = 0; - unsigned long start_pfn, end_pfn; - - for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) { - start_pfn = min_t(unsigned long, start_pfn, limit_pfn); - end_pfn = min_t(unsigned long, end_pfn, limit_pfn); - mapped_pages += end_pfn - start_pfn; - } - - return mapped_pages << PAGE_SHIFT; -} static void __init early_reserve_initrd(void) { /* Assume only end is not page aligned */ @@ -404,7 +390,7 @@ static void __init reserve_initrd(void) initrd_start = 0; - mapped_size = get_mem_size(max_pfn_mapped); + mapped_size = memblock_mem_size(max_pfn_mapped); if (ramdisk_size >= (mapped_size>>1)) panic("initrd too large to handle, " "disabling initrd (%lld needed, %lld available)\n", -- cgit v1.1 From 7d41a8a4a2b2438621a9159477bff36a11d79a42 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:10 -0800 Subject: x86, kdump: Remove crashkernel range find limit for 64bit Now kexeced kernel/ramdisk could be above 4g, so remove 896 limit for 64bit. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-30-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbe8cdf..4778dde 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -501,13 +501,11 @@ static void __init memblock_x86_reserve_range_setup_data(void) /* * Keep the crash kernel below this limit. On 32 bits earlier kernels * would limit the kernel to the low 512 MiB due to mapping restrictions. - * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this - * limit once kexec-tools are fixed. */ #ifdef CONFIG_X86_32 # define CRASH_KERNEL_ADDR_MAX (512 << 20) #else -# define CRASH_KERNEL_ADDR_MAX (896 << 20) +# define CRASH_KERNEL_ADDR_MAX MAXMEM #endif static void __init reserve_crashkernel(void) -- cgit v1.1 From 0212f9159694be61c6bc52e925fa76643e0c1abf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:11 -0800 Subject: x86: Add Crash kernel low reservation During kdump kernel's booting stage, it need to find low ram for swiotlb buffer when system does not support intel iommu/dmar remapping. kexed-tools is appending memmap=exactmap and range from /proc/iomem with "Crash kernel", and that range is above 4G for 64bit after boot protocol 2.12. We need to add another range in /proc/iomem like "Crash kernel low", so kexec-tools could find that info and append to kdump kernel command line. Try to reserve some under 4G if the normal "Crash kernel" is above 4G. User could specify the size with crashkernel_low=XX[KMG]. -v2: fix warning that is found by Fengguang's test robot. -v3: move out get_mem_size change to another patch, to solve compiling warning that is found by Borislav Petkov -v4: user must specify crashkernel_low if system does not support intel or amd iommu. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-31-git-send-email-yinghai@kernel.org Cc: Eric Biederman Cc: Rob Landley Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 42 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 40 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4778dde..5dc47c3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -508,8 +508,44 @@ static void __init memblock_x86_reserve_range_setup_data(void) # define CRASH_KERNEL_ADDR_MAX MAXMEM #endif +static void __init reserve_crashkernel_low(void) +{ +#ifdef CONFIG_X86_64 + const unsigned long long alignment = 16<<20; /* 16M */ + unsigned long long low_base = 0, low_size = 0; + unsigned long total_low_mem; + unsigned long long base; + int ret; + + total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT)); + ret = parse_crashkernel_low(boot_command_line, total_low_mem, + &low_size, &base); + if (ret != 0 || low_size <= 0) + return; + + low_base = memblock_find_in_range(low_size, (1ULL<<32), + low_size, alignment); + + if (!low_base) { + pr_info("crashkernel low reservation failed - No suitable area found.\n"); + + return; + } + + memblock_reserve(low_base, low_size); + pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", + (unsigned long)(low_size >> 20), + (unsigned long)(low_base >> 20), + (unsigned long)(total_low_mem >> 20)); + crashk_low_res.start = low_base; + crashk_low_res.end = low_base + low_size - 1; + insert_resource(&iomem_resource, &crashk_low_res); +#endif +} + static void __init reserve_crashkernel(void) { + const unsigned long long alignment = 16<<20; /* 16M */ unsigned long long total_mem; unsigned long long crash_size, crash_base; int ret; @@ -523,8 +559,6 @@ static void __init reserve_crashkernel(void) /* 0 means: find the address automatically */ if (crash_base <= 0) { - const unsigned long long alignment = 16<<20; /* 16M */ - /* * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX */ @@ -535,6 +569,7 @@ static void __init reserve_crashkernel(void) pr_info("crashkernel reservation failed - No suitable area found.\n"); return; } + } else { unsigned long long start; @@ -556,6 +591,9 @@ static void __init reserve_crashkernel(void) crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; insert_resource(&iomem_resource, &crashk_res); + + if (crash_base >= (1ULL<<32)) + reserve_crashkernel_low(); } #else static void __init reserve_crashkernel(void) -- cgit v1.1 From 6c902b656c4a808d9c6f40a387b166455efecd62 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:12 -0800 Subject: x86: Merge early kernel reserve for 32bit and 64bit They are the same, and we could move them out from head32/64.c to setup.c. We are using memblock, and it could handle overlapping properly, so we don't need to reserve some at first to hold the location, and just need to make sure we reserve them before we are using memblock to find free mem to use. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-32-git-send-email-yinghai@kernel.org Cc: Alexander Duyck Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head32.c | 9 --------- arch/x86/kernel/head64.c | 9 --------- arch/x86/kernel/setup.c | 9 +++++++++ 3 files changed, 9 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index a795b54..138463a 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -33,9 +33,6 @@ void __init i386_start_kernel(void) { sanitize_boot_params(&boot_params); - memblock_reserve(__pa_symbol(&_text), - __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); - /* Call the subarch specific early setup function */ switch (boot_params.hdr.hardware_subarch) { case X86_SUBARCH_MRST: @@ -49,11 +46,5 @@ void __init i386_start_kernel(void) break; } - /* - * At this point everything still needed from the boot loader - * or BIOS or kernel text should be early reserved or marked not - * RAM in e820. All other memory is free game. - */ - start_kernel(); } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 6873b07..57334f4c 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -186,16 +186,7 @@ void __init x86_64_start_reservations(char *real_mode_data) if (!boot_params.hdr.version) copy_bootdata(__va(real_mode_data)); - memblock_reserve(__pa_symbol(&_text), - __pa_symbol(&__bss_stop) - __pa_symbol(&_text)); - reserve_ebda_region(); - /* - * At this point everything still needed from the boot loader - * or BIOS or kernel text should be early reserved or marked not - * RAM in e820. All other memory is free game. - */ - start_kernel(); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5dc47c3..a74701a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -805,8 +805,17 @@ early_param("reservelow", parse_reservelow); void __init setup_arch(char **cmdline_p) { + memblock_reserve(__pa_symbol(_text), + (unsigned long)__bss_stop - (unsigned long)_text); + early_reserve_initrd(); + /* + * At this point everything still needed from the boot loader + * or BIOS or kernel text should be early reserved or marked not + * RAM in e820. All other memory is free game. + */ + #ifdef CONFIG_X86_32 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); visws_early_detect(); -- cgit v1.1 From 72212675d1c96f5db8ec6fb35701879911193158 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:13 -0800 Subject: x86, 64bit, mm: Mark data/bss/brk to nx HPA said, we should not have RW and +x set at the time. for kernel layout: [ 0.000000] Kernel Layout: [ 0.000000] .text: [0x01000000-0x021434f8] [ 0.000000] .rodata: [0x02200000-0x02a13fff] [ 0.000000] .data: [0x02c00000-0x02dc763f] [ 0.000000] .init: [0x02dc9000-0x0312cfff] [ 0.000000] .bss: [0x0313b000-0x03dd6fff] [ 0.000000] .brk: [0x03dd7000-0x03dfffff] before the patch, we have ---[ High Kernel Mapping ]--- 0xffffffff80000000-0xffffffff81000000 16M pmd 0xffffffff81000000-0xffffffff82200000 18M ro PSE GLB x pmd 0xffffffff82200000-0xffffffff82c00000 10M ro PSE GLB NX pmd 0xffffffff82c00000-0xffffffff82dc9000 1828K RW GLB x pte 0xffffffff82dc9000-0xffffffff82e00000 220K RW GLB NX pte 0xffffffff82e00000-0xffffffff83000000 2M RW PSE GLB NX pmd 0xffffffff83000000-0xffffffff8313a000 1256K RW GLB NX pte 0xffffffff8313a000-0xffffffff83200000 792K RW GLB x pte 0xffffffff83200000-0xffffffff83e00000 12M RW PSE GLB x pmd 0xffffffff83e00000-0xffffffffa0000000 450M pmd after patch,, we get ---[ High Kernel Mapping ]--- 0xffffffff80000000-0xffffffff81000000 16M pmd 0xffffffff81000000-0xffffffff82200000 18M ro PSE GLB x pmd 0xffffffff82200000-0xffffffff82c00000 10M ro PSE GLB NX pmd 0xffffffff82c00000-0xffffffff82e00000 2M RW GLB NX pte 0xffffffff82e00000-0xffffffff83000000 2M RW PSE GLB NX pmd 0xffffffff83000000-0xffffffff83200000 2M RW GLB NX pte 0xffffffff83200000-0xffffffff83e00000 12M RW PSE GLB NX pmd 0xffffffff83e00000-0xffffffffa0000000 450M pmd so data, bss, brk get NX ... Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-33-git-send-email-yinghai@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/mm/init_64.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dc67337..e2fcbc3 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -810,6 +810,7 @@ void mark_rodata_ro(void) unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table); unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata); unsigned long data_start = (unsigned long) &_sdata; + unsigned long all_end = PFN_ALIGN(&_end); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); @@ -818,10 +819,10 @@ void mark_rodata_ro(void) kernel_set_to_readonly = 1; /* - * The rodata section (but not the kernel text!) should also be - * not-executable. + * The rodata/data/bss/brk section (but not the kernel text!) + * should also be not-executable. */ - set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); + set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); rodata_test(); -- cgit v1.1 From 8b78c21d72d9dbcb7230e97423a2cd8d8402c20c Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 24 Jan 2013 12:20:14 -0800 Subject: x86, 64bit, mm: hibernate use generic mapping_init We should set mappings only for usable memory ranges under max_pfn Otherwise causes same problem that is fixed by x86, mm: Only direct map addresses that are marked as E820_RAM Make it only map range in pfn_mapped array. Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/1359058816-7615-34-git-send-email-yinghai@kernel.org Cc: Pavel Machek Cc: Rafael J. Wysocki Cc: linux-pm@vger.kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/power/hibernate_64.c | 66 +++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 44 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index 460f314..a0fde91 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -11,6 +11,8 @@ #include #include #include + +#include #include #include #include @@ -39,41 +41,21 @@ pgd_t *temp_level4_pgt; void *relocated_restore_code; -static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) +static void *alloc_pgt_page(void *context) { - long i, j; - - i = pud_index(address); - pud = pud + i; - for (; i < PTRS_PER_PUD; pud++, i++) { - unsigned long paddr; - pmd_t *pmd; - - paddr = address + i*PUD_SIZE; - if (paddr >= end) - break; - - pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); - if (!pmd) - return -ENOMEM; - set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); - for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { - unsigned long pe; - - if (paddr >= end) - break; - pe = __PAGE_KERNEL_LARGE_EXEC | paddr; - pe &= __supported_pte_mask; - set_pmd(pmd, __pmd(pe)); - } - } - return 0; + return (void *)get_safe_page(GFP_ATOMIC); } static int set_up_temporary_mappings(void) { - unsigned long start, end, next; - int error; + struct x86_mapping_info info = { + .alloc_pgt_page = alloc_pgt_page, + .pmd_flag = __PAGE_KERNEL_LARGE_EXEC, + .kernel_mapping = true, + }; + unsigned long mstart, mend; + int result; + int i; temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); if (!temp_level4_pgt) @@ -84,21 +66,17 @@ static int set_up_temporary_mappings(void) init_level4_pgt[pgd_index(__START_KERNEL_map)]); /* Set up the direct mapping from scratch */ - start = (unsigned long)pfn_to_kaddr(0); - end = (unsigned long)pfn_to_kaddr(max_pfn); - - for (; start < end; start = next) { - pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); - if (!pud) - return -ENOMEM; - next = start + PGDIR_SIZE; - if (next > end) - next = end; - if ((error = res_phys_pud_init(pud, __pa(start), __pa(next)))) - return error; - set_pgd(temp_level4_pgt + pgd_index(start), - mk_kernel_pgd(__pa(pud))); + for (i = 0; i < nr_pfn_mapped; i++) { + mstart = pfn_mapped[i].start << PAGE_SHIFT; + mend = pfn_mapped[i].end << PAGE_SHIFT; + + result = kernel_ident_mapping_init(&info, temp_level4_pgt, + mstart, mend); + + if (result) + return result; } + return 0; } -- cgit v1.1 From 3b58908a92e00840bcd9050808f3dc86fd547029 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 30 Jan 2013 17:51:44 -0800 Subject: x86: bpf_jit_comp: add pkt_type support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Supporting access to skb->pkt_type is a bit tricky if we want to have a generic code, allowing pkt_type to be moved in struct sk_buff pkt_type is a bit field, so compiler cannot really help us to find its offset. Let's use a helper for this : It will throw a one time message if pkt_type no longer starts at a byte boundary or is no longer a 3bit field. Reported-by: Willem de Bruijn Signed-off-by: Eric Dumazet Cc: Maciej Å»enczykowski Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d11a470..3cbe4538 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1,6 +1,6 @@ /* bpf_jit_comp.c : BPF JIT compiler * - * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) + * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -124,6 +124,26 @@ static inline void bpf_flush_icache(void *start, void *end) #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) +/* Helper to find the offset of pkt_type in sk_buff + * We want to make sure its still a 3bit field starting at a byte boundary. + */ +#define PKT_TYPE_MAX 7 +static int pkt_type_offset(void) +{ + struct sk_buff skb_probe = { + .pkt_type = ~0, + }; + char *ct = (char *)&skb_probe; + unsigned int off; + + for (off = 0; off < sizeof(struct sk_buff); off++) { + if (ct[off] == PKT_TYPE_MAX) + return off; + } + pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n"); + return -1; +} + void bpf_jit_compile(struct sk_filter *fp) { u8 temp[64]; @@ -216,6 +236,7 @@ void bpf_jit_compile(struct sk_filter *fp) case BPF_S_ANC_VLAN_TAG: case BPF_S_ANC_VLAN_TAG_PRESENT: case BPF_S_ANC_QUEUE: + case BPF_S_ANC_PKTTYPE: case BPF_S_LD_W_ABS: case BPF_S_LD_H_ABS: case BPF_S_LD_B_ABS: @@ -536,6 +557,23 @@ void bpf_jit_compile(struct sk_filter *fp) EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */ } break; + case BPF_S_ANC_PKTTYPE: + { + int off = pkt_type_offset(); + + if (off < 0) + goto out; + if (is_imm8(off)) { + /* movzbl off8(%rdi),%eax */ + EMIT4(0x0f, 0xb6, 0x47, off); + } else { + /* movbl off32(%rdi),%eax */ + EMIT3(0x0f, 0xb6, 0x87); + EMIT(off, 4); + } + EMIT3(0x83, 0xe0, PKT_TYPE_MAX); /* and $0x7,%eax */ + break; + } case BPF_S_LD_W_ABS: func = CHOOSE_LOAD_FUNC(K, sk_load_word); common_load: seen |= SEEN_DATAREF; -- cgit v1.1 From 40a1ef95da85843696fc3ebe5fce39b0db32669f Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 30 Jan 2013 07:55:53 +0000 Subject: x86-64: Replace left over sti/cli in ia32 audit exit code For some reason they didn't get replaced so far by their paravirt equivalents, resulting in code to be run with interrupts disabled that doesn't expect so (causing, in the observed case, a BUG_ON() to trigger) when syscall auditing is enabled. David (Cc-ed) came up with an identical fix, so likely this can be taken to count as an ack from him. Reported-by: Peter Moody Signed-off-by: Jan Beulich Cc: David Vrabel Cc: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/5108E01902000078000BA9C5@nat28.tlf.novell.com Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Cc: Konrad Rzeszutek Wilk Cc: David Vrabel Tested-by: Peter Moody --- arch/x86/ia32/ia32entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 102ff7c..142c4ce 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -207,7 +207,7 @@ sysexit_from_sys_call: testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jnz ia32_ret_from_sys_call TRACE_IRQS_ON - sti + ENABLE_INTERRUPTS(CLBR_NONE) movl %eax,%esi /* second arg, syscall return value */ cmpl $-MAX_ERRNO,%eax /* is it an error ? */ jbe 1f @@ -217,7 +217,7 @@ sysexit_from_sys_call: call __audit_syscall_exit movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi - cli + DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET) jz \exit -- cgit v1.1 From 1e9209edc71b851d81f0316ca03a0e6335c0ef9a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 27 Jan 2013 01:18:21 +0100 Subject: x86/numa: Use __pa_nodebug() instead MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... and fix the following warning: arch/x86/mm/numa.c: In function ‘setup_node_data’: arch/x86/mm/numa.c:222:3: warning: passing argument 1 of ‘__phys_addr_nodebug’ makes integer from pointer without a cast Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Link: http://lkml.kernel.org/r/1359245901-8512-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 76604eb..b2313c6 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -219,7 +219,7 @@ static void __init setup_node_data(int nid, u64 start, u64 end) */ nd = alloc_remap(nid, nd_size); if (nd) { - nd_pa = __phys_addr_nodebug(nd); + nd_pa = __pa_nodebug(nd); remapped = true; } else { nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); -- cgit v1.1 From 2663960c159f23cbfb8e196c96e9fc9f3b5f1a8d Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 22 Jan 2013 22:24:23 -0800 Subject: perf: Make EVENT_ATTR global Rename EVENT_ATTR() to PMU_EVENT_ATTR() and make it global so it is available to all architectures. Further to allow architectures flexibility, have PMU_EVENT_ATTR() pass in the variable name as a parameter. Changelog[v2] - [Jiri Olsa] No need to define PMU_EVENT_PTR() Signed-off-by: Sukadev Bhattiprolu Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Anton Blanchard Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Robert Richter Cc: Stephane Eranian Cc: linuxppc-dev@ozlabs.org Link: http://lkml.kernel.org/r/20130123062422.GC13720@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- arch/x86/kernel/cpu/perf_event.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6774c17..c0df5ed2 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1310,11 +1310,6 @@ static struct attribute_group x86_pmu_format_group = { .attrs = NULL, }; -struct perf_pmu_events_attr { - struct device_attribute attr; - u64 id; -}; - /* * Remove all undefined events (x86_pmu.event_map(id) == 0) * out of events_attr attributes. @@ -1348,11 +1343,9 @@ static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *at #define EVENT_VAR(_id) event_attr_##_id #define EVENT_PTR(_id) &event_attr_##_id.attr.attr -#define EVENT_ATTR(_name, _id) \ -static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ - .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ - .id = PERF_COUNT_HW_##_id, \ -}; +#define EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ + events_sysfs_show) EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); -- cgit v1.1 From 9cd4d78e21cfdc709b1af516214ec4f69ee0e6bd Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:22 -0800 Subject: x86/microcode_intel.h: Define functions and macros for early loading ucode Define some functions and macros that will be used in early loading ucode. Some of them are moved from microcode_intel.c driver in order to be called in early boot phase before module can be called. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-3-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/microcode_intel.h | 85 ++++++++++++++ arch/x86/kernel/Makefile | 3 + arch/x86/kernel/microcode_core.c | 7 +- arch/x86/kernel/microcode_intel.c | 198 +++++---------------------------- 4 files changed, 122 insertions(+), 171 deletions(-) create mode 100644 arch/x86/include/asm/microcode_intel.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h new file mode 100644 index 0000000..5356f92 --- /dev/null +++ b/arch/x86/include/asm/microcode_intel.h @@ -0,0 +1,85 @@ +#ifndef _ASM_X86_MICROCODE_INTEL_H +#define _ASM_X86_MICROCODE_INTEL_H + +#include + +struct microcode_header_intel { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int datasize; + unsigned int totalsize; + unsigned int reserved[3]; +}; + +struct microcode_intel { + struct microcode_header_intel hdr; + unsigned int bits[0]; +}; + +/* microcode format is extended from prescott processors */ +struct extended_signature { + unsigned int sig; + unsigned int pf; + unsigned int cksum; +}; + +struct extended_sigtable { + unsigned int count; + unsigned int cksum; + unsigned int reserved[3]; + struct extended_signature sigs[0]; +}; + +#define DEFAULT_UCODE_DATASIZE (2000) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) +#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) +#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) +#define DWSIZE (sizeof(u32)) + +#define get_totalsize(mc) \ + (((struct microcode_intel *)mc)->hdr.totalsize ? \ + ((struct microcode_intel *)mc)->hdr.totalsize : \ + DEFAULT_UCODE_TOTALSIZE) + +#define get_datasize(mc) \ + (((struct microcode_intel *)mc)->hdr.datasize ? \ + ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) + +#define sigmatch(s1, s2, p1, p2) \ + (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) + +#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) + +extern int +get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev); +extern int microcode_sanity_check(void *mc, int print_err); +extern int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev); +extern int +update_match_revision(struct microcode_header_intel *mc_header, int rev); + +#ifdef CONFIG_MICROCODE_INTEL_EARLY +extern void __init load_ucode_intel_bsp(void); +extern void __cpuinit load_ucode_intel_ap(void); +extern void show_ucode_info_early(void); +#else +static inline __init void load_ucode_intel_bsp(void) {} +static inline __cpuinit void load_ucode_intel_ap(void) {} +static inline void show_ucode_info_early(void) {} +#endif + +#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) +extern int save_mc_for_early(u8 *mc); +#else +static inline int save_mc_for_early(u8 *mc) +{ + return 0; +} +#endif + +#endif /* _ASM_X86_MICROCODE_INTEL_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 34e923a..052abee 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -88,6 +88,9 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o +obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o +obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o +obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o microcode-y := microcode_core.o microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 3a04b22..22db92b 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -364,10 +364,7 @@ static struct attribute_group mc_attr_group = { static void microcode_fini_cpu(int cpu) { - struct ucode_cpu_info *uci = ucode_cpu_info + cpu; - microcode_ops->microcode_fini_cpu(cpu); - uci->valid = 0; } static enum ucode_state microcode_resume_cpu(int cpu) @@ -383,6 +380,10 @@ static enum ucode_state microcode_resume_cpu(int cpu) static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw) { enum ucode_state ustate; + struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + + if (uci && uci->valid) + return UCODE_OK; if (collect_cpu_info(cpu)) return UCODE_ERROR; diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 3544aed..5fb2ceb 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -79,7 +79,7 @@ #include #include -#include +#include #include #include @@ -87,59 +87,6 @@ MODULE_DESCRIPTION("Microcode Update Driver"); MODULE_AUTHOR("Tigran Aivazian "); MODULE_LICENSE("GPL"); -struct microcode_header_intel { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int datasize; - unsigned int totalsize; - unsigned int reserved[3]; -}; - -struct microcode_intel { - struct microcode_header_intel hdr; - unsigned int bits[0]; -}; - -/* microcode format is extended from prescott processors */ -struct extended_signature { - unsigned int sig; - unsigned int pf; - unsigned int cksum; -}; - -struct extended_sigtable { - unsigned int count; - unsigned int cksum; - unsigned int reserved[3]; - struct extended_signature sigs[0]; -}; - -#define DEFAULT_UCODE_DATASIZE (2000) -#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) -#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) -#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) -#define DWSIZE (sizeof(u32)) - -#define get_totalsize(mc) \ - (((struct microcode_intel *)mc)->hdr.totalsize ? \ - ((struct microcode_intel *)mc)->hdr.totalsize : \ - DEFAULT_UCODE_TOTALSIZE) - -#define get_datasize(mc) \ - (((struct microcode_intel *)mc)->hdr.datasize ? \ - ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define sigmatch(s1, s2, p1, p2) \ - (((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0)))) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu_num); @@ -162,128 +109,25 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) return 0; } -static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf) -{ - return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; -} - -static inline int -update_match_revision(struct microcode_header_intel *mc_header, int rev) -{ - return (mc_header->rev <= rev) ? 0 : 1; -} - -static int microcode_sanity_check(void *mc) -{ - unsigned long total_size, data_size, ext_table_size; - struct microcode_header_intel *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - int sum, orig_sum, ext_sigcount = 0, i; - struct extended_signature *ext_sig; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - - if (data_size + MC_HEADER_SIZE > total_size) { - pr_err("error! Bad data size in microcode data file\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - pr_err("error! Unknown microcode update format\n"); - return -EINVAL; - } - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - if ((ext_table_size < EXT_HEADER_SIZE) - || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - pr_err("error! Small exttable size in microcode data file\n"); - return -EINVAL; - } - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - pr_err("error! Bad exttable size in microcode data file\n"); - return -EFAULT; - } - ext_sigcount = ext_header->count; - } - - /* check extended table checksum */ - if (ext_table_size) { - int ext_table_sum = 0; - int *ext_tablep = (int *)ext_header; - - i = ext_table_size / DWSIZE; - while (i--) - ext_table_sum += ext_tablep[i]; - if (ext_table_sum) { - pr_warning("aborting, bad extended signature table checksum\n"); - return -EINVAL; - } - } - - /* calculate the checksum */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / DWSIZE; - while (i--) - orig_sum += ((int *)mc)[i]; - if (orig_sum) { - pr_err("aborting, bad checksum\n"); - return -EINVAL; - } - if (!ext_table_size) - return 0; - /* check extended signature checksum */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (void *)ext_header + EXT_HEADER_SIZE + - EXT_SIGNATURE_SIZE * i; - sum = orig_sum - - (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - pr_err("aborting, bad checksum\n"); - return -EINVAL; - } - } - return 0; -} - /* * return 0 - no update found * return 1 - found update */ -static int -get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) +static int get_matching_mc(struct microcode_intel *mc_intel, int cpu) { - struct microcode_header_intel *mc_header = mc; - struct extended_sigtable *ext_header; - unsigned long total_size = get_totalsize(mc_header); - int ext_sigcount, i; - struct extended_signature *ext_sig; - - if (!update_match_revision(mc_header, rev)) - return 0; - - if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf)) - return 1; + struct cpu_signature cpu_sig; + unsigned int csig, cpf, crev; - /* Look for ext. headers: */ - if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) - return 0; + collect_cpu_info(cpu, &cpu_sig); - ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + csig = cpu_sig.sig; + cpf = cpu_sig.pf; + crev = cpu_sig.rev; - for (i = 0; i < ext_sigcount; i++) { - if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf)) - return 1; - ext_sig++; - } - return 0; + return get_matching_microcode(csig, cpf, mc_intel, crev); } -static int apply_microcode(int cpu) +int apply_microcode(int cpu) { struct microcode_intel *mc_intel; struct ucode_cpu_info *uci; @@ -300,6 +144,14 @@ static int apply_microcode(int cpu) if (mc_intel == NULL) return 0; + /* + * Microcode on this CPU could be updated earlier. Only apply the + * microcode patch in mc_intel when it is newer than the one on this + * CPU. + */ + if (get_matching_mc(mc_intel, cpu) == 0) + return 0; + /* write microcode via MSR 0x79 */ wrmsr(MSR_IA32_UCODE_WRITE, (unsigned long) mc_intel->bits, @@ -338,6 +190,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, unsigned int leftover = size; enum ucode_state state = UCODE_OK; unsigned int curr_mc_size = 0; + unsigned int csig, cpf; while (leftover) { struct microcode_header_intel mc_header; @@ -362,11 +215,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, } if (get_ucode_data(mc, ucode_ptr, mc_size) || - microcode_sanity_check(mc) < 0) { + microcode_sanity_check(mc, 1) < 0) { break; } - if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) { + csig = uci->cpu_sig.sig; + cpf = uci->cpu_sig.pf; + if (get_matching_microcode(csig, cpf, mc, new_rev)) { vfree(new_mc); new_rev = mc_header.rev; new_mc = mc; @@ -393,6 +248,13 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, vfree(uci->mc); uci->mc = (struct microcode_intel *)new_mc; + /* + * If early loading microcode is supported, save this mc into + * permanent memory. So it will be loaded early when a CPU is hot added + * or resumes. + */ + save_mc_for_early(new_mc); + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); out: -- cgit v1.1 From d288e1cf8e62f3e4034f1f021f047009c4ac0b3c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:23 -0800 Subject: x86/common.c: Make have_cpuid_p() a global function Remove static declaration in have_cpuid_p() to make it a global function. The function will be called in early loading microcode. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-4-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/processor.h | 8 ++++++++ arch/x86/kernel/cpu/common.c | 9 +++------ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bdee8bd..3cdf4aa 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -190,6 +190,14 @@ extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); extern void detect_extended_topology(struct cpuinfo_x86 *c); extern void detect_ht(struct cpuinfo_x86 *c); +#ifdef CONFIG_X86_32 +extern int have_cpuid_p(void); +#else +static inline int have_cpuid_p(void) +{ + return 1; +} +#endif static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9c3ab43..d7fd246 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -37,6 +37,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_LOCAL_APIC #include @@ -213,7 +215,7 @@ static inline int flag_is_changeable_p(u32 flag) } /* Probe for the CPUID instruction */ -static int __cpuinit have_cpuid_p(void) +int __cpuinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } @@ -249,11 +251,6 @@ static inline int flag_is_changeable_p(u32 flag) { return 1; } -/* Probe for the CPUID instruction */ -static inline int have_cpuid_p(void) -{ - return 1; -} static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { } -- cgit v1.1 From e6ebf5deaaaa33b661f0db86380c232b162bd68c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:24 -0800 Subject: x86/common.c: load ucode in 64 bit or show loading ucode info in 32 bit on AP In 64 bit, load ucode on AP in cpu_init(). In 32 bit, show ucode loading info on AP in cpu_init(). Microcode has been loaded earlier before paging. Now it is safe to show the loading microcode info on this AP. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-5-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/common.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d7fd246..d814772 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1220,6 +1220,12 @@ void __cpuinit cpu_init(void) int cpu; int i; + /* + * Load microcode on this cpu if a valid microcode is available. + * This is early microcode loading procedure. + */ + load_ucode_ap(); + cpu = stack_smp_processor_id(); t = &per_cpu(init_tss, cpu); oist = &per_cpu(orig_ist, cpu); @@ -1311,6 +1317,8 @@ void __cpuinit cpu_init(void) struct tss_struct *t = &per_cpu(init_tss, cpu); struct thread_struct *thread = &curr->thread; + show_ucode_info_early(); + if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); for (;;) -- cgit v1.1 From a8ebf6d1d6971b90a20f5bd0465e6d520377e33b Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:25 -0800 Subject: x86/microcode_core_early.c: Define interfaces for early loading ucode Define interfaces load_ucode_bsp() and load_ucode_ap() to load ucode on BSP and AP in early boot time. These are generic interfaces. Internally they call vendor specific implementations. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-6-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/microcode.h | 14 +++++++ arch/x86/kernel/microcode_core_early.c | 76 ++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 arch/x86/kernel/microcode_core_early.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 43d921b..6825e2e 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -57,4 +57,18 @@ static inline struct microcode_ops * __init init_amd_microcode(void) static inline void __exit exit_amd_microcode(void) {} #endif +#ifdef CONFIG_MICROCODE_EARLY +#define MAX_UCODE_COUNT 128 +extern void __init load_ucode_bsp(void); +extern __init void load_ucode_ap(void); +extern int __init save_microcode_in_initrd(void); +#else +static inline void __init load_ucode_bsp(void) {} +static inline __init void load_ucode_ap(void) {} +static inline int __init save_microcode_in_initrd(void) +{ + return 0; +} +#endif + #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c new file mode 100644 index 0000000..577db84 --- /dev/null +++ b/arch/x86/kernel/microcode_core_early.c @@ -0,0 +1,76 @@ +/* + * X86 CPU microcode early update for Linux + * + * Copyright (C) 2012 Fenghua Yu + * H Peter Anvin" + * + * This driver allows to early upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture + * Software Developer's Manual. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) +#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') +#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') +#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') +#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') +#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') +#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') + +#define CPUID_IS(a, b, c, ebx, ecx, edx) \ + (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) + +/* + * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. + * x86_vendor() gets vendor id for BSP. + * + * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify + * coding, we still use x86_vendor() to get vendor id for AP. + * + * x86_vendor() gets vendor information directly through cpuid. + */ +static int __cpuinit x86_vendor(void) +{ + u32 eax = 0x00000000; + u32 ebx, ecx = 0, edx; + + if (!have_cpuid_p()) + return X86_VENDOR_UNKNOWN; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) + return X86_VENDOR_INTEL; + + if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) + return X86_VENDOR_AMD; + + return X86_VENDOR_UNKNOWN; +} + +void __init load_ucode_bsp(void) +{ + int vendor = x86_vendor(); + + if (vendor == X86_VENDOR_INTEL) + load_ucode_intel_bsp(); +} + +void __cpuinit load_ucode_ap(void) +{ + int vendor = x86_vendor(); + + if (vendor == X86_VENDOR_INTEL) + load_ucode_intel_ap(); +} -- cgit v1.1 From e666dfa273db1b12711eaec91facac5fec2ec851 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:26 -0800 Subject: x86/microcode_intel_lib.c: Early update ucode on Intel's CPU Define interfaces microcode_sanity_check() and get_matching_microcode(). They are called both in early boot time and in microcode Intel driver. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-7-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_intel_lib.c | 174 ++++++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100644 arch/x86/kernel/microcode_intel_lib.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/microcode_intel_lib.c new file mode 100644 index 0000000..ce69320 --- /dev/null +++ b/arch/x86/kernel/microcode_intel_lib.c @@ -0,0 +1,174 @@ +/* + * Intel CPU Microcode Update Driver for Linux + * + * Copyright (C) 2012 Fenghua Yu + * H Peter Anvin" + * + * This driver allows to upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture + * Software Developer's Manual + * Order Number 253668 or free download from: + * + * http://developer.intel.com/Assets/PDF/manual/253668.pdf + * + * For more information, go to http://www.urbanmyth.org/microcode + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include +#include +#include +#include + +#include +#include +#include + +static inline int +update_match_cpu(unsigned int csig, unsigned int cpf, + unsigned int sig, unsigned int pf) +{ + return (!sigmatch(sig, csig, pf, cpf)) ? 0 : 1; +} + +int +update_match_revision(struct microcode_header_intel *mc_header, int rev) +{ + return (mc_header->rev <= rev) ? 0 : 1; +} + +int microcode_sanity_check(void *mc, int print_err) +{ + unsigned long total_size, data_size, ext_table_size; + struct microcode_header_intel *mc_header = mc; + struct extended_sigtable *ext_header = NULL; + int sum, orig_sum, ext_sigcount = 0, i; + struct extended_signature *ext_sig; + + total_size = get_totalsize(mc_header); + data_size = get_datasize(mc_header); + + if (data_size + MC_HEADER_SIZE > total_size) { + if (print_err) + pr_err("error! Bad data size in microcode data file\n"); + return -EINVAL; + } + + if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { + if (print_err) + pr_err("error! Unknown microcode update format\n"); + return -EINVAL; + } + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); + if (ext_table_size) { + if ((ext_table_size < EXT_HEADER_SIZE) + || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { + if (print_err) + pr_err("error! Small exttable size in microcode data file\n"); + return -EINVAL; + } + ext_header = mc + MC_HEADER_SIZE + data_size; + if (ext_table_size != exttable_size(ext_header)) { + if (print_err) + pr_err("error! Bad exttable size in microcode data file\n"); + return -EFAULT; + } + ext_sigcount = ext_header->count; + } + + /* check extended table checksum */ + if (ext_table_size) { + int ext_table_sum = 0; + int *ext_tablep = (int *)ext_header; + + i = ext_table_size / DWSIZE; + while (i--) + ext_table_sum += ext_tablep[i]; + if (ext_table_sum) { + if (print_err) + pr_warn("aborting, bad extended signature table checksum\n"); + return -EINVAL; + } + } + + /* calculate the checksum */ + orig_sum = 0; + i = (MC_HEADER_SIZE + data_size) / DWSIZE; + while (i--) + orig_sum += ((int *)mc)[i]; + if (orig_sum) { + if (print_err) + pr_err("aborting, bad checksum\n"); + return -EINVAL; + } + if (!ext_table_size) + return 0; + /* check extended signature checksum */ + for (i = 0; i < ext_sigcount; i++) { + ext_sig = (void *)ext_header + EXT_HEADER_SIZE + + EXT_SIGNATURE_SIZE * i; + sum = orig_sum + - (mc_header->sig + mc_header->pf + mc_header->cksum) + + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + if (sum) { + if (print_err) + pr_err("aborting, bad checksum\n"); + return -EINVAL; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(microcode_sanity_check); + +/* + * return 0 - no update found + * return 1 - found update + */ +int get_matching_sig(unsigned int csig, int cpf, void *mc, int rev) +{ + struct microcode_header_intel *mc_header = mc; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + + if (update_match_cpu(csig, cpf, mc_header->sig, mc_header->pf)) + return 1; + + /* Look for ext. headers: */ + if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE) + return 0; + + ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + + for (i = 0; i < ext_sigcount; i++) { + if (update_match_cpu(csig, cpf, ext_sig->sig, ext_sig->pf)) + return 1; + ext_sig++; + } + return 0; +} + +/* + * return 0 - no update found + * return 1 - found update + */ +int get_matching_microcode(unsigned int csig, int cpf, void *mc, int rev) +{ + struct microcode_header_intel *mc_header = mc; + + if (!update_match_revision(mc_header, rev)) + return 0; + + return get_matching_sig(csig, cpf, mc, rev); +} +EXPORT_SYMBOL_GPL(get_matching_microcode); -- cgit v1.1 From 086fc8f8037bf16f55f82c66b26a8b834f7349ec Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:27 -0800 Subject: x86/tlbflush.h: Define __native_flush_tlb_global_irq_disabled() This function is called in __native_flush_tlb_global() and after apply_microcode_early(). Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-8-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/tlbflush.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 0fee48e..50a7fc0 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -20,10 +20,20 @@ static inline void __native_flush_tlb(void) native_write_cr3(native_read_cr3()); } +static inline void __native_flush_tlb_global_irq_disabled(void) +{ + unsigned long cr4; + + cr4 = native_read_cr4(); + /* clear PGE */ + native_write_cr4(cr4 & ~X86_CR4_PGE); + /* write old PGE again and flush TLBs */ + native_write_cr4(cr4); +} + static inline void __native_flush_tlb_global(void) { unsigned long flags; - unsigned long cr4; /* * Read-modify-write to CR4 - protect it from preemption and @@ -32,11 +42,7 @@ static inline void __native_flush_tlb_global(void) */ raw_local_irq_save(flags); - cr4 = native_read_cr4(); - /* clear PGE */ - native_write_cr4(cr4 & ~X86_CR4_PGE); - /* write old PGE again and flush TLBs */ - native_write_cr4(cr4); + __native_flush_tlb_global_irq_disabled(); raw_local_irq_restore(flags); } -- cgit v1.1 From ec400ddeff200b068ddc6c70f7321f49ecf32ed5 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:28 -0800 Subject: x86/microcode_intel_early.c: Early update ucode on Intel's CPU Implementation of early update ucode on Intel's CPU. load_ucode_intel_bsp() scans ucode in initrd image file which is a cpio format ucode followed by ordinary initrd image file. The binary ucode file is stored in kernel/x86/microcode/GenuineIntel.bin in the cpio data. All ucode patches with the same model as BSP are saved in memory. A matching ucode patch is updated on BSP. load_ucode_intel_ap() reads saved ucoded patches and updates ucode on AP. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-9-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_intel_early.c | 796 ++++++++++++++++++++++++++++++++ 1 file changed, 796 insertions(+) create mode 100644 arch/x86/kernel/microcode_intel_early.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c new file mode 100644 index 0000000..7890bc8 --- /dev/null +++ b/arch/x86/kernel/microcode_intel_early.c @@ -0,0 +1,796 @@ +/* + * Intel CPU microcode early update for Linux + * + * Copyright (C) 2012 Fenghua Yu + * H Peter Anvin" + * + * This allows to early upgrade microcode on Intel processors + * belonging to IA-32 family - PentiumPro, Pentium II, + * Pentium III, Xeon, Pentium 4, etc. + * + * Reference: Section 9.11 of Volume 3, IA-32 Intel Architecture + * Software Developer's Manual. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long mc_saved_in_initrd[MAX_UCODE_COUNT]; +struct mc_saved_data { + unsigned int mc_saved_count; + struct microcode_intel **mc_saved; +} mc_saved_data; + +static enum ucode_state __cpuinit +generic_load_microcode_early(struct microcode_intel **mc_saved_p, + unsigned int mc_saved_count, + struct ucode_cpu_info *uci) +{ + struct microcode_intel *ucode_ptr, *new_mc = NULL; + int new_rev = uci->cpu_sig.rev; + enum ucode_state state = UCODE_OK; + unsigned int mc_size; + struct microcode_header_intel *mc_header; + unsigned int csig = uci->cpu_sig.sig; + unsigned int cpf = uci->cpu_sig.pf; + int i; + + for (i = 0; i < mc_saved_count; i++) { + ucode_ptr = mc_saved_p[i]; + + mc_header = (struct microcode_header_intel *)ucode_ptr; + mc_size = get_totalsize(mc_header); + if (get_matching_microcode(csig, cpf, ucode_ptr, new_rev)) { + new_rev = mc_header->rev; + new_mc = ucode_ptr; + } + } + + if (!new_mc) { + state = UCODE_NFOUND; + goto out; + } + + uci->mc = (struct microcode_intel *)new_mc; +out: + return state; +} + +static void __cpuinit +microcode_pointer(struct microcode_intel **mc_saved, + unsigned long *mc_saved_in_initrd, + unsigned long initrd_start, int mc_saved_count) +{ + int i; + + for (i = 0; i < mc_saved_count; i++) + mc_saved[i] = (struct microcode_intel *) + (mc_saved_in_initrd[i] + initrd_start); +} + +#ifdef CONFIG_X86_32 +static void __cpuinit +microcode_phys(struct microcode_intel **mc_saved_tmp, + struct mc_saved_data *mc_saved_data) +{ + int i; + struct microcode_intel ***mc_saved; + + mc_saved = (struct microcode_intel ***) + __pa_symbol(&mc_saved_data->mc_saved); + for (i = 0; i < mc_saved_data->mc_saved_count; i++) { + struct microcode_intel *p; + + p = *(struct microcode_intel **) + __pa(mc_saved_data->mc_saved + i); + mc_saved_tmp[i] = (struct microcode_intel *)__pa(p); + } +} +#endif + +static enum ucode_state __cpuinit +load_microcode(struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + unsigned long initrd_start, + struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int count = mc_saved_data->mc_saved_count; + + if (!mc_saved_data->mc_saved) { + microcode_pointer(mc_saved_tmp, mc_saved_in_initrd, + initrd_start, count); + + return generic_load_microcode_early(mc_saved_tmp, count, uci); + } else { +#ifdef CONFIG_X86_32 + microcode_phys(mc_saved_tmp, mc_saved_data); + return generic_load_microcode_early(mc_saved_tmp, count, uci); +#else + return generic_load_microcode_early(mc_saved_data->mc_saved, + count, uci); +#endif + } +} + +static u8 get_x86_family(unsigned long sig) +{ + u8 x86; + + x86 = (sig >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (sig >> 20) & 0xff; + + return x86; +} + +static u8 get_x86_model(unsigned long sig) +{ + u8 x86, x86_model; + + x86 = get_x86_family(sig); + x86_model = (sig >> 4) & 0xf; + + if (x86 == 0x6 || x86 == 0xf) + x86_model += ((sig >> 16) & 0xf) << 4; + + return x86_model; +} + +/* + * Given CPU signature and a microcode patch, this function finds if the + * microcode patch has matching family and model with the CPU. + */ +static enum ucode_state +matching_model_microcode(struct microcode_header_intel *mc_header, + unsigned long sig) +{ + u8 x86, x86_model; + u8 x86_ucode, x86_model_ucode; + struct extended_sigtable *ext_header; + unsigned long total_size = get_totalsize(mc_header); + unsigned long data_size = get_datasize(mc_header); + int ext_sigcount, i; + struct extended_signature *ext_sig; + + x86 = get_x86_family(sig); + x86_model = get_x86_model(sig); + + x86_ucode = get_x86_family(mc_header->sig); + x86_model_ucode = get_x86_model(mc_header->sig); + + if (x86 == x86_ucode && x86_model == x86_model_ucode) + return UCODE_OK; + + /* Look for ext. headers: */ + if (total_size <= data_size + MC_HEADER_SIZE) + return UCODE_NFOUND; + + ext_header = (struct extended_sigtable *) + mc_header + data_size + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + + for (i = 0; i < ext_sigcount; i++) { + x86_ucode = get_x86_family(ext_sig->sig); + x86_model_ucode = get_x86_model(ext_sig->sig); + + if (x86 == x86_ucode && x86_model == x86_model_ucode) + return UCODE_OK; + + ext_sig++; + } + + return UCODE_NFOUND; +} + +static int +save_microcode(struct mc_saved_data *mc_saved_data, + struct microcode_intel **mc_saved_src, + unsigned int mc_saved_count) +{ + int i, j; + struct microcode_intel **mc_saved_p; + int ret; + + if (!mc_saved_count) + return -EINVAL; + + /* + * Copy new microcode data. + */ + mc_saved_p = kmalloc(mc_saved_count*sizeof(struct microcode_intel *), + GFP_KERNEL); + if (!mc_saved_p) + return -ENOMEM; + + for (i = 0; i < mc_saved_count; i++) { + struct microcode_intel *mc = mc_saved_src[i]; + struct microcode_header_intel *mc_header = &mc->hdr; + unsigned long mc_size = get_totalsize(mc_header); + mc_saved_p[i] = kmalloc(mc_size, GFP_KERNEL); + if (!mc_saved_p[i]) { + ret = -ENOMEM; + goto err; + } + if (!mc_saved_src[i]) { + ret = -EINVAL; + goto err; + } + memcpy(mc_saved_p[i], mc, mc_size); + } + + /* + * Point to newly saved microcode. + */ + mc_saved_data->mc_saved = mc_saved_p; + mc_saved_data->mc_saved_count = mc_saved_count; + + return 0; + +err: + for (j = 0; j <= i; j++) + kfree(mc_saved_p[j]); + kfree(mc_saved_p); + + return ret; +} + +/* + * A microcode patch in ucode_ptr is saved into mc_saved + * - if it has matching signature and newer revision compared to an existing + * patch mc_saved. + * - or if it is a newly discovered microcode patch. + * + * The microcode patch should have matching model with CPU. + */ +static void _save_mc(struct microcode_intel **mc_saved, u8 *ucode_ptr, + unsigned int *mc_saved_count_p) +{ + int i; + int found = 0; + unsigned int mc_saved_count = *mc_saved_count_p; + struct microcode_header_intel *mc_header; + + mc_header = (struct microcode_header_intel *)ucode_ptr; + for (i = 0; i < mc_saved_count; i++) { + unsigned int sig, pf; + unsigned int new_rev; + struct microcode_header_intel *mc_saved_header = + (struct microcode_header_intel *)mc_saved[i]; + sig = mc_saved_header->sig; + pf = mc_saved_header->pf; + new_rev = mc_header->rev; + + if (get_matching_sig(sig, pf, ucode_ptr, new_rev)) { + found = 1; + if (update_match_revision(mc_header, new_rev)) { + /* + * Found an older ucode saved before. + * Replace the older one with this newer + * one. + */ + mc_saved[i] = + (struct microcode_intel *)ucode_ptr; + break; + } + } + } + if (i >= mc_saved_count && !found) + /* + * This ucode is first time discovered in ucode file. + * Save it to memory. + */ + mc_saved[mc_saved_count++] = + (struct microcode_intel *)ucode_ptr; + + *mc_saved_count_p = mc_saved_count; +} + +/* + * Get microcode matching with BSP's model. Only CPUs with the same model as + * BSP can stay in the platform. + */ +static enum ucode_state __init +get_matching_model_microcode(int cpu, unsigned long start, + void *data, size_t size, + struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + struct ucode_cpu_info *uci) +{ + u8 *ucode_ptr = data; + unsigned int leftover = size; + enum ucode_state state = UCODE_OK; + unsigned int mc_size; + struct microcode_header_intel *mc_header; + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int mc_saved_count = mc_saved_data->mc_saved_count; + int i; + + while (leftover) { + mc_header = (struct microcode_header_intel *)ucode_ptr; + + mc_size = get_totalsize(mc_header); + if (!mc_size || mc_size > leftover || + microcode_sanity_check(ucode_ptr, 0) < 0) + break; + + leftover -= mc_size; + + /* + * Since APs with same family and model as the BSP may boot in + * the platform, we need to find and save microcode patches + * with the same family and model as the BSP. + */ + if (matching_model_microcode(mc_header, uci->cpu_sig.sig) != + UCODE_OK) { + ucode_ptr += mc_size; + continue; + } + + _save_mc(mc_saved_tmp, ucode_ptr, &mc_saved_count); + + ucode_ptr += mc_size; + } + + if (leftover) { + state = UCODE_ERROR; + goto out; + } + + if (mc_saved_count == 0) { + state = UCODE_NFOUND; + goto out; + } + + for (i = 0; i < mc_saved_count; i++) + mc_saved_in_initrd[i] = (unsigned long)mc_saved_tmp[i] - start; + + mc_saved_data->mc_saved_count = mc_saved_count; +out: + return state; +} + +#define native_rdmsr(msr, val1, val2) \ +do { \ + u64 __val = native_read_msr((msr)); \ + (void)((val1) = (u32)__val); \ + (void)((val2) = (u32)(__val >> 32)); \ +} while (0) + +#define native_wrmsr(msr, low, high) \ + native_write_msr(msr, low, high); + +static int __cpuinit collect_cpu_info_early(struct ucode_cpu_info *uci) +{ + unsigned int val[2]; + u8 x86, x86_model; + struct cpu_signature csig; + unsigned int eax, ebx, ecx, edx; + + csig.sig = 0; + csig.pf = 0; + csig.rev = 0; + + memset(uci, 0, sizeof(*uci)); + + eax = 0x00000001; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + csig.sig = eax; + + x86 = get_x86_family(csig.sig); + x86_model = get_x86_model(csig.sig); + + if ((x86_model >= 5) || (x86 > 6)) { + /* get processor flags from MSR 0x17 */ + native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + csig.pf = 1 << ((val[1] >> 18) & 7); + } + native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + + csig.rev = val[1]; + + uci->cpu_sig = csig; + uci->valid = 1; + + return 0; +} + +#ifdef DEBUG +static void __ref show_saved_mc(void) +{ + int i, j; + unsigned int sig, pf, rev, total_size, data_size, date; + struct ucode_cpu_info uci; + + if (mc_saved_data.mc_saved_count == 0) { + pr_debug("no micorcode data saved.\n"); + return; + } + pr_debug("Total microcode saved: %d\n", mc_saved_data.mc_saved_count); + + collect_cpu_info_early(&uci); + + sig = uci.cpu_sig.sig; + pf = uci.cpu_sig.pf; + rev = uci.cpu_sig.rev; + pr_debug("CPU%d: sig=0x%x, pf=0x%x, rev=0x%x\n", + smp_processor_id(), sig, pf, rev); + + for (i = 0; i < mc_saved_data.mc_saved_count; i++) { + struct microcode_header_intel *mc_saved_header; + struct extended_sigtable *ext_header; + int ext_sigcount; + struct extended_signature *ext_sig; + + mc_saved_header = (struct microcode_header_intel *) + mc_saved_data.mc_saved[i]; + sig = mc_saved_header->sig; + pf = mc_saved_header->pf; + rev = mc_saved_header->rev; + total_size = get_totalsize(mc_saved_header); + data_size = get_datasize(mc_saved_header); + date = mc_saved_header->date; + + pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, toal size=0x%x, date = %04x-%02x-%02x\n", + i, sig, pf, rev, total_size, + date & 0xffff, + date >> 24, + (date >> 16) & 0xff); + + /* Look for ext. headers: */ + if (total_size <= data_size + MC_HEADER_SIZE) + continue; + + ext_header = (struct extended_sigtable *) + mc_saved_header + data_size + MC_HEADER_SIZE; + ext_sigcount = ext_header->count; + ext_sig = (void *)ext_header + EXT_HEADER_SIZE; + + for (j = 0; j < ext_sigcount; j++) { + sig = ext_sig->sig; + pf = ext_sig->pf; + + pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n", + j, sig, pf); + + ext_sig++; + } + + } +} +#else +static inline void show_saved_mc(void) +{ +} +#endif + +#if defined(CONFIG_MICROCODE_INTEL_EARLY) && defined(CONFIG_HOTPLUG_CPU) +/* + * Save this mc into mc_saved_data. So it will be loaded early when a CPU is + * hot added or resumes. + * + * Please make sure this mc should be a valid microcode patch before calling + * this function. + */ +int save_mc_for_early(u8 *mc) +{ + struct microcode_intel *mc_saved_tmp[MAX_UCODE_COUNT]; + unsigned int mc_saved_count_init; + unsigned int mc_saved_count; + struct microcode_intel **mc_saved; + int ret = 0; + int i; + + /* + * Hold hotplug lock so mc_saved_data is not accessed by a CPU in + * hotplug. + */ + cpu_hotplug_driver_lock(); + + mc_saved_count_init = mc_saved_data.mc_saved_count; + mc_saved_count = mc_saved_data.mc_saved_count; + mc_saved = mc_saved_data.mc_saved; + + if (mc_saved && mc_saved_count) + memcpy(mc_saved_tmp, mc_saved, + mc_saved_count * sizeof(struct mirocode_intel *)); + /* + * Save the microcode patch mc in mc_save_tmp structure if it's a newer + * version. + */ + + _save_mc(mc_saved_tmp, mc, &mc_saved_count); + + /* + * Save the mc_save_tmp in global mc_saved_data. + */ + ret = save_microcode(&mc_saved_data, mc_saved_tmp, mc_saved_count); + if (ret) { + pr_err("Can not save microcode patch.\n"); + goto out; + } + + show_saved_mc(); + + /* + * Free old saved microcod data. + */ + if (mc_saved) { + for (i = 0; i < mc_saved_count_init; i++) + kfree(mc_saved[i]); + kfree(mc_saved); + } + +out: + cpu_hotplug_driver_unlock(); + + return ret; +} +EXPORT_SYMBOL_GPL(save_mc_for_early); +#endif + +static __initdata char ucode_name[] = "kernel/x86/microcode/GenuineIntel.bin"; +static __init enum ucode_state +scan_microcode(unsigned long start, unsigned long end, + struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + struct ucode_cpu_info *uci) +{ + unsigned int size = end - start + 1; + struct cpio_data cd; + long offset = 0; +#ifdef CONFIG_X86_32 + char *p = (char *)__pa_symbol(ucode_name); +#else + char *p = ucode_name; +#endif + + cd.data = NULL; + cd.size = 0; + + cd = find_cpio_data(p, (void *)start, size, &offset); + if (!cd.data) + return UCODE_ERROR; + + + return get_matching_model_microcode(0, start, cd.data, cd.size, + mc_saved_data, mc_saved_in_initrd, + uci); +} + +/* + * Print ucode update info. + */ +static void __cpuinit +print_ucode_info(struct ucode_cpu_info *uci, unsigned int date) +{ + int cpu = smp_processor_id(); + + pr_info("CPU%d microcode updated early to revision 0x%x, date = %04x-%02x-%02x\n", + cpu, + uci->cpu_sig.rev, + date & 0xffff, + date >> 24, + (date >> 16) & 0xff); +} + +#ifdef CONFIG_X86_32 + +static int delay_ucode_info; +static int current_mc_date; + +/* + * Print early updated ucode info after printk works. This is delayed info dump. + */ +void __cpuinit show_ucode_info_early(void) +{ + struct ucode_cpu_info uci; + + if (delay_ucode_info) { + collect_cpu_info_early(&uci); + print_ucode_info(&uci, current_mc_date); + delay_ucode_info = 0; + } +} + +/* + * At this point, we can not call printk() yet. Keep microcode patch number in + * mc_saved_data.mc_saved and delay printing microcode info in + * show_ucode_info_early() until printk() works. + */ +static void __cpuinit print_ucode(struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + int *delay_ucode_info_p; + int *current_mc_date_p; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return; + + delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info); + current_mc_date_p = (int *)__pa_symbol(¤t_mc_date); + + *delay_ucode_info_p = 1; + *current_mc_date_p = mc_intel->hdr.date; +} +#else + +/* + * Flush global tlb. We only do this in x86_64 where paging has been enabled + * already and PGE should be enabled as well. + */ +static inline void __cpuinit flush_tlb_early(void) +{ + __native_flush_tlb_global_irq_disabled(); +} + +static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return; + + print_ucode_info(uci, mc_intel->hdr.date); +} +#endif + +static int apply_microcode_early(struct mc_saved_data *mc_saved_data, + struct ucode_cpu_info *uci) +{ + struct microcode_intel *mc_intel; + unsigned int val[2]; + + mc_intel = uci->mc; + if (mc_intel == NULL) + return 0; + + /* write microcode via MSR 0x79 */ + native_wrmsr(MSR_IA32_UCODE_WRITE, + (unsigned long) mc_intel->bits, + (unsigned long) mc_intel->bits >> 16 >> 16); + native_wrmsr(MSR_IA32_UCODE_REV, 0, 0); + + /* As documented in the SDM: Do a CPUID 1 here */ + sync_core(); + + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); + if (val[1] != mc_intel->hdr.rev) + return -1; + +#ifdef CONFIG_X86_64 + /* Flush global tlb. This is precaution. */ + flush_tlb_early(); +#endif + uci->cpu_sig.rev = val[1]; + + print_ucode(uci); + + return 0; +} + +/* + * This function converts microcode patch offsets previously stored in + * mc_saved_in_initrd to pointers and stores the pointers in mc_saved_data. + */ +int __init save_microcode_in_initrd(void) +{ + unsigned int count = mc_saved_data.mc_saved_count; + struct microcode_intel *mc_saved[MAX_UCODE_COUNT]; + int ret = 0; + + if (count == 0) + return ret; + + microcode_pointer(mc_saved, mc_saved_in_initrd, initrd_start, count); + ret = save_microcode(&mc_saved_data, mc_saved, count); + if (ret) + pr_err("Can not save microcod patches from initrd"); + + show_saved_mc(); + + return ret; +} + +static void __init +_load_ucode_intel_bsp(struct mc_saved_data *mc_saved_data, + unsigned long *mc_saved_in_initrd, + unsigned long initrd_start_early, + unsigned long initrd_end_early, + struct ucode_cpu_info *uci) +{ + collect_cpu_info_early(uci); + scan_microcode(initrd_start_early, initrd_end_early, mc_saved_data, + mc_saved_in_initrd, uci); + load_microcode(mc_saved_data, mc_saved_in_initrd, + initrd_start_early, uci); + apply_microcode_early(mc_saved_data, uci); +} + +void __init +load_ucode_intel_bsp(void) +{ + u64 ramdisk_image, ramdisk_size; + unsigned long initrd_start_early, initrd_end_early; + struct ucode_cpu_info uci; +#ifdef CONFIG_X86_32 + struct boot_params *boot_params_p; + + boot_params_p = (struct boot_params *)__pa_symbol(&boot_params); + ramdisk_image = boot_params_p->hdr.ramdisk_image; + ramdisk_size = boot_params_p->hdr.ramdisk_size; + initrd_start_early = ramdisk_image; + initrd_end_early = initrd_start_early + ramdisk_size; + + _load_ucode_intel_bsp( + (struct mc_saved_data *)__pa_symbol(&mc_saved_data), + (unsigned long *)__pa_symbol(&mc_saved_in_initrd), + initrd_start_early, initrd_end_early, &uci); +#else + ramdisk_image = boot_params.hdr.ramdisk_image; + ramdisk_size = boot_params.hdr.ramdisk_size; + initrd_start_early = ramdisk_image + PAGE_OFFSET; + initrd_end_early = initrd_start_early + ramdisk_size; + + _load_ucode_intel_bsp(&mc_saved_data, mc_saved_in_initrd, + initrd_start_early, initrd_end_early, &uci); +#endif +} + +void __cpuinit load_ucode_intel_ap(void) +{ + struct mc_saved_data *mc_saved_data_p; + struct ucode_cpu_info uci; + unsigned long *mc_saved_in_initrd_p; + unsigned long initrd_start_addr; +#ifdef CONFIG_X86_32 + unsigned long *initrd_start_p; + + mc_saved_in_initrd_p = + (unsigned long *)__pa_symbol(mc_saved_in_initrd); + mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data); + initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start); + initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p); +#else + mc_saved_data_p = &mc_saved_data; + mc_saved_in_initrd_p = mc_saved_in_initrd; + initrd_start_addr = initrd_start; +#endif + + /* + * If there is no valid ucode previously saved in memory, no need to + * update ucode on this AP. + */ + if (mc_saved_data_p->mc_saved_count == 0) + return; + + collect_cpu_info_early(&uci); + load_microcode(mc_saved_data_p, mc_saved_in_initrd_p, + initrd_start_addr, &uci); + apply_microcode_early(mc_saved_data_p, &uci); +} -- cgit v1.1 From 63b553c68db5a8d4febcd1010b194333d2b02e1c Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:29 -0800 Subject: x86/head_32.S: Early update ucode in 32-bit This updates ucode in 32-bit kernel on BSP and AP. At this point, there is no paging and no virtual address yet. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-10-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 8e7f655..2f70530 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -144,6 +144,11 @@ ENTRY(startup_32) movl %eax, pa(olpc_ofw_pgd) #endif +#ifdef CONFIG_MICROCODE_EARLY + /* Early load ucode on BSP. */ + call load_ucode_bsp +#endif + /* * Initialize page tables. This creates a PDE and a set of page * tables, which are located immediately beyond __brk_base. The variable @@ -299,6 +304,12 @@ ENTRY(startup_32_smp) movl %eax,%ss leal -__PAGE_OFFSET(%ecx),%esp +#ifdef CONFIG_MICROCODE_EARLY + /* Early load ucode on AP. */ + call load_ucode_ap +#endif + + default_entry: /* * New page tables may be in 4Mbyte page mode and may -- cgit v1.1 From feddc9de8bf69415da6f96eca0219186b70d145a Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:30 -0800 Subject: x86/head64.c: Early update ucode in 64-bit This updates ucode on BSP in 64-bit mode. Paging and virtual address are working now. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-11-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head64.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 57334f4c..3ff3570 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -26,6 +26,7 @@ #include #include #include +#include /* * Manage page tables very early on. @@ -170,6 +171,11 @@ void __init x86_64_start_kernel(char * real_mode_data) copy_bootdata(__va(real_mode_data)); + /* + * Load microcode early on BSP. + */ + load_ucode_bsp(); + if (console_loglevel == 10) early_printk("Kernel alive\n"); -- cgit v1.1 From cd745be89e1580e8a1b47454a39f97f9c5c4b1e0 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:31 -0800 Subject: x86/mm/init.c: Copy ucode from initrd image to kernel memory Before initrd image is freed, copy valid ucode patches from initrd image to kernel memory. The saved ucode will be used to update AP in resume or hotplug. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-12-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index d418152..4903a03 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -16,6 +16,7 @@ #include #include #include /* for MAX_DMA_PFN */ +#include #include "mm_internal.h" @@ -534,6 +535,15 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void __init free_initrd_mem(unsigned long start, unsigned long end) { +#ifdef CONFIG_MICROCODE_EARLY + /* + * Remember, initrd memory may contain microcode or other useful things. + * Before we lose initrd mem, we need to find a place to hold them + * now that normal virtual memory is enabled. + */ + save_microcode_in_initrd(); +#endif + /* * end could be not aligned, and We can not align that, * decompresser could be confused by aligned initrd_end -- cgit v1.1 From da76f64e7eb28b718501d15c1b79af560b7ca4ea Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Thu, 20 Dec 2012 23:44:32 -0800 Subject: x86/Kconfig: Make early microcode loading a configuration feature MICROCODE_INTEL_LIB, MICROCODE_INTEL_EARLY, and MICROCODE_EARLY are three new configurations to enable or disable the feature. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1356075872-3054-13-git-send-email-fenghua.yu@intel.com Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 79795af..e243da7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1029,6 +1029,24 @@ config MICROCODE_OLD_INTERFACE def_bool y depends on MICROCODE +config MICROCODE_INTEL_LIB + def_bool y + depends on MICROCODE_INTEL + +config MICROCODE_INTEL_EARLY + bool "Early load microcode" + depends on MICROCODE_INTEL && BLK_DEV_INITRD + default y + help + This option provides functionality to read additional microcode data + at the beginning of initrd image. The data tells kernel to load + microcode to CPU's as early as possible. No functional change if no + microcode data is glued to the initrd, therefore it's safe to say Y. + +config MICROCODE_EARLY + def_bool y + depends on MICROCODE_INTEL_EARLY + config X86_MSR tristate "/dev/cpu/*/msr - Model-specific register support" ---help--- -- cgit v1.1 From 6bf08a8dcd1ef13e542f08fc3b1ce6cf64ae63b6 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 29 Jan 2013 16:32:16 -0500 Subject: x86, AMD: Clean up init_amd() Clean up multiple declarations of variable used for rd/wrmsr. Signed-off-by: Boris Ostrovsky Link: http://lkml.kernel.org/r/1359495136-23244-1-git-send-email-ostr@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 15239ff..dd4a5b6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -518,10 +518,9 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) static void __cpuinit init_amd(struct cpuinfo_x86 *c) { u32 dummy; - -#ifdef CONFIG_SMP unsigned long long value; +#ifdef CONFIG_SMP /* * Disable TLB flush filter by setting HWCR.FFDIS on K8 * bit 6 of msr C001_0015 @@ -559,12 +558,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * (AMD Erratum #110, docId: 25759). */ if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { - u64 val; - clear_cpu_cap(c, X86_FEATURE_LAHF_LM); - if (!rdmsrl_amd_safe(0xc001100d, &val)) { - val &= ~(1ULL << 32); - wrmsrl_amd_safe(0xc001100d, val); + if (!rdmsrl_amd_safe(0xc001100d, &value)) { + value &= ~(1ULL << 32); + wrmsrl_amd_safe(0xc001100d, value); } } @@ -617,13 +614,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if ((c->x86 == 0x15) && (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && !cpu_has(c, X86_FEATURE_TOPOEXT)) { - u64 val; - if (!rdmsrl_safe(0xc0011005, &val)) { - val |= 1ULL << 54; - wrmsrl_safe(0xc0011005, val); - rdmsrl(0xc0011005, val); - if (val & (1ULL << 54)) { + if (!rdmsrl_safe(0xc0011005, &value)) { + value |= 1ULL << 54; + wrmsrl_safe(0xc0011005, value); + rdmsrl(0xc0011005, value); + if (value & (1ULL << 54)) { set_cpu_cap(c, X86_FEATURE_TOPOEXT); printk(KERN_INFO FW_INFO "CPU: Re-enabling " "disabled Topology Extensions Support\n"); @@ -637,11 +633,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) */ if ((c->x86 == 0x15) && (c->x86_model >= 0x02) && (c->x86_model < 0x20)) { - u64 val; - if (!rdmsrl_safe(0xc0011021, &val) && !(val & 0x1E)) { - val |= 0x1E; - wrmsrl_safe(0xc0011021, val); + if (!rdmsrl_safe(0xc0011021, &value) && !(value & 0x1E)) { + value |= 0x1E; + wrmsrl_safe(0xc0011021, value); } } -- cgit v1.1 From f0322bd341fd63261527bf84afd3272bcc2e8dd3 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Tue, 29 Jan 2013 16:32:49 -0500 Subject: x86, AMD: Enable WC+ memory type on family 10 processors In some cases BIOS may not enable WC+ memory type on family 10 processors, instead converting what would be WC+ memory to CD type. On guests using nested pages this could result in performance degradation. This patch enables WC+. Signed-off-by: Boris Ostrovsky Link: http://lkml.kernel.org/r/1359495169-23278-1-git-send-email-ostr@amd64.org Signed-off-by: H. Peter Anvin --- arch/x86/include/uapi/asm/msr-index.h | 1 + arch/x86/kernel/cpu/amd.c | 21 ++++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 433a59f..158cde9 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -173,6 +173,7 @@ #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_BU_CFG2 0xc001102a #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index dd4a5b6..721ef32 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -698,13 +698,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) if (c->x86 > 0x11) set_cpu_cap(c, X86_FEATURE_ARAT); - /* - * Disable GART TLB Walk Errors on Fam10h. We do this here - * because this is always needed when GART is enabled, even in a - * kernel which has no MCE support built in. - */ if (c->x86 == 0x10) { /* + * Disable GART TLB Walk Errors on Fam10h. We do this here + * because this is always needed when GART is enabled, even in a + * kernel which has no MCE support built in. * BIOS should disable GartTlbWlk Errors themself. If * it doesn't do it here as suggested by the BKDG. * @@ -718,6 +716,19 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) mask |= (1 << 10); wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); } + + /* + * On family 10h BIOS may not have properly enabled WC+ support, + * causing it to be converted to CD memtype. This may result in + * performance degradation for certain nested-paging guests. + * Prevent this conversion by clearing bit 24 in + * MSR_AMD64_BU_CFG2. + */ + if (c->x86 == 0x10) { + rdmsrl(MSR_AMD64_BU_CFG2, value); + value &= ~(1ULL << 24); + wrmsrl(MSR_AMD64_BU_CFG2, value); + } } rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); -- cgit v1.1 From f03574f2d5b2d6229dcdf2d322848065f72953c7 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 30 Jan 2013 16:56:16 -0800 Subject: x86-32, mm: Rip out x86_32 NUMA remapping code This code was an optimization for 32-bit NUMA systems. It has probably been the cause of a number of subtle bugs over the years, although the conditions to excite them would have been hard to trigger. Essentially, we remap part of the kernel linear mapping area, and then sometimes part of that area gets freed back in to the bootmem allocator. If those pages get used by kernel data structures (say mem_map[] or a dentry), there's no big deal. But, if anyone ever tried to use the linear mapping for these pages _and_ cared about their physical address, bad things happen. For instance, say you passed __GFP_ZERO to the page allocator and then happened to get handed one of these pages, it zero the remapped page, but it would make a pte to the _old_ page. There are probably a hundred other ways that it could screw with things. We don't need to hang on to performance optimizations for these old boxes any more. All my 32-bit NUMA systems are long dead and buried, and I probably had access to more than most people. This code is causing real things to break today: https://lkml.org/lkml/2013/1/9/376 I looked in to actually fixing this, but it requires surgery to way too much brittle code, as well as stuff like per_cpu_ptr_to_phys(). [ hpa: Cc: this for -stable, since it is a memory corruption issue. However, an alternative is to simply mark NUMA as depends BROKEN rather than EXPERIMENTAL in the X86_32 subclause... ] Link: http://lkml.kernel.org/r/20130131005616.1C79F411@kernel.stglabs.ibm.com Signed-off-by: H. Peter Anvin Cc: --- arch/x86/Kconfig | 4 -- arch/x86/mm/numa.c | 3 - arch/x86/mm/numa_32.c | 161 -------------------------------------------- arch/x86/mm/numa_internal.h | 6 -- 4 files changed, 174 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 79795af..108efcb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1253,10 +1253,6 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accommodate various tables. -config HAVE_ARCH_ALLOC_REMAP - def_bool y - depends on X86_32 && NUMA - config ARCH_HAVE_MEMORY_PRESENT def_bool y depends on X86_32 && DISCONTIGMEM diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index b2313c6..61c2b6f 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -205,9 +205,6 @@ static void __init setup_node_data(int nid, u64 start, u64 end) if (end && (end - start) < NODE_MIN_SIZE) return; - /* initialize remap allocator before aligning to ZONE_ALIGN */ - init_alloc_remap(nid, start, end); - start = roundup(start, ZONE_ALIGN); printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 534255a..73a6d73 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -73,167 +73,6 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, extern unsigned long highend_pfn, highstart_pfn; -#define LARGE_PAGE_BYTES (PTRS_PER_PTE * PAGE_SIZE) - -static void *node_remap_start_vaddr[MAX_NUMNODES]; -void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); - -/* - * Remap memory allocator - */ -static unsigned long node_remap_start_pfn[MAX_NUMNODES]; -static void *node_remap_end_vaddr[MAX_NUMNODES]; -static void *node_remap_alloc_vaddr[MAX_NUMNODES]; - -/** - * alloc_remap - Allocate remapped memory - * @nid: NUMA node to allocate memory from - * @size: The size of allocation - * - * Allocate @size bytes from the remap area of NUMA node @nid. The - * size of the remap area is predetermined by init_alloc_remap() and - * only the callers considered there should call this function. For - * more info, please read the comment on top of init_alloc_remap(). - * - * The caller must be ready to handle allocation failure from this - * function and fall back to regular memory allocator in such cases. - * - * CONTEXT: - * Single CPU early boot context. - * - * RETURNS: - * Pointer to the allocated memory on success, %NULL on failure. - */ -void *alloc_remap(int nid, unsigned long size) -{ - void *allocation = node_remap_alloc_vaddr[nid]; - - size = ALIGN(size, L1_CACHE_BYTES); - - if (!allocation || (allocation + size) > node_remap_end_vaddr[nid]) - return NULL; - - node_remap_alloc_vaddr[nid] += size; - memset(allocation, 0, size); - - return allocation; -} - -#ifdef CONFIG_HIBERNATION -/** - * resume_map_numa_kva - add KVA mapping to the temporary page tables created - * during resume from hibernation - * @pgd_base - temporary resume page directory - */ -void resume_map_numa_kva(pgd_t *pgd_base) -{ - int node; - - for_each_online_node(node) { - unsigned long start_va, start_pfn, nr_pages, pfn; - - start_va = (unsigned long)node_remap_start_vaddr[node]; - start_pfn = node_remap_start_pfn[node]; - nr_pages = (node_remap_end_vaddr[node] - - node_remap_start_vaddr[node]) >> PAGE_SHIFT; - - printk(KERN_DEBUG "%s: node %d\n", __func__, node); - - for (pfn = 0; pfn < nr_pages; pfn += PTRS_PER_PTE) { - unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); - pgd_t *pgd = pgd_base + pgd_index(vaddr); - pud_t *pud = pud_offset(pgd, vaddr); - pmd_t *pmd = pmd_offset(pud, vaddr); - - set_pmd(pmd, pfn_pmd(start_pfn + pfn, - PAGE_KERNEL_LARGE_EXEC)); - - printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", - __func__, vaddr, start_pfn + pfn); - } - } -} -#endif - -/** - * init_alloc_remap - Initialize remap allocator for a NUMA node - * @nid: NUMA node to initizlie remap allocator for - * - * NUMA nodes may end up without any lowmem. As allocating pgdat and - * memmap on a different node with lowmem is inefficient, a special - * remap allocator is implemented which can be used by alloc_remap(). - * - * For each node, the amount of memory which will be necessary for - * pgdat and memmap is calculated and two memory areas of the size are - * allocated - one in the node and the other in lowmem; then, the area - * in the node is remapped to the lowmem area. - * - * As pgdat and memmap must be allocated in lowmem anyway, this - * doesn't waste lowmem address space; however, the actual lowmem - * which gets remapped over is wasted. The amount shouldn't be - * problematic on machines this feature will be used. - * - * Initialization failure isn't fatal. alloc_remap() is used - * opportunistically and the callers will fall back to other memory - * allocation mechanisms on failure. - */ -void __init init_alloc_remap(int nid, u64 start, u64 end) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long end_pfn = end >> PAGE_SHIFT; - unsigned long size, pfn; - u64 node_pa, remap_pa; - void *remap_va; - - /* - * The acpi/srat node info can show hot-add memroy zones where - * memory could be added but not currently present. - */ - printk(KERN_DEBUG "node %d pfn: [%lx - %lx]\n", - nid, start_pfn, end_pfn); - - /* calculate the necessary space aligned to large page size */ - size = node_memmap_size_bytes(nid, start_pfn, end_pfn); - size += ALIGN(sizeof(pg_data_t), PAGE_SIZE); - size = ALIGN(size, LARGE_PAGE_BYTES); - - /* allocate node memory and the lowmem remap area */ - node_pa = memblock_find_in_range(start, end, size, LARGE_PAGE_BYTES); - if (!node_pa) { - pr_warning("remap_alloc: failed to allocate %lu bytes for node %d\n", - size, nid); - return; - } - memblock_reserve(node_pa, size); - - remap_pa = memblock_find_in_range(min_low_pfn << PAGE_SHIFT, - max_low_pfn << PAGE_SHIFT, - size, LARGE_PAGE_BYTES); - if (!remap_pa) { - pr_warning("remap_alloc: failed to allocate %lu bytes remap area for node %d\n", - size, nid); - memblock_free(node_pa, size); - return; - } - memblock_reserve(remap_pa, size); - remap_va = phys_to_virt(remap_pa); - - /* perform actual remap */ - for (pfn = 0; pfn < size >> PAGE_SHIFT; pfn += PTRS_PER_PTE) - set_pmd_pfn((unsigned long)remap_va + (pfn << PAGE_SHIFT), - (node_pa >> PAGE_SHIFT) + pfn, - PAGE_KERNEL_LARGE); - - /* initialize remap allocator parameters */ - node_remap_start_pfn[nid] = node_pa >> PAGE_SHIFT; - node_remap_start_vaddr[nid] = remap_va; - node_remap_end_vaddr[nid] = remap_va + size; - node_remap_alloc_vaddr[nid] = remap_va; - - printk(KERN_DEBUG "remap_alloc: node %d [%08llx-%08llx) -> [%p-%p)\n", - nid, node_pa, node_pa + size, remap_va, remap_va + size); -} - void __init initmem_init(void) { x86_numa_init(); diff --git a/arch/x86/mm/numa_internal.h b/arch/x86/mm/numa_internal.h index 7178c3a..ad86ec9 100644 --- a/arch/x86/mm/numa_internal.h +++ b/arch/x86/mm/numa_internal.h @@ -21,12 +21,6 @@ void __init numa_reset_distance(void); void __init x86_numa_init(void); -#ifdef CONFIG_X86_64 -static inline void init_alloc_remap(int nid, u64 start, u64 end) { } -#else -void __init init_alloc_remap(int nid, u64 start, u64 end); -#endif - #ifdef CONFIG_NUMA_EMU void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt); -- cgit v1.1 From bb112aec5ee41427e9b9726e3d57b896709598ed Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 31 Jan 2013 13:53:10 -0800 Subject: x86-32, mm: Remove reference to resume_map_numa_kva() Remove reference to removed function resume_map_numa_kva(). Signed-off-by: H. Peter Anvin Cc: Dave Hansen Cc: Link: http://lkml.kernel.org/r/20130131005616.1C79F411@kernel.stglabs.ibm.com --- arch/x86/include/asm/mmzone_32.h | 6 ------ arch/x86/power/hibernate_32.c | 2 -- 2 files changed, 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index eb05fb3..8a9b3e2 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -14,12 +14,6 @@ extern struct pglist_data *node_data[]; #include -extern void resume_map_numa_kva(pgd_t *pgd); - -#else /* !CONFIG_NUMA */ - -static inline void resume_map_numa_kva(pgd_t *pgd) {} - #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index 74202c1..7d28c88 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -129,8 +129,6 @@ static int resume_physical_mapping_init(pgd_t *pgd_base) } } - resume_map_numa_kva(pgd_base); - return 0; } -- cgit v1.1 From 07f4207a305c834f528d08428df4531744e25678 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 31 Jan 2013 14:00:48 -0800 Subject: x86-32, mm: Remove reference to alloc_remap() We have removed the remap allocator for x86-32, and x86-64 never had it (and doesn't need it). Remove residual reference to it. Reported-by: Yinghai Lu Signed-off-by: H. Peter Anvin Cc: Dave Hansen Cc: Link: http://lkml.kernel.org/r/CAE9FiQVn6_QZi3fNQ-JHYiR-7jeDJ5hT0SyT_%2BzVvfOj=PzF3w@mail.gmail.com --- arch/x86/mm/numa.c | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 61c2b6f..8504f36 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -193,7 +193,6 @@ int __init numa_add_memblk(int nid, u64 start, u64 end) static void __init setup_node_data(int nid, u64 start, u64 end) { const size_t nd_size = roundup(sizeof(pg_data_t), PAGE_SIZE); - bool remapped = false; u64 nd_pa; void *nd; int tnid; @@ -211,28 +210,22 @@ static void __init setup_node_data(int nid, u64 start, u64 end) nid, start, end - 1); /* - * Allocate node data. Try remap allocator first, node-local - * memory and then any node. Never allocate in DMA zone. + * Allocate node data. Try node-local memory and then any node. + * Never allocate in DMA zone. */ - nd = alloc_remap(nid, nd_size); - if (nd) { - nd_pa = __pa_nodebug(nd); - remapped = true; - } else { - nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); - if (!nd_pa) { - pr_err("Cannot find %zu bytes in node %d\n", - nd_size, nid); - return; - } - nd = __va(nd_pa); + nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); + if (!nd_pa) { + pr_err("Cannot find %zu bytes in node %d\n", + nd_size, nid); + return; } + nd = __va(nd_pa); /* report and initialize */ - printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]%s\n", - nd_pa, nd_pa + nd_size - 1, remapped ? " (remapped)" : ""); + printk(KERN_INFO " NODE_DATA [mem %#010Lx-%#010Lx]\n", + nd_pa, nd_pa + nd_size - 1); tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT); - if (!remapped && tnid != nid) + if (tnid != nid) printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nid, tnid); node_data[nid] = nd; -- cgit v1.1 From eaca6eae3e0c41d41fcb9d1d70e00934988dff2e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Nov 2012 23:12:10 -0500 Subject: sanitize rt_sigaction() situation a bit Switch from __ARCH_WANT_SYS_RT_SIGACTION to opposite (!CONFIG_ODD_RT_SIGACTION); the only two architectures that need it are alpha and sparc. The reason for use of CONFIG_... instead of __ARCH_... is that it's needed only kernel-side and doing it that way avoids a mess with include order on many architectures. Signed-off-by: Al Viro --- arch/x86/um/shared/sysdep/syscalls_32.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h index 8436079..68fd2cf 100644 --- a/arch/x86/um/shared/sysdep/syscalls_32.h +++ b/arch/x86/um/shared/sysdep/syscalls_32.h @@ -8,11 +8,6 @@ typedef long syscall_handler_t(struct pt_regs); -/* Not declared on x86, incompatible declarations on x86_64, so these have - * to go here rather than in sys_call_table.c - */ -extern syscall_handler_t sys_rt_sigaction; - extern syscall_handler_t *sys_call_table[]; #define EXECUTE_SYSCALL(syscall, regs) \ -- cgit v1.1 From 92a3ce4a1e0047215aa0a0b30cc333bd32b866a8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Nov 2012 21:20:05 -0500 Subject: consolidate declarations of k_sigaction Only alpha and sparc are unusual - they have ka_restorer in it. And nobody needs that exposed to userland. Signed-off-by: Al Viro --- arch/x86/include/asm/signal.h | 5 ----- arch/x86/include/uapi/asm/signal.h | 4 ---- 2 files changed, 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 216bf36..e7cf500 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -46,11 +46,6 @@ struct sigaction { sigset_t sa_mask; /* mask last for extensibility */ }; -struct k_sigaction { - struct sigaction sa; -}; - -#else /* __i386__ */ #endif /* !__i386__ */ #include diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h index aa7d6ae..e52443f 100644 --- a/arch/x86/include/uapi/asm/signal.h +++ b/arch/x86/include/uapi/asm/signal.h @@ -122,10 +122,6 @@ struct sigaction { sigset_t sa_mask; /* mask last for extensibility */ }; -struct k_sigaction { - struct sigaction sa; -}; - #endif /* !__i386__ */ typedef struct sigaltstack { -- cgit v1.1 From 574c4866e33d648520a8bd5bf6f573ea6e554e88 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Nov 2012 22:24:19 -0500 Subject: consolidate kernel-side struct sigaction declarations Signed-off-by: Al Viro --- arch/x86/include/asm/signal.h | 10 +++------- arch/x86/include/uapi/asm/signal.h | 4 ++-- 2 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index e7cf500..9bda822 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -31,6 +31,9 @@ typedef sigset_t compat_sigset_t; #include #ifndef __ASSEMBLY__ extern void do_notify_resume(struct pt_regs *, void *, __u32); + +#define __ARCH_HAS_SA_RESTORER + #ifdef __i386__ struct old_sigaction { __sighandler_t sa_handler; @@ -39,13 +42,6 @@ struct old_sigaction { __sigrestore_t sa_restorer; }; -struct sigaction { - __sighandler_t sa_handler; - unsigned long sa_flags; - __sigrestore_t sa_restorer; - sigset_t sa_mask; /* mask last for extensibility */ -}; - #endif /* !__i386__ */ #include diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h index e52443f..8264f47 100644 --- a/arch/x86/include/uapi/asm/signal.h +++ b/arch/x86/include/uapi/asm/signal.h @@ -95,9 +95,9 @@ typedef unsigned long sigset_t; #ifndef __ASSEMBLY__ -#ifdef __i386__ # ifndef __KERNEL__ /* Here we must cater to libcs that poke about in kernel headers. */ +#ifdef __i386__ struct sigaction { union { @@ -112,7 +112,6 @@ struct sigaction { #define sa_handler _u._sa_handler #define sa_sigaction _u._sa_sigaction -# endif /* ! __KERNEL__ */ #else /* __i386__ */ struct sigaction { @@ -123,6 +122,7 @@ struct sigaction { }; #endif /* !__i386__ */ +# endif /* ! __KERNEL__ */ typedef struct sigaltstack { void __user *ss_sp; -- cgit v1.1 From ea93a6e2e73c5a1a8d1d60098f1ca5618fd5aca1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 12 Nov 2012 14:19:03 -0500 Subject: amd64: get rid of useless RESTORE_TOP_OF_STACK in stub_execve() we are not going to return via SYSRET anyway. Signed-off-by: Al Viro --- arch/x86/kernel/entry_64.S | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 07a7a04..2f2f57a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -886,7 +886,6 @@ ENTRY(stub_execve) SAVE_REST FIXUP_TOP_OF_STACK %r11 call sys_execve - RESTORE_TOP_OF_STACK %r11 movq %rax,RAX(%rsp) RESTORE_REST jmp int_ret_from_sys_call -- cgit v1.1 From b3af11afe06abdcf980b5f5c0b44dc46c496ddc3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 19 Nov 2012 22:00:52 -0500 Subject: x86: get rid of pt_regs argument of iopl(2) Signed-off-by: Al Viro --- arch/x86/ia32/ia32entry.S | 1 - arch/x86/include/asm/syscalls.h | 2 +- arch/x86/kernel/entry_32.S | 1 - arch/x86/kernel/entry_64.S | 31 +++++++++++++------------------ arch/x86/kernel/ioport.c | 3 ++- arch/x86/syscalls/syscall_32.tbl | 2 +- arch/x86/um/sys_call_table_32.c | 1 - 7 files changed, 17 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 102ff7c..74bc91e 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -467,7 +467,6 @@ GLOBAL(\label) PTREGSCALL stub32_execve, compat_sys_execve, %rcx PTREGSCALL stub32_fork, sys_fork, %rdi PTREGSCALL stub32_vfork, sys_vfork, %rdi - PTREGSCALL stub32_iopl, sys_iopl, %rsi ALIGN GLOBAL(stub32_clone) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 58b7e3e..76be6f2 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -18,7 +18,7 @@ /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); -long sys_iopl(unsigned int, struct pt_regs *); +asmlinkage long sys_iopl(unsigned int); /* kernel/ldt.c */ asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6ed91d9..415c634 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -738,7 +738,6 @@ ENTRY(ptregs_##name) ; \ CFI_ENDPROC; \ ENDPROC(ptregs_##name) -PTREGSCALL1(iopl) PTREGSCALL0(sigreturn) PTREGSCALL0(rt_sigreturn) PTREGSCALL2(vm86) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2f2f57a..4486fee 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -828,23 +828,6 @@ int_restore_rest: CFI_ENDPROC END(system_call) -/* - * Certain special system calls that need to save a complete full stack frame. - */ - .macro PTREGSCALL label,func,arg -ENTRY(\label) - PARTIAL_FRAME 1 8 /* offset 8: return address */ - subq $REST_SKIP, %rsp - CFI_ADJUST_CFA_OFFSET REST_SKIP - call save_rest - DEFAULT_FRAME 0 8 /* offset 8: return address */ - leaq 8(%rsp), \arg /* pt_regs pointer */ - call \func - jmp ptregscall_common - CFI_ENDPROC -END(\label) - .endm - .macro FORK_LIKE func ENTRY(stub_\func) CFI_STARTPROC @@ -861,10 +844,22 @@ ENTRY(stub_\func) END(stub_\func) .endm + .macro FIXED_FRAME label,func +ENTRY(\label) + CFI_STARTPROC + PARTIAL_FRAME 0 8 /* offset 8: return address */ + FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET + call \func + RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET + ret + CFI_ENDPROC +END(\label) + .endm + FORK_LIKE clone FORK_LIKE fork FORK_LIKE vfork - PTREGSCALL stub_iopl, sys_iopl, %rsi + FIXED_FRAME stub_iopl, sys_iopl ENTRY(ptregscall_common) DEFAULT_FRAME 1 8 /* offset 8: return address */ diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 8c96897..4ddaf66 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -93,8 +93,9 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) * on system-call entry - see also fork() and the signal handling * code. */ -long sys_iopl(unsigned int level, struct pt_regs *regs) +SYSCALL_DEFINE1(iopl, unsigned int, level) { + struct pt_regs *regs = current_pt_regs(); unsigned int old = (regs->flags >> 12) & 3; struct thread_struct *t = ¤t->thread; diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 28e3fa9..aa15a7a 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -116,7 +116,7 @@ 107 i386 lstat sys_newlstat compat_sys_newlstat 108 i386 fstat sys_newfstat compat_sys_newfstat 109 i386 olduname sys_uname -110 i386 iopl ptregs_iopl stub32_iopl +110 i386 iopl sys_iopl 111 i386 vhangup sys_vhangup 112 i386 idle 113 i386 vm86old ptregs_vm86old sys32_vm86_warning diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index a0c3b0d..5cc2bce 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -24,7 +24,6 @@ #define old_mmap sys_old_mmap -#define ptregs_iopl sys_iopl #define ptregs_vm86old sys_vm86old #define ptregs_vm86 sys_vm86 -- cgit v1.1 From 3fe26fa34da029263067b48836e740c1ddffbd91 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 12 Nov 2012 14:32:42 -0500 Subject: x86: get rid of pt_regs argument in sigreturn variants Signed-off-by: Al Viro --- arch/x86/ia32/ia32_signal.c | 6 ++++-- arch/x86/ia32/ia32entry.S | 11 +++++------ arch/x86/include/asm/sys_ia32.h | 4 ++-- arch/x86/include/asm/syscalls.h | 4 ++-- arch/x86/kernel/entry_32.S | 23 ----------------------- arch/x86/kernel/entry_64.S | 2 -- arch/x86/kernel/signal.c | 9 ++++++--- arch/x86/syscalls/syscall_32.tbl | 4 ++-- arch/x86/um/signal.c | 15 ++------------- 9 files changed, 23 insertions(+), 55 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index a1daf4a..15fdb3f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -215,8 +215,9 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, return err; } -asmlinkage long sys32_sigreturn(struct pt_regs *regs) +asmlinkage long sys32_sigreturn(void) { + struct pt_regs *regs = current_pt_regs(); struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); sigset_t set; unsigned int ax; @@ -241,8 +242,9 @@ badframe: return 0; } -asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) +asmlinkage long sys32_rt_sigreturn(void) { + struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_ia32 __user *frame; sigset_t set; unsigned int ax; diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 74bc91e..c05e16b 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -456,17 +456,16 @@ ia32_badsys: ALIGN GLOBAL(\label) leaq \func(%rip),%rax - leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ jmp ia32_ptregs_common .endm CFI_STARTPROC32 - PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi - PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi - PTREGSCALL stub32_execve, compat_sys_execve, %rcx - PTREGSCALL stub32_fork, sys_fork, %rdi - PTREGSCALL stub32_vfork, sys_vfork, %rdi + PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn + PTREGSCALL stub32_sigreturn, sys32_sigreturn + PTREGSCALL stub32_execve, compat_sys_execve + PTREGSCALL stub32_fork, sys_fork + PTREGSCALL stub32_vfork, sys_vfork ALIGN GLOBAL(stub32_clone) diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 31f61f9..93e142a 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -69,8 +69,8 @@ asmlinkage long sys32_fallocate(int, int, unsigned, /* ia32/ia32_signal.c */ asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); -asmlinkage long sys32_sigreturn(struct pt_regs *); -asmlinkage long sys32_rt_sigreturn(struct pt_regs *); +asmlinkage long sys32_sigreturn(void); +asmlinkage long sys32_rt_sigreturn(void); /* ia32/ipc32.c */ asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 76be6f2..de2e1ab 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -24,7 +24,7 @@ asmlinkage long sys_iopl(unsigned int); asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); /* kernel/signal.c */ -long sys_rt_sigreturn(struct pt_regs *); +long sys_rt_sigreturn(void); /* kernel/tls.c */ asmlinkage int sys_set_thread_area(struct user_desc __user *); @@ -37,7 +37,7 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); -unsigned long sys_sigreturn(struct pt_regs *); +unsigned long sys_sigreturn(void); /* kernel/vm86_32.c */ int sys_vm86old(struct vm86_struct __user *, struct pt_regs *); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 415c634..e132cf6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -702,12 +702,6 @@ END(syscall_badsys) /* * System calls that need a pt_regs pointer. */ -#define PTREGSCALL0(name) \ -ENTRY(ptregs_##name) ; \ - leal 4(%esp),%eax; \ - jmp sys_##name; \ -ENDPROC(ptregs_##name) - #define PTREGSCALL1(name) \ ENTRY(ptregs_##name) ; \ leal 4(%esp),%edx; \ @@ -723,23 +717,6 @@ ENTRY(ptregs_##name) ; \ jmp sys_##name; \ ENDPROC(ptregs_##name) -#define PTREGSCALL3(name) \ -ENTRY(ptregs_##name) ; \ - CFI_STARTPROC; \ - leal 4(%esp),%eax; \ - pushl_cfi %eax; \ - movl PT_EDX(%eax),%ecx; \ - movl PT_ECX(%eax),%edx; \ - movl PT_EBX(%eax),%eax; \ - call sys_##name; \ - addl $4,%esp; \ - CFI_ADJUST_CFA_OFFSET -4; \ - ret; \ - CFI_ENDPROC; \ -ENDPROC(ptregs_##name) - -PTREGSCALL0(sigreturn) -PTREGSCALL0(rt_sigreturn) PTREGSCALL2(vm86) PTREGSCALL1(vm86old) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4486fee..1975122 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -896,7 +896,6 @@ ENTRY(stub_rt_sigreturn) addq $8, %rsp PARTIAL_FRAME 0 SAVE_REST - movq %rsp,%rdi FIXUP_TOP_OF_STACK %r11 call sys_rt_sigreturn movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer @@ -911,7 +910,6 @@ ENTRY(stub_x32_rt_sigreturn) addq $8, %rsp PARTIAL_FRAME 0 SAVE_REST - movq %rsp,%rdi FIXUP_TOP_OF_STACK %r11 call sys32_x32_rt_sigreturn movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d6bf1f3..38ba7e5 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -597,8 +597,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, * Do a signal return; undo the signal stack. */ #ifdef CONFIG_X86_32 -unsigned long sys_sigreturn(struct pt_regs *regs) +unsigned long sys_sigreturn(void) { + struct pt_regs *regs = current_pt_regs(); struct sigframe __user *frame; unsigned long ax; sigset_t set; @@ -625,8 +626,9 @@ badframe: } #endif /* CONFIG_X86_32 */ -long sys_rt_sigreturn(struct pt_regs *regs) +long sys_rt_sigreturn(void) { + struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; unsigned long ax; sigset_t set; @@ -843,8 +845,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) } #ifdef CONFIG_X86_X32_ABI -asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) +asmlinkage long sys32_x32_rt_sigreturn(void) { + struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_x32 __user *frame; sigset_t set; unsigned long ax; diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index aa15a7a..4e8ab08 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -125,7 +125,7 @@ 116 i386 sysinfo sys_sysinfo compat_sys_sysinfo 117 i386 ipc sys_ipc sys32_ipc 118 i386 fsync sys_fsync -119 i386 sigreturn ptregs_sigreturn stub32_sigreturn +119 i386 sigreturn sys_sigreturn stub32_sigreturn 120 i386 clone sys_clone stub32_clone 121 i386 setdomainname sys_setdomainname 122 i386 uname sys_newuname @@ -179,7 +179,7 @@ 170 i386 setresgid sys_setresgid16 171 i386 getresgid sys_getresgid16 172 i386 prctl sys_prctl -173 i386 rt_sigreturn ptregs_rt_sigreturn stub32_rt_sigreturn +173 i386 rt_sigreturn sys_rt_sigreturn stub32_rt_sigreturn 174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction 175 i386 rt_sigprocmask sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending sys32_rt_sigpending diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index 71cef48..ae7319d 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -464,7 +464,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, return 0; } -long sys_sigreturn(struct pt_regs *regs) +long sys_sigreturn(void) { unsigned long sp = PT_REGS_SP(¤t->thread.regs); struct sigframe __user *frame = (struct sigframe __user *)(sp - 8); @@ -577,7 +577,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig, } #endif -long sys_rt_sigreturn(struct pt_regs *regs) +long sys_rt_sigreturn(void) { unsigned long sp = PT_REGS_SP(¤t->thread.regs); struct rt_sigframe __user *frame = @@ -601,14 +601,3 @@ long sys_rt_sigreturn(struct pt_regs *regs) force_sig(SIGSEGV, current); return 0; } - -#ifdef CONFIG_X86_32 -long ptregs_sigreturn(void) -{ - return sys_sigreturn(NULL); -} -long ptregs_rt_sigreturn(void) -{ - return sys_rt_sigreturn(NULL); -} -#endif -- cgit v1.1 From 49cb25e92902ba24bd38c350114f8695493b262f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 12 Nov 2012 14:38:28 -0500 Subject: x86: get rid of pt_regs argument in vm86/vm86old Signed-off-by: Al Viro --- arch/x86/include/asm/syscalls.h | 4 ++-- arch/x86/kernel/entry_32.S | 21 --------------------- arch/x86/kernel/vm86_32.c | 8 ++++---- arch/x86/syscalls/syscall_32.tbl | 4 ++-- arch/x86/um/sys_call_table_32.c | 3 --- 5 files changed, 8 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index de2e1ab..f755423 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -40,8 +40,8 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, unsigned long sys_sigreturn(void); /* kernel/vm86_32.c */ -int sys_vm86old(struct vm86_struct __user *, struct pt_regs *); -int sys_vm86(unsigned long, unsigned long, struct pt_regs *); +int sys_vm86old(struct vm86_struct __user *); +int sys_vm86(unsigned long, unsigned long); #else /* CONFIG_X86_32 */ diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index e132cf6..352e5a9 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -699,27 +699,6 @@ END(syscall_badsys) */ .popsection -/* - * System calls that need a pt_regs pointer. - */ -#define PTREGSCALL1(name) \ -ENTRY(ptregs_##name) ; \ - leal 4(%esp),%edx; \ - movl (PT_EBX+4)(%esp),%eax; \ - jmp sys_##name; \ -ENDPROC(ptregs_##name) - -#define PTREGSCALL2(name) \ -ENTRY(ptregs_##name) ; \ - leal 4(%esp),%ecx; \ - movl (PT_ECX+4)(%esp),%edx; \ - movl (PT_EBX+4)(%esp),%eax; \ - jmp sys_##name; \ -ENDPROC(ptregs_##name) - -PTREGSCALL2(vm86) -PTREGSCALL1(vm86old) - .macro FIXUP_ESPFIX_STACK /* * Switch back for ESPFIX stack to the normal zerobased stack diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 1dfe69c..1cf5766 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -202,7 +202,7 @@ out: static int do_vm86_irq_handling(int subfunction, int irqnumber); static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); -int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) +int sys_vm86old(struct vm86_struct __user *v86) { struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. @@ -222,7 +222,7 @@ int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) if (tmp) goto out; memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); - info.regs32 = regs; + info.regs32 = current_pt_regs(); tsk->thread.vm86_info = v86; do_sys_vm86(&info, tsk); ret = 0; /* we never return here */ @@ -231,7 +231,7 @@ out: } -int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) +int sys_vm86(unsigned long cmd, unsigned long arg) { struct kernel_vm86_struct info; /* declare this _on top_, * this avoids wasting of stack space. @@ -272,7 +272,7 @@ int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) ret = -EFAULT; if (tmp) goto out; - info.regs32 = regs; + info.regs32 = current_pt_regs(); info.vm86plus.is_vm86pus = 1; tsk->thread.vm86_info = (struct vm86_struct __user *)v86; do_sys_vm86(&info, tsk); diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 4e8ab08..250e2d9 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -119,7 +119,7 @@ 110 i386 iopl sys_iopl 111 i386 vhangup sys_vhangup 112 i386 idle -113 i386 vm86old ptregs_vm86old sys32_vm86_warning +113 i386 vm86old sys_vm86old sys32_vm86_warning 114 i386 wait4 sys_wait4 compat_sys_wait4 115 i386 swapoff sys_swapoff 116 i386 sysinfo sys_sysinfo compat_sys_sysinfo @@ -172,7 +172,7 @@ 163 i386 mremap sys_mremap 164 i386 setresuid sys_setresuid16 165 i386 getresuid sys_getresuid16 -166 i386 vm86 ptregs_vm86 sys32_vm86_warning +166 i386 vm86 sys_vm86 sys32_vm86_warning 167 i386 query_module 168 i386 poll sys_poll 169 i386 nfsservctl diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c index 5cc2bce..531d426 100644 --- a/arch/x86/um/sys_call_table_32.c +++ b/arch/x86/um/sys_call_table_32.c @@ -24,9 +24,6 @@ #define old_mmap sys_old_mmap -#define ptregs_vm86old sys_vm86old -#define ptregs_vm86 sys_vm86 - #define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ; #include -- cgit v1.1 From f45adb0499dedd4082b46522e7b166cff5dc64eb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 25 Dec 2012 14:46:17 -0500 Subject: x86: switch to generic compat rt_sigpending() Signed-off-by: Al Viro --- arch/x86/Kconfig | 1 + arch/x86/ia32/sys_ia32.c | 24 ------------------------ arch/x86/include/asm/sys_ia32.h | 1 - arch/x86/syscalls/syscall_32.tbl | 2 +- arch/x86/syscalls/syscall_64.tbl | 2 +- 5 files changed, 3 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 79795af..38018ec 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -114,6 +114,7 @@ config X86 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 select GENERIC_SIGALTSTACK + select GENERIC_COMPAT_RT_SIGPENDING config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index d0b689b..b417fc1 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -310,30 +310,6 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, return ret; } -asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *set, - compat_size_t sigsetsize) -{ - sigset_t s; - compat_sigset_t s32; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - ret = sys_rt_sigpending((sigset_t __user *)&s, sigsetsize); - set_fs(old_fs); - if (!ret) { - switch (_NSIG_WORDS) { - case 4: s32.sig[7] = (s.sig[3] >> 32); s32.sig[6] = s.sig[3]; - case 3: s32.sig[5] = (s.sig[2] >> 32); s32.sig[4] = s.sig[2]; - case 2: s32.sig[3] = (s.sig[1] >> 32); s32.sig[2] = s.sig[1]; - case 1: s32.sig[1] = (s.sig[0] >> 32); s32.sig[0] = s.sig[0]; - } - if (copy_to_user(set, &s32, sizeof(compat_sigset_t))) - return -EFAULT; - } - return ret; -} - asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, compat_siginfo_t __user *uinfo) { diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 93e142a..ff429b0 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -45,7 +45,6 @@ asmlinkage long sys32_sysfs(int, u32, u32); asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, struct compat_timespec __user *); -asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 250e2d9..5a55e84 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -182,7 +182,7 @@ 173 i386 rt_sigreturn sys_rt_sigreturn stub32_rt_sigreturn 174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction 175 i386 rt_sigprocmask sys_rt_sigprocmask -176 i386 rt_sigpending sys_rt_sigpending sys32_rt_sigpending +176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending 177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait 178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo sys32_rt_sigqueueinfo 179 i386 rt_sigsuspend sys_rt_sigsuspend diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index dc97328..dbde26d 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -335,7 +335,7 @@ 519 x32 recvmsg compat_sys_recvmsg 520 x32 execve stub_x32_execve 521 x32 ptrace compat_sys_ptrace -522 x32 rt_sigpending sys32_rt_sigpending +522 x32 rt_sigpending compat_sys_rt_sigpending 523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait 524 x32 rt_sigqueueinfo sys32_rt_sigqueueinfo 525 x32 sigaltstack compat_sys_sigaltstack -- cgit v1.1 From 7b83d1a297ac1afda2bb9a9979ca73173148623f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 25 Dec 2012 15:26:55 -0500 Subject: x86: switch to generic compat rt_sigqueueinfo() Signed-off-by: Al Viro --- arch/x86/Kconfig | 1 + arch/x86/ia32/sys_ia32.c | 15 --------------- arch/x86/include/asm/sys_ia32.h | 1 - arch/x86/syscalls/syscall_32.tbl | 2 +- arch/x86/syscalls/syscall_64.tbl | 2 +- 5 files changed, 3 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 38018ec..9f03111 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -114,6 +114,7 @@ config X86 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 select GENERIC_SIGALTSTACK + select GENERIC_COMPAT_RT_SIGQUEUEINFO select GENERIC_COMPAT_RT_SIGPENDING config INSTRUCTION_DECODER diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index b417fc1..0306fdc 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -310,21 +310,6 @@ asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, return ret; } -asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, - compat_siginfo_t __user *uinfo) -{ - siginfo_t info; - int ret; - mm_segment_t old_fs = get_fs(); - - if (copy_siginfo_from_user32(&info, uinfo)) - return -EFAULT; - set_fs(KERNEL_DS); - ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *)&info); - set_fs(old_fs); - return ret; -} - /* warning: next two assume little endian */ asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index ff429b0..e3a8931 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -45,7 +45,6 @@ asmlinkage long sys32_sysfs(int, u32, u32); asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, struct compat_timespec __user *); -asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 5a55e84..9c707c4 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -184,7 +184,7 @@ 175 i386 rt_sigprocmask sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending 177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait -178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo sys32_rt_sigqueueinfo +178 i386 rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo 179 i386 rt_sigsuspend sys_rt_sigsuspend 180 i386 pread64 sys_pread64 sys32_pread 181 i386 pwrite64 sys_pwrite64 sys32_pwrite diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index dbde26d..6ceaa63 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -337,7 +337,7 @@ 521 x32 ptrace compat_sys_ptrace 522 x32 rt_sigpending compat_sys_rt_sigpending 523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait -524 x32 rt_sigqueueinfo sys32_rt_sigqueueinfo +524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo 525 x32 sigaltstack compat_sys_sigaltstack 526 x32 timer_create compat_sys_timer_create 527 x32 mq_notify compat_sys_mq_notify -- cgit v1.1 From 15ce1f7154c80693cec4f8a5309e8c8a06fb2541 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 25 Dec 2012 16:09:20 -0500 Subject: x86,um: switch to generic old sigsuspend() Signed-off-by: Al Viro --- arch/x86/Kconfig | 1 + arch/x86/ia32/ia32_signal.c | 7 ------- arch/x86/include/asm/sys_ia32.h | 1 - arch/x86/include/asm/syscalls.h | 1 - arch/x86/kernel/signal.c | 11 ----------- arch/x86/syscalls/syscall_32.tbl | 2 +- arch/x86/um/Kconfig | 1 + 7 files changed, 3 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f03111..5bda2d7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -116,6 +116,7 @@ config X86 select GENERIC_SIGALTSTACK select GENERIC_COMPAT_RT_SIGQUEUEINFO select GENERIC_COMPAT_RT_SIGPENDING + select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 15fdb3f..b0460cd 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -129,13 +129,6 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) return err; } -asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask) -{ - sigset_t blocked; - siginitset(&blocked, mask); - return sigsuspend(&blocked); -} - /* * Do a signal return; undo the signal stack. */ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index e3a8931..d4c3837 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -66,7 +66,6 @@ asmlinkage long sys32_fallocate(int, int, unsigned, unsigned, unsigned, unsigned); /* ia32/ia32_signal.c */ -asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); asmlinkage long sys32_sigreturn(void); asmlinkage long sys32_rt_sigreturn(void); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index f755423..7ed7dec 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -34,7 +34,6 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); #ifdef CONFIG_X86_32 /* kernel/signal.c */ -asmlinkage int sys_sigsuspend(int, int, old_sigset_t); asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, struct old_sigaction __user *); unsigned long sys_sigreturn(void); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 38ba7e5..0577129 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -536,17 +536,6 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, } #ifdef CONFIG_X86_32 -/* - * Atomically swap in the new signal mask, and wait for a signal. - */ -asmlinkage int -sys_sigsuspend(int history0, int history1, old_sigset_t mask) -{ - sigset_t blocked; - siginitset(&blocked, mask); - return sigsuspend(&blocked); -} - asmlinkage int sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact) diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 9c707c4..fae76a3 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -78,7 +78,7 @@ 69 i386 ssetmask sys_ssetmask 70 i386 setreuid sys_setreuid16 71 i386 setregid sys_setregid16 -72 i386 sigsuspend sys_sigsuspend sys32_sigsuspend +72 i386 sigsuspend sys_sigsuspend sys_sigsuspend 73 i386 sigpending sys_sigpending compat_sys_sigpending 74 i386 sethostname sys_sethostname 75 i386 setrlimit sys_setrlimit compat_sys_setrlimit diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 53c90fd..a372582 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -25,6 +25,7 @@ config X86_32 select ARCH_WANT_IPC_PARSE_VERSION select MODULES_USE_ELF_REL select CLONE_BACKWARDS + select OLD_SIGSUSPEND3 config X86_64 def_bool 64BIT -- cgit v1.1 From d7c43e4afb411db68bcf652c96e4206c6085f5e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 25 Dec 2012 17:19:57 -0500 Subject: x86: switch to generic compat sched_rr_get_interval() Signed-off-by: Al Viro --- arch/x86/ia32/sys_ia32.c | 17 ----------------- arch/x86/include/asm/sys_ia32.h | 3 --- arch/x86/syscalls/syscall_32.tbl | 2 +- 3 files changed, 1 insertion(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 0306fdc..cdf22ba 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -293,23 +293,6 @@ asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, return compat_sys_wait4(pid, stat_addr, options, NULL); } -/* 32-bit timeval and related flotsam. */ - -asmlinkage long sys32_sched_rr_get_interval(compat_pid_t pid, - struct compat_timespec __user *interval) -{ - struct timespec t; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - ret = sys_sched_rr_get_interval(pid, (struct timespec __user *)&t); - set_fs(old_fs); - if (put_compat_timespec(&t, interval)) - return -EFAULT; - return ret; -} - /* warning: next two assume little endian */ asmlinkage long sys32_pread(unsigned int fd, char __user *ubuf, u32 count, u32 poslo, u32 poshi) diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index d4c3837..c603c8f 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -43,9 +43,6 @@ asmlinkage long sys32_alarm(unsigned int); asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); asmlinkage long sys32_sysfs(int, u32, u32); -asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, - struct compat_timespec __user *); - asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index fae76a3..f6006b2 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -167,7 +167,7 @@ 158 i386 sched_yield sys_sched_yield 159 i386 sched_get_priority_max sys_sched_get_priority_max 160 i386 sched_get_priority_min sys_sched_get_priority_min -161 i386 sched_rr_get_interval sys_sched_rr_get_interval sys32_sched_rr_get_interval +161 i386 sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval 162 i386 nanosleep sys_nanosleep compat_sys_nanosleep 163 i386 mremap sys_mremap 164 i386 setresuid sys_setresuid16 -- cgit v1.1 From 29fd448084e2da6d19ab675cf01d4a65fe2fcc44 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 25 Dec 2012 18:42:26 -0500 Subject: x86: switch to generic compat rt_sigaction() Signed-off-by: Al Viro --- arch/x86/Kconfig | 1 + arch/x86/ia32/sys_ia32.c | 76 ---------------------------------------- arch/x86/include/asm/ia32.h | 8 ----- arch/x86/include/asm/sys_ia32.h | 3 -- arch/x86/syscalls/syscall_32.tbl | 2 +- arch/x86/syscalls/syscall_64.tbl | 2 +- 6 files changed, 3 insertions(+), 89 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5bda2d7..0271a14 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -114,6 +114,7 @@ config X86 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 select GENERIC_SIGALTSTACK + select GENERIC_COMPAT_RT_SIGACTION select GENERIC_COMPAT_RT_SIGQUEUEINFO select GENERIC_COMPAT_RT_SIGPENDING select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index cdf22ba..ffe9751 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -172,82 +172,6 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len, return sys_mprotect(start, len, prot); } -asmlinkage long sys32_rt_sigaction(int sig, struct sigaction32 __user *act, - struct sigaction32 __user *oact, - unsigned int sigsetsize) -{ - struct k_sigaction new_ka, old_ka; - int ret; - compat_sigset_t set32; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - - if (act) { - compat_uptr_t handler, restorer; - - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(restorer, &act->sa_restorer) || - __copy_from_user(&set32, &act->sa_mask, - sizeof(compat_sigset_t))) - return -EFAULT; - new_ka.sa.sa_handler = compat_ptr(handler); - new_ka.sa.sa_restorer = compat_ptr(restorer); - - /* - * FIXME: here we rely on _COMPAT_NSIG_WORS to be >= - * than _NSIG_WORDS << 1 - */ - switch (_NSIG_WORDS) { - case 4: new_ka.sa.sa_mask.sig[3] = set32.sig[6] - | (((long)set32.sig[7]) << 32); - case 3: new_ka.sa.sa_mask.sig[2] = set32.sig[4] - | (((long)set32.sig[5]) << 32); - case 2: new_ka.sa.sa_mask.sig[1] = set32.sig[2] - | (((long)set32.sig[3]) << 32); - case 1: new_ka.sa.sa_mask.sig[0] = set32.sig[0] - | (((long)set32.sig[1]) << 32); - } - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - /* - * FIXME: here we rely on _COMPAT_NSIG_WORS to be >= - * than _NSIG_WORDS << 1 - */ - switch (_NSIG_WORDS) { - case 4: - set32.sig[7] = (old_ka.sa.sa_mask.sig[3] >> 32); - set32.sig[6] = old_ka.sa.sa_mask.sig[3]; - case 3: - set32.sig[5] = (old_ka.sa.sa_mask.sig[2] >> 32); - set32.sig[4] = old_ka.sa.sa_mask.sig[2]; - case 2: - set32.sig[3] = (old_ka.sa.sa_mask.sig[1] >> 32); - set32.sig[2] = old_ka.sa.sa_mask.sig[1]; - case 1: - set32.sig[1] = (old_ka.sa.sa_mask.sig[0] >> 32); - set32.sig[0] = old_ka.sa.sa_mask.sig[0]; - } - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(ptr_to_compat(old_ka.sa.sa_handler), - &oact->sa_handler) || - __put_user(ptr_to_compat(old_ka.sa.sa_restorer), - &oact->sa_restorer) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __copy_to_user(&oact->sa_mask, &set32, - sizeof(compat_sigset_t))) - return -EFAULT; - } - - return ret; -} - asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact) { diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 4c6da2e..259372d 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -13,14 +13,6 @@ #include /* signal.h */ -struct sigaction32 { - unsigned int sa_handler; /* Really a pointer, but need to deal - with 32 bits */ - unsigned int sa_flags; - unsigned int sa_restorer; /* Another 32 bit pointer */ - compat_sigset_t sa_mask; /* A 32 bit mask */ -}; - struct old_sigaction32 { unsigned int sa_handler; /* Really a pointer, but need to deal with 32 bits */ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index c603c8f..2bf18f1 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -32,10 +32,7 @@ struct mmap_arg_struct32; asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); -struct sigaction32; struct old_sigaction32; -asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, - struct sigaction32 __user *, unsigned int); asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, struct old_sigaction32 __user *); asmlinkage long sys32_alarm(unsigned int); diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index f6006b2..2fada84 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -180,7 +180,7 @@ 171 i386 getresgid sys_getresgid16 172 i386 prctl sys_prctl 173 i386 rt_sigreturn sys_rt_sigreturn stub32_rt_sigreturn -174 i386 rt_sigaction sys_rt_sigaction sys32_rt_sigaction +174 i386 rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction 175 i386 rt_sigprocmask sys_rt_sigprocmask 176 i386 rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending 177 i386 rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 6ceaa63..38ae65d 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -325,7 +325,7 @@ # x32-specific system call numbers start at 512 to avoid cache impact # for native 64-bit operation. # -512 x32 rt_sigaction sys32_rt_sigaction +512 x32 rt_sigaction compat_sys_rt_sigaction 513 x32 rt_sigreturn stub_x32_rt_sigreturn 514 x32 ioctl compat_sys_ioctl 515 x32 readv compat_sys_readv -- cgit v1.1 From 5b3eb3ade4444c3b1419ffa23598a57f6f4bf494 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 25 Dec 2012 19:14:55 -0500 Subject: x86: switch to generic old sigaction Signed-off-by: Al Viro --- arch/x86/Kconfig | 2 ++ arch/x86/ia32/sys_ia32.c | 39 --------------------------------- arch/x86/include/asm/ia32.h | 7 ------ arch/x86/include/asm/signal.h | 9 -------- arch/x86/include/asm/sys_ia32.h | 3 --- arch/x86/include/asm/syscalls.h | 2 -- arch/x86/kernel/signal.c | 47 ---------------------------------------- arch/x86/syscalls/syscall_32.tbl | 2 +- arch/x86/um/Kconfig | 1 + arch/x86/um/Makefile | 4 ++-- arch/x86/um/syscalls_32.c | 38 -------------------------------- 11 files changed, 6 insertions(+), 148 deletions(-) delete mode 100644 arch/x86/um/syscalls_32.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0271a14..87d0917 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -118,6 +118,8 @@ config X86 select GENERIC_COMPAT_RT_SIGQUEUEINFO select GENERIC_COMPAT_RT_SIGPENDING select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION + select OLD_SIGACTION if X86_32 + select COMPAT_OLD_SIGACTION if IA32_EMULATION config INSTRUCTION_DECODER def_bool y diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index ffe9751..592f5a9 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -172,45 +172,6 @@ asmlinkage long sys32_mprotect(unsigned long start, size_t len, return sys_mprotect(start, len, prot); } -asmlinkage long sys32_sigaction(int sig, struct old_sigaction32 __user *act, - struct old_sigaction32 __user *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - compat_old_sigset_t mask; - compat_uptr_t handler, restorer; - - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(restorer, &act->sa_restorer) || - __get_user(mask, &act->sa_mask)) - return -EFAULT; - - new_ka.sa.sa_handler = compat_ptr(handler); - new_ka.sa.sa_restorer = compat_ptr(restorer); - - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(ptr_to_compat(old_ka.sa.sa_handler), - &oact->sa_handler) || - __put_user(ptr_to_compat(old_ka.sa.sa_restorer), - &oact->sa_restorer) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) - return -EFAULT; - } - - return ret; -} - asmlinkage long sys32_waitpid(compat_pid_t pid, unsigned int __user *stat_addr, int options) { diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 259372d..d0e8e01 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -13,13 +13,6 @@ #include /* signal.h */ -struct old_sigaction32 { - unsigned int sa_handler; /* Really a pointer, but need to deal - with 32 bits */ - compat_old_sigset_t sa_mask; /* A 32 bit mask */ - unsigned int sa_flags; - unsigned int sa_restorer; /* Another 32 bit pointer */ -}; struct ucontext_ia32 { unsigned int uc_flags; diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 9bda822..35e67a4 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -34,15 +34,6 @@ extern void do_notify_resume(struct pt_regs *, void *, __u32); #define __ARCH_HAS_SA_RESTORER -#ifdef __i386__ -struct old_sigaction { - __sighandler_t sa_handler; - old_sigset_t sa_mask; - unsigned long sa_flags; - __sigrestore_t sa_restorer; -}; - -#endif /* !__i386__ */ #include #ifdef __i386__ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 2bf18f1..0218d91 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -32,9 +32,6 @@ struct mmap_arg_struct32; asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); -struct old_sigaction32; -asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, - struct old_sigaction32 __user *); asmlinkage long sys32_alarm(unsigned int); asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 7ed7dec..6cf0a9c 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -34,8 +34,6 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *); #ifdef CONFIG_X86_32 /* kernel/signal.c */ -asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, - struct old_sigaction __user *); unsigned long sys_sigreturn(void); /* kernel/vm86_32.c */ diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 0577129..d5b1f8a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -535,53 +535,6 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, return 0; } -#ifdef CONFIG_X86_32 -asmlinkage int -sys_sigaction(int sig, const struct old_sigaction __user *act, - struct old_sigaction __user *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret = 0; - - if (act) { - old_sigset_t mask; - - if (!access_ok(VERIFY_READ, act, sizeof(*act))) - return -EFAULT; - - get_user_try { - get_user_ex(new_ka.sa.sa_handler, &act->sa_handler); - get_user_ex(new_ka.sa.sa_flags, &act->sa_flags); - get_user_ex(mask, &act->sa_mask); - get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer); - } get_user_catch(ret); - - if (ret) - return -EFAULT; - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact))) - return -EFAULT; - - put_user_try { - put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler); - put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags); - put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer); - } put_user_catch(ret); - - if (ret) - return -EFAULT; - } - - return ret; -} -#endif /* CONFIG_X86_32 */ - /* * Do a signal return; undo the signal stack. */ diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 2fada84..f2fe78f 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -73,7 +73,7 @@ 64 i386 getppid sys_getppid 65 i386 getpgrp sys_getpgrp 66 i386 setsid sys_setsid -67 i386 sigaction sys_sigaction sys32_sigaction +67 i386 sigaction sys_sigaction compat_sys_sigaction 68 i386 sgetmask sys_sgetmask 69 i386 ssetmask sys_ssetmask 70 i386 setreuid sys_setreuid16 diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index a372582..cf0f273 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -26,6 +26,7 @@ config X86_32 select MODULES_USE_ELF_REL select CLONE_BACKWARDS select OLD_SIGSUSPEND3 + select OLD_SIGACTION config X86_64 def_bool 64BIT diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 5d065b2..eafa324 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -10,7 +10,7 @@ endif obj-y = bug.o bugs_$(BITS).o delay.o fault.o ksyms.o ldt.o \ ptrace_$(BITS).o ptrace_user.o setjmp_$(BITS).o signal.o \ - stub_$(BITS).o stub_segv.o syscalls_$(BITS).o \ + stub_$(BITS).o stub_segv.o \ sys_call_table_$(BITS).o sysrq_$(BITS).o tls_$(BITS).o \ mem_$(BITS).o subarch.o os-$(OS)/ @@ -25,7 +25,7 @@ subarch-$(CONFIG_HIGHMEM) += ../mm/highmem_32.o else -obj-y += vdso/ +obj-y += syscalls_64.o vdso/ subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../lib/thunk_64.o \ ../lib/rwsem.o diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c deleted file mode 100644 index e8bcea9..0000000 --- a/arch/x86/um/syscalls_32.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) - * Licensed under the GPL - */ - -#include -#include - -long sys_sigaction(int sig, const struct old_sigaction __user *act, - struct old_sigaction __user *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || - __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(mask, &act->sa_mask)) - return -EFAULT; - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) - return -EFAULT; - } - - return ret; -} -- cgit v1.1 From f76e39c531304b114e27c3dd3e0036f56cd33df1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Feb 2013 10:13:15 +0100 Subject: x86/intel/cacheinfo: Shut up annoying warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've been getting the following warning when doing randbuilds since forever. Now it finally pissed me off just the perfect amount so that I can fix it. arch/x86/kernel/cpu/intel_cacheinfo.c:489:27: warning: ‘cache_disable_0’ defined but not used [-Wunused-variable] arch/x86/kernel/cpu/intel_cacheinfo.c:491:27: warning: ‘cache_disable_1’ defined but not used [-Wunused-variable] arch/x86/kernel/cpu/intel_cacheinfo.c:524:27: warning: ‘subcaches’ defined but not used [-Wunused-variable] It happens because in randconfigs where CONFIG_SYSFS is not set, the whole sysfs-interface to L3 cache index disabling is remaining unused and gcc correctly warns about it. Make it optional, depending on CONFIG_SYSFS too, as is the case with other sysfs-related machinery in this file. Signed-off-by: Borislav Petkov Cc: Andreas Herrmann Link: http://lkml.kernel.org/r/1359969195-27362-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index fe9edec..84c1309c 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -298,8 +298,7 @@ struct _cache_attr { unsigned int); }; -#ifdef CONFIG_AMD_NB - +#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) /* * L3 cache descriptors */ @@ -524,9 +523,9 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count, static struct _cache_attr subcaches = __ATTR(subcaches, 0644, show_subcaches, store_subcaches); -#else /* CONFIG_AMD_NB */ +#else #define amd_init_l3_cache(x, y) -#endif /* CONFIG_AMD_NB */ +#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ static int __cpuinit cpuid4_cache_lookup_regs(int index, -- cgit v1.1 From feb3eb704a86d97edb296502e95da42d622dac61 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 30 Jan 2013 16:45:00 +0200 Subject: KVM: MMU: make spte_is_locklessly_modifiable() more clear spte_is_locklessly_modifiable() checks that both SPTE_HOST_WRITEABLE and SPTE_MMU_WRITEABLE are present on spte. Make it more explicit. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9f628f7..2fa82b0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -448,7 +448,8 @@ static bool __check_direct_spte_mmio_pf(u64 spte) static bool spte_is_locklessly_modifiable(u64 spte) { - return !(~spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)); + return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) == + (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE); } static bool spte_has_volatile_bits(u64 spte) -- cgit v1.1 From 9bb4f6b15ec038ab9afcf346aa6a590406ad6c17 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 30 Jan 2013 16:45:01 +0200 Subject: KVM: MMU: drop unneeded checks. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2fa82b0..40737b3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2328,9 +2328,8 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, if (s->role.level != PT_PAGE_TABLE_LEVEL) return 1; - if (!need_unsync && !s->unsync) { + if (!s->unsync) need_unsync = true; - } } if (need_unsync) kvm_unsync_pages(vcpu, gfn); @@ -4008,7 +4007,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, !((sp->role.word ^ vcpu->arch.mmu.base_role.word) & mask.word) && rmap_can_add(vcpu)) mmu_pte_write_new_pte(vcpu, sp, spte, &gentry); - if (!remote_flush && need_remote_flush(entry, *spte)) + if (need_remote_flush(entry, *spte)) remote_flush = true; ++spte; } -- cgit v1.1 From 2c9afa52ef081334925905d6370d36b6602c328c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 30 Jan 2013 16:45:02 +0200 Subject: KVM: MMU: set base_role.nxe during mmu initialization. Move base_role.nxe initialisation to where all other roles are initialized. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 1 + arch/x86/kvm/x86.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 40737b3..8028ac6 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3687,6 +3687,7 @@ int kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context) else r = paging32_init_context(vcpu, context); + vcpu->arch.mmu.base_role.nxe = is_nx(vcpu); vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); vcpu->arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); vcpu->arch.mmu.base_role.smep_andnot_wp diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cf512e70..373e17a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -870,8 +870,6 @@ static int set_efer(struct kvm_vcpu *vcpu, u64 efer) kvm_x86_ops->set_efer(vcpu, efer); - vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; - /* Update reserved bits */ if ((efer ^ old_efer) & EFER_NX) kvm_mmu_reset_context(vcpu); -- cgit v1.1 From 116eb3d30e7e121bfc6117ac037833865dad4971 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 30 Jan 2013 16:45:03 +0200 Subject: KVM: MMU: drop superfluous min() call. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8028ac6..42ba85c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3854,7 +3854,7 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ *gpa &= ~(gpa_t)7; *bytes = 8; - r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, min(*bytes, 8)); + r = kvm_read_guest(vcpu->kvm, *gpa, &gentry, 8); if (r) gentry = 0; new = (const u8 *)&gentry; -- cgit v1.1 From eb3fce87ccc5d38b1ad340f32e34abc09911fb83 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 30 Jan 2013 16:45:04 +0200 Subject: KVM: MMU: drop superfluous is_present_gpte() check. Gust page walker puts only present ptes into ptes[] array. No need to check it again. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ca69dcc..34c5c99 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -409,9 +409,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, unsigned direct_access, access = gw->pt_access; int top_level, emulate = 0; - if (!is_present_gpte(gw->ptes[gw->level - 1])) - return 0; - direct_access = gw->pte_access; top_level = vcpu->arch.mmu.root_level; -- cgit v1.1 From 834be0d83f9451573e6fadb381fe0714211c7e90 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 30 Jan 2013 16:45:05 +0200 Subject: Revert "KVM: MMU: split kvm_mmu_free_page" This reverts commit bd4c86eaa6ff10abc4e00d0f45d2a28b10b09df4. There is not user for kvm_mmu_isolate_page() any more. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 42ba85c..0242a8a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1461,28 +1461,14 @@ static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, int nr) percpu_counter_add(&kvm_total_used_mmu_pages, nr); } -/* - * Remove the sp from shadow page cache, after call it, - * we can not find this sp from the cache, and the shadow - * page table is still valid. - * It should be under the protection of mmu lock. - */ -static void kvm_mmu_isolate_page(struct kvm_mmu_page *sp) +static void kvm_mmu_free_page(struct kvm_mmu_page *sp) { ASSERT(is_empty_shadow_page(sp->spt)); hlist_del(&sp->hash_link); - if (!sp->role.direct) - free_page((unsigned long)sp->gfns); -} - -/* - * Free the shadow page table and the sp, we can do it - * out of the protection of mmu lock. - */ -static void kvm_mmu_free_page(struct kvm_mmu_page *sp) -{ list_del(&sp->link); free_page((unsigned long)sp->spt); + if (!sp->role.direct) + free_page((unsigned long)sp->gfns); kmem_cache_free(mmu_page_header_cache, sp); } @@ -2126,7 +2112,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, do { sp = list_first_entry(invalid_list, struct kvm_mmu_page, link); WARN_ON(!sp->role.invalid || sp->root_count); - kvm_mmu_isolate_page(sp); kvm_mmu_free_page(sp); } while (!list_empty(invalid_list)); } -- cgit v1.1 From c08800a56cb8622bb61577abb4a120c6fdc4b9be Mon Sep 17 00:00:00 2001 From: Dongxiao Xu Date: Mon, 4 Feb 2013 11:50:43 +0800 Subject: KVM: VMX: disable SMEP feature when guest is in non-paging mode SMEP is disabled if CPU is in non-paging mode in hardware. However KVM always uses paging mode to emulate guest non-paging mode with TDP. To emulate this behavior, SMEP needs to be manually disabled when guest switches to non-paging mode. We met an issue that, SMP Linux guest with recent kernel (enable SMEP support, for example, 3.5.3) would crash with triple fault if setting unrestricted_guest=0. This is because KVM uses an identity mapping page table to emulate the non-paging mode, where the page table is set with USER flag. If SMEP is still enabled in this case, guest will meet unhandlable page fault and then crash. Reviewed-by: Gleb Natapov Reviewed-by: Paolo Bonzini Signed-off-by: Dongxiao Xu Signed-off-by: Xiantao Zhang Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0cf74a6..fe9a9cf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3227,6 +3227,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (!is_paging(vcpu)) { hw_cr4 &= ~X86_CR4_PAE; hw_cr4 |= X86_CR4_PSE; + /* + * SMEP is disabled if CPU is in non-paging mode in + * hardware. However KVM always uses paging mode to + * emulate guest non-paging mode with TDP. + * To emulate this behavior, SMEP needs to be manually + * disabled when guest switches to non-paging mode. + */ + hw_cr4 &= ~X86_CR4_SMEP; } else if (!(cr4 & X86_CR4_PAE)) { hw_cr4 &= ~X86_CR4_PAE; } -- cgit v1.1 From b0da5bec30eca7ffbb2c89afa6fe503fd418d3a6 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Sun, 3 Feb 2013 18:17:17 +0200 Subject: KVM: VMX: add missing exit names to VMX_EXIT_REASONS array Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/vmx.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 694586c..5c9dbad 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -105,7 +105,12 @@ { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \ { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \ { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \ - { EXIT_REASON_WBINVD, "WBINVD" } + { EXIT_REASON_WBINVD, "WBINVD" }, \ + { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \ + { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \ + { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \ + { EXIT_REASON_INVD, "INVD" }, \ + { EXIT_REASON_INVPCID, "INVPCID" } #ifdef __KERNEL__ -- cgit v1.1 From 2c53c3dd0b6497484b29fd49d34ef98acbc14577 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 6 Feb 2013 11:26:24 -0600 Subject: perf/x86/amd: Rework northbridge event constraints handler Code simplification. No functional changes. Signed-off-by: Robert Richter Signed-off-by: Jacob Shin Acked-by: Stephane Eranian Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Robert Richter Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1360171589-6381-2-git-send-email-jacob.shin@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd.c | 68 ++++++++++++++---------------------- 1 file changed, 26 insertions(+), 42 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index c93bc4e..e7963c7 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -256,9 +256,8 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct amd_nb *nb = cpuc->amd_nb; - struct perf_event *old = NULL; - int max = x86_pmu.num_counters; - int i, j, k = -1; + struct perf_event *old; + int idx, new = -1; /* * if not NB event or no NB, then no constraints @@ -276,48 +275,33 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) * because of successive calls to x86_schedule_events() from * hw_perf_group_sched_in() without hw_perf_enable() */ - for (i = 0; i < max; i++) { - /* - * keep track of first free slot - */ - if (k == -1 && !nb->owners[i]) - k = i; + for (idx = 0; idx < x86_pmu.num_counters; idx++) { + if (new == -1 || hwc->idx == idx) + /* assign free slot, prefer hwc->idx */ + old = cmpxchg(nb->owners + idx, NULL, event); + else if (nb->owners[idx] == event) + /* event already present */ + old = event; + else + continue; + + if (old && old != event) + continue; + + /* reassign to this slot */ + if (new != -1) + cmpxchg(nb->owners + new, event, NULL); + new = idx; /* already present, reuse */ - if (nb->owners[i] == event) - goto done; - } - /* - * not present, so grab a new slot - * starting either at: - */ - if (hwc->idx != -1) { - /* previous assignment */ - i = hwc->idx; - } else if (k != -1) { - /* start from free slot found */ - i = k; - } else { - /* - * event not found, no slot found in - * first pass, try again from the - * beginning - */ - i = 0; - } - j = i; - do { - old = cmpxchg(nb->owners+i, NULL, event); - if (!old) + if (old == event) break; - if (++i == max) - i = 0; - } while (i != j); -done: - if (!old) - return &nb->event_constraints[i]; - - return &emptyconstraint; + } + + if (new == -1) + return &emptyconstraint; + + return &nb->event_constraints[new]; } static struct amd_nb *amd_alloc_nb(int cpu) -- cgit v1.1 From 4dd4c2ae555d8a91e8c5bf1cd56807a35764436a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 6 Feb 2013 11:26:25 -0600 Subject: perf/x86/amd: Generalize northbridge constraints code for family 15h Generalize northbridge constraints code for family 10h so that later we can reuse the same code path with other AMD processor families that have the same northbridge event constraints. Signed-off-by: Robert Richter Signed-off-by: Jacob Shin Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1360171589-6381-3-git-send-email-jacob.shin@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd.c | 43 +++++++++++++++++++++--------------- 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index e7963c7..f8c9dfb 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -188,20 +188,13 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc) return nb && nb->nb_id != -1; } -static void amd_put_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) +static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; struct amd_nb *nb = cpuc->amd_nb; int i; /* - * only care about NB events - */ - if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) - return; - - /* * need to scan whole list because event may not have * been assigned during scheduling * @@ -247,12 +240,13 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, * * Given that resources are allocated (cmpxchg), they must be * eventually freed for others to use. This is accomplished by - * calling amd_put_event_constraints(). + * calling __amd_put_nb_event_constraints() * * Non NB events are not impacted by this restriction. */ static struct event_constraint * -amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, + struct event_constraint *c) { struct hw_perf_event *hwc = &event->hw; struct amd_nb *nb = cpuc->amd_nb; @@ -260,12 +254,6 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) int idx, new = -1; /* - * if not NB event or no NB, then no constraints - */ - if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) - return &unconstrained; - - /* * detect if already present, if so reuse * * cannot merge with actual allocation @@ -275,7 +263,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) * because of successive calls to x86_schedule_events() from * hw_perf_group_sched_in() without hw_perf_enable() */ - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { if (new == -1 || hwc->idx == idx) /* assign free slot, prefer hwc->idx */ old = cmpxchg(nb->owners + idx, NULL, event); @@ -391,6 +379,25 @@ static void amd_pmu_cpu_dead(int cpu) } } +static struct event_constraint * +amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + /* + * if not NB event or no NB, then no constraints + */ + if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) + return &unconstrained; + + return __amd_get_nb_event_constraints(cpuc, event, &unconstrained); +} + +static void amd_put_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)) + __amd_put_nb_event_constraints(cpuc, event); +} + PMU_FORMAT_ATTR(event, "config:0-7,32-35"); PMU_FORMAT_ATTR(umask, "config:8-15" ); PMU_FORMAT_ATTR(edge, "config:18" ); -- cgit v1.1 From 9f19010af8c651879ac2c36f1a808a3a4419cd40 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Wed, 6 Feb 2013 11:26:26 -0600 Subject: perf/x86/amd: Use proper naming scheme for AMD bit field definitions Update these AMD bit field names to be consistent with naming convention followed by the rest of the file. Signed-off-by: Jacob Shin Acked-by: Stephane Eranian Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1360171589-6381-4-git-send-email-jacob.shin@amd.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 4 ++-- arch/x86/kernel/cpu/perf_event_amd.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 4fabcdf..2234eaaec 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -29,8 +29,8 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL -#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40) -#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41) +#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) +#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) #define AMD64_EVENTSEL_EVENT \ (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index f8c9dfb..aea8c20 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -156,9 +156,9 @@ static int amd_pmu_hw_config(struct perf_event *event) event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | ARCH_PERFMON_EVENTSEL_OS); else if (event->attr.exclude_host) - event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY; + event->hw.config |= AMD64_EVENTSEL_GUESTONLY; else if (event->attr.exclude_guest) - event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY; + event->hw.config |= AMD64_EVENTSEL_HOSTONLY; if (event->attr.type != PERF_TYPE_RAW) return 0; @@ -336,7 +336,7 @@ static void amd_pmu_cpu_starting(int cpu) struct amd_nb *nb; int i, nb_id; - cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; + cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; if (boot_cpu_data.x86_max_cores < 2) return; @@ -669,7 +669,7 @@ void amd_pmu_disable_virt(void) * SVM is disabled the Guest-only bits still gets set and the counter * will not count anything. */ - cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; + cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; /* Reload all events */ x86_pmu_disable_all(); -- cgit v1.1 From 4c1fd17a1cb32bc4f429c7a5ff9a91a3bffdb8fa Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Wed, 6 Feb 2013 11:26:27 -0600 Subject: perf/x86: Move MSR address offset calculation to architecture specific files Move counter index to MSR address offset calculation to architecture specific files. This prepares the way for perf_event_amd to enable counter addresses that are not contiguous -- for example AMD Family 15h processors have 6 core performance counters starting at 0xc0010200 and 4 northbridge performance counters starting at 0xc0010240. Signed-off-by: Jacob Shin Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1360171589-6381-5-git-send-email-jacob.shin@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 21 +++++------------- arch/x86/kernel/cpu/perf_event_amd.c | 42 ++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 115c1ea..a7f06a9 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -325,6 +325,7 @@ struct x86_pmu { int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; unsigned perfctr; + int (*addr_offset)(int index, bool eventsel); u64 (*event_map)(int); int max_events; int num_counters; @@ -446,28 +447,16 @@ extern u64 __read_mostly hw_cache_extra_regs u64 x86_perf_event_update(struct perf_event *event); -static inline int x86_pmu_addr_offset(int index) -{ - int offset; - - /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ - alternative_io(ASM_NOP2, - "shll $1, %%eax", - X86_FEATURE_PERFCTR_CORE, - "=a" (offset), - "a" (index)); - - return offset; -} - static inline unsigned int x86_pmu_config_addr(int index) { - return x86_pmu.eventsel + x86_pmu_addr_offset(index); + return x86_pmu.eventsel + (x86_pmu.addr_offset ? + x86_pmu.addr_offset(index, true) : index); } static inline unsigned int x86_pmu_event_addr(int index) { - return x86_pmu.perfctr + x86_pmu_addr_offset(index); + return x86_pmu.perfctr + (x86_pmu.addr_offset ? + x86_pmu.addr_offset(index, false) : index); } int x86_setup_perfctr(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index aea8c20..b60f31c 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -132,6 +132,47 @@ static u64 amd_pmu_event_map(int hw_event) return amd_perfmon_event_map[hw_event]; } +/* + * Previously calculated offsets + */ +static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; +static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; + +/* + * Legacy CPUs: + * 4 counters starting at 0xc0010000 each offset by 1 + * + * CPUs with core performance counter extensions: + * 6 counters starting at 0xc0010200 each offset by 2 + */ +static inline int amd_pmu_addr_offset(int index, bool eventsel) +{ + int offset; + + if (!index) + return index; + + if (eventsel) + offset = event_offsets[index]; + else + offset = count_offsets[index]; + + if (offset) + return offset; + + if (!cpu_has_perfctr_core) + offset = index; + else + offset = index << 1; + + if (eventsel) + event_offsets[index] = offset; + else + count_offsets[index] = offset; + + return offset; +} + static int amd_pmu_hw_config(struct perf_event *event) { int ret; @@ -578,6 +619,7 @@ static __initconst const struct x86_pmu amd_pmu = { .schedule_events = x86_schedule_events, .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, + .addr_offset = amd_pmu_addr_offset, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), .num_counters = AMD64_NUM_COUNTERS, -- cgit v1.1 From 0fbdad078a70ed72248c3d30fe32e45e83be00d1 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Wed, 6 Feb 2013 11:26:28 -0600 Subject: perf/x86: Allow for architecture specific RDPMC indexes Similar to config_base and event_base, allow architecture specific RDPMC ECX values. Signed-off-by: Jacob Shin Acked-by: Stephane Eranian Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1360171589-6381-6-git-send-email-jacob.shin@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 2 +- arch/x86/kernel/cpu/perf_event.h | 6 ++++++ arch/x86/kernel/cpu/perf_event_amd.c | 6 ++++++ 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c0df5ed2..bf0f01a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -829,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event, } else { hwc->config_base = x86_pmu_config_addr(hwc->idx); hwc->event_base = x86_pmu_event_addr(hwc->idx); - hwc->event_base_rdpmc = hwc->idx; + hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx); } } diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index a7f06a9..7f5c75c 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -326,6 +326,7 @@ struct x86_pmu { unsigned eventsel; unsigned perfctr; int (*addr_offset)(int index, bool eventsel); + int (*rdpmc_index)(int index); u64 (*event_map)(int); int max_events; int num_counters; @@ -459,6 +460,11 @@ static inline unsigned int x86_pmu_event_addr(int index) x86_pmu.addr_offset(index, false) : index); } +static inline int x86_pmu_rdpmc_index(int index) +{ + return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index; +} + int x86_setup_perfctr(struct perf_event *event); int x86_pmu_hw_config(struct perf_event *event); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index b60f31c..05462f0 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -173,6 +173,11 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) return offset; } +static inline int amd_pmu_rdpmc_index(int index) +{ + return index; +} + static int amd_pmu_hw_config(struct perf_event *event) { int ret; @@ -620,6 +625,7 @@ static __initconst const struct x86_pmu amd_pmu = { .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, .addr_offset = amd_pmu_addr_offset, + .rdpmc_index = amd_pmu_rdpmc_index, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), .num_counters = AMD64_NUM_COUNTERS, -- cgit v1.1 From 5037878e2223278aa627162aa0bf106dffac19d4 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 4 Feb 2013 16:00:28 +0200 Subject: KVM: VMX: cleanup vmx_set_cr0(). When calculating hw_cr0 teh current code masks bits that should be always on and re-adds them back immediately after. Cleanup the code by masking only those bits that should be dropped from hw_cr0. This allow us to get rid of some defines. Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fe9a9cf..fe09fdc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -95,12 +95,8 @@ module_param(enable_apicv_reg_vid, bool, S_IRUGO); static bool __read_mostly nested = 0; module_param(nested, bool, S_IRUGO); -#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \ - (X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD) -#define KVM_GUEST_CR0_MASK \ - (KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) -#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \ - (X86_CR0_WP | X86_CR0_NE) +#define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD) +#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE) #define KVM_VM_CR0_ALWAYS_ON \ (KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE) #define KVM_CR4_GUEST_OWNED_BITS \ @@ -3137,11 +3133,11 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long hw_cr0; + hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK); if (enable_unrestricted_guest) - hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST) - | KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; + hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST; else { - hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON; + hw_cr0 |= KVM_VM_CR0_ALWAYS_ON; if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE)) enter_pmode(vcpu); -- cgit v1.1 From caf6900f2d8aaebe404c976753f6813ccd31d95e Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 5 Feb 2013 15:11:09 +0800 Subject: KVM: MMU: lazily drop large spte Currently, kvm zaps the large spte if write-protected is needed, the later read can fault on that spte. Actually, we can make the large spte readonly instead of making them not present, the page fault caused by read access can be avoided The idea is from Avi: | As I mentioned before, write-protecting a large spte is a good idea, | since it moves some work from protect-time to fault-time, so it reduces | jitter. This removes the need for the return value. Reviewed-by: Gleb Natapov Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 0242a8a..2a8d99a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1106,8 +1106,7 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) /* * Write-protect on the specified @sptep, @pt_protect indicates whether - * spte writ-protection is caused by protecting shadow page table. - * @flush indicates whether tlb need be flushed. + * spte write-protection is caused by protecting shadow page table. * * Note: write protection is difference between drity logging and spte * protection: @@ -1116,10 +1115,9 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) * - for spte protection, the spte can be writable only after unsync-ing * shadow page. * - * Return true if the spte is dropped. + * Return true if tlb need be flushed. */ -static bool -spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) +static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect) { u64 spte = *sptep; @@ -1129,17 +1127,11 @@ spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); - if (__drop_large_spte(kvm, sptep)) { - *flush |= true; - return true; - } - if (pt_protect) spte &= ~SPTE_MMU_WRITEABLE; spte = spte & ~PT_WRITABLE_MASK; - *flush |= mmu_spte_update(sptep, spte); - return false; + return mmu_spte_update(sptep, spte); } static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, @@ -1151,11 +1143,8 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { BUG_ON(!(*sptep & PT_PRESENT_MASK)); - if (spte_write_protect(kvm, sptep, &flush, pt_protect)) { - sptep = rmap_get_first(*rmapp, &iter); - continue; - } + flush |= spte_write_protect(kvm, sptep, pt_protect); sptep = rmap_get_next(&iter); } @@ -2596,6 +2585,8 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, break; } + drop_large_spte(vcpu, iterator.sptep); + if (!is_shadow_present_pte(*iterator.sptep)) { u64 base_addr = iterator.addr; -- cgit v1.1 From 55dd98c3a81e243447d5fbf496fa8b909ef3b7f6 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 5 Feb 2013 15:26:54 +0800 Subject: KVM: MMU: cleanup mapping-level Use min() to cleanup mapping_level Reviewed-by: Gleb Natapov Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2a8d99a..5356d8d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -832,8 +832,7 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn) if (host_level == PT_PAGE_TABLE_LEVEL) return host_level; - max_level = kvm_x86_ops->get_lpage_level() < host_level ? - kvm_x86_ops->get_lpage_level() : host_level; + max_level = min(kvm_x86_ops->get_lpage_level(), host_level); for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level) if (has_wrprotected_page(vcpu->kvm, large_gfn, level)) -- cgit v1.1 From f761620377ebc791afba7ded078947d2116f48ce Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 5 Feb 2013 15:27:27 +0800 Subject: KVM: MMU: remove pt_access in mmu_set_spte It is only used in debug code, so drop it Reviewed-by: Gleb Natapov Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 17 +++++++---------- arch/x86/kvm/paging_tmpl.h | 9 ++++----- 2 files changed, 11 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5356d8d..e956e9b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2388,16 +2388,15 @@ done: } static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, - unsigned pt_access, unsigned pte_access, - int write_fault, int *emulate, int level, gfn_t gfn, - pfn_t pfn, bool speculative, bool host_writable) + unsigned pte_access, int write_fault, int *emulate, + int level, gfn_t gfn, pfn_t pfn, bool speculative, + bool host_writable) { int was_rmapped = 0; int rmap_count; - pgprintk("%s: spte %llx access %x write_fault %d gfn %llx\n", - __func__, *sptep, pt_access, - write_fault, gfn); + pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, + *sptep, write_fault, gfn); if (is_rmap_spte(*sptep)) { /* @@ -2513,7 +2512,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, return -1; for (i = 0; i < ret; i++, gfn++, start++) - mmu_set_spte(vcpu, start, ACC_ALL, access, 0, NULL, + mmu_set_spte(vcpu, start, access, 0, NULL, sp->role.level, gfn, page_to_pfn(pages[i]), true, true); @@ -2574,9 +2573,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { if (iterator.level == level) { - unsigned pte_access = ACC_ALL; - - mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, + mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, write, &emulate, level, gfn, pfn, prefault, map_writable); direct_pte_prefetch(vcpu, iterator.sptep); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 34c5c99..105dd5b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -326,8 +326,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * we call mmu_set_spte() with host_writable = true because * pte_prefetch_gfn_to_pfn always gets a writable pfn. */ - mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, - NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true); + mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL, + gfn, pfn, true, true); return true; } @@ -470,9 +470,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, } clear_sp_write_flooding_count(it.sptep); - mmu_set_spte(vcpu, it.sptep, access, gw->pte_access, - write_fault, &emulate, it.level, - gw->gfn, pfn, prefault, map_writable); + mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate, + it.level, gw->gfn, pfn, prefault, map_writable); FNAME(pte_prefetch)(vcpu, gw, it.sptep); return emulate; -- cgit v1.1 From 24db2734ad8123b6858ca98d690483ecdcceebb5 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 5 Feb 2013 15:28:02 +0800 Subject: KVM: MMU: cleanup __direct_map Use link_shadow_page to link the sp to the spte in __direct_map Reviewed-by: Gleb Natapov Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e956e9b..1cda1f3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1947,9 +1947,9 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) { u64 spte; - spte = __pa(sp->spt) - | PT_PRESENT_MASK | PT_ACCESSED_MASK - | PT_WRITABLE_MASK | PT_USER_MASK; + spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | + shadow_user_mask | shadow_x_mask | shadow_accessed_mask; + mmu_spte_set(sptep, spte); } @@ -2592,11 +2592,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, iterator.level - 1, 1, ACC_ALL, iterator.sptep); - mmu_spte_set(iterator.sptep, - __pa(sp->spt) - | PT_PRESENT_MASK | PT_WRITABLE_MASK - | shadow_user_mask | shadow_x_mask - | shadow_accessed_mask); + link_shadow_page(iterator.sptep, sp); } } return emulate; -- cgit v1.1 From e575a86fdc50d013bf3ad3aa81d9100e8e6cc60d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 7 Feb 2013 09:44:13 -0800 Subject: x86: Do not leak kernel page mapping locations Without this patch, it is trivial to determine kernel page mappings by examining the error code reported to dmesg[1]. Instead, declare the entire kernel memory space as a violation of a present page. Additionally, since show_unhandled_signals is enabled by default, switch branch hinting to the more realistic expectation, and unobfuscate the setting of the PF_PROT bit to improve readability. [1] http://vulnfactory.org/blog/2013/02/06/a-linux-memory-trick/ Reported-by: Dan Rosenberg Suggested-by: Brad Spengler Signed-off-by: Kees Cook Cc: stable@vger.kernel.org Acked-by: H. Peter Anvin Cc: Paul E. McKenney Cc: Frederic Weisbecker Cc: Eric W. Biederman Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20130207174413.GA12485@www.outflux.net Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 027088f..fb674fd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -748,13 +748,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, return; } #endif + /* Kernel addresses are always protection faults: */ + if (address >= TASK_SIZE) + error_code |= PF_PROT; - if (unlikely(show_unhandled_signals)) + if (likely(show_unhandled_signals)) show_signal_msg(regs, error_code, address, tsk); - /* Kernel addresses are always protection faults: */ tsk->thread.cr2 = address; - tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_PF; force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0); -- cgit v1.1 From 96477b4cd705c5416346aef262b0a1116cfcdd80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 12 Dec 2012 13:34:03 +0200 Subject: x86-32: Add support for 64bit get_user() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement __get_user_8() for x86-32. It will return the 64-bit result in edx:eax register pair, and ecx is used to pass in the address and return the error value. For consistency, change the register assignment for all other __get_user_x() variants, so that address is passed in ecx/rcx, the error value is returned in ecx/rcx, and eax/rax contains the actual value. [ hpa: I modified the patch so that it does NOT change the calling conventions for the existing callsites, this also means that the code is completely unchanged for 64 bits. Instead, continue to use eax for address input/error output and use the ecx:edx register pair for the output. ] This is a partial refresh of a patch [1] by Jamie Lokier from 2004. Only the minimal changes to implement 64bit get_user() were picked from the original patch. [1] http://article.gmane.org/gmane.linux.kernel/198823 Originally-by: Jamie Lokier Signed-off-by: Ville Syrjälä Link: http://lkml.kernel.org/r/1355312043-11467-1-git-send-email-ville.syrjala@linux.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uaccess.h | 19 +++++++++++++++---- arch/x86/kernel/i386_ksyms_32.c | 1 + arch/x86/lib/getuser.S | 37 ++++++++++++++++++++++++++++++++----- 3 files changed, 48 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 1709801..1e96326 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -151,8 +151,15 @@ extern int __get_user_bad(void); * On error, the variable @x is set to zero. */ #ifdef CONFIG_X86_32 -#define __get_user_8(__ret_gu, __val_gu, ptr) \ - __get_user_x(X, __ret_gu, __val_gu, ptr) +#define __get_user_8(ret, x, ptr) \ +do { \ + register unsigned long long __xx asm("%edx"); \ + asm volatile("call __get_user_8" \ + : "=a" (ret), "=r" (__xx) \ + : "0" (ptr)); \ + (x) = __xx; \ +} while (0) + #else #define __get_user_8(__ret_gu, __val_gu, ptr) \ __get_user_x(8, __ret_gu, __val_gu, ptr) @@ -162,6 +169,7 @@ extern int __get_user_bad(void); ({ \ int __ret_gu; \ unsigned long __val_gu; \ + unsigned long long __val_gu8; \ __chk_user_ptr(ptr); \ might_fault(); \ switch (sizeof(*(ptr))) { \ @@ -175,13 +183,16 @@ extern int __get_user_bad(void); __get_user_x(4, __ret_gu, __val_gu, ptr); \ break; \ case 8: \ - __get_user_8(__ret_gu, __val_gu, ptr); \ + __get_user_8(__ret_gu, __val_gu8, ptr); \ break; \ default: \ __get_user_x(X, __ret_gu, __val_gu, ptr); \ break; \ } \ - (x) = (__typeof__(*(ptr)))__val_gu; \ + if (sizeof(*(ptr)) == 8) \ + (x) = (__typeof__(*(ptr)))__val_gu8; \ + else \ + (x) = (__typeof__(*(ptr)))__val_gu; \ __ret_gu; \ }) diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 9c3bd4a..0fa6912 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -26,6 +26,7 @@ EXPORT_SYMBOL(csum_partial_copy_generic); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); EXPORT_SYMBOL(__get_user_4); +EXPORT_SYMBOL(__get_user_8); EXPORT_SYMBOL(__put_user_1); EXPORT_SYMBOL(__put_user_2); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index 156b9c8..d3bf9f9 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -15,11 +15,10 @@ * __get_user_X * * Inputs: %[r|e]ax contains the address. - * The register is modified, but all changes are undone - * before returning because the C code doesn't know about it. * * Outputs: %[r|e]ax is error code (0 or -EFAULT) * %[r|e]dx contains zero-extended value + * %ecx contains the high half for 32-bit __get_user_8 * * * These functions should not modify any other registers, @@ -79,22 +78,35 @@ ENTRY(__get_user_4) CFI_ENDPROC ENDPROC(__get_user_4) -#ifdef CONFIG_X86_64 ENTRY(__get_user_8) CFI_STARTPROC +#ifdef CONFIG_X86_64 add $7,%_ASM_AX jc bad_get_user GET_THREAD_INFO(%_ASM_DX) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX - jae bad_get_user + jae bad_get_user ASM_STAC 4: movq -7(%_ASM_AX),%_ASM_DX xor %eax,%eax ASM_CLAC ret +#else + add $7,%_ASM_AX + jc bad_get_user_8 + GET_THREAD_INFO(%_ASM_DX) + cmp TI_addr_limit(%_ASM_DX),%_ASM_AX + jae bad_get_user_8 + ASM_STAC +4: mov -7(%_ASM_AX),%edx +5: mov -3(%_ASM_AX),%ecx + xor %eax,%eax + ASM_CLAC + ret +#endif CFI_ENDPROC ENDPROC(__get_user_8) -#endif + bad_get_user: CFI_STARTPROC @@ -105,9 +117,24 @@ bad_get_user: CFI_ENDPROC END(bad_get_user) +#ifdef CONFIG_X86_32 +bad_get_user_8: + CFI_STARTPROC + xor %edx,%edx + xor %ecx,%ecx + mov $(-EFAULT),%_ASM_AX + ASM_CLAC + ret + CFI_ENDPROC +END(bad_get_user_8) +#endif + _ASM_EXTABLE(1b,bad_get_user) _ASM_EXTABLE(2b,bad_get_user) _ASM_EXTABLE(3b,bad_get_user) #ifdef CONFIG_X86_64 _ASM_EXTABLE(4b,bad_get_user) +#else + _ASM_EXTABLE(4b,bad_get_user_8) + _ASM_EXTABLE(5b,bad_get_user_8) #endif -- cgit v1.1 From cf31ec3f7fece93f3fce3ee5964e27857141ea47 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Dec 2012 15:21:22 +0100 Subject: uprobes/x86: Change __skip_sstep() to actually skip the whole insn __skip_sstep() doesn't update regs->ip. Currently this is correct but only "by accident" and it doesn't skip the whole insn. Change it to advance ->ip by the length of the detected 0x66*0x90 sequence. Signed-off-by: Oleg Nesterov Acked-by: Srikar Dronamraju --- arch/x86/kernel/uprobes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index c71025b..4e33a35d 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -680,8 +680,11 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) if (auprobe->insn[i] == 0x66) continue; - if (auprobe->insn[i] == 0x90) + if (auprobe->insn[i] == 0x90) { + regs->ip = uprobe_get_swbp_addr(regs); + regs->ip += i + 1; return true; + } break; } -- cgit v1.1 From 74e59dfc6b19e3472a7c16ad57bc831e6e647895 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Sun, 30 Dec 2012 15:54:08 +0100 Subject: uprobes: Change handle_swbp() to expose bp_vaddr to handler_chain() Change handle_swbp() to set regs->ip = bp_vaddr in advance, this is what consumer->handler() needs but uprobe_get_swbp_addr() is not exported. This also simplifies the code and makes it more consistent across the supported architectures. handle_swbp() becomes the only caller of uprobe_get_swbp_addr(). Signed-off-by: Oleg Nesterov Acked-by: Ananth N Mavinakayanahalli --- arch/x86/kernel/uprobes.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 4e33a35d..0ba4cfb 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -681,7 +681,6 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) continue; if (auprobe->insn[i] == 0x90) { - regs->ip = uprobe_get_swbp_addr(regs); regs->ip += i + 1; return true; } -- cgit v1.1 From 679204183472af16e8e75d2b1479459ad19bc67c Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 31 Jan 2013 15:22:15 -0500 Subject: tools/power turbostat: decode MSR_IA32_POWER_CTL When verbose is enabled, print the C1E-Enable bit in MSR_IA32_POWER_CTL. also delete some redundant tests on the verbose variable. Signed-off-by: Len Brown --- arch/x86/include/uapi/asm/msr-index.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 433a59f..7bdaf7c 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -103,6 +103,8 @@ #define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) #define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define MSR_IA32_POWER_CTL 0x000001fc + #define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_STATUS 0x00000401 #define MSR_IA32_MC0_ADDR 0x00000402 -- cgit v1.1 From 137ecc779c80138723677209730738d76262e810 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Feb 2013 21:35:35 -0500 Subject: intel_idle: remove use and definition of MWAIT_MAX_NUM_CSTATES Cosmetic only. Replace use of MWAIT_MAX_NUM_CSTATES with CPUIDLE_STATE_MAX. They are both 8, so this patch has no functional change. The reason to change is that intel_idle will soon be able to export more than the 8 "major" states supported by MWAIT. When we hit that limit, it is important to know where the limit comes from. Signed-off-by: Len Brown --- arch/x86/include/asm/mwait.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index bcdff99..3f44732 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -4,7 +4,6 @@ #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf #define MWAIT_SUBSTATE_SIZE 4 -#define MWAIT_MAX_NUM_CSTATES 8 #define CPUID_MWAIT_LEAF 5 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 -- cgit v1.1 From e022e7eb90f3edb83f9ff77825eda3d1b3a2f2e0 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 1 Feb 2013 23:37:30 -0500 Subject: intel_idle: remove assumption of one C-state per MWAIT flag Remove the assumption that cstate_tables are indexed by MWAIT flag values. Each entry identifies itself via its own flags value. This change is needed to support multiple states that share the same MWAIT flags. Note that this can have an effect on what state is described by 'N' on cmdline intel_idle.max_cstate=N on some systems. intel_idle.max_cstate=0 still disables the driver intel_idle.max_cstate=1 still results in just C1(E) However, "place holders" in the sparse C-state name-space (eg. Atom) have been removed. Signed-off-by: Len Brown --- arch/x86/include/asm/mwait.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 3f44732..2f366d0 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -4,6 +4,8 @@ #define MWAIT_SUBSTATE_MASK 0xf #define MWAIT_CSTATE_MASK 0xf #define MWAIT_SUBSTATE_SIZE 4 +#define MWAIT_HINT2CSTATE(hint) (((hint) >> MWAIT_SUBSTATE_SIZE) & MWAIT_CSTATE_MASK) +#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK) #define CPUID_MWAIT_LEAF 5 #define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1 -- cgit v1.1 From 6a377ddc4e4ede2eeb9cd46ada23bbe417704fc9 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 9 Feb 2013 23:08:07 -0500 Subject: xen idle: make xen-specific macro xen-specific This macro is only invoked by Xen, so make its definition specific to Xen. > set_pm_idle_to_default() < xen_set_default_idle() Signed-off-by: Len Brown Cc: xen-devel@lists.xensource.com --- arch/x86/include/asm/processor.h | 6 +++++- arch/x86/kernel/process.c | 4 +++- arch/x86/xen/setup.c | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 888184b..c2f7f47 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -998,7 +998,11 @@ extern unsigned long arch_align_stack(unsigned long sp); extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); -bool set_pm_idle_to_default(void); +#ifdef CONFIG_XEN +bool xen_set_default_idle(void); +#else +#define xen_set_default_idle 0 +#endif void stop_this_cpu(void *dummy); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2ed787f..7ed9f6b0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -396,7 +396,8 @@ void default_idle(void) EXPORT_SYMBOL(default_idle); #endif -bool set_pm_idle_to_default(void) +#ifdef CONFIG_XEN +bool xen_set_default_idle(void) { bool ret = !!pm_idle; @@ -404,6 +405,7 @@ bool set_pm_idle_to_default(void) return ret; } +#endif void stop_this_cpu(void *dummy) { local_irq_disable(); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8971a26..2b73b5c 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -561,7 +561,7 @@ void __init xen_arch_setup(void) #endif disable_cpuidle(); disable_cpufreq(); - WARN_ON(set_pm_idle_to_default()); + WARN_ON(xen_set_default_idle()); fiddle_vdso(); #ifdef CONFIG_NUMA numa_off = 1; -- cgit v1.1 From 69fb3676df3329a7142803bb3502fa59dc0db2e3 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 10 Feb 2013 01:38:39 -0500 Subject: x86 idle: remove mwait_idle() and "idle=mwait" cmdline param mwait_idle() is a C1-only idle loop intended to be more efficient than HLT, starting on Pentium-4 HT-enabled processors. But mwait_idle() has been replaced by the more general mwait_idle_with_hints(), which handles both C1 and deeper C-states. ACPI processor_idle and intel_idle use only mwait_idle_with_hints(), and no longer use mwait_idle(). Here we simplify the x86 native idle code by removing mwait_idle(), and the "idle=mwait" bootparam used to invoke it. Since Linux 3.0 there has been a boot-time warning when "idle=mwait" was invoked saying it would be removed in 2012. This removal was also noted in the (now removed:-) feature-removal-schedule.txt. After this change, kernels configured with (CONFIG_ACPI=n && CONFIG_INTEL_IDLE=n) when run on hardware that supports MWAIT will simply use HLT. If MWAIT is desired on those systems, cpuidle and the cpuidle drivers above can be enabled. Signed-off-by: Len Brown Cc: x86@kernel.org --- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/process.c | 79 +--------------------------------------- arch/x86/kernel/smpboot.c | 2 +- 3 files changed, 3 insertions(+), 80 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c2f7f47..8a28fea 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -725,7 +725,7 @@ extern unsigned long boot_option_idle_override; extern bool amd_e400_c1e_detected; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, - IDLE_POLL, IDLE_FORCE_MWAIT}; + IDLE_POLL}; extern void enable_sep_cpu(void); extern int sysenter_setup(void); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7ed9f6b0..cd5a4c9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -421,27 +421,6 @@ void stop_this_cpu(void *dummy) } } -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - if (!need_resched()) { - trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id()); - trace_cpu_idle_rcuidle(1, smp_processor_id()); - if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) - clflush((void *)¤t_thread_info()->flags); - - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(0, 0); - else - local_irq_enable(); - trace_power_end_rcuidle(smp_processor_id()); - trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); - } else - local_irq_enable(); -} - /* * On SMP it's slightly faster (but much more power-consuming!) * to poll the ->work.need_resched flag instead of waiting for the @@ -458,53 +437,6 @@ static void poll_idle(void) trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } -/* - * mwait selection logic: - * - * It depends on the CPU. For AMD CPUs that support MWAIT this is - * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings - * then depend on a clock divisor and current Pstate of the core. If - * all cores of a processor are in halt state (C1) the processor can - * enter the C1E (C1 enhanced) state. If mwait is used this will never - * happen. - * - * idle=mwait overrides this decision and forces the usage of mwait. - */ - -#define MWAIT_INFO 0x05 -#define MWAIT_ECX_EXTENDED_INFO 0x01 -#define MWAIT_EDX_C1 0xf0 - -int mwait_usable(const struct cpuinfo_x86 *c) -{ - u32 eax, ebx, ecx, edx; - - /* Use mwait if idle=mwait boot option is given */ - if (boot_option_idle_override == IDLE_FORCE_MWAIT) - return 1; - - /* - * Any idle= boot option other than idle=mwait means that we must not - * use mwait. Eg: idle=halt or idle=poll or idle=nomwait - */ - if (boot_option_idle_override != IDLE_NO_OVERRIDE) - return 0; - - if (c->cpuid_level < MWAIT_INFO) - return 0; - - cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx); - /* Check, whether EDX has extended info about MWAIT */ - if (!(ecx & MWAIT_ECX_EXTENDED_INFO)) - return 1; - - /* - * edx enumeratios MONITOR/MWAIT extensions. Check, whether - * C1 supports MWAIT - */ - return (edx & MWAIT_EDX_C1); -} - bool amd_e400_c1e_detected; EXPORT_SYMBOL(amd_e400_c1e_detected); @@ -576,13 +508,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) if (pm_idle) return; - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * One CPU supports mwait => All CPUs supports mwait - */ - pr_info("using mwait in idle threads\n"); - pm_idle = mwait_idle; - } else if (cpu_has_amd_erratum(amd_erratum_400)) { + if (cpu_has_amd_erratum(amd_erratum_400)) { /* E400: APIC timer interrupt does not wake up CPU from C1e */ pr_info("using AMD E400 aware idle routine\n"); pm_idle = amd_e400_idle; @@ -606,9 +532,6 @@ static int __init idle_setup(char *str) pr_info("using polling idle threads\n"); pm_idle = poll_idle; boot_option_idle_override = IDLE_POLL; - } else if (!strcmp(str, "mwait")) { - boot_option_idle_override = IDLE_FORCE_MWAIT; - WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n"); } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ed0fe38..a6ceaed 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1369,7 +1369,7 @@ static inline void mwait_play_dead(void) void *mwait_ptr; struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); - if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) + if (!this_cpu_has(X86_FEATURE_MWAIT)) return; if (!this_cpu_has(X86_FEATURE_CLFLSH)) return; -- cgit v1.1 From 27be457000211a6903968dfce06d5f73f051a217 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 10 Feb 2013 02:28:46 -0500 Subject: x86 idle: remove 32-bit-only "no-hlt" parameter, hlt_works_ok flag Remove 32-bit x86 a cmdline param "no-hlt", and the cpuinfo_x86.hlt_works_ok that it sets. If a user wants to avoid HLT, then "idle=poll" is much more useful, as it avoids invocation of HLT in idle, while "no-hlt" failed to do so. Indeed, hlt_works_ok was consulted in only 3 places. First, in /proc/cpuinfo where "hlt_bug yes" would be printed if and only if the user booted the system with "no-hlt" -- as there was no other code to set that flag. Second, check_hlt() would not invoke halt() if "no-hlt" were on the cmdline. Third, it was consulted in stop_this_cpu(), which is invoked by native_machine_halt()/reboot_interrupt()/smp_stop_nmi_callback() -- all cases where the machine is being shutdown/reset. The flag was not consulted in the more frequently invoked play_dead()/hlt_play_dead() used in processor offline and suspend. Since Linux-3.0 there has been a run-time notice upon "no-hlt" invocations indicating that it would be removed in 2012. Signed-off-by: Len Brown Cc: x86@kernel.org --- arch/x86/include/asm/processor.h | 10 ---------- arch/x86/kernel/cpu/bugs.c | 27 --------------------------- arch/x86/kernel/cpu/proc.c | 2 -- arch/x86/kernel/process.c | 6 ++---- arch/x86/xen/setup.c | 3 --- 5 files changed, 2 insertions(+), 46 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8a28fea..b9e7d27 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -89,7 +89,6 @@ struct cpuinfo_x86 { char wp_works_ok; /* It doesn't on 386's */ /* Problems on some 486Dx4's and old 386's: */ - char hlt_works_ok; char hard_math; char rfu; char fdiv_bug; @@ -165,15 +164,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); extern const struct seq_operations cpuinfo_op; -static inline int hlt_works(int cpu) -{ -#ifdef CONFIG_X86_32 - return cpu_data(cpu).hlt_works_ok; -#else - return 1; -#endif -} - #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 92dfec9..af6455e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -17,15 +17,6 @@ #include #include -static int __init no_halt(char *s) -{ - WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n"); - boot_cpu_data.hlt_works_ok = 0; - return 1; -} - -__setup("no-hlt", no_halt); - static int __init no_387(char *s) { boot_cpu_data.hard_math = 0; @@ -89,23 +80,6 @@ static void __init check_fpu(void) pr_warn("Hmm, FPU with FDIV bug\n"); } -static void __init check_hlt(void) -{ - if (boot_cpu_data.x86 >= 5 || paravirt_enabled()) - return; - - pr_info("Checking 'hlt' instruction... "); - if (!boot_cpu_data.hlt_works_ok) { - pr_cont("disabled\n"); - return; - } - halt(); - halt(); - halt(); - halt(); - pr_cont("OK\n"); -} - /* * Check whether we are able to run this kernel safely on SMP. * @@ -129,7 +103,6 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); #endif check_config(); - check_hlt(); init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 3286a92..e280253 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -28,7 +28,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) { seq_printf(m, "fdiv_bug\t: %s\n" - "hlt_bug\t\t: %s\n" "f00f_bug\t: %s\n" "coma_bug\t: %s\n" "fpu\t\t: %s\n" @@ -36,7 +35,6 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c) "cpuid level\t: %d\n" "wp\t\t: %s\n", c->fdiv_bug ? "yes" : "no", - c->hlt_works_ok ? "no" : "yes", c->f00f_bug ? "yes" : "no", c->coma_bug ? "yes" : "no", c->hard_math ? "yes" : "no", diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cd5a4c9..aef852e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -415,10 +415,8 @@ void stop_this_cpu(void *dummy) set_cpu_online(smp_processor_id(), false); disable_local_APIC(); - for (;;) { - if (hlt_works(smp_processor_id())) - halt(); - } + for (;;) + halt(); } /* diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 2b73b5c..94eac5c 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -556,9 +556,6 @@ void __init xen_arch_setup(void) COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); /* Set up idle, making sure it calls safe_halt() pvop */ -#ifdef CONFIG_X86_32 - boot_cpu_data.hlt_works_ok = 1; -#endif disable_cpuidle(); disable_cpufreq(); WARN_ON(xen_set_default_idle()); -- cgit v1.1 From 257090f70233084488f7b3ebe99be8c159a23281 Mon Sep 17 00:00:00 2001 From: Yang Zhang Date: Sun, 10 Feb 2013 22:57:18 +0800 Subject: KVM: VMX: disable apicv by default Without Posted Interrupt, current code is broken. Just disable by default until Posted Interrupt is ready. Signed-off-by: Yang Zhang Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fe09fdc..c794478 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -84,8 +84,7 @@ module_param(vmm_exclusive, bool, S_IRUGO); static bool __read_mostly fasteoi = 1; module_param(fasteoi, bool, S_IRUGO); -static bool __read_mostly enable_apicv_reg_vid = 1; -module_param(enable_apicv_reg_vid, bool, S_IRUGO); +static bool __read_mostly enable_apicv_reg_vid; /* * If nested=1, nested virtualization is supported, i.e., guests may use -- cgit v1.1 From 7a905b1485adf863607b5fc9e32a3fa3838bcc23 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 7 Feb 2013 18:55:57 +0900 Subject: KVM: Remove user_alloc from struct kvm_memory_slot This field was needed to differentiate memory slots created by the new API, KVM_SET_USER_MEMORY_REGION, from those by the old equivalent, KVM_SET_MEMORY_REGION, whose support was dropped long before: commit b74a07beed0e64bfba413dcb70dd6749c57f43dc KVM: Remove kernel-allocated memory regions Although we also have private memory slots to which KVM allocates memory with vm_mmap(), !user_alloc slots in other words, the slot id should be enough for differentiating them. Note: corresponding function parameters will be removed later. Reviewed-by: Marcelo Tosatti Signed-off-by: Takuya Yoshikawa Signed-off-by: Gleb Natapov --- arch/x86/kvm/x86.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 373e17a..3c5bb6f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6897,33 +6897,28 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, bool user_alloc) { int npages = memslot->npages; - int map_flags = MAP_PRIVATE | MAP_ANONYMOUS; - /* Prevent internal slot pages from being moved by fork()/COW. */ - if (memslot->id >= KVM_USER_MEM_SLOTS) - map_flags = MAP_SHARED | MAP_ANONYMOUS; - - /*To keep backward compatibility with older userspace, - *x86 needs to handle !user_alloc case. + /* + * Only private memory slots need to be mapped here since + * KVM_SET_MEMORY_REGION ioctl is no longer supported. */ - if (!user_alloc) { - if (npages && !old.npages) { - unsigned long userspace_addr; + if ((memslot->id >= KVM_USER_MEM_SLOTS) && npages && !old.npages) { + unsigned long userspace_addr; - userspace_addr = vm_mmap(NULL, 0, - npages * PAGE_SIZE, - PROT_READ | PROT_WRITE, - map_flags, - 0); + /* + * MAP_SHARED to prevent internal slot pages from being moved + * by fork()/COW. + */ + userspace_addr = vm_mmap(NULL, 0, npages * PAGE_SIZE, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, 0); - if (IS_ERR((void *)userspace_addr)) - return PTR_ERR((void *)userspace_addr); + if (IS_ERR((void *)userspace_addr)) + return PTR_ERR((void *)userspace_addr); - memslot->userspace_addr = userspace_addr; - } + memslot->userspace_addr = userspace_addr; } - return 0; } @@ -6935,7 +6930,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT; - if (!user_alloc && !old.user_alloc && old.npages && !npages) { + if ((mem->slot >= KVM_USER_MEM_SLOTS) && old.npages && !npages) { int ret; ret = vm_munmap(old.userspace_addr, -- cgit v1.1 From 136867f517cbc3f8a91f035677911a6b503c3323 Mon Sep 17 00:00:00 2001 From: Shuah Khan Date: Tue, 5 Feb 2013 19:57:22 -0700 Subject: x86/kvm: Fix compile warning in kvm_register_steal_time() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following compile warning in kvm_register_steal_time(): CC arch/x86/kernel/kvm.o arch/x86/kernel/kvm.c: In function ‘kvm_register_steal_time’: arch/x86/kernel/kvm.c:302:3: warning: format ‘%lx’ expects argument of type ‘long unsigned int’, but argument 3 has type ‘phys_addr_t’ [-Wformat] Introduced via: 5dfd486c4750 x86, kvm: Fix kvm's use of __pa() on percpu areas d76565344512 x86, mm: Create slow_virt_to_phys() f3c4fbb68e93 x86, mm: Use new pagetable helpers in try_preserve_large_page() 4cbeb51b860c x86, mm: Pagetable level size/shift/mask helpers a25b9316841c x86, mm: Make DEBUG_VIRTUAL work earlier in boot Signed-off-by: Shuah Khan Acked-by: Gleb Natapov Cc: Marcelo Tosatti Cc: Dave Hansen Cc: Rik van Riel Cc: shuahkhan@gmail.com Cc: avi@redhat.com Cc: gleb@redhat.com Cc: mst@redhat.com Link: http://lkml.kernel.org/r/1360119442.8356.8.camel@lorien2 Signed-off-by: Ingo Molnar --- arch/x86/kernel/kvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index aa7e58b..9cec202 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -298,8 +298,8 @@ static void kvm_register_steal_time(void) memset(st, 0, sizeof(*st)); wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED)); - printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n", - cpu, slow_virt_to_phys(st)); + pr_info("kvm-stealtime: cpu %d, msr %llx\n", + cpu, (unsigned long long) slow_virt_to_phys(st)); } static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED; -- cgit v1.1 From cb214ede7657db458fd0b2a25ea0b28dbf900ebc Mon Sep 17 00:00:00 2001 From: Stoney Wang Date: Thu, 7 Feb 2013 10:53:02 -0800 Subject: x86/apic: Work around boot failure on HP ProLiant DL980 G7 Server systems When a HP ProLiant DL980 G7 Server boots a regular kernel, there will be intermittent lost interrupts which could result in a hang or (in extreme cases) data loss. The reason is that this system only supports x2apic physical mode, while the kernel boots with a logical-cluster default setting. This bug can be worked around by specifying the "x2apic_phys" or "nox2apic" boot option, but we want to handle this system without requiring manual workarounds. The BIOS sets ACPI_FADT_APIC_PHYSICAL in FADT table. As all apicids are smaller than 255, BIOS need to pass the control to the OS with xapic mode, according to x2apic-spec, chapter 2.9. Current code handle x2apic when BIOS pass with xapic mode enabled: When user specifies x2apic_phys, or FADT indicates PHYSICAL: 1. During madt oem check, apic driver is set with xapic logical or xapic phys driver at first. 2. enable_IR_x2apic() will enable x2apic_mode. 3. if user specifies x2apic_phys on the boot line, x2apic_phys_probe() will install the correct x2apic phys driver and use x2apic phys mode. Otherwise it will skip the driver will let x2apic_cluster_probe to take over to install x2apic cluster driver (wrong one) even though FADT indicates PHYSICAL, because x2apic_phys_probe does not check FADT PHYSICAL. Add checking x2apic_fadt_phys in x2apic_phys_probe() to fix the problem. Signed-off-by: Stoney Wang [ updated the changelog and simplified the code ] Signed-off-by: Yinghai Lu Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1360263182-16226-1-git-send-email-yinghai@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_phys.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index e03a1e1..562a76d 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c @@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg) } early_param("x2apic_phys", set_x2apic_phys_mode); -static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +static bool x2apic_fadt_phys(void) { - if (x2apic_phys) - return x2apic_enabled(); - else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && - (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) && - x2apic_enabled()) { + if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) && + (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { printk(KERN_DEBUG "System requires x2apic physical mode\n"); - return 1; + return true; } - else - return 0; + return false; +} + +static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys()); } static void @@ -82,7 +83,7 @@ static void init_x2apic_ldr(void) static int x2apic_phys_probe(void) { - if (x2apic_mode && x2apic_phys) + if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys())) return 1; return apic == &apic_x2apic_phys; -- cgit v1.1 From 60fe7be34d96c596d82e37cc0392e7bb546989d1 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 11 Feb 2013 13:45:09 -0600 Subject: x86, uv, uv3: Update MMR register definitions for SGI Ultraviolet System 3 (UV3) This patch updates the MMR register definitions for the SGI UV3 system. Note that because these definitions are automatically generated from the RTL we cannot control the length of the names. Therefore there are lines that exceed 80 characters. All the new MMR definitions are added in this patch. The patches that follow then update the references. The last patch is a "trim" patch which reduces the size of the MMR definitions file by about a third. This keeps "bi-sectability" in place as the intermediate patches would not compile correctly if the trimmed MMR defines were done first. Signed-off-by: Mike Travis Link: http://lkml.kernel.org/r/20130211194508.326204556@gulag1.americas.sgi.com Acked-by: Russ Anderson Reviewed-by: Dimitri Sivanich Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uv/uv_mmrs.h | 3526 +++++++++++++++++++++++++++++++++++-- 1 file changed, 3392 insertions(+), 134 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index cf1d736..e1fa870 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -5,16 +5,25 @@ * * SGI UV MMR definitions * - * Copyright (C) 2007-2011 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_MMRS_H #define _ASM_X86_UV_UV_MMRS_H /* - * This file contains MMR definitions for both UV1 & UV2 hubs. + * This file contains MMR definitions for all UV hubs types. * - * In general, MMR addresses and structures are identical on both hubs. + * To minimize coding differences between hub types, the symbols are + * grouped by architecture types. + * + * UVH - definitions common to all UV hub types. + * UVXH - definitions common to all UV eXtended hub types (currently 2 & 3). + * UV1H - definitions specific to UV type 1 hub. + * UV2H - definitions specific to UV type 2 hub. + * UV3H - definitions specific to UV type 3 hub. + * + * So in general, MMR addresses and structures are identical on all hubs types. * These MMRs are identified as: * #define UVH_xxx
* union uvh_xxx { @@ -23,24 +32,36 @@ * } s; * }; * - * If the MMR exists on both hub type but has different addresses or - * contents, the MMR definition is similar to: - * #define UV1H_xxx - * #define UV2H_xxx - * #define UVH_xxx (is_uv1_hub() ? UV1H_xxx : UV2H_xxx) + * If the MMR exists on all hub types but have different addresses: + * #define UV1Hxxx a + * #define UV2Hxxx b + * #define UV3Hxxx c + * #define UVHxxx (is_uv1_hub() ? UV1Hxxx : + * (is_uv2_hub() ? UV2Hxxx : + * UV3Hxxx)) + * + * If the MMR exists on all hub types > 1 but have different addresses: + * #define UV2Hxxx b + * #define UV3Hxxx c + * #define UVXHxxx (is_uv2_hub() ? UV2Hxxx : + * UV3Hxxx)) + * * union uvh_xxx { * unsigned long v; - * struct uv1h_int_cmpd_s { (Common fields only) + * struct uvh_xxx_s { # Common fields only * } s; - * struct uv1h_int_cmpd_s { (Full UV1 definition) + * struct uv1h_xxx_s { # Full UV1 definition (*) * } s1; - * struct uv2h_int_cmpd_s { (Full UV2 definition) + * struct uv2h_xxx_s { # Full UV2 definition (*) * } s2; + * struct uv3h_xxx_s { # Full UV3 definition (*) + * } s3; * }; + * (* - if present and different than the common struct) * - * Only essential difference are enumerated. For example, if the address is - * the same for both UV1 & UV2, only a single #define is generated. Likewise, - * if the contents is the same for both hubs, only the "s" structure is + * Only essential differences are enumerated. For example, if the address is + * the same for all UV's, only a single #define is generated. Likewise, + * if the contents is the same for all hubs, only the "s" structure is * generated. * * If the MMR exists on ONLY 1 type of hub, no generic definition is @@ -51,6 +72,8 @@ * struct uvh_int_cmpd_s { * } sn; * }; + * + * (GEN Flags: mflags_opt=c undefs=0 UV23=UVXH) */ #define UV_MMR_ENABLE (1UL << 63) @@ -58,32 +81,75 @@ #define UV1_HUB_PART_NUMBER 0x88a5 #define UV2_HUB_PART_NUMBER 0x8eb8 #define UV2_HUB_PART_NUMBER_X 0x1111 +#define UV3_HUB_PART_NUMBER 0x9578 +#define UV3_HUB_PART_NUMBER_X 0x4321 -/* Compat: if this #define is present, UV headers support UV2 */ +/* Compat: Indicate which UV Hubs are supported. */ #define UV2_HUB_IS_SUPPORTED 1 +#define UV3_HUB_IS_SUPPORTED 1 /* ========================================================================= */ /* UVH_BAU_DATA_BROADCAST */ /* ========================================================================= */ -#define UVH_BAU_DATA_BROADCAST 0x61688UL -#define UVH_BAU_DATA_BROADCAST_32 0x440 +#define UVH_BAU_DATA_BROADCAST 0x61688UL +#define UV1H_BAU_DATA_BROADCAST 0x61688UL +#define UV2H_BAU_DATA_BROADCAST 0x61688UL +#define UV3H_BAU_DATA_BROADCAST 0x61688UL +#define UVH_BAU_DATA_BROADCAST_32 0x440 +#define UV1H_BAU_DATA_BROADCAST_32 0x61688UL +#define UV2H_BAU_DATA_BROADCAST_32 0x61688UL +#define UV3H_BAU_DATA_BROADCAST_32 0x61688UL #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL +#define UV1H_BAU_DATA_BROADCAST_ENABLE_SHFT 0 +#define UV1H_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL + +#define UVXH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 +#define UVXH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL + +#define UV2H_BAU_DATA_BROADCAST_ENABLE_SHFT 0 +#define UV2H_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL + +#define UV3H_BAU_DATA_BROADCAST_ENABLE_SHFT 0 +#define UV3H_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL + union uvh_bau_data_broadcast_u { unsigned long v; struct uvh_bau_data_broadcast_s { unsigned long enable:1; /* RW */ unsigned long rsvd_1_63:63; } s; + struct uv1h_bau_data_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s1; + struct uvxh_bau_data_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } sx; + struct uv2h_bau_data_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s2; + struct uv3h_bau_data_broadcast_s { + unsigned long enable:1; /* RW */ + unsigned long rsvd_1_63:63; + } s3; }; /* ========================================================================= */ /* UVH_BAU_DATA_CONFIG */ /* ========================================================================= */ -#define UVH_BAU_DATA_CONFIG 0x61680UL -#define UVH_BAU_DATA_CONFIG_32 0x438 +#define UVH_BAU_DATA_CONFIG 0x61680UL +#define UV1H_BAU_DATA_CONFIG 0x61680UL +#define UV2H_BAU_DATA_CONFIG 0x61680UL +#define UV3H_BAU_DATA_CONFIG 0x61680UL +#define UVH_BAU_DATA_CONFIG_32 0x438 +#define UV1H_BAU_DATA_CONFIG_32 0x61680UL +#define UV2H_BAU_DATA_CONFIG_32 0x61680UL +#define UV3H_BAU_DATA_CONFIG_32 0x61680UL #define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 #define UVH_BAU_DATA_CONFIG_DM_SHFT 8 @@ -102,6 +168,74 @@ union uvh_bau_data_broadcast_u { #define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL #define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL +#define UV1H_BAU_DATA_CONFIG_VECTOR_SHFT 0 +#define UV1H_BAU_DATA_CONFIG_DM_SHFT 8 +#define UV1H_BAU_DATA_CONFIG_DESTMODE_SHFT 11 +#define UV1H_BAU_DATA_CONFIG_STATUS_SHFT 12 +#define UV1H_BAU_DATA_CONFIG_P_SHFT 13 +#define UV1H_BAU_DATA_CONFIG_T_SHFT 15 +#define UV1H_BAU_DATA_CONFIG_M_SHFT 16 +#define UV1H_BAU_DATA_CONFIG_APIC_ID_SHFT 32 +#define UV1H_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV1H_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL +#define UV1H_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV1H_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV1H_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL +#define UV1H_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL +#define UV1H_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL +#define UV1H_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UVXH_BAU_DATA_CONFIG_VECTOR_SHFT 0 +#define UVXH_BAU_DATA_CONFIG_DM_SHFT 8 +#define UVXH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 +#define UVXH_BAU_DATA_CONFIG_STATUS_SHFT 12 +#define UVXH_BAU_DATA_CONFIG_P_SHFT 13 +#define UVXH_BAU_DATA_CONFIG_T_SHFT 15 +#define UVXH_BAU_DATA_CONFIG_M_SHFT 16 +#define UVXH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 +#define UVXH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV2H_BAU_DATA_CONFIG_VECTOR_SHFT 0 +#define UV2H_BAU_DATA_CONFIG_DM_SHFT 8 +#define UV2H_BAU_DATA_CONFIG_DESTMODE_SHFT 11 +#define UV2H_BAU_DATA_CONFIG_STATUS_SHFT 12 +#define UV2H_BAU_DATA_CONFIG_P_SHFT 13 +#define UV2H_BAU_DATA_CONFIG_T_SHFT 15 +#define UV2H_BAU_DATA_CONFIG_M_SHFT 16 +#define UV2H_BAU_DATA_CONFIG_APIC_ID_SHFT 32 +#define UV2H_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV2H_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL +#define UV2H_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV2H_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV2H_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL +#define UV2H_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL +#define UV2H_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL +#define UV2H_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV3H_BAU_DATA_CONFIG_VECTOR_SHFT 0 +#define UV3H_BAU_DATA_CONFIG_DM_SHFT 8 +#define UV3H_BAU_DATA_CONFIG_DESTMODE_SHFT 11 +#define UV3H_BAU_DATA_CONFIG_STATUS_SHFT 12 +#define UV3H_BAU_DATA_CONFIG_P_SHFT 13 +#define UV3H_BAU_DATA_CONFIG_T_SHFT 15 +#define UV3H_BAU_DATA_CONFIG_M_SHFT 16 +#define UV3H_BAU_DATA_CONFIG_APIC_ID_SHFT 32 +#define UV3H_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV3H_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL +#define UV3H_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV3H_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV3H_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL +#define UV3H_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL +#define UV3H_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL +#define UV3H_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_bau_data_config_u { unsigned long v; struct uvh_bau_data_config_s { @@ -116,13 +250,72 @@ union uvh_bau_data_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; + struct uv1h_bau_data_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s1; + struct uvxh_bau_data_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + struct uv2h_bau_data_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; + struct uv3h_bau_data_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_EVENT_OCCURRED0 */ /* ========================================================================= */ -#define UVH_EVENT_OCCURRED0 0x70000UL -#define UVH_EVENT_OCCURRED0_32 0x5e8 +#define UVH_EVENT_OCCURRED0 0x70000UL +#define UV1H_EVENT_OCCURRED0 0x70000UL +#define UV2H_EVENT_OCCURRED0 0x70000UL +#define UV3H_EVENT_OCCURRED0 0x70000UL +#define UVH_EVENT_OCCURRED0_32 0x5e8 +#define UV1H_EVENT_OCCURRED0_32 0x70000UL +#define UV2H_EVENT_OCCURRED0_32 0x70000UL +#define UV3H_EVENT_OCCURRED0_32 0x70000UL + +#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL #define UV1H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 #define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 @@ -239,6 +432,125 @@ union uvh_bau_data_config_u { #define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL #define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL +#define UVXH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UVXH_EVENT_OCCURRED0_QP_HCERR_SHFT 1 +#define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2 +#define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 +#define UVXH_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 +#define UVXH_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 +#define UVXH_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 +#define UVXH_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 +#define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 +#define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 +#define UVXH_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 +#define UVXH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 +#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 +#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 +#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 +#define UVXH_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 +#define UVXH_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 +#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 +#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 +#define UVXH_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 +#define UVXH_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 +#define UVXH_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 +#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 +#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 +#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 +#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 +#define UVXH_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 +#define UVXH_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 +#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 +#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 +#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 +#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 +#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 +#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 +#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 +#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 +#define UVXH_EVENT_OCCURRED0_IPI_INT_SHFT 53 +#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 +#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 +#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 +#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 +#define UVXH_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 +#define UVXH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UVXH_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL +#define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL +#define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL +#define UVXH_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL +#define UVXH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL +#define UVXH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL +#define UVXH_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL +#define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL +#define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL +#define UVXH_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL +#define UVXH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL +#define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL +#define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL +#define UVXH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL +#define UVXH_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL +#define UVXH_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL +#define UVXH_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL +#define UVXH_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL +#define UVXH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL +#define UVXH_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL +#define UVXH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL +#define UVXH_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL +#define UVXH_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL +#define UVXH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL +#define UVXH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL +#define UVXH_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL +#define UVXH_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL +#define UVXH_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL +#define UVXH_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL +#define UVXH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL +#define UVXH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL +#define UVXH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL +#define UVXH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL +#define UVXH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL +#define UVXH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL +#define UVXH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL +#define UVXH_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL +#define UVXH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL +#define UVXH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL +#define UVXH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL +#define UVXH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL +#define UVXH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + #define UV2H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 #define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 #define UV2H_EVENT_OCCURRED0_RH_HCERR_SHFT 2 @@ -358,8 +670,133 @@ union uvh_bau_data_config_u { #define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL #define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL +#define UV3H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 +#define UV3H_EVENT_OCCURRED0_RH_HCERR_SHFT 2 +#define UV3H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 +#define UV3H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 +#define UV3H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 +#define UV3H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 +#define UV3H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 +#define UV3H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 +#define UV3H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 +#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 +#define UV3H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UV3H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 +#define UV3H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 +#define UV3H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 +#define UV3H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 +#define UV3H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 +#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 +#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 +#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 +#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 +#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 +#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 +#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 +#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 +#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 +#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 +#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 +#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 +#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 +#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 +#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 +#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 +#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 +#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 +#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 +#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 +#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 +#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 +#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 +#define UV3H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL +#define UV3H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL +#define UV3H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL +#define UV3H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL +#define UV3H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL +#define UV3H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL +#define UV3H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL +#define UV3H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL +#define UV3H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL +#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL +#define UV3H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UV3H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL +#define UV3H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL +#define UV3H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL +#define UV3H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL +#define UV3H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL +#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL +#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL +#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL +#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL +#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL +#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL +#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL +#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL +#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL +#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL +#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL +#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL +#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL +#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL +#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL +#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL +#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL +#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL +#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL +#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL +#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL +#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL +#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL +#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + union uvh_event_occurred0_u { unsigned long v; + struct uvh_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW, W1C */ + unsigned long rsvd_1_10:10; + unsigned long rh_aoerr0:1; /* RW, W1C */ + unsigned long rsvd_12_63:52; + } s; struct uv1h_event_occurred0_s { unsigned long lb_hcerr:1; /* RW, W1C */ unsigned long gr0_hcerr:1; /* RW, W1C */ @@ -420,6 +857,68 @@ union uvh_event_occurred0_u { unsigned long power_management_req:1; /* RW, W1C */ unsigned long rsvd_57_63:7; } s1; + struct uvxh_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long qp_hcerr:1; /* RW */ + unsigned long rh_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long gr0_hcerr:1; /* RW */ + unsigned long gr1_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long qp_aoerr0:1; /* RW */ + unsigned long rh_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long gr0_aoerr0:1; /* RW */ + unsigned long gr1_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rt_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long qp_aoerr1:1; /* RW */ + unsigned long rh_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long gr0_aoerr1:1; /* RW */ + unsigned long gr1_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rt_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + unsigned long profile_int:1; /* RW */ + unsigned long rsvd_59_63:5; + } sx; struct uv2h_event_occurred0_s { unsigned long lb_hcerr:1; /* RW */ unsigned long qp_hcerr:1; /* RW */ @@ -482,18 +981,90 @@ union uvh_event_occurred0_u { unsigned long profile_int:1; /* RW */ unsigned long rsvd_59_63:5; } s2; + struct uv3h_event_occurred0_s { + unsigned long lb_hcerr:1; /* RW */ + unsigned long qp_hcerr:1; /* RW */ + unsigned long rh_hcerr:1; /* RW */ + unsigned long lh0_hcerr:1; /* RW */ + unsigned long lh1_hcerr:1; /* RW */ + unsigned long gr0_hcerr:1; /* RW */ + unsigned long gr1_hcerr:1; /* RW */ + unsigned long ni0_hcerr:1; /* RW */ + unsigned long ni1_hcerr:1; /* RW */ + unsigned long lb_aoerr0:1; /* RW */ + unsigned long qp_aoerr0:1; /* RW */ + unsigned long rh_aoerr0:1; /* RW */ + unsigned long lh0_aoerr0:1; /* RW */ + unsigned long lh1_aoerr0:1; /* RW */ + unsigned long gr0_aoerr0:1; /* RW */ + unsigned long gr1_aoerr0:1; /* RW */ + unsigned long xb_aoerr0:1; /* RW */ + unsigned long rt_aoerr0:1; /* RW */ + unsigned long ni0_aoerr0:1; /* RW */ + unsigned long ni1_aoerr0:1; /* RW */ + unsigned long lb_aoerr1:1; /* RW */ + unsigned long qp_aoerr1:1; /* RW */ + unsigned long rh_aoerr1:1; /* RW */ + unsigned long lh0_aoerr1:1; /* RW */ + unsigned long lh1_aoerr1:1; /* RW */ + unsigned long gr0_aoerr1:1; /* RW */ + unsigned long gr1_aoerr1:1; /* RW */ + unsigned long xb_aoerr1:1; /* RW */ + unsigned long rt_aoerr1:1; /* RW */ + unsigned long ni0_aoerr1:1; /* RW */ + unsigned long ni1_aoerr1:1; /* RW */ + unsigned long system_shutdown_int:1; /* RW */ + unsigned long lb_irq_int_0:1; /* RW */ + unsigned long lb_irq_int_1:1; /* RW */ + unsigned long lb_irq_int_2:1; /* RW */ + unsigned long lb_irq_int_3:1; /* RW */ + unsigned long lb_irq_int_4:1; /* RW */ + unsigned long lb_irq_int_5:1; /* RW */ + unsigned long lb_irq_int_6:1; /* RW */ + unsigned long lb_irq_int_7:1; /* RW */ + unsigned long lb_irq_int_8:1; /* RW */ + unsigned long lb_irq_int_9:1; /* RW */ + unsigned long lb_irq_int_10:1; /* RW */ + unsigned long lb_irq_int_11:1; /* RW */ + unsigned long lb_irq_int_12:1; /* RW */ + unsigned long lb_irq_int_13:1; /* RW */ + unsigned long lb_irq_int_14:1; /* RW */ + unsigned long lb_irq_int_15:1; /* RW */ + unsigned long l1_nmi_int:1; /* RW */ + unsigned long stop_clock:1; /* RW */ + unsigned long asic_to_l1:1; /* RW */ + unsigned long l1_to_asic:1; /* RW */ + unsigned long la_seq_trigger:1; /* RW */ + unsigned long ipi_int:1; /* RW */ + unsigned long extio_int0:1; /* RW */ + unsigned long extio_int1:1; /* RW */ + unsigned long extio_int2:1; /* RW */ + unsigned long extio_int3:1; /* RW */ + unsigned long profile_int:1; /* RW */ + unsigned long rsvd_59_63:5; + } s3; }; /* ========================================================================= */ /* UVH_EVENT_OCCURRED0_ALIAS */ /* ========================================================================= */ -#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL -#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 +#define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL +#define UV1H_EVENT_OCCURRED0_ALIAS 0x70008UL +#define UV2H_EVENT_OCCURRED0_ALIAS 0x70008UL +#define UV3H_EVENT_OCCURRED0_ALIAS 0x70008UL +#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 +#define UV1H_EVENT_OCCURRED0_ALIAS_32 0x70008UL +#define UV2H_EVENT_OCCURRED0_ALIAS_32 0x70008UL +#define UV3H_EVENT_OCCURRED0_ALIAS_32 0x70008UL + /* ========================================================================= */ /* UVH_GR0_TLB_INT0_CONFIG */ /* ========================================================================= */ -#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL +#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL +#define UV1H_GR0_TLB_INT0_CONFIG 0x61b00UL +#define UV2H_GR0_TLB_INT0_CONFIG 0x61b00UL +#define UV3H_GR0_TLB_INT0_CONFIG 0x61b00UL #define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 #define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 @@ -512,6 +1083,74 @@ union uvh_event_occurred0_u { #define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL +#define UV1H_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UV1H_GR0_TLB_INT0_CONFIG_DM_SHFT 8 +#define UV1H_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UV1H_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UV1H_GR0_TLB_INT0_CONFIG_P_SHFT 13 +#define UV1H_GR0_TLB_INT0_CONFIG_T_SHFT 15 +#define UV1H_GR0_TLB_INT0_CONFIG_M_SHFT 16 +#define UV1H_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UV1H_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV1H_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UV1H_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV1H_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV1H_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UV1H_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UV1H_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UV1H_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 +#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UVXH_GR0_TLB_INT0_CONFIG_P_SHFT 13 +#define UVXH_GR0_TLB_INT0_CONFIG_T_SHFT 15 +#define UVXH_GR0_TLB_INT0_CONFIG_M_SHFT 16 +#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV2H_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UV2H_GR0_TLB_INT0_CONFIG_DM_SHFT 8 +#define UV2H_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UV2H_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UV2H_GR0_TLB_INT0_CONFIG_P_SHFT 13 +#define UV2H_GR0_TLB_INT0_CONFIG_T_SHFT 15 +#define UV2H_GR0_TLB_INT0_CONFIG_M_SHFT 16 +#define UV2H_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UV2H_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV2H_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UV2H_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV2H_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV2H_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UV2H_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UV2H_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UV2H_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV3H_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UV3H_GR0_TLB_INT0_CONFIG_DM_SHFT 8 +#define UV3H_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UV3H_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UV3H_GR0_TLB_INT0_CONFIG_P_SHFT 13 +#define UV3H_GR0_TLB_INT0_CONFIG_T_SHFT 15 +#define UV3H_GR0_TLB_INT0_CONFIG_M_SHFT 16 +#define UV3H_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UV3H_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV3H_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UV3H_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV3H_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV3H_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UV3H_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UV3H_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UV3H_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_gr0_tlb_int0_config_u { unsigned long v; struct uvh_gr0_tlb_int0_config_s { @@ -526,12 +1165,63 @@ union uvh_gr0_tlb_int0_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; + struct uv1h_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s1; + struct uvxh_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + struct uv2h_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; + struct uv3h_gr0_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_GR0_TLB_INT1_CONFIG */ /* ========================================================================= */ -#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL +#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL +#define UV1H_GR0_TLB_INT1_CONFIG 0x61b40UL +#define UV2H_GR0_TLB_INT1_CONFIG 0x61b40UL +#define UV3H_GR0_TLB_INT1_CONFIG 0x61b40UL #define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 #define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 @@ -550,6 +1240,74 @@ union uvh_gr0_tlb_int0_config_u { #define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL +#define UV1H_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UV1H_GR0_TLB_INT1_CONFIG_DM_SHFT 8 +#define UV1H_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UV1H_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UV1H_GR0_TLB_INT1_CONFIG_P_SHFT 13 +#define UV1H_GR0_TLB_INT1_CONFIG_T_SHFT 15 +#define UV1H_GR0_TLB_INT1_CONFIG_M_SHFT 16 +#define UV1H_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UV1H_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV1H_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UV1H_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV1H_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV1H_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UV1H_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UV1H_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UV1H_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 +#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UVXH_GR0_TLB_INT1_CONFIG_P_SHFT 13 +#define UVXH_GR0_TLB_INT1_CONFIG_T_SHFT 15 +#define UVXH_GR0_TLB_INT1_CONFIG_M_SHFT 16 +#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV2H_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UV2H_GR0_TLB_INT1_CONFIG_DM_SHFT 8 +#define UV2H_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UV2H_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UV2H_GR0_TLB_INT1_CONFIG_P_SHFT 13 +#define UV2H_GR0_TLB_INT1_CONFIG_T_SHFT 15 +#define UV2H_GR0_TLB_INT1_CONFIG_M_SHFT 16 +#define UV2H_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UV2H_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV2H_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UV2H_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV2H_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV2H_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UV2H_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UV2H_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UV2H_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV3H_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UV3H_GR0_TLB_INT1_CONFIG_DM_SHFT 8 +#define UV3H_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UV3H_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UV3H_GR0_TLB_INT1_CONFIG_P_SHFT 13 +#define UV3H_GR0_TLB_INT1_CONFIG_T_SHFT 15 +#define UV3H_GR0_TLB_INT1_CONFIG_M_SHFT 16 +#define UV3H_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UV3H_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV3H_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UV3H_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV3H_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV3H_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UV3H_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UV3H_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UV3H_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_gr0_tlb_int1_config_u { unsigned long v; struct uvh_gr0_tlb_int1_config_s { @@ -564,33 +1322,83 @@ union uvh_gr0_tlb_int1_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; -}; - -/* ========================================================================= */ -/* UVH_GR0_TLB_MMR_CONTROL */ -/* ========================================================================= */ -#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL -#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL -#define UVH_GR0_TLB_MMR_CONTROL (is_uv1_hub() ? \ - UV1H_GR0_TLB_MMR_CONTROL : \ - UV2H_GR0_TLB_MMR_CONTROL) - -#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 -#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 -#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 -#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 -#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 -#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL -#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL -#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL -#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL -#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL -#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL - -#define UV1H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 -#define UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 -#define UV1H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 + struct uv1h_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s1; + struct uvxh_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + struct uv2h_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; + struct uv3h_gr0_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; +}; + +/* ========================================================================= */ +/* UVH_GR0_TLB_MMR_CONTROL */ +/* ========================================================================= */ +#define UV1H_GR0_TLB_MMR_CONTROL 0x401080UL +#define UV2H_GR0_TLB_MMR_CONTROL 0xc01080UL +#define UV3H_GR0_TLB_MMR_CONTROL 0xc01080UL +#define UVH_GR0_TLB_MMR_CONTROL \ + (is_uv1_hub() ? UV1H_GR0_TLB_MMR_CONTROL : \ + (is_uv2_hub() ? UV2H_GR0_TLB_MMR_CONTROL : \ + UV3H_GR0_TLB_MMR_CONTROL)) + +#define UVH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 +#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 +#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 +#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 +#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 +#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 +#define UVH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL +#define UVH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL +#define UVH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL +#define UVH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL +#define UVH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL +#define UVH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL + +#define UV1H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 +#define UV1H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 +#define UV1H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 #define UV1H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 #define UV1H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 #define UV1H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 @@ -611,6 +1419,21 @@ union uvh_gr0_tlb_int1_config_u { #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL #define UV1H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL +#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 +#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 +#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 +#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 +#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 +#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 +#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 +#define UVXH_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL +#define UVXH_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL +#define UVXH_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL +#define UVXH_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL +#define UVXH_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL +#define UVXH_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL +#define UVXH_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL + #define UV2H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 #define UV2H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 #define UV2H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 @@ -630,6 +1453,23 @@ union uvh_gr0_tlb_int1_config_u { #define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL #define UV2H_GR0_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL +#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_SHFT 0 +#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 +#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 +#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 +#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_SHFT 21 +#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 +#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_SHFT 31 +#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 +#define UV3H_GR0_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL +#define UV3H_GR0_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL +#define UV3H_GR0_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL +#define UV3H_GR0_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL +#define UV3H_GR0_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL +#define UV3H_GR0_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL +#define UV3H_GR0_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL +#define UV3H_GR0_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL + union uvh_gr0_tlb_mmr_control_u { unsigned long v; struct uvh_gr0_tlb_mmr_control_s { @@ -642,7 +1482,9 @@ union uvh_gr0_tlb_mmr_control_u { unsigned long rsvd_21_29:9; unsigned long mmr_write:1; /* WP */ unsigned long mmr_read:1; /* WP */ - unsigned long rsvd_32_63:32; + unsigned long rsvd_32_48:17; + unsigned long rsvd_49_51:3; + unsigned long rsvd_52_63:12; } s; struct uv1h_gr0_tlb_mmr_control_s { unsigned long index:12; /* RW */ @@ -666,6 +1508,23 @@ union uvh_gr0_tlb_mmr_control_u { unsigned long mmr_inj_tlblruv:1; /* RW */ unsigned long rsvd_61_63:3; } s1; + struct uvxh_gr0_tlb_mmr_control_s { + unsigned long index:12; /* RW */ + unsigned long mem_sel:2; /* RW */ + unsigned long rsvd_14_15:2; + unsigned long auto_valid_en:1; /* RW */ + unsigned long rsvd_17_19:3; + unsigned long mmr_hash_index_en:1; /* RW */ + unsigned long rsvd_21_29:9; + unsigned long mmr_write:1; /* WP */ + unsigned long mmr_read:1; /* WP */ + unsigned long mmr_op_done:1; /* RW */ + unsigned long rsvd_33_47:15; + unsigned long rsvd_48:1; + unsigned long rsvd_49_51:3; + unsigned long rsvd_52:1; + unsigned long rsvd_53_63:11; + } sx; struct uv2h_gr0_tlb_mmr_control_s { unsigned long index:12; /* RW */ unsigned long mem_sel:2; /* RW */ @@ -683,6 +1542,24 @@ union uvh_gr0_tlb_mmr_control_u { unsigned long mmr_inj_tlbram:1; /* RW */ unsigned long rsvd_53_63:11; } s2; + struct uv3h_gr0_tlb_mmr_control_s { + unsigned long index:12; /* RW */ + unsigned long mem_sel:2; /* RW */ + unsigned long rsvd_14_15:2; + unsigned long auto_valid_en:1; /* RW */ + unsigned long rsvd_17_19:3; + unsigned long mmr_hash_index_en:1; /* RW */ + unsigned long ecc_sel:1; /* RW */ + unsigned long rsvd_22_29:8; + unsigned long mmr_write:1; /* WP */ + unsigned long mmr_read:1; /* WP */ + unsigned long mmr_op_done:1; /* RW */ + unsigned long rsvd_33_47:15; + unsigned long undef_48:1; /* Undefined */ + unsigned long rsvd_49_51:3; + unsigned long undef_52:1; /* Undefined */ + unsigned long rsvd_53_63:11; + } s3; }; /* ========================================================================= */ @@ -690,9 +1567,11 @@ union uvh_gr0_tlb_mmr_control_u { /* ========================================================================= */ #define UV1H_GR0_TLB_MMR_READ_DATA_HI 0x4010a0UL #define UV2H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL -#define UVH_GR0_TLB_MMR_READ_DATA_HI (is_uv1_hub() ? \ - UV1H_GR0_TLB_MMR_READ_DATA_HI : \ - UV2H_GR0_TLB_MMR_READ_DATA_HI) +#define UV3H_GR0_TLB_MMR_READ_DATA_HI 0xc010a0UL +#define UVH_GR0_TLB_MMR_READ_DATA_HI \ + (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_HI : \ + (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_HI : \ + UV3H_GR0_TLB_MMR_READ_DATA_HI)) #define UVH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 #define UVH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 @@ -703,6 +1582,46 @@ union uvh_gr0_tlb_mmr_control_u { #define UVH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL #define UVH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL +#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 +#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 +#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 +#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 +#define UV1H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL +#define UV1H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL +#define UV1H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL +#define UV1H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL + +#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 +#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 +#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 +#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 +#define UVXH_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL +#define UVXH_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL +#define UVXH_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL +#define UVXH_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL + +#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 +#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 +#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 +#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 +#define UV2H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL +#define UV2H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL +#define UV2H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL +#define UV2H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL + +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45 +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55 +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL +#define UV3H_GR0_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL + union uvh_gr0_tlb_mmr_read_data_hi_u { unsigned long v; struct uvh_gr0_tlb_mmr_read_data_hi_s { @@ -712,6 +1631,36 @@ union uvh_gr0_tlb_mmr_read_data_hi_u { unsigned long larger:1; /* RO */ unsigned long rsvd_45_63:19; } s; + struct uv1h_gr0_tlb_mmr_read_data_hi_s { + unsigned long pfn:41; /* RO */ + unsigned long gaa:2; /* RO */ + unsigned long dirty:1; /* RO */ + unsigned long larger:1; /* RO */ + unsigned long rsvd_45_63:19; + } s1; + struct uvxh_gr0_tlb_mmr_read_data_hi_s { + unsigned long pfn:41; /* RO */ + unsigned long gaa:2; /* RO */ + unsigned long dirty:1; /* RO */ + unsigned long larger:1; /* RO */ + unsigned long rsvd_45_63:19; + } sx; + struct uv2h_gr0_tlb_mmr_read_data_hi_s { + unsigned long pfn:41; /* RO */ + unsigned long gaa:2; /* RO */ + unsigned long dirty:1; /* RO */ + unsigned long larger:1; /* RO */ + unsigned long rsvd_45_63:19; + } s2; + struct uv3h_gr0_tlb_mmr_read_data_hi_s { + unsigned long pfn:41; /* RO */ + unsigned long gaa:2; /* RO */ + unsigned long dirty:1; /* RO */ + unsigned long larger:1; /* RO */ + unsigned long aa_ext:1; /* RO */ + unsigned long undef_46_54:9; /* Undefined */ + unsigned long way_ecc:9; /* RO */ + } s3; }; /* ========================================================================= */ @@ -719,9 +1668,11 @@ union uvh_gr0_tlb_mmr_read_data_hi_u { /* ========================================================================= */ #define UV1H_GR0_TLB_MMR_READ_DATA_LO 0x4010a8UL #define UV2H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL -#define UVH_GR0_TLB_MMR_READ_DATA_LO (is_uv1_hub() ? \ - UV1H_GR0_TLB_MMR_READ_DATA_LO : \ - UV2H_GR0_TLB_MMR_READ_DATA_LO) +#define UV3H_GR0_TLB_MMR_READ_DATA_LO 0xc010a8UL +#define UVH_GR0_TLB_MMR_READ_DATA_LO \ + (is_uv1_hub() ? UV1H_GR0_TLB_MMR_READ_DATA_LO : \ + (is_uv2_hub() ? UV2H_GR0_TLB_MMR_READ_DATA_LO : \ + UV3H_GR0_TLB_MMR_READ_DATA_LO)) #define UVH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 #define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 @@ -730,6 +1681,34 @@ union uvh_gr0_tlb_mmr_read_data_hi_u { #define UVH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL #define UVH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL +#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 +#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 +#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 +#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL +#define UV1H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL +#define UV1H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL + +#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 +#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 +#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 +#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL +#define UVXH_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL +#define UVXH_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL + +#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 +#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 +#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 +#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL +#define UV2H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL +#define UV2H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL + +#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 +#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 +#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 +#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL +#define UV3H_GR0_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL +#define UV3H_GR0_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL + union uvh_gr0_tlb_mmr_read_data_lo_u { unsigned long v; struct uvh_gr0_tlb_mmr_read_data_lo_s { @@ -737,12 +1716,35 @@ union uvh_gr0_tlb_mmr_read_data_lo_u { unsigned long asid:24; /* RO */ unsigned long valid:1; /* RO */ } s; + struct uv1h_gr0_tlb_mmr_read_data_lo_s { + unsigned long vpn:39; /* RO */ + unsigned long asid:24; /* RO */ + unsigned long valid:1; /* RO */ + } s1; + struct uvxh_gr0_tlb_mmr_read_data_lo_s { + unsigned long vpn:39; /* RO */ + unsigned long asid:24; /* RO */ + unsigned long valid:1; /* RO */ + } sx; + struct uv2h_gr0_tlb_mmr_read_data_lo_s { + unsigned long vpn:39; /* RO */ + unsigned long asid:24; /* RO */ + unsigned long valid:1; /* RO */ + } s2; + struct uv3h_gr0_tlb_mmr_read_data_lo_s { + unsigned long vpn:39; /* RO */ + unsigned long asid:24; /* RO */ + unsigned long valid:1; /* RO */ + } s3; }; /* ========================================================================= */ /* UVH_GR1_TLB_INT0_CONFIG */ /* ========================================================================= */ -#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL +#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL +#define UV1H_GR1_TLB_INT0_CONFIG 0x61f00UL +#define UV2H_GR1_TLB_INT0_CONFIG 0x61f00UL +#define UV3H_GR1_TLB_INT0_CONFIG 0x61f00UL #define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 #define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 @@ -761,6 +1763,74 @@ union uvh_gr0_tlb_mmr_read_data_lo_u { #define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL +#define UV1H_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UV1H_GR1_TLB_INT0_CONFIG_DM_SHFT 8 +#define UV1H_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UV1H_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UV1H_GR1_TLB_INT0_CONFIG_P_SHFT 13 +#define UV1H_GR1_TLB_INT0_CONFIG_T_SHFT 15 +#define UV1H_GR1_TLB_INT0_CONFIG_M_SHFT 16 +#define UV1H_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UV1H_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV1H_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UV1H_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV1H_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV1H_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UV1H_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UV1H_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UV1H_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 +#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UVXH_GR1_TLB_INT0_CONFIG_P_SHFT 13 +#define UVXH_GR1_TLB_INT0_CONFIG_T_SHFT 15 +#define UVXH_GR1_TLB_INT0_CONFIG_M_SHFT 16 +#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV2H_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UV2H_GR1_TLB_INT0_CONFIG_DM_SHFT 8 +#define UV2H_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UV2H_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UV2H_GR1_TLB_INT0_CONFIG_P_SHFT 13 +#define UV2H_GR1_TLB_INT0_CONFIG_T_SHFT 15 +#define UV2H_GR1_TLB_INT0_CONFIG_M_SHFT 16 +#define UV2H_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UV2H_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV2H_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UV2H_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV2H_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV2H_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UV2H_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UV2H_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UV2H_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV3H_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UV3H_GR1_TLB_INT0_CONFIG_DM_SHFT 8 +#define UV3H_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UV3H_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UV3H_GR1_TLB_INT0_CONFIG_P_SHFT 13 +#define UV3H_GR1_TLB_INT0_CONFIG_T_SHFT 15 +#define UV3H_GR1_TLB_INT0_CONFIG_M_SHFT 16 +#define UV3H_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UV3H_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV3H_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UV3H_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV3H_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV3H_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UV3H_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UV3H_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UV3H_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_gr1_tlb_int0_config_u { unsigned long v; struct uvh_gr1_tlb_int0_config_s { @@ -775,12 +1845,63 @@ union uvh_gr1_tlb_int0_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; + struct uv1h_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s1; + struct uvxh_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + struct uv2h_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; + struct uv3h_gr1_tlb_int0_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_GR1_TLB_INT1_CONFIG */ /* ========================================================================= */ -#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL +#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL +#define UV1H_GR1_TLB_INT1_CONFIG 0x61f40UL +#define UV2H_GR1_TLB_INT1_CONFIG 0x61f40UL +#define UV3H_GR1_TLB_INT1_CONFIG 0x61f40UL #define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 #define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 @@ -799,6 +1920,74 @@ union uvh_gr1_tlb_int0_config_u { #define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL +#define UV1H_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UV1H_GR1_TLB_INT1_CONFIG_DM_SHFT 8 +#define UV1H_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UV1H_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UV1H_GR1_TLB_INT1_CONFIG_P_SHFT 13 +#define UV1H_GR1_TLB_INT1_CONFIG_T_SHFT 15 +#define UV1H_GR1_TLB_INT1_CONFIG_M_SHFT 16 +#define UV1H_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UV1H_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV1H_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UV1H_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV1H_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV1H_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UV1H_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UV1H_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UV1H_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UVXH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 +#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UVXH_GR1_TLB_INT1_CONFIG_P_SHFT 13 +#define UVXH_GR1_TLB_INT1_CONFIG_T_SHFT 15 +#define UVXH_GR1_TLB_INT1_CONFIG_M_SHFT 16 +#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV2H_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UV2H_GR1_TLB_INT1_CONFIG_DM_SHFT 8 +#define UV2H_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UV2H_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UV2H_GR1_TLB_INT1_CONFIG_P_SHFT 13 +#define UV2H_GR1_TLB_INT1_CONFIG_T_SHFT 15 +#define UV2H_GR1_TLB_INT1_CONFIG_M_SHFT 16 +#define UV2H_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UV2H_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV2H_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UV2H_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV2H_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV2H_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UV2H_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UV2H_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UV2H_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV3H_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UV3H_GR1_TLB_INT1_CONFIG_DM_SHFT 8 +#define UV3H_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UV3H_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UV3H_GR1_TLB_INT1_CONFIG_P_SHFT 13 +#define UV3H_GR1_TLB_INT1_CONFIG_T_SHFT 15 +#define UV3H_GR1_TLB_INT1_CONFIG_M_SHFT 16 +#define UV3H_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UV3H_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV3H_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UV3H_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV3H_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV3H_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UV3H_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UV3H_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UV3H_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_gr1_tlb_int1_config_u { unsigned long v; struct uvh_gr1_tlb_int1_config_s { @@ -813,6 +2002,54 @@ union uvh_gr1_tlb_int1_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; + struct uv1h_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s1; + struct uvxh_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + struct uv2h_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; + struct uv3h_gr1_tlb_int1_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; }; /* ========================================================================= */ @@ -820,9 +2057,11 @@ union uvh_gr1_tlb_int1_config_u { /* ========================================================================= */ #define UV1H_GR1_TLB_MMR_CONTROL 0x801080UL #define UV2H_GR1_TLB_MMR_CONTROL 0x1001080UL -#define UVH_GR1_TLB_MMR_CONTROL (is_uv1_hub() ? \ - UV1H_GR1_TLB_MMR_CONTROL : \ - UV2H_GR1_TLB_MMR_CONTROL) +#define UV3H_GR1_TLB_MMR_CONTROL 0x1001080UL +#define UVH_GR1_TLB_MMR_CONTROL \ + (is_uv1_hub() ? UV1H_GR1_TLB_MMR_CONTROL : \ + (is_uv2_hub() ? UV2H_GR1_TLB_MMR_CONTROL : \ + UV3H_GR1_TLB_MMR_CONTROL)) #define UVH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 #define UVH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 @@ -860,6 +2099,21 @@ union uvh_gr1_tlb_int1_config_u { #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRREG_MASK 0x0100000000000000UL #define UV1H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBLRUV_MASK 0x1000000000000000UL +#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 +#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 +#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 +#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 +#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 +#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 +#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 +#define UVXH_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL +#define UVXH_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL +#define UVXH_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL +#define UVXH_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL +#define UVXH_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL +#define UVXH_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL +#define UVXH_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL + #define UV2H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 #define UV2H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 #define UV2H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 @@ -879,6 +2133,23 @@ union uvh_gr1_tlb_int1_config_u { #define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_CON_MASK 0x0001000000000000UL #define UV2H_GR1_TLB_MMR_CONTROL_MMR_INJ_TLBRAM_MASK 0x0010000000000000UL +#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_SHFT 0 +#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_SHFT 12 +#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_SHFT 16 +#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_SHFT 20 +#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_SHFT 21 +#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_SHFT 30 +#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_SHFT 31 +#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_SHFT 32 +#define UV3H_GR1_TLB_MMR_CONTROL_INDEX_MASK 0x0000000000000fffUL +#define UV3H_GR1_TLB_MMR_CONTROL_MEM_SEL_MASK 0x0000000000003000UL +#define UV3H_GR1_TLB_MMR_CONTROL_AUTO_VALID_EN_MASK 0x0000000000010000UL +#define UV3H_GR1_TLB_MMR_CONTROL_MMR_HASH_INDEX_EN_MASK 0x0000000000100000UL +#define UV3H_GR1_TLB_MMR_CONTROL_ECC_SEL_MASK 0x0000000000200000UL +#define UV3H_GR1_TLB_MMR_CONTROL_MMR_WRITE_MASK 0x0000000040000000UL +#define UV3H_GR1_TLB_MMR_CONTROL_MMR_READ_MASK 0x0000000080000000UL +#define UV3H_GR1_TLB_MMR_CONTROL_MMR_OP_DONE_MASK 0x0000000100000000UL + union uvh_gr1_tlb_mmr_control_u { unsigned long v; struct uvh_gr1_tlb_mmr_control_s { @@ -891,7 +2162,9 @@ union uvh_gr1_tlb_mmr_control_u { unsigned long rsvd_21_29:9; unsigned long mmr_write:1; /* WP */ unsigned long mmr_read:1; /* WP */ - unsigned long rsvd_32_63:32; + unsigned long rsvd_32_48:17; + unsigned long rsvd_49_51:3; + unsigned long rsvd_52_63:12; } s; struct uv1h_gr1_tlb_mmr_control_s { unsigned long index:12; /* RW */ @@ -915,6 +2188,23 @@ union uvh_gr1_tlb_mmr_control_u { unsigned long mmr_inj_tlblruv:1; /* RW */ unsigned long rsvd_61_63:3; } s1; + struct uvxh_gr1_tlb_mmr_control_s { + unsigned long index:12; /* RW */ + unsigned long mem_sel:2; /* RW */ + unsigned long rsvd_14_15:2; + unsigned long auto_valid_en:1; /* RW */ + unsigned long rsvd_17_19:3; + unsigned long mmr_hash_index_en:1; /* RW */ + unsigned long rsvd_21_29:9; + unsigned long mmr_write:1; /* WP */ + unsigned long mmr_read:1; /* WP */ + unsigned long mmr_op_done:1; /* RW */ + unsigned long rsvd_33_47:15; + unsigned long rsvd_48:1; + unsigned long rsvd_49_51:3; + unsigned long rsvd_52:1; + unsigned long rsvd_53_63:11; + } sx; struct uv2h_gr1_tlb_mmr_control_s { unsigned long index:12; /* RW */ unsigned long mem_sel:2; /* RW */ @@ -932,6 +2222,24 @@ union uvh_gr1_tlb_mmr_control_u { unsigned long mmr_inj_tlbram:1; /* RW */ unsigned long rsvd_53_63:11; } s2; + struct uv3h_gr1_tlb_mmr_control_s { + unsigned long index:12; /* RW */ + unsigned long mem_sel:2; /* RW */ + unsigned long rsvd_14_15:2; + unsigned long auto_valid_en:1; /* RW */ + unsigned long rsvd_17_19:3; + unsigned long mmr_hash_index_en:1; /* RW */ + unsigned long ecc_sel:1; /* RW */ + unsigned long rsvd_22_29:8; + unsigned long mmr_write:1; /* WP */ + unsigned long mmr_read:1; /* WP */ + unsigned long mmr_op_done:1; /* RW */ + unsigned long rsvd_33_47:15; + unsigned long undef_48:1; /* Undefined */ + unsigned long rsvd_49_51:3; + unsigned long undef_52:1; /* Undefined */ + unsigned long rsvd_53_63:11; + } s3; }; /* ========================================================================= */ @@ -939,9 +2247,11 @@ union uvh_gr1_tlb_mmr_control_u { /* ========================================================================= */ #define UV1H_GR1_TLB_MMR_READ_DATA_HI 0x8010a0UL #define UV2H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL -#define UVH_GR1_TLB_MMR_READ_DATA_HI (is_uv1_hub() ? \ - UV1H_GR1_TLB_MMR_READ_DATA_HI : \ - UV2H_GR1_TLB_MMR_READ_DATA_HI) +#define UV3H_GR1_TLB_MMR_READ_DATA_HI 0x10010a0UL +#define UVH_GR1_TLB_MMR_READ_DATA_HI \ + (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_HI : \ + (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_HI : \ + UV3H_GR1_TLB_MMR_READ_DATA_HI)) #define UVH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 #define UVH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 @@ -952,6 +2262,46 @@ union uvh_gr1_tlb_mmr_control_u { #define UVH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL #define UVH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL +#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 +#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 +#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 +#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 +#define UV1H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL +#define UV1H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL +#define UV1H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL +#define UV1H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL + +#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 +#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 +#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 +#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 +#define UVXH_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL +#define UVXH_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL +#define UVXH_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL +#define UVXH_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL + +#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 +#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 +#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 +#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 +#define UV2H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL +#define UV2H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL +#define UV2H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL +#define UV2H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL + +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_SHFT 0 +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_SHFT 41 +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_SHFT 43 +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_SHFT 44 +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_SHFT 45 +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_SHFT 55 +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_PFN_MASK 0x000001ffffffffffUL +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_GAA_MASK 0x0000060000000000UL +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_DIRTY_MASK 0x0000080000000000UL +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_LARGER_MASK 0x0000100000000000UL +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_AA_EXT_MASK 0x0000200000000000UL +#define UV3H_GR1_TLB_MMR_READ_DATA_HI_WAY_ECC_MASK 0xff80000000000000UL + union uvh_gr1_tlb_mmr_read_data_hi_u { unsigned long v; struct uvh_gr1_tlb_mmr_read_data_hi_s { @@ -961,6 +2311,36 @@ union uvh_gr1_tlb_mmr_read_data_hi_u { unsigned long larger:1; /* RO */ unsigned long rsvd_45_63:19; } s; + struct uv1h_gr1_tlb_mmr_read_data_hi_s { + unsigned long pfn:41; /* RO */ + unsigned long gaa:2; /* RO */ + unsigned long dirty:1; /* RO */ + unsigned long larger:1; /* RO */ + unsigned long rsvd_45_63:19; + } s1; + struct uvxh_gr1_tlb_mmr_read_data_hi_s { + unsigned long pfn:41; /* RO */ + unsigned long gaa:2; /* RO */ + unsigned long dirty:1; /* RO */ + unsigned long larger:1; /* RO */ + unsigned long rsvd_45_63:19; + } sx; + struct uv2h_gr1_tlb_mmr_read_data_hi_s { + unsigned long pfn:41; /* RO */ + unsigned long gaa:2; /* RO */ + unsigned long dirty:1; /* RO */ + unsigned long larger:1; /* RO */ + unsigned long rsvd_45_63:19; + } s2; + struct uv3h_gr1_tlb_mmr_read_data_hi_s { + unsigned long pfn:41; /* RO */ + unsigned long gaa:2; /* RO */ + unsigned long dirty:1; /* RO */ + unsigned long larger:1; /* RO */ + unsigned long aa_ext:1; /* RO */ + unsigned long undef_46_54:9; /* Undefined */ + unsigned long way_ecc:9; /* RO */ + } s3; }; /* ========================================================================= */ @@ -968,9 +2348,11 @@ union uvh_gr1_tlb_mmr_read_data_hi_u { /* ========================================================================= */ #define UV1H_GR1_TLB_MMR_READ_DATA_LO 0x8010a8UL #define UV2H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL -#define UVH_GR1_TLB_MMR_READ_DATA_LO (is_uv1_hub() ? \ - UV1H_GR1_TLB_MMR_READ_DATA_LO : \ - UV2H_GR1_TLB_MMR_READ_DATA_LO) +#define UV3H_GR1_TLB_MMR_READ_DATA_LO 0x10010a8UL +#define UVH_GR1_TLB_MMR_READ_DATA_LO \ + (is_uv1_hub() ? UV1H_GR1_TLB_MMR_READ_DATA_LO : \ + (is_uv2_hub() ? UV2H_GR1_TLB_MMR_READ_DATA_LO : \ + UV3H_GR1_TLB_MMR_READ_DATA_LO)) #define UVH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 #define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 @@ -979,6 +2361,34 @@ union uvh_gr1_tlb_mmr_read_data_hi_u { #define UVH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL #define UVH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL +#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 +#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 +#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 +#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL +#define UV1H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL +#define UV1H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL + +#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 +#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 +#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 +#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL +#define UVXH_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL +#define UVXH_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL + +#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 +#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 +#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 +#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL +#define UV2H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL +#define UV2H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL + +#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_SHFT 0 +#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_SHFT 39 +#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_SHFT 63 +#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VPN_MASK 0x0000007fffffffffUL +#define UV3H_GR1_TLB_MMR_READ_DATA_LO_ASID_MASK 0x7fffff8000000000UL +#define UV3H_GR1_TLB_MMR_READ_DATA_LO_VALID_MASK 0x8000000000000000UL + union uvh_gr1_tlb_mmr_read_data_lo_u { unsigned long v; struct uvh_gr1_tlb_mmr_read_data_lo_s { @@ -986,31 +2396,94 @@ union uvh_gr1_tlb_mmr_read_data_lo_u { unsigned long asid:24; /* RO */ unsigned long valid:1; /* RO */ } s; + struct uv1h_gr1_tlb_mmr_read_data_lo_s { + unsigned long vpn:39; /* RO */ + unsigned long asid:24; /* RO */ + unsigned long valid:1; /* RO */ + } s1; + struct uvxh_gr1_tlb_mmr_read_data_lo_s { + unsigned long vpn:39; /* RO */ + unsigned long asid:24; /* RO */ + unsigned long valid:1; /* RO */ + } sx; + struct uv2h_gr1_tlb_mmr_read_data_lo_s { + unsigned long vpn:39; /* RO */ + unsigned long asid:24; /* RO */ + unsigned long valid:1; /* RO */ + } s2; + struct uv3h_gr1_tlb_mmr_read_data_lo_s { + unsigned long vpn:39; /* RO */ + unsigned long asid:24; /* RO */ + unsigned long valid:1; /* RO */ + } s3; }; /* ========================================================================= */ /* UVH_INT_CMPB */ /* ========================================================================= */ -#define UVH_INT_CMPB 0x22080UL +#define UVH_INT_CMPB 0x22080UL +#define UV1H_INT_CMPB 0x22080UL +#define UV2H_INT_CMPB 0x22080UL +#define UV3H_INT_CMPB 0x22080UL #define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 #define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL +#define UV1H_INT_CMPB_REAL_TIME_CMPB_SHFT 0 +#define UV1H_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL + +#define UVXH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 +#define UVXH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL + +#define UV2H_INT_CMPB_REAL_TIME_CMPB_SHFT 0 +#define UV2H_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL + +#define UV3H_INT_CMPB_REAL_TIME_CMPB_SHFT 0 +#define UV3H_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL + union uvh_int_cmpb_u { unsigned long v; struct uvh_int_cmpb_s { unsigned long real_time_cmpb:56; /* RW */ unsigned long rsvd_56_63:8; } s; + struct uv1h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } s1; + struct uvxh_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } sx; + struct uv2h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } s2; + struct uv3h_int_cmpb_s { + unsigned long real_time_cmpb:56; /* RW */ + unsigned long rsvd_56_63:8; + } s3; }; /* ========================================================================= */ /* UVH_INT_CMPC */ /* ========================================================================= */ -#define UVH_INT_CMPC 0x22100UL +#define UVH_INT_CMPC 0x22100UL +#define UV1H_INT_CMPC 0x22100UL +#define UV2H_INT_CMPC 0x22100UL +#define UV3H_INT_CMPC 0x22100UL + +#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0 +#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL -#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 -#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL +#define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT 0 +#define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL + +#define UV2H_INT_CMPC_REAL_TIME_CMP_2_SHFT 0 +#define UV2H_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL + +#define UV3H_INT_CMPC_REAL_TIME_CMP_2_SHFT 0 +#define UV3H_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL union uvh_int_cmpc_u { unsigned long v; @@ -1018,15 +2491,43 @@ union uvh_int_cmpc_u { unsigned long real_time_cmpc:56; /* RW */ unsigned long rsvd_56_63:8; } s; + struct uv1h_int_cmpc_s { + unsigned long real_time_cmpc:56; /* RW */ + unsigned long rsvd_56_63:8; + } s1; + struct uvxh_int_cmpc_s { + unsigned long real_time_cmpc:56; /* RW */ + unsigned long rsvd_56_63:8; + } sx; + struct uv2h_int_cmpc_s { + unsigned long real_time_cmpc:56; /* RW */ + unsigned long rsvd_56_63:8; + } s2; + struct uv3h_int_cmpc_s { + unsigned long real_time_cmpc:56; /* RW */ + unsigned long rsvd_56_63:8; + } s3; }; /* ========================================================================= */ /* UVH_INT_CMPD */ /* ========================================================================= */ -#define UVH_INT_CMPD 0x22180UL +#define UVH_INT_CMPD 0x22180UL +#define UV1H_INT_CMPD 0x22180UL +#define UV2H_INT_CMPD 0x22180UL +#define UV3H_INT_CMPD 0x22180UL + +#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0 +#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL -#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 -#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL +#define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT 0 +#define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL + +#define UV2H_INT_CMPD_REAL_TIME_CMP_3_SHFT 0 +#define UV2H_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL + +#define UV3H_INT_CMPD_REAL_TIME_CMP_3_SHFT 0 +#define UV3H_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL union uvh_int_cmpd_u { unsigned long v; @@ -1034,13 +2535,35 @@ union uvh_int_cmpd_u { unsigned long real_time_cmpd:56; /* RW */ unsigned long rsvd_56_63:8; } s; + struct uv1h_int_cmpd_s { + unsigned long real_time_cmpd:56; /* RW */ + unsigned long rsvd_56_63:8; + } s1; + struct uvxh_int_cmpd_s { + unsigned long real_time_cmpd:56; /* RW */ + unsigned long rsvd_56_63:8; + } sx; + struct uv2h_int_cmpd_s { + unsigned long real_time_cmpd:56; /* RW */ + unsigned long rsvd_56_63:8; + } s2; + struct uv3h_int_cmpd_s { + unsigned long real_time_cmpd:56; /* RW */ + unsigned long rsvd_56_63:8; + } s3; }; /* ========================================================================= */ /* UVH_IPI_INT */ /* ========================================================================= */ -#define UVH_IPI_INT 0x60500UL -#define UVH_IPI_INT_32 0x348 +#define UVH_IPI_INT 0x60500UL +#define UV1H_IPI_INT 0x60500UL +#define UV2H_IPI_INT 0x60500UL +#define UV3H_IPI_INT 0x60500UL +#define UVH_IPI_INT_32 0x348 +#define UV1H_IPI_INT_32 0x60500UL +#define UV2H_IPI_INT_32 0x60500UL +#define UV3H_IPI_INT_32 0x60500UL #define UVH_IPI_INT_VECTOR_SHFT 0 #define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 @@ -1053,6 +2576,50 @@ union uvh_int_cmpd_u { #define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL #define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL +#define UV1H_IPI_INT_VECTOR_SHFT 0 +#define UV1H_IPI_INT_DELIVERY_MODE_SHFT 8 +#define UV1H_IPI_INT_DESTMODE_SHFT 11 +#define UV1H_IPI_INT_APIC_ID_SHFT 16 +#define UV1H_IPI_INT_SEND_SHFT 63 +#define UV1H_IPI_INT_VECTOR_MASK 0x00000000000000ffUL +#define UV1H_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL +#define UV1H_IPI_INT_DESTMODE_MASK 0x0000000000000800UL +#define UV1H_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL +#define UV1H_IPI_INT_SEND_MASK 0x8000000000000000UL + +#define UVXH_IPI_INT_VECTOR_SHFT 0 +#define UVXH_IPI_INT_DELIVERY_MODE_SHFT 8 +#define UVXH_IPI_INT_DESTMODE_SHFT 11 +#define UVXH_IPI_INT_APIC_ID_SHFT 16 +#define UVXH_IPI_INT_SEND_SHFT 63 +#define UVXH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL +#define UVXH_IPI_INT_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL +#define UVXH_IPI_INT_SEND_MASK 0x8000000000000000UL + +#define UV2H_IPI_INT_VECTOR_SHFT 0 +#define UV2H_IPI_INT_DELIVERY_MODE_SHFT 8 +#define UV2H_IPI_INT_DESTMODE_SHFT 11 +#define UV2H_IPI_INT_APIC_ID_SHFT 16 +#define UV2H_IPI_INT_SEND_SHFT 63 +#define UV2H_IPI_INT_VECTOR_MASK 0x00000000000000ffUL +#define UV2H_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL +#define UV2H_IPI_INT_DESTMODE_MASK 0x0000000000000800UL +#define UV2H_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL +#define UV2H_IPI_INT_SEND_MASK 0x8000000000000000UL + +#define UV3H_IPI_INT_VECTOR_SHFT 0 +#define UV3H_IPI_INT_DELIVERY_MODE_SHFT 8 +#define UV3H_IPI_INT_DESTMODE_SHFT 11 +#define UV3H_IPI_INT_APIC_ID_SHFT 16 +#define UV3H_IPI_INT_SEND_SHFT 63 +#define UV3H_IPI_INT_VECTOR_MASK 0x00000000000000ffUL +#define UV3H_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL +#define UV3H_IPI_INT_DESTMODE_MASK 0x0000000000000800UL +#define UV3H_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL +#define UV3H_IPI_INT_SEND_MASK 0x8000000000000000UL + union uvh_ipi_int_u { unsigned long v; struct uvh_ipi_int_s { @@ -1064,19 +2631,81 @@ union uvh_ipi_int_u { unsigned long rsvd_48_62:15; unsigned long send:1; /* WP */ } s; + struct uv1h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s1; + struct uvxh_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } sx; + struct uv2h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s2; + struct uv3h_ipi_int_s { + unsigned long vector_:8; /* RW */ + unsigned long delivery_mode:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long rsvd_12_15:4; + unsigned long apic_id:32; /* RW */ + unsigned long rsvd_48_62:15; + unsigned long send:1; /* WP */ + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ /* ========================================================================= */ -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x320050UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x320050UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x320050UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL + +#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 +#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 +#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL +#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL + +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL + +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL + union uvh_lb_bau_intd_payload_queue_first_u { unsigned long v; struct uvh_lb_bau_intd_payload_queue_first_s { @@ -1086,17 +2715,63 @@ union uvh_lb_bau_intd_payload_queue_first_u { unsigned long node_id:14; /* RW */ unsigned long rsvd_63:1; } s; + struct uv1h_lb_bau_intd_payload_queue_first_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_48:6; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } s1; + struct uvxh_lb_bau_intd_payload_queue_first_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_48:6; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } sx; + struct uv2h_lb_bau_intd_payload_queue_first_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_48:6; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } s2; + struct uv3h_lb_bau_intd_payload_queue_first_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_48:6; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ /* ========================================================================= */ -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x320060UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x320060UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x320060UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL + +#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 +#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL + +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL + +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL + union uvh_lb_bau_intd_payload_queue_last_u { unsigned long v; struct uvh_lb_bau_intd_payload_queue_last_s { @@ -1104,17 +2779,55 @@ union uvh_lb_bau_intd_payload_queue_last_u { unsigned long address:39; /* RW */ unsigned long rsvd_43_63:21; } s; + struct uv1h_lb_bau_intd_payload_queue_last_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } s1; + struct uvxh_lb_bau_intd_payload_queue_last_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } sx; + struct uv2h_lb_bau_intd_payload_queue_last_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } s2; + struct uv3h_lb_bau_intd_payload_queue_last_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ /* ========================================================================= */ -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x320070UL +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x320070UL +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x320070UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 +#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL + +#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 +#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL + +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 +#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL + +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 +#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL + union uvh_lb_bau_intd_payload_queue_tail_u { unsigned long v; struct uvh_lb_bau_intd_payload_queue_tail_s { @@ -1122,13 +2835,39 @@ union uvh_lb_bau_intd_payload_queue_tail_u { unsigned long address:39; /* RW */ unsigned long rsvd_43_63:21; } s; + struct uv1h_lb_bau_intd_payload_queue_tail_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } s1; + struct uvxh_lb_bau_intd_payload_queue_tail_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } sx; + struct uv2h_lb_bau_intd_payload_queue_tail_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } s2; + struct uv3h_lb_bau_intd_payload_queue_tail_s { + unsigned long rsvd_0_3:4; + unsigned long address:39; /* RW */ + unsigned long rsvd_43_63:21; + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ /* ========================================================================= */ -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x320080UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x320080UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x320080UL #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 @@ -1163,6 +2902,138 @@ union uvh_lb_bau_intd_payload_queue_tail_u { #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL + +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL +#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL + +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL + +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL + union uvh_lb_bau_intd_software_acknowledge_u { unsigned long v; struct uvh_lb_bau_intd_software_acknowledge_s { @@ -1183,20 +3054,109 @@ union uvh_lb_bau_intd_software_acknowledge_u { unsigned long timeout_6:1; /* RW, W1C */ unsigned long timeout_7:1; /* RW, W1C */ unsigned long rsvd_16_63:48; - } s; + } s; + struct uv1h_lb_bau_intd_software_acknowledge_s { + unsigned long pending_0:1; /* RW, W1C */ + unsigned long pending_1:1; /* RW, W1C */ + unsigned long pending_2:1; /* RW, W1C */ + unsigned long pending_3:1; /* RW, W1C */ + unsigned long pending_4:1; /* RW, W1C */ + unsigned long pending_5:1; /* RW, W1C */ + unsigned long pending_6:1; /* RW, W1C */ + unsigned long pending_7:1; /* RW, W1C */ + unsigned long timeout_0:1; /* RW, W1C */ + unsigned long timeout_1:1; /* RW, W1C */ + unsigned long timeout_2:1; /* RW, W1C */ + unsigned long timeout_3:1; /* RW, W1C */ + unsigned long timeout_4:1; /* RW, W1C */ + unsigned long timeout_5:1; /* RW, W1C */ + unsigned long timeout_6:1; /* RW, W1C */ + unsigned long timeout_7:1; /* RW, W1C */ + unsigned long rsvd_16_63:48; + } s1; + struct uvxh_lb_bau_intd_software_acknowledge_s { + unsigned long pending_0:1; /* RW */ + unsigned long pending_1:1; /* RW */ + unsigned long pending_2:1; /* RW */ + unsigned long pending_3:1; /* RW */ + unsigned long pending_4:1; /* RW */ + unsigned long pending_5:1; /* RW */ + unsigned long pending_6:1; /* RW */ + unsigned long pending_7:1; /* RW */ + unsigned long timeout_0:1; /* RW */ + unsigned long timeout_1:1; /* RW */ + unsigned long timeout_2:1; /* RW */ + unsigned long timeout_3:1; /* RW */ + unsigned long timeout_4:1; /* RW */ + unsigned long timeout_5:1; /* RW */ + unsigned long timeout_6:1; /* RW */ + unsigned long timeout_7:1; /* RW */ + unsigned long rsvd_16_63:48; + } sx; + struct uv2h_lb_bau_intd_software_acknowledge_s { + unsigned long pending_0:1; /* RW */ + unsigned long pending_1:1; /* RW */ + unsigned long pending_2:1; /* RW */ + unsigned long pending_3:1; /* RW */ + unsigned long pending_4:1; /* RW */ + unsigned long pending_5:1; /* RW */ + unsigned long pending_6:1; /* RW */ + unsigned long pending_7:1; /* RW */ + unsigned long timeout_0:1; /* RW */ + unsigned long timeout_1:1; /* RW */ + unsigned long timeout_2:1; /* RW */ + unsigned long timeout_3:1; /* RW */ + unsigned long timeout_4:1; /* RW */ + unsigned long timeout_5:1; /* RW */ + unsigned long timeout_6:1; /* RW */ + unsigned long timeout_7:1; /* RW */ + unsigned long rsvd_16_63:48; + } s2; + struct uv3h_lb_bau_intd_software_acknowledge_s { + unsigned long pending_0:1; /* RW */ + unsigned long pending_1:1; /* RW */ + unsigned long pending_2:1; /* RW */ + unsigned long pending_3:1; /* RW */ + unsigned long pending_4:1; /* RW */ + unsigned long pending_5:1; /* RW */ + unsigned long pending_6:1; /* RW */ + unsigned long pending_7:1; /* RW */ + unsigned long timeout_0:1; /* RW */ + unsigned long timeout_1:1; /* RW */ + unsigned long timeout_2:1; /* RW */ + unsigned long timeout_3:1; /* RW */ + unsigned long timeout_4:1; /* RW */ + unsigned long timeout_5:1; /* RW */ + unsigned long timeout_6:1; /* RW */ + unsigned long timeout_7:1; /* RW */ + unsigned long rsvd_16_63:48; + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ /* ========================================================================= */ -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 +#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x320088UL +#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x320088UL +#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x320088UL + /* ========================================================================= */ /* UVH_LB_BAU_MISC_CONTROL */ /* ========================================================================= */ -#define UVH_LB_BAU_MISC_CONTROL 0x320170UL -#define UVH_LB_BAU_MISC_CONTROL_32 0xa10 +#define UVH_LB_BAU_MISC_CONTROL 0x320170UL +#define UV1H_LB_BAU_MISC_CONTROL 0x320170UL +#define UV2H_LB_BAU_MISC_CONTROL 0x320170UL +#define UV3H_LB_BAU_MISC_CONTROL 0x320170UL +#define UVH_LB_BAU_MISC_CONTROL_32 0xa10 +#define UV1H_LB_BAU_MISC_CONTROL_32 0x320170UL +#define UV2H_LB_BAU_MISC_CONTROL_32 0x320170UL +#define UV3H_LB_BAU_MISC_CONTROL_32 0x320170UL #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 @@ -1213,6 +3173,7 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 +#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48 #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL #define UVH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL @@ -1228,6 +3189,7 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL +#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL #define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 #define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 @@ -1262,6 +3224,53 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL #define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL +#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 +#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 +#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 +#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 +#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 +#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 +#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 +#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 +#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 +#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 +#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 +#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 +#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 +#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 +#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 +#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 +#define UVXH_LB_BAU_MISC_CONTROL_FUN_SHFT 48 +#define UVXH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL +#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL +#define UVXH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL +#define UVXH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL +#define UVXH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL +#define UVXH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL +#define UVXH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL +#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL +#define UVXH_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL +#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL +#define UVXH_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL +#define UVXH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL +#define UVXH_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL +#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL +#define UVXH_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL +#define UVXH_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL +#define UVXH_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL +#define UVXH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL + #define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 #define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 #define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 @@ -1309,6 +3318,59 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL #define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL +#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 +#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 +#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 +#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 +#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 +#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 +#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 +#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 +#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 +#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 +#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 +#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 +#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 +#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 +#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 +#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 +#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_SHFT 36 +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_SHFT 37 +#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_SHFT 38 +#define UV3H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 +#define UV3H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL +#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL +#define UV3H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL +#define UV3H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL +#define UV3H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL +#define UV3H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL +#define UV3H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL +#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL +#define UV3H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL +#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL +#define UV3H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL +#define UV3H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL +#define UV3H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL +#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL +#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL +#define UV3H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL +#define UV3H_LB_BAU_MISC_CONTROL_SUPPRESS_QUIESCE_MSGS_TO_QPI_MASK 0x0000001000000000UL +#define UV3H_LB_BAU_MISC_CONTROL_ENABLE_INTD_PREFETCH_HINT_MASK 0x0000002000000000UL +#define UV3H_LB_BAU_MISC_CONTROL_THREAD_KILL_TIMEBASE_MASK 0x00003fc000000000UL +#define UV3H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL + union uvh_lb_bau_misc_control_u { unsigned long v; struct uvh_lb_bau_misc_control_s { @@ -1327,7 +3389,8 @@ union uvh_lb_bau_misc_control_u { unsigned long programmed_initial_priority:3; /* RW */ unsigned long use_incoming_priority:1; /* RW */ unsigned long enable_programmed_initial_priority:1;/* RW */ - unsigned long rsvd_29_63:35; + unsigned long rsvd_29_47:19; + unsigned long fun:16; /* RW */ } s; struct uv1h_lb_bau_misc_control_s { unsigned long rejection_delay:8; /* RW */ @@ -1348,6 +3411,32 @@ union uvh_lb_bau_misc_control_u { unsigned long rsvd_29_47:19; unsigned long fun:16; /* RW */ } s1; + struct uvxh_lb_bau_misc_control_s { + unsigned long rejection_delay:8; /* RW */ + unsigned long apic_mode:1; /* RW */ + unsigned long force_broadcast:1; /* RW */ + unsigned long force_lock_nop:1; /* RW */ + unsigned long qpi_agent_presence_vector:3; /* RW */ + unsigned long descriptor_fetch_mode:1; /* RW */ + unsigned long enable_intd_soft_ack_mode:1; /* RW */ + unsigned long intd_soft_ack_timeout_period:4; /* RW */ + unsigned long enable_dual_mapping_mode:1; /* RW */ + unsigned long vga_io_port_decode_enable:1; /* RW */ + unsigned long vga_io_port_16_bit_decode:1; /* RW */ + unsigned long suppress_dest_registration:1; /* RW */ + unsigned long programmed_initial_priority:3; /* RW */ + unsigned long use_incoming_priority:1; /* RW */ + unsigned long enable_programmed_initial_priority:1;/* RW */ + unsigned long enable_automatic_apic_mode_selection:1;/* RW */ + unsigned long apic_mode_status:1; /* RO */ + unsigned long suppress_interrupts_to_self:1; /* RW */ + unsigned long enable_lock_based_system_flush:1;/* RW */ + unsigned long enable_extended_sb_status:1; /* RW */ + unsigned long suppress_int_prio_udt_to_self:1;/* RW */ + unsigned long use_legacy_descriptor_formats:1;/* RW */ + unsigned long rsvd_36_47:12; + unsigned long fun:16; /* RW */ + } sx; struct uv2h_lb_bau_misc_control_s { unsigned long rejection_delay:8; /* RW */ unsigned long apic_mode:1; /* RW */ @@ -1374,13 +3463,48 @@ union uvh_lb_bau_misc_control_u { unsigned long rsvd_36_47:12; unsigned long fun:16; /* RW */ } s2; + struct uv3h_lb_bau_misc_control_s { + unsigned long rejection_delay:8; /* RW */ + unsigned long apic_mode:1; /* RW */ + unsigned long force_broadcast:1; /* RW */ + unsigned long force_lock_nop:1; /* RW */ + unsigned long qpi_agent_presence_vector:3; /* RW */ + unsigned long descriptor_fetch_mode:1; /* RW */ + unsigned long enable_intd_soft_ack_mode:1; /* RW */ + unsigned long intd_soft_ack_timeout_period:4; /* RW */ + unsigned long enable_dual_mapping_mode:1; /* RW */ + unsigned long vga_io_port_decode_enable:1; /* RW */ + unsigned long vga_io_port_16_bit_decode:1; /* RW */ + unsigned long suppress_dest_registration:1; /* RW */ + unsigned long programmed_initial_priority:3; /* RW */ + unsigned long use_incoming_priority:1; /* RW */ + unsigned long enable_programmed_initial_priority:1;/* RW */ + unsigned long enable_automatic_apic_mode_selection:1;/* RW */ + unsigned long apic_mode_status:1; /* RO */ + unsigned long suppress_interrupts_to_self:1; /* RW */ + unsigned long enable_lock_based_system_flush:1;/* RW */ + unsigned long enable_extended_sb_status:1; /* RW */ + unsigned long suppress_int_prio_udt_to_self:1;/* RW */ + unsigned long use_legacy_descriptor_formats:1;/* RW */ + unsigned long suppress_quiesce_msgs_to_qpi:1; /* RW */ + unsigned long enable_intd_prefetch_hint:1; /* RW */ + unsigned long thread_kill_timebase:8; /* RW */ + unsigned long rsvd_46_47:2; + unsigned long fun:16; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ /* ========================================================================= */ -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL +#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL +#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL +#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 +#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x320020UL +#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x320020UL +#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x320020UL #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 @@ -1389,6 +3513,34 @@ union uvh_lb_bau_misc_control_u { #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL +#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 +#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 +#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 +#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL +#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL +#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL + +#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 +#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 +#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 +#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL +#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL +#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL + +#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 +#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 +#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 +#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL +#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL +#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL + +#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 +#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 +#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 +#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL +#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL +#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL + union uvh_lb_bau_sb_activation_control_u { unsigned long v; struct uvh_lb_bau_sb_activation_control_s { @@ -1397,51 +3549,161 @@ union uvh_lb_bau_sb_activation_control_u { unsigned long push:1; /* WP */ unsigned long init:1; /* WP */ } s; + struct uv1h_lb_bau_sb_activation_control_s { + unsigned long index:6; /* RW */ + unsigned long rsvd_6_61:56; + unsigned long push:1; /* WP */ + unsigned long init:1; /* WP */ + } s1; + struct uvxh_lb_bau_sb_activation_control_s { + unsigned long index:6; /* RW */ + unsigned long rsvd_6_61:56; + unsigned long push:1; /* WP */ + unsigned long init:1; /* WP */ + } sx; + struct uv2h_lb_bau_sb_activation_control_s { + unsigned long index:6; /* RW */ + unsigned long rsvd_6_61:56; + unsigned long push:1; /* WP */ + unsigned long init:1; /* WP */ + } s2; + struct uv3h_lb_bau_sb_activation_control_s { + unsigned long index:6; /* RW */ + unsigned long rsvd_6_61:56; + unsigned long push:1; /* WP */ + unsigned long init:1; /* WP */ + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ /* ========================================================================= */ -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x320030UL +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x320030UL +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x320030UL #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL + +#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 +#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL + +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL + +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL + union uvh_lb_bau_sb_activation_status_0_u { unsigned long v; struct uvh_lb_bau_sb_activation_status_0_s { unsigned long status:64; /* RW */ } s; + struct uv1h_lb_bau_sb_activation_status_0_s { + unsigned long status:64; /* RW */ + } s1; + struct uvxh_lb_bau_sb_activation_status_0_s { + unsigned long status:64; /* RW */ + } sx; + struct uv2h_lb_bau_sb_activation_status_0_s { + unsigned long status:64; /* RW */ + } s2; + struct uv3h_lb_bau_sb_activation_status_0_s { + unsigned long status:64; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ /* ========================================================================= */ -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x320040UL +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x320040UL +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x320040UL #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 +#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL + +#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 +#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL + +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL + +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL + union uvh_lb_bau_sb_activation_status_1_u { unsigned long v; struct uvh_lb_bau_sb_activation_status_1_s { unsigned long status:64; /* RW */ } s; + struct uv1h_lb_bau_sb_activation_status_1_s { + unsigned long status:64; /* RW */ + } s1; + struct uvxh_lb_bau_sb_activation_status_1_s { + unsigned long status:64; /* RW */ + } sx; + struct uv2h_lb_bau_sb_activation_status_1_s { + unsigned long status:64; /* RW */ + } s2; + struct uv3h_lb_bau_sb_activation_status_1_s { + unsigned long status:64; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ /* ========================================================================= */ -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x320010UL +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x320010UL +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x320010UL #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL + +#define UVXH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 +#define UVXH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 +#define UVXH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UVXH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL + +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL + +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL +#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL + union uvh_lb_bau_sb_descriptor_base_u { unsigned long v; struct uvh_lb_bau_sb_descriptor_base_s { @@ -1451,12 +3713,43 @@ union uvh_lb_bau_sb_descriptor_base_u { unsigned long node_id:14; /* RW */ unsigned long rsvd_63:1; } s; + struct uv1h_lb_bau_sb_descriptor_base_s { + unsigned long rsvd_0_11:12; + unsigned long page_address:31; /* RW */ + unsigned long rsvd_43_48:6; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } s1; + struct uvxh_lb_bau_sb_descriptor_base_s { + unsigned long rsvd_0_11:12; + unsigned long page_address:31; /* RW */ + unsigned long rsvd_43_48:6; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } sx; + struct uv2h_lb_bau_sb_descriptor_base_s { + unsigned long rsvd_0_11:12; + unsigned long page_address:31; /* RW */ + unsigned long rsvd_43_48:6; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } s2; + struct uv3h_lb_bau_sb_descriptor_base_s { + unsigned long rsvd_0_11:12; + unsigned long page_address:31; /* RW */ + unsigned long rsvd_43_48:6; + unsigned long node_id:14; /* RW */ + unsigned long rsvd_63:1; + } s3; }; /* ========================================================================= */ /* UVH_NODE_ID */ /* ========================================================================= */ -#define UVH_NODE_ID 0x0UL +#define UVH_NODE_ID 0x0UL +#define UV1H_NODE_ID 0x0UL +#define UV2H_NODE_ID 0x0UL +#define UV3H_NODE_ID 0x0UL #define UVH_NODE_ID_FORCE1_SHFT 0 #define UVH_NODE_ID_MANUFACTURER_SHFT 1 @@ -1484,6 +3777,21 @@ union uvh_lb_bau_sb_descriptor_base_u { #define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL #define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL +#define UVXH_NODE_ID_FORCE1_SHFT 0 +#define UVXH_NODE_ID_MANUFACTURER_SHFT 1 +#define UVXH_NODE_ID_PART_NUMBER_SHFT 12 +#define UVXH_NODE_ID_REVISION_SHFT 28 +#define UVXH_NODE_ID_NODE_ID_SHFT 32 +#define UVXH_NODE_ID_NODES_PER_BIT_SHFT 50 +#define UVXH_NODE_ID_NI_PORT_SHFT 57 +#define UVXH_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UVXH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UVXH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UVXH_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UVXH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UVXH_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL +#define UVXH_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL + #define UV2H_NODE_ID_FORCE1_SHFT 0 #define UV2H_NODE_ID_MANUFACTURER_SHFT 1 #define UV2H_NODE_ID_PART_NUMBER_SHFT 12 @@ -1499,6 +3807,25 @@ union uvh_lb_bau_sb_descriptor_base_u { #define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL #define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL +#define UV3H_NODE_ID_FORCE1_SHFT 0 +#define UV3H_NODE_ID_MANUFACTURER_SHFT 1 +#define UV3H_NODE_ID_PART_NUMBER_SHFT 12 +#define UV3H_NODE_ID_REVISION_SHFT 28 +#define UV3H_NODE_ID_NODE_ID_SHFT 32 +#define UV3H_NODE_ID_ROUTER_SELECT_SHFT 48 +#define UV3H_NODE_ID_RESERVED_2_SHFT 49 +#define UV3H_NODE_ID_NODES_PER_BIT_SHFT 50 +#define UV3H_NODE_ID_NI_PORT_SHFT 57 +#define UV3H_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UV3H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UV3H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UV3H_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UV3H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UV3H_NODE_ID_ROUTER_SELECT_MASK 0x0001000000000000UL +#define UV3H_NODE_ID_RESERVED_2_MASK 0x0002000000000000UL +#define UV3H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL +#define UV3H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL + union uvh_node_id_u { unsigned long v; struct uvh_node_id_s { @@ -1521,6 +3848,17 @@ union uvh_node_id_u { unsigned long ni_port:4; /* RO */ unsigned long rsvd_60_63:4; } s1; + struct uvxh_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47_49:3; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; + } sx; struct uv2h_node_id_s { unsigned long force1:1; /* RO */ unsigned long manufacturer:11; /* RO */ @@ -1532,28 +3870,74 @@ union uvh_node_id_u { unsigned long ni_port:5; /* RO */ unsigned long rsvd_62_63:2; } s2; + struct uv3h_node_id_s { + unsigned long force1:1; /* RO */ + unsigned long manufacturer:11; /* RO */ + unsigned long part_number:16; /* RO */ + unsigned long revision:4; /* RO */ + unsigned long node_id:15; /* RW */ + unsigned long rsvd_47:1; + unsigned long router_select:1; /* RO */ + unsigned long rsvd_49:1; + unsigned long nodes_per_bit:7; /* RO */ + unsigned long ni_port:5; /* RO */ + unsigned long rsvd_62_63:2; + } s3; }; /* ========================================================================= */ /* UVH_NODE_PRESENT_TABLE */ /* ========================================================================= */ -#define UVH_NODE_PRESENT_TABLE 0x1400UL -#define UVH_NODE_PRESENT_TABLE_DEPTH 16 +#define UVH_NODE_PRESENT_TABLE 0x1400UL +#define UV1H_NODE_PRESENT_TABLE 0x1400UL +#define UV2H_NODE_PRESENT_TABLE 0x1400UL +#define UV3H_NODE_PRESENT_TABLE 0x1400UL +#define UVH_NODE_PRESENT_TABLE_DEPTH 16 +#define UV1H_NODE_PRESENT_TABLE_DEPTH 0x1400UL +#define UV2H_NODE_PRESENT_TABLE_DEPTH 0x1400UL +#define UV3H_NODE_PRESENT_TABLE_DEPTH 0x1400UL #define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 #define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL +#define UV1H_NODE_PRESENT_TABLE_NODES_SHFT 0 +#define UV1H_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + +#define UVXH_NODE_PRESENT_TABLE_NODES_SHFT 0 +#define UVXH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + +#define UV2H_NODE_PRESENT_TABLE_NODES_SHFT 0 +#define UV2H_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + +#define UV3H_NODE_PRESENT_TABLE_NODES_SHFT 0 +#define UV3H_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL + union uvh_node_present_table_u { unsigned long v; struct uvh_node_present_table_s { unsigned long nodes:64; /* RW */ } s; + struct uv1h_node_present_table_s { + unsigned long nodes:64; /* RW */ + } s1; + struct uvxh_node_present_table_s { + unsigned long nodes:64; /* RW */ + } sx; + struct uv2h_node_present_table_s { + unsigned long nodes:64; /* RW */ + } s2; + struct uv3h_node_present_table_s { + unsigned long nodes:64; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 @@ -1562,6 +3946,34 @@ union uvh_node_present_table_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_0_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_overlay_config_0_mmr_s { @@ -1572,12 +3984,47 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; + struct uv1h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s1; + struct uvxh_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + struct uv2h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; + struct uv3h_rh_gam_alias210_overlay_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 @@ -1586,6 +4033,34 @@ union uvh_rh_gam_alias210_overlay_config_0_mmr_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_1_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_overlay_config_1_mmr_s { @@ -1596,12 +4071,47 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; + struct uv1h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s1; + struct uvxh_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + struct uv2h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; + struct uv3h_rh_gam_alias210_overlay_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 @@ -1610,6 +4120,34 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_overlay_config_2_mmr_s { @@ -1620,33 +4158,115 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; + struct uv1h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s1; + struct uvxh_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } sx; + struct uv2h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s2; + struct uv3h_rh_gam_alias210_overlay_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long base:8; /* RW */ + unsigned long rsvd_32_47:16; + unsigned long m_alias:5; /* RW */ + unsigned long rsvd_53_62:10; + unsigned long enable:1; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_0_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { unsigned long rsvd_0_23:24; unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; - } s; + } s; + struct uv1h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s1; + struct uvxh_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + struct uv2h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; + struct uv3h_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_1_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { @@ -1654,16 +4274,51 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; + struct uv1h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s1; + struct uvxh_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + struct uv2h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; + struct uv3h_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + union uvh_rh_gam_alias210_redirect_config_2_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { @@ -1671,12 +4326,35 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; + struct uv1h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s1; + struct uvxh_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } sx; + struct uv2h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s2; + struct uv3h_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23:24; + unsigned long dest_base:22; /* RW */ + unsigned long rsvd_46_63:18; + } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_CONFIG_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL +#define UVH_RH_GAM_CONFIG_MMR 0x1600000UL +#define UV1H_RH_GAM_CONFIG_MMR 0x1600000UL +#define UV2H_RH_GAM_CONFIG_MMR 0x1600000UL +#define UV3H_RH_GAM_CONFIG_MMR 0x1600000UL #define UVH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 #define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 @@ -1690,11 +4368,21 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { #define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL #define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL +#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 +#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 +#define UVXH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL +#define UVXH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL + #define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 #define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 #define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL #define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL +#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 +#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 +#define UV3H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL +#define UV3H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL + union uvh_rh_gam_config_mmr_u { unsigned long v; struct uvh_rh_gam_config_mmr_s { @@ -1709,20 +4397,37 @@ union uvh_rh_gam_config_mmr_u { unsigned long mmiol_cfg:1; /* RW */ unsigned long rsvd_13_63:51; } s1; + struct uvxh_rh_gam_config_mmr_s { + unsigned long m_skt:6; /* RW */ + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } sx; struct uv2h_rh_gam_config_mmr_s { unsigned long m_skt:6; /* RW */ unsigned long n_skt:4; /* RW */ unsigned long rsvd_10_63:54; } s2; + struct uv3h_rh_gam_config_mmr_s { + unsigned long m_skt:6; /* RW */ + unsigned long n_skt:4; /* RW */ + unsigned long rsvd_10_63:54; + } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 @@ -1733,6 +4438,13 @@ union uvh_rh_gam_config_mmr_u { #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL #define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UVXH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 @@ -1740,12 +4452,23 @@ union uvh_rh_gam_config_mmr_u { #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL #define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_SHFT 62 +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_MODE_MASK 0x4000000000000000UL +#define UV3H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_gru_overlay_config_mmr_u { unsigned long v; struct uvh_rh_gam_gru_overlay_config_mmr_s { unsigned long rsvd_0_27:28; unsigned long base:18; /* RW */ - unsigned long rsvd_46_62:17; + unsigned long rsvd_46_51:6; + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ } s; struct uv1h_rh_gam_gru_overlay_config_mmr_s { @@ -1758,6 +4481,14 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ } s1; + struct uvxh_rh_gam_gru_overlay_config_mmr_s { + unsigned long rsvd_0_27:28; + unsigned long base:18; /* RW */ + unsigned long rsvd_46_51:6; + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } sx; struct uv2h_rh_gam_gru_overlay_config_mmr_s { unsigned long rsvd_0_27:28; unsigned long base:18; /* RW */ @@ -1766,12 +4497,25 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { unsigned long rsvd_56_62:7; unsigned long enable:1; /* RW */ } s2; + struct uv3h_rh_gam_gru_overlay_config_mmr_s { + unsigned long rsvd_0_27:28; + unsigned long base:18; /* RW */ + unsigned long rsvd_46_51:6; + unsigned long n_gru:4; /* RW */ + unsigned long rsvd_56_61:6; + unsigned long mode:1; /* RW */ + unsigned long enable:1; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL +#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR \ + (is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \ + UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR) #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 @@ -1814,10 +4558,15 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u { /* ========================================================================= */ /* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ /* ========================================================================= */ -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL +#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL +#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 @@ -1826,11 +4575,21 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u { #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL #define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UVXH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL #define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + union uvh_rh_gam_mmr_overlay_config_mmr_u { unsigned long v; struct uvh_rh_gam_mmr_overlay_config_mmr_s { @@ -1846,34 +4605,80 @@ union uvh_rh_gam_mmr_overlay_config_mmr_u { unsigned long rsvd_47_62:16; unsigned long enable:1; /* RW */ } s1; + struct uvxh_rh_gam_mmr_overlay_config_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long rsvd_46_62:17; + unsigned long enable:1; /* RW */ + } sx; struct uv2h_rh_gam_mmr_overlay_config_mmr_s { unsigned long rsvd_0_25:26; unsigned long base:20; /* RW */ unsigned long rsvd_46_62:17; unsigned long enable:1; /* RW */ } s2; + struct uv3h_rh_gam_mmr_overlay_config_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long rsvd_46_62:17; + unsigned long enable:1; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_RTC */ /* ========================================================================= */ -#define UVH_RTC 0x340000UL +#define UVH_RTC 0x340000UL +#define UV1H_RTC 0x340000UL +#define UV2H_RTC 0x340000UL +#define UV3H_RTC 0x340000UL #define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 #define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL +#define UV1H_RTC_REAL_TIME_CLOCK_SHFT 0 +#define UV1H_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL + +#define UVXH_RTC_REAL_TIME_CLOCK_SHFT 0 +#define UVXH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL + +#define UV2H_RTC_REAL_TIME_CLOCK_SHFT 0 +#define UV2H_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL + +#define UV3H_RTC_REAL_TIME_CLOCK_SHFT 0 +#define UV3H_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL + union uvh_rtc_u { unsigned long v; struct uvh_rtc_s { unsigned long real_time_clock:56; /* RW */ unsigned long rsvd_56_63:8; } s; + struct uv1h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s1; + struct uvxh_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } sx; + struct uv2h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s2; + struct uv3h_rtc_s { + unsigned long real_time_clock:56; /* RW */ + unsigned long rsvd_56_63:8; + } s3; }; /* ========================================================================= */ /* UVH_RTC1_INT_CONFIG */ /* ========================================================================= */ -#define UVH_RTC1_INT_CONFIG 0x615c0UL +#define UVH_RTC1_INT_CONFIG 0x615c0UL +#define UV1H_RTC1_INT_CONFIG 0x615c0UL +#define UV2H_RTC1_INT_CONFIG 0x615c0UL +#define UV3H_RTC1_INT_CONFIG 0x615c0UL #define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 #define UVH_RTC1_INT_CONFIG_DM_SHFT 8 @@ -1892,6 +4697,74 @@ union uvh_rtc_u { #define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL #define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL +#define UV1H_RTC1_INT_CONFIG_VECTOR_SHFT 0 +#define UV1H_RTC1_INT_CONFIG_DM_SHFT 8 +#define UV1H_RTC1_INT_CONFIG_DESTMODE_SHFT 11 +#define UV1H_RTC1_INT_CONFIG_STATUS_SHFT 12 +#define UV1H_RTC1_INT_CONFIG_P_SHFT 13 +#define UV1H_RTC1_INT_CONFIG_T_SHFT 15 +#define UV1H_RTC1_INT_CONFIG_M_SHFT 16 +#define UV1H_RTC1_INT_CONFIG_APIC_ID_SHFT 32 +#define UV1H_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV1H_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UV1H_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV1H_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV1H_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UV1H_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UV1H_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UV1H_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UVXH_RTC1_INT_CONFIG_VECTOR_SHFT 0 +#define UVXH_RTC1_INT_CONFIG_DM_SHFT 8 +#define UVXH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 +#define UVXH_RTC1_INT_CONFIG_STATUS_SHFT 12 +#define UVXH_RTC1_INT_CONFIG_P_SHFT 13 +#define UVXH_RTC1_INT_CONFIG_T_SHFT 15 +#define UVXH_RTC1_INT_CONFIG_M_SHFT 16 +#define UVXH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 +#define UVXH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVXH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVXH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVXH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVXH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVXH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVXH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVXH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV2H_RTC1_INT_CONFIG_VECTOR_SHFT 0 +#define UV2H_RTC1_INT_CONFIG_DM_SHFT 8 +#define UV2H_RTC1_INT_CONFIG_DESTMODE_SHFT 11 +#define UV2H_RTC1_INT_CONFIG_STATUS_SHFT 12 +#define UV2H_RTC1_INT_CONFIG_P_SHFT 13 +#define UV2H_RTC1_INT_CONFIG_T_SHFT 15 +#define UV2H_RTC1_INT_CONFIG_M_SHFT 16 +#define UV2H_RTC1_INT_CONFIG_APIC_ID_SHFT 32 +#define UV2H_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV2H_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UV2H_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV2H_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV2H_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UV2H_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UV2H_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UV2H_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +#define UV3H_RTC1_INT_CONFIG_VECTOR_SHFT 0 +#define UV3H_RTC1_INT_CONFIG_DM_SHFT 8 +#define UV3H_RTC1_INT_CONFIG_DESTMODE_SHFT 11 +#define UV3H_RTC1_INT_CONFIG_STATUS_SHFT 12 +#define UV3H_RTC1_INT_CONFIG_P_SHFT 13 +#define UV3H_RTC1_INT_CONFIG_T_SHFT 15 +#define UV3H_RTC1_INT_CONFIG_M_SHFT 16 +#define UV3H_RTC1_INT_CONFIG_APIC_ID_SHFT 32 +#define UV3H_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UV3H_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UV3H_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UV3H_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UV3H_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UV3H_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UV3H_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UV3H_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + union uvh_rtc1_int_config_u { unsigned long v; struct uvh_rtc1_int_config_s { @@ -1906,29 +4779,176 @@ union uvh_rtc1_int_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; + struct uv1h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s1; + struct uvxh_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } sx; + struct uv2h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s2; + struct uv3h_rtc1_int_config_s { + unsigned long vector_:8; /* RW */ + unsigned long dm:3; /* RW */ + unsigned long destmode:1; /* RW */ + unsigned long status:1; /* RO */ + unsigned long p:1; /* RO */ + unsigned long rsvd_14:1; + unsigned long t:1; /* RO */ + unsigned long m:1; /* RW */ + unsigned long rsvd_17_31:15; + unsigned long apic_id:32; /* RW */ + } s3; }; /* ========================================================================= */ /* UVH_SCRATCH5 */ /* ========================================================================= */ -#define UVH_SCRATCH5 0x2d0200UL -#define UVH_SCRATCH5_32 0x778 +#define UVH_SCRATCH5 0x2d0200UL +#define UV1H_SCRATCH5 0x2d0200UL +#define UV2H_SCRATCH5 0x2d0200UL +#define UV3H_SCRATCH5 0x2d0200UL +#define UVH_SCRATCH5_32 0x778 +#define UV1H_SCRATCH5_32 0x2d0200UL +#define UV2H_SCRATCH5_32 0x2d0200UL +#define UV3H_SCRATCH5_32 0x2d0200UL #define UVH_SCRATCH5_SCRATCH5_SHFT 0 #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL +#define UV1H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV1H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + +#define UVXH_SCRATCH5_SCRATCH5_SHFT 0 +#define UVXH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + +#define UV2H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV2H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + +#define UV3H_SCRATCH5_SCRATCH5_SHFT 0 +#define UV3H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + union uvh_scratch5_u { unsigned long v; struct uvh_scratch5_s { unsigned long scratch5:64; /* RW, W1CS */ } s; + struct uv1h_scratch5_s { + unsigned long scratch5:64; /* RW, W1CS */ + } s1; + struct uvxh_scratch5_s { + unsigned long scratch5:64; /* RW */ + } sx; + struct uv2h_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s2; + struct uv3h_scratch5_s { + unsigned long scratch5:64; /* RW */ + } s3; }; /* ========================================================================= */ -/* UV2H_EVENT_OCCURRED2 */ +/* UVXH_EVENT_OCCURRED2 */ /* ========================================================================= */ -#define UV2H_EVENT_OCCURRED2 0x70100UL -#define UV2H_EVENT_OCCURRED2_32 0xb68 +#define UVXH_EVENT_OCCURRED2 0x70100UL +#define UV2H_EVENT_OCCURRED2 0x70100UL +#define UV3H_EVENT_OCCURRED2 0x70100UL +#define UVXH_EVENT_OCCURRED2_32 0xb68 +#define UV2H_EVENT_OCCURRED2_32 0x70100UL +#define UV3H_EVENT_OCCURRED2_32 0x70100UL + +#define UVXH_EVENT_OCCURRED2_RTC_0_SHFT 0 +#define UVXH_EVENT_OCCURRED2_RTC_1_SHFT 1 +#define UVXH_EVENT_OCCURRED2_RTC_2_SHFT 2 +#define UVXH_EVENT_OCCURRED2_RTC_3_SHFT 3 +#define UVXH_EVENT_OCCURRED2_RTC_4_SHFT 4 +#define UVXH_EVENT_OCCURRED2_RTC_5_SHFT 5 +#define UVXH_EVENT_OCCURRED2_RTC_6_SHFT 6 +#define UVXH_EVENT_OCCURRED2_RTC_7_SHFT 7 +#define UVXH_EVENT_OCCURRED2_RTC_8_SHFT 8 +#define UVXH_EVENT_OCCURRED2_RTC_9_SHFT 9 +#define UVXH_EVENT_OCCURRED2_RTC_10_SHFT 10 +#define UVXH_EVENT_OCCURRED2_RTC_11_SHFT 11 +#define UVXH_EVENT_OCCURRED2_RTC_12_SHFT 12 +#define UVXH_EVENT_OCCURRED2_RTC_13_SHFT 13 +#define UVXH_EVENT_OCCURRED2_RTC_14_SHFT 14 +#define UVXH_EVENT_OCCURRED2_RTC_15_SHFT 15 +#define UVXH_EVENT_OCCURRED2_RTC_16_SHFT 16 +#define UVXH_EVENT_OCCURRED2_RTC_17_SHFT 17 +#define UVXH_EVENT_OCCURRED2_RTC_18_SHFT 18 +#define UVXH_EVENT_OCCURRED2_RTC_19_SHFT 19 +#define UVXH_EVENT_OCCURRED2_RTC_20_SHFT 20 +#define UVXH_EVENT_OCCURRED2_RTC_21_SHFT 21 +#define UVXH_EVENT_OCCURRED2_RTC_22_SHFT 22 +#define UVXH_EVENT_OCCURRED2_RTC_23_SHFT 23 +#define UVXH_EVENT_OCCURRED2_RTC_24_SHFT 24 +#define UVXH_EVENT_OCCURRED2_RTC_25_SHFT 25 +#define UVXH_EVENT_OCCURRED2_RTC_26_SHFT 26 +#define UVXH_EVENT_OCCURRED2_RTC_27_SHFT 27 +#define UVXH_EVENT_OCCURRED2_RTC_28_SHFT 28 +#define UVXH_EVENT_OCCURRED2_RTC_29_SHFT 29 +#define UVXH_EVENT_OCCURRED2_RTC_30_SHFT 30 +#define UVXH_EVENT_OCCURRED2_RTC_31_SHFT 31 +#define UVXH_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL +#define UVXH_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL +#define UVXH_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL +#define UVXH_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL +#define UVXH_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL +#define UVXH_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL +#define UVXH_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL +#define UVXH_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL +#define UVXH_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL +#define UVXH_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL +#define UVXH_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL +#define UVXH_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL +#define UVXH_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL +#define UVXH_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL +#define UVXH_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL +#define UVXH_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL +#define UVXH_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL +#define UVXH_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL +#define UVXH_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL +#define UVXH_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL +#define UVXH_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL +#define UVXH_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL +#define UVXH_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL +#define UVXH_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL +#define UVXH_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL +#define UVXH_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL +#define UVXH_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL +#define UVXH_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL +#define UVXH_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL +#define UVXH_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL +#define UVXH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL +#define UVXH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL #define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 #define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 @@ -1995,8 +5015,108 @@ union uvh_scratch5_u { #define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL #define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL -union uv2h_event_occurred2_u { +#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0 +#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1 +#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2 +#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3 +#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4 +#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5 +#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6 +#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7 +#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8 +#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9 +#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10 +#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11 +#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12 +#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13 +#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14 +#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15 +#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16 +#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17 +#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18 +#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19 +#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20 +#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21 +#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22 +#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23 +#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24 +#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25 +#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26 +#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27 +#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28 +#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29 +#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30 +#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31 +#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL +#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL +#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL +#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL +#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL +#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL +#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL +#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL +#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL +#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL +#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL +#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL +#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL +#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL +#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL +#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL +#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL +#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL +#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL +#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL +#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL +#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL +#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL +#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL +#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL +#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL +#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL +#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL +#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL +#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL +#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL +#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL + +union uvxh_event_occurred2_u { unsigned long v; + struct uvxh_event_occurred2_s { + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_32_63:32; + } sx; struct uv2h_event_occurred2_s { unsigned long rtc_0:1; /* RW */ unsigned long rtc_1:1; /* RW */ @@ -2031,29 +5151,85 @@ union uv2h_event_occurred2_u { unsigned long rtc_30:1; /* RW */ unsigned long rtc_31:1; /* RW */ unsigned long rsvd_32_63:32; - } s1; + } s2; + struct uv3h_event_occurred2_s { + unsigned long rtc_0:1; /* RW */ + unsigned long rtc_1:1; /* RW */ + unsigned long rtc_2:1; /* RW */ + unsigned long rtc_3:1; /* RW */ + unsigned long rtc_4:1; /* RW */ + unsigned long rtc_5:1; /* RW */ + unsigned long rtc_6:1; /* RW */ + unsigned long rtc_7:1; /* RW */ + unsigned long rtc_8:1; /* RW */ + unsigned long rtc_9:1; /* RW */ + unsigned long rtc_10:1; /* RW */ + unsigned long rtc_11:1; /* RW */ + unsigned long rtc_12:1; /* RW */ + unsigned long rtc_13:1; /* RW */ + unsigned long rtc_14:1; /* RW */ + unsigned long rtc_15:1; /* RW */ + unsigned long rtc_16:1; /* RW */ + unsigned long rtc_17:1; /* RW */ + unsigned long rtc_18:1; /* RW */ + unsigned long rtc_19:1; /* RW */ + unsigned long rtc_20:1; /* RW */ + unsigned long rtc_21:1; /* RW */ + unsigned long rtc_22:1; /* RW */ + unsigned long rtc_23:1; /* RW */ + unsigned long rtc_24:1; /* RW */ + unsigned long rtc_25:1; /* RW */ + unsigned long rtc_26:1; /* RW */ + unsigned long rtc_27:1; /* RW */ + unsigned long rtc_28:1; /* RW */ + unsigned long rtc_29:1; /* RW */ + unsigned long rtc_30:1; /* RW */ + unsigned long rtc_31:1; /* RW */ + unsigned long rsvd_32_63:32; + } s3; }; /* ========================================================================= */ -/* UV2H_EVENT_OCCURRED2_ALIAS */ +/* UVXH_EVENT_OCCURRED2_ALIAS */ /* ========================================================================= */ -#define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL -#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70 +#define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL +#define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL +#define UV3H_EVENT_OCCURRED2_ALIAS 0x70108UL +#define UVXH_EVENT_OCCURRED2_ALIAS_32 0xb70 +#define UV2H_EVENT_OCCURRED2_ALIAS_32 0x70108UL +#define UV3H_EVENT_OCCURRED2_ALIAS_32 0x70108UL + /* ========================================================================= */ -/* UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 */ +/* UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 */ /* ========================================================================= */ -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 +#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL +#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x320130UL + +#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 +#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 #define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL -union uv2h_lb_bau_sb_activation_status_2_u { +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 +#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL + +union uvxh_lb_bau_sb_activation_status_2_u { unsigned long v; + struct uvxh_lb_bau_sb_activation_status_2_s { + unsigned long aux_error:64; /* RW */ + } sx; struct uv2h_lb_bau_sb_activation_status_2_s { unsigned long aux_error:64; /* RW */ - } s1; + } s2; + struct uv3h_lb_bau_sb_activation_status_2_s { + unsigned long aux_error:64; /* RW */ + } s3; }; /* ========================================================================= */ @@ -2073,5 +5249,87 @@ union uv1h_lb_target_physical_apic_id_mask_u { } s1; }; +/* ========================================================================= */ +/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR */ +/* ========================================================================= */ +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR 0x1603000UL + +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT 26 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_SHFT 46 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_ENABLE_MASK 0x8000000000000000UL + +union uv3h_rh_gam_mmioh_overlay_config0_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_overlay_config0_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s3; +}; + +/* ========================================================================= */ +/* UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR */ +/* ========================================================================= */ +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1604000UL + +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_SHFT 26 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_SHFT 46 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_SHFT 63 +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR_ENABLE_MASK 0x8000000000000000UL + +union uv3h_rh_gam_mmioh_overlay_config1_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_overlay_config1_mmr_s { + unsigned long rsvd_0_25:26; + unsigned long base:20; /* RW */ + unsigned long m_io:6; /* RW */ + unsigned long n_io:4; + unsigned long rsvd_56_62:7; + unsigned long enable:1; /* RW */ + } s3; +}; + +/* ========================================================================= */ +/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR */ +/* ========================================================================= */ +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR 0x1603800UL +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH 128 + +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_SHFT 0 +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_NASID_MASK 0x0000000000007fffUL + +union uv3h_rh_gam_mmioh_redirect_config0_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_redirect_config0_mmr_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s3; +}; + +/* ========================================================================= */ +/* UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR */ +/* ========================================================================= */ +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR 0x1604800UL +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_DEPTH 128 + +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_SHFT 0 +#define UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR_NASID_MASK 0x0000000000007fffUL + +union uv3h_rh_gam_mmioh_redirect_config1_mmr_u { + unsigned long v; + struct uv3h_rh_gam_mmioh_redirect_config1_mmr_s { + unsigned long nasid:15; /* RW */ + unsigned long rsvd_15_63:49; + } s3; +}; + #endif /* _ASM_X86_UV_UV_MMRS_H */ -- cgit v1.1 From 526018bc5eccfe3177780f03d2aaba0efee40720 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 11 Feb 2013 13:45:10 -0600 Subject: x86, uv, uv3: Update ACPI Check to include SGI UV3 Add UV3 to exclusion list. Instead of adding every new series of SGI UV systems, just check oem_id to have a prefix of "SGI". Signed-off-by: Mike Travis Link: http://lkml.kernel.org/r/20130211194508.457937455@gulag1.americas.sgi.com Acked-by: Russ Anderson Reviewed-by: Dimitri Sivanich Cc: Jiang Liu Cc: Bjorn Helgaas Cc: Yinghai Lu Cc: Greg Kroah-Hartman Signed-off-by: H. Peter Anvin --- arch/x86/pci/mmconfig-shared.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index fb29968..082e881 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -548,8 +548,7 @@ static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, if (cfg->address < 0xFFFFFFFF) return 0; - if (!strcmp(mcfg->header.oem_id, "SGI") || - !strcmp(mcfg->header.oem_id, "SGI2")) + if (!strncmp(mcfg->header.oem_id, "SGI", 3)) return 0; if (mcfg->header.revision >= 1) { -- cgit v1.1 From 6edbd4714edd8af64ec6a1bb8d89d0cb2bbe671e Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 11 Feb 2013 13:45:11 -0600 Subject: x86, uv, uv3: Update Hub Info for SGI UV3 This patch updates the UV HUB info for UV3. The "is_uv3_hub" and "is_uvx_hub" (UV2 or UV3) functions are added as well as the addresses and sizes of the MMR regions for UV3. Signed-off-by: Mike Travis Link: http://lkml.kernel.org/r/20130211194508.610723192@gulag1.americas.sgi.com Acked-by: Russ Anderson Reviewed-by: Dimitri Sivanich Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uv/uv_hub.h | 44 ++++++++++++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 21f7385..2c32df9 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -5,7 +5,7 @@ * * SGI UV architectural definitions * - * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_HUB_H @@ -175,6 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); */ #define UV1_HUB_REVISION_BASE 1 #define UV2_HUB_REVISION_BASE 3 +#define UV3_HUB_REVISION_BASE 5 static inline int is_uv1_hub(void) { @@ -183,6 +184,23 @@ static inline int is_uv1_hub(void) static inline int is_uv2_hub(void) { + return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) && + (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE)); +} + +static inline int is_uv3_hub(void) +{ + return uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE; +} + +static inline int is_uv_hub(void) +{ + return uv_hub_info->hub_revision; +} + +/* code common to uv2 and uv3 only */ +static inline int is_uvx_hub(void) +{ return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE; } @@ -230,14 +248,23 @@ union uvh_apicid { #define UV2_LOCAL_MMR_SIZE (32UL * 1024 * 1024) #define UV2_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024) -#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE \ - : UV2_LOCAL_MMR_BASE) -#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE \ - : UV2_GLOBAL_MMR32_BASE) -#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \ - UV2_LOCAL_MMR_SIZE) +#define UV3_LOCAL_MMR_BASE 0xfa000000UL +#define UV3_GLOBAL_MMR32_BASE 0xfc000000UL +#define UV3_LOCAL_MMR_SIZE (32UL * 1024 * 1024) +#define UV3_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024) + +#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE : \ + (is_uv2_hub() ? UV2_LOCAL_MMR_BASE : \ + UV3_LOCAL_MMR_BASE)) +#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE :\ + (is_uv2_hub() ? UV2_GLOBAL_MMR32_BASE :\ + UV3_GLOBAL_MMR32_BASE)) +#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \ + (is_uv2_hub() ? UV2_LOCAL_MMR_SIZE : \ + UV3_LOCAL_MMR_SIZE)) #define UV_GLOBAL_MMR32_SIZE (is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\ - UV2_GLOBAL_MMR32_SIZE) + (is_uv2_hub() ? UV2_GLOBAL_MMR32_SIZE :\ + UV3_GLOBAL_MMR32_SIZE)) #define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) #define UV_GLOBAL_GRU_MMR_BASE 0x4000000 @@ -599,6 +626,7 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) * 1 - UV1 rev 1.0 initial silicon * 2 - UV1 rev 2.0 production silicon * 3 - UV2 rev 1.0 initial silicon + * 5 - UV3 rev 1.0 initial silicon */ static inline int uv_get_min_hub_revision_id(void) { -- cgit v1.1 From b15cc4a12bed8026bc70f469f514862b027b0c75 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 11 Feb 2013 13:45:12 -0600 Subject: x86, uv, uv3: Update x2apic Support for SGI UV3 This patch adds support for the SGI UV3 hub to the common x2apic functions. The primary changes are to account for the similarities between UV2 and UV3 which are encompassed within the "UVX" nomenclature. One significant difference within UV3 is the handling of the MMIOH regions which are redirected to the target blade (with the device) in a different manner. It also now has two MMIOH regions for both small and large BARs. This aids in limiting the amount of physical address space removed from real memory that's used for I/O in the max config of 64TB. Signed-off-by: Mike Travis Link: http://lkml.kernel.org/r/20130211194508.752924185@gulag1.americas.sgi.com Acked-by: Russ Anderson Reviewed-by: Dimitri Sivanich Cc: Alexander Gordeev Cc: Suresh Siddha Cc: Michael S. Tsirkin Cc: Steffen Persvold Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/x2apic_uv_x.c | 206 ++++++++++++++++++++++++++++++------- 1 file changed, 171 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 8cfade9..794f6eb 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -5,7 +5,7 @@ * * SGI UV APIC functions (note: not an Intel compatible APIC) * - * Copyright (C) 2007-2010 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2007-2013 Silicon Graphics, Inc. All rights reserved. */ #include #include @@ -91,10 +91,16 @@ static int __init early_get_pnodeid(void) m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); uv_min_hub_revision_id = node_id.s.revision; - if (node_id.s.part_number == UV2_HUB_PART_NUMBER) - uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; - if (node_id.s.part_number == UV2_HUB_PART_NUMBER_X) + switch (node_id.s.part_number) { + case UV2_HUB_PART_NUMBER: + case UV2_HUB_PART_NUMBER_X: uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; + break; + case UV3_HUB_PART_NUMBER: + case UV3_HUB_PART_NUMBER_X: + uv_min_hub_revision_id += UV3_HUB_REVISION_BASE - 1; + break; + } uv_hub_info->hub_revision = uv_min_hub_revision_id; pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); @@ -130,13 +136,16 @@ static void __init uv_set_apicid_hibit(void) static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - int pnodeid, is_uv1, is_uv2; + int pnodeid, is_uv1, is_uv2, is_uv3; is_uv1 = !strcmp(oem_id, "SGI"); is_uv2 = !strcmp(oem_id, "SGI2"); - if (is_uv1 || is_uv2) { + is_uv3 = !strncmp(oem_id, "SGI3", 4); /* there are varieties of UV3 */ + if (is_uv1 || is_uv2 || is_uv3) { uv_hub_info->hub_revision = - is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE; + (is_uv1 ? UV1_HUB_REVISION_BASE : + (is_uv2 ? UV2_HUB_REVISION_BASE : + UV3_HUB_REVISION_BASE)); pnodeid = early_get_pnodeid(); early_get_apic_pnode_shift(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; @@ -450,14 +459,17 @@ static __init void map_high(char *id, unsigned long base, int pshift, paddr = base << pshift; bytes = (1UL << bshift) * (max_pnode + 1); - printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, - paddr + bytes); + if (!paddr) { + pr_info("UV: Map %s_HI base address NULL\n", id); + return; + } + pr_info("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes); if (map_type == map_uc) init_extra_mapping_uc(paddr, bytes); else init_extra_mapping_wb(paddr, bytes); - } + static __init void map_gru_high(int max_pnode) { union uvh_rh_gam_gru_overlay_config_mmr_u gru; @@ -468,7 +480,8 @@ static __init void map_gru_high(int max_pnode) map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); gru_start_paddr = ((u64)gru.s.base << shift); gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); - + } else { + pr_info("UV: GRU disabled\n"); } } @@ -480,23 +493,146 @@ static __init void map_mmr_high(int max_pnode) mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); if (mmr.s.enable) map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); + else + pr_info("UV: MMR disabled\n"); +} + +/* + * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY + * and REDIRECT MMR regs are exactly the same on UV3. + */ +struct mmioh_config { + unsigned long overlay; + unsigned long redirect; + char *id; +}; + +static __initdata struct mmioh_config mmiohs[] = { + { + UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR, + UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR, + "MMIOH0" + }, + { + UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR, + UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR, + "MMIOH1" + }, +}; + +static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) +{ + union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay; + unsigned long mmr; + unsigned long base; + int i, n, shift, m_io, max_io; + int nasid, lnasid, fi, li; + char *id; + + id = mmiohs[index].id; + overlay.v = uv_read_local_mmr(mmiohs[index].overlay); + pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n", + id, overlay.v, overlay.s3.base, overlay.s3.m_io); + if (!overlay.s3.enable) { + pr_info("UV: %s disabled\n", id); + return; + } + + shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT; + base = (unsigned long)overlay.s3.base; + m_io = overlay.s3.m_io; + mmr = mmiohs[index].redirect; + n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH; + min_pnode *= 2; /* convert to NASID */ + max_pnode *= 2; + max_io = lnasid = fi = li = -1; + + for (i = 0; i < n; i++) { + union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect; + + redirect.v = uv_read_local_mmr(mmr + i * 8); + nasid = redirect.s3.nasid; + if (nasid < min_pnode || max_pnode < nasid) + nasid = -1; /* invalid NASID */ + + if (nasid == lnasid) { + li = i; + if (i != n-1) /* last entry check */ + continue; + } + + /* check if we have a cached (or last) redirect to print */ + if (lnasid != -1 || (i == n-1 && nasid != -1)) { + unsigned long addr1, addr2; + int f, l; + + if (lnasid == -1) { + f = l = i; + lnasid = nasid; + } else { + f = fi; + l = li; + } + addr1 = (base << shift) + + f * (unsigned long)(1 << m_io); + addr2 = (base << shift) + + (l + 1) * (unsigned long)(1 << m_io); + pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n", + id, fi, li, lnasid, addr1, addr2); + if (max_io < l) + max_io = l; + } + fi = li = i; + lnasid = nasid; + } + + pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n", + id, base, shift, m_io, max_io); + + if (max_io >= 0) + map_high(id, base, shift, m_io, max_io, map_uc); } -static __init void map_mmioh_high(int max_pnode) +static __init void map_mmioh_high(int min_pnode, int max_pnode) { union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; - int shift; + unsigned long mmr, base; + int shift, enable, m_io, n_io; - mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); - if (is_uv1_hub() && mmioh.s1.enable) { - shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; - map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io, - max_pnode, map_uc); + if (is_uv3_hub()) { + /* Map both MMIOH Regions */ + map_mmioh_high_uv3(0, min_pnode, max_pnode); + map_mmioh_high_uv3(1, min_pnode, max_pnode); + return; } - if (is_uv2_hub() && mmioh.s2.enable) { + + if (is_uv1_hub()) { + mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR; + shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + mmioh.v = uv_read_local_mmr(mmr); + enable = !!mmioh.s1.enable; + base = mmioh.s1.base; + m_io = mmioh.s1.m_io; + n_io = mmioh.s1.n_io; + } else if (is_uv2_hub()) { + mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR; shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; - map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io, - max_pnode, map_uc); + mmioh.v = uv_read_local_mmr(mmr); + enable = !!mmioh.s2.enable; + base = mmioh.s2.base; + m_io = mmioh.s2.m_io; + n_io = mmioh.s2.n_io; + } else + return; + + if (enable) { + max_pnode &= (1 << n_io) - 1; + pr_info( + "UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n", + base, shift, m_io, n_io, max_pnode); + map_high("MMIOH", base, shift, m_io, max_pnode, map_uc); + } else { + pr_info("UV: MMIOH disabled\n"); } } @@ -724,42 +860,41 @@ void uv_nmi_init(void) void __init uv_system_init(void) { union uvh_rh_gam_config_mmr_u m_n_config; - union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; union uvh_node_id_u node_id; unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; - int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io; - int gnode_extra, max_pnode = 0; + int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; + int gnode_extra, min_pnode = 999999, max_pnode = -1; unsigned long mmr_base, present, paddr; - unsigned short pnode_mask, pnode_io_mask; + unsigned short pnode_mask; + char *hub = (is_uv1_hub() ? "UV1" : + (is_uv2_hub() ? "UV2" : + "UV3")); - printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2"); + pr_info("UV: Found %s hub\n", hub); map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; - mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); - n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io; + pnode_mask = (1 << n_val) - 1; mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; - pnode_mask = (1 << n_val) - 1; - pnode_io_mask = (1 << n_io) - 1; node_id.v = uv_read_local_mmr(UVH_NODE_ID); gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; gnode_upper = ((unsigned long)gnode_extra << m_val); - printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n", - n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask); + pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x\n", + n_val, m_val, pnode_mask, gnode_upper, gnode_extra); - printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); + pr_info("UV: global MMR base 0x%lx\n", mmr_base); for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) uv_possible_blades += hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8)); /* uv_num_possible_blades() is really the hub count */ - printk(KERN_INFO "UV: Found %d blades, %d hubs\n", + pr_info("UV: Found %d blades, %d hubs\n", is_uv1_hub() ? uv_num_possible_blades() : (uv_num_possible_blades() + 1) / 2, uv_num_possible_blades()); @@ -794,6 +929,7 @@ void __init uv_system_init(void) uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; spin_lock_init(&uv_blade_info[blade].nmi_lock); + min_pnode = min(pnode, min_pnode); max_pnode = max(pnode, max_pnode); blade++; } @@ -856,7 +992,7 @@ void __init uv_system_init(void) map_gru_high(max_pnode); map_mmr_high(max_pnode); - map_mmioh_high(max_pnode & pnode_io_mask); + map_mmioh_high(min_pnode, max_pnode); uv_cpu_init(); uv_scir_register_cpu_notifier(); -- cgit v1.1 From 0af6352045a3bf359a2d5f55965266d67d262b47 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 11 Feb 2013 13:45:13 -0600 Subject: x86, uv, uv3: Update Time Support for SGI UV3 This patch updates time support for the SGI UV3 hub. Since the UV2 and UV3 time support is identical, "is_uvx_hub" is used instead of having both "is_uv2_hub" and "is_uv3_hub". Signed-off-by: Mike Travis Link: http://lkml.kernel.org/r/20130211194508.893907185@gulag1.americas.sgi.com Acked-by: Russ Anderson Reviewed-by: Dimitri Sivanich Signed-off-by: H. Peter Anvin --- arch/x86/platform/uv/uv_time.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 5032e0d..98718f6 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -15,7 +15,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2009-2013 Silicon Graphics, Inc. All Rights Reserved. * Copyright (c) Dimitri Sivanich */ #include @@ -102,9 +102,10 @@ static int uv_intr_pending(int pnode) if (is_uv1_hub()) return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & UV1H_EVENT_OCCURRED0_RTC1_MASK; - else - return uv_read_global_mmr64(pnode, UV2H_EVENT_OCCURRED2) & - UV2H_EVENT_OCCURRED2_RTC_1_MASK; + else if (is_uvx_hub()) + return uv_read_global_mmr64(pnode, UVXH_EVENT_OCCURRED2) & + UVXH_EVENT_OCCURRED2_RTC_1_MASK; + return 0; } /* Setup interrupt and return non-zero if early expiration occurred. */ @@ -122,8 +123,8 @@ static int uv_setup_intr(int cpu, u64 expires) uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, UV1H_EVENT_OCCURRED0_RTC1_MASK); else - uv_write_global_mmr64(pnode, UV2H_EVENT_OCCURRED2_ALIAS, - UV2H_EVENT_OCCURRED2_RTC_1_MASK); + uv_write_global_mmr64(pnode, UVXH_EVENT_OCCURRED2_ALIAS, + UVXH_EVENT_OCCURRED2_RTC_1_MASK); val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); -- cgit v1.1 From d924f947a44684796eee6fa488a9fe7876923c3b Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Mon, 11 Feb 2013 13:45:15 -0600 Subject: x86, uv, uv3: Trim MMR register definitions after code changes for SGI UV3 This patch trims the MMR register definitions after the updates for the SGI UV3 system have been applied. Note that because these definitions are automatically generated from the RTL we cannot control the length of the names. Therefore there are lines that exceed 80 characters. Signed-off-by: Mike Travis Link: http://lkml.kernel.org/r/20130211194509.173026880@gulag1.americas.sgi.com Acked-by: Russ Anderson Reviewed-by: Dimitri Sivanich Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uv/uv_mmrs.h | 2532 +------------------------------------ 1 file changed, 27 insertions(+), 2505 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index e1fa870..bd5f80e 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -73,7 +73,7 @@ * } sn; * }; * - * (GEN Flags: mflags_opt=c undefs=0 UV23=UVXH) + * (GEN Flags: mflags_opt= undefs=0 UV23=UVXH) */ #define UV_MMR_ENABLE (1UL << 63) @@ -92,64 +92,24 @@ /* UVH_BAU_DATA_BROADCAST */ /* ========================================================================= */ #define UVH_BAU_DATA_BROADCAST 0x61688UL -#define UV1H_BAU_DATA_BROADCAST 0x61688UL -#define UV2H_BAU_DATA_BROADCAST 0x61688UL -#define UV3H_BAU_DATA_BROADCAST 0x61688UL #define UVH_BAU_DATA_BROADCAST_32 0x440 -#define UV1H_BAU_DATA_BROADCAST_32 0x61688UL -#define UV2H_BAU_DATA_BROADCAST_32 0x61688UL -#define UV3H_BAU_DATA_BROADCAST_32 0x61688UL #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL -#define UV1H_BAU_DATA_BROADCAST_ENABLE_SHFT 0 -#define UV1H_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL - -#define UVXH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 -#define UVXH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL - -#define UV2H_BAU_DATA_BROADCAST_ENABLE_SHFT 0 -#define UV2H_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL - -#define UV3H_BAU_DATA_BROADCAST_ENABLE_SHFT 0 -#define UV3H_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL - union uvh_bau_data_broadcast_u { unsigned long v; struct uvh_bau_data_broadcast_s { unsigned long enable:1; /* RW */ unsigned long rsvd_1_63:63; } s; - struct uv1h_bau_data_broadcast_s { - unsigned long enable:1; /* RW */ - unsigned long rsvd_1_63:63; - } s1; - struct uvxh_bau_data_broadcast_s { - unsigned long enable:1; /* RW */ - unsigned long rsvd_1_63:63; - } sx; - struct uv2h_bau_data_broadcast_s { - unsigned long enable:1; /* RW */ - unsigned long rsvd_1_63:63; - } s2; - struct uv3h_bau_data_broadcast_s { - unsigned long enable:1; /* RW */ - unsigned long rsvd_1_63:63; - } s3; }; /* ========================================================================= */ /* UVH_BAU_DATA_CONFIG */ /* ========================================================================= */ #define UVH_BAU_DATA_CONFIG 0x61680UL -#define UV1H_BAU_DATA_CONFIG 0x61680UL -#define UV2H_BAU_DATA_CONFIG 0x61680UL -#define UV3H_BAU_DATA_CONFIG 0x61680UL #define UVH_BAU_DATA_CONFIG_32 0x438 -#define UV1H_BAU_DATA_CONFIG_32 0x61680UL -#define UV2H_BAU_DATA_CONFIG_32 0x61680UL -#define UV3H_BAU_DATA_CONFIG_32 0x61680UL #define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 #define UVH_BAU_DATA_CONFIG_DM_SHFT 8 @@ -168,74 +128,6 @@ union uvh_bau_data_broadcast_u { #define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL #define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL -#define UV1H_BAU_DATA_CONFIG_VECTOR_SHFT 0 -#define UV1H_BAU_DATA_CONFIG_DM_SHFT 8 -#define UV1H_BAU_DATA_CONFIG_DESTMODE_SHFT 11 -#define UV1H_BAU_DATA_CONFIG_STATUS_SHFT 12 -#define UV1H_BAU_DATA_CONFIG_P_SHFT 13 -#define UV1H_BAU_DATA_CONFIG_T_SHFT 15 -#define UV1H_BAU_DATA_CONFIG_M_SHFT 16 -#define UV1H_BAU_DATA_CONFIG_APIC_ID_SHFT 32 -#define UV1H_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV1H_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL -#define UV1H_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV1H_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV1H_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL -#define UV1H_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL -#define UV1H_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL -#define UV1H_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UVXH_BAU_DATA_CONFIG_VECTOR_SHFT 0 -#define UVXH_BAU_DATA_CONFIG_DM_SHFT 8 -#define UVXH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 -#define UVXH_BAU_DATA_CONFIG_STATUS_SHFT 12 -#define UVXH_BAU_DATA_CONFIG_P_SHFT 13 -#define UVXH_BAU_DATA_CONFIG_T_SHFT 15 -#define UVXH_BAU_DATA_CONFIG_M_SHFT 16 -#define UVXH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 -#define UVXH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVXH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL -#define UVXH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVXH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVXH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL -#define UVXH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL -#define UVXH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL -#define UVXH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV2H_BAU_DATA_CONFIG_VECTOR_SHFT 0 -#define UV2H_BAU_DATA_CONFIG_DM_SHFT 8 -#define UV2H_BAU_DATA_CONFIG_DESTMODE_SHFT 11 -#define UV2H_BAU_DATA_CONFIG_STATUS_SHFT 12 -#define UV2H_BAU_DATA_CONFIG_P_SHFT 13 -#define UV2H_BAU_DATA_CONFIG_T_SHFT 15 -#define UV2H_BAU_DATA_CONFIG_M_SHFT 16 -#define UV2H_BAU_DATA_CONFIG_APIC_ID_SHFT 32 -#define UV2H_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV2H_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL -#define UV2H_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV2H_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV2H_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL -#define UV2H_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL -#define UV2H_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL -#define UV2H_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV3H_BAU_DATA_CONFIG_VECTOR_SHFT 0 -#define UV3H_BAU_DATA_CONFIG_DM_SHFT 8 -#define UV3H_BAU_DATA_CONFIG_DESTMODE_SHFT 11 -#define UV3H_BAU_DATA_CONFIG_STATUS_SHFT 12 -#define UV3H_BAU_DATA_CONFIG_P_SHFT 13 -#define UV3H_BAU_DATA_CONFIG_T_SHFT 15 -#define UV3H_BAU_DATA_CONFIG_M_SHFT 16 -#define UV3H_BAU_DATA_CONFIG_APIC_ID_SHFT 32 -#define UV3H_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV3H_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL -#define UV3H_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV3H_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV3H_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL -#define UV3H_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL -#define UV3H_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL -#define UV3H_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - union uvh_bau_data_config_u { unsigned long v; struct uvh_bau_data_config_s { @@ -250,74 +142,19 @@ union uvh_bau_data_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; - struct uv1h_bau_data_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s1; - struct uvxh_bau_data_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } sx; - struct uv2h_bau_data_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s2; - struct uv3h_bau_data_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s3; }; /* ========================================================================= */ /* UVH_EVENT_OCCURRED0 */ /* ========================================================================= */ #define UVH_EVENT_OCCURRED0 0x70000UL -#define UV1H_EVENT_OCCURRED0 0x70000UL -#define UV2H_EVENT_OCCURRED0 0x70000UL -#define UV3H_EVENT_OCCURRED0 0x70000UL #define UVH_EVENT_OCCURRED0_32 0x5e8 -#define UV1H_EVENT_OCCURRED0_32 0x70000UL -#define UV2H_EVENT_OCCURRED0_32 0x70000UL -#define UV3H_EVENT_OCCURRED0_32 0x70000UL #define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 #define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 #define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL #define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL -#define UV1H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 #define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 #define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 #define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3 @@ -328,7 +165,6 @@ union uvh_bau_data_config_u { #define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 #define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 #define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 -#define UV1H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 #define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 #define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 #define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 @@ -374,7 +210,6 @@ union uvh_bau_data_config_u { #define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54 #define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55 #define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 -#define UV1H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL #define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL #define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL #define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL @@ -385,7 +220,6 @@ union uvh_bau_data_config_u { #define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL #define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL #define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL -#define UV1H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL #define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL #define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL #define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL @@ -432,7 +266,6 @@ union uvh_bau_data_config_u { #define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL #define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL -#define UVXH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 #define UVXH_EVENT_OCCURRED0_QP_HCERR_SHFT 1 #define UVXH_EVENT_OCCURRED0_RH_HCERR_SHFT 2 #define UVXH_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 @@ -443,7 +276,6 @@ union uvh_bau_data_config_u { #define UVXH_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 #define UVXH_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 #define UVXH_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 -#define UVXH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 #define UVXH_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 #define UVXH_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 #define UVXH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 @@ -491,7 +323,6 @@ union uvh_bau_data_config_u { #define UVXH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 #define UVXH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 #define UVXH_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 -#define UVXH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL #define UVXH_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL #define UVXH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL #define UVXH_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL @@ -502,7 +333,6 @@ union uvh_bau_data_config_u { #define UVXH_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL #define UVXH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL #define UVXH_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL -#define UVXH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL #define UVXH_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL #define UVXH_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL #define UVXH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL @@ -551,244 +381,6 @@ union uvh_bau_data_config_u { #define UVXH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL #define UVXH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL -#define UV2H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 -#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 -#define UV2H_EVENT_OCCURRED0_RH_HCERR_SHFT 2 -#define UV2H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 -#define UV2H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 -#define UV2H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 -#define UV2H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 -#define UV2H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 -#define UV2H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 -#define UV2H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 -#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 -#define UV2H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 -#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 -#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 -#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 -#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 -#define UV2H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 -#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 -#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 -#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 -#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 -#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 -#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 -#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 -#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 -#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 -#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 -#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 -#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 -#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 -#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 -#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 -#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 -#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 -#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 -#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 -#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 -#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 -#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 -#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 -#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 -#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 -#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 -#define UV2H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL -#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL -#define UV2H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL -#define UV2H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL -#define UV2H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL -#define UV2H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL -#define UV2H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL -#define UV2H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL -#define UV2H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL -#define UV2H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL -#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL -#define UV2H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL -#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL -#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL -#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL -#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL -#define UV2H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL -#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL -#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL -#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL -#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL -#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL -#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL -#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL -#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL -#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL -#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL -#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL -#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL -#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL -#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL -#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL -#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL -#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL -#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL -#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL -#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL -#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL -#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL -#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL -#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL -#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL -#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL -#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL - -#define UV3H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 -#define UV3H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 -#define UV3H_EVENT_OCCURRED0_RH_HCERR_SHFT 2 -#define UV3H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 -#define UV3H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 -#define UV3H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 -#define UV3H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 -#define UV3H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 -#define UV3H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 -#define UV3H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 -#define UV3H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 -#define UV3H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 -#define UV3H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 -#define UV3H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 -#define UV3H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 -#define UV3H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 -#define UV3H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 -#define UV3H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 -#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 -#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 -#define UV3H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 -#define UV3H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 -#define UV3H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 -#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 -#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 -#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 -#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 -#define UV3H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 -#define UV3H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 -#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 -#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 -#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 -#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 -#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 -#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 -#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 -#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 -#define UV3H_EVENT_OCCURRED0_IPI_INT_SHFT 53 -#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 -#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 -#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 -#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 -#define UV3H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 -#define UV3H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL -#define UV3H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL -#define UV3H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL -#define UV3H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL -#define UV3H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL -#define UV3H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL -#define UV3H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL -#define UV3H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL -#define UV3H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL -#define UV3H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL -#define UV3H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL -#define UV3H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL -#define UV3H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL -#define UV3H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL -#define UV3H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL -#define UV3H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL -#define UV3H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL -#define UV3H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL -#define UV3H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL -#define UV3H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL -#define UV3H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL -#define UV3H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL -#define UV3H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL -#define UV3H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL -#define UV3H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL -#define UV3H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL -#define UV3H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL -#define UV3H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL -#define UV3H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL -#define UV3H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL -#define UV3H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL -#define UV3H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL -#define UV3H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL -#define UV3H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL -#define UV3H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL -#define UV3H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL -#define UV3H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL -#define UV3H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL -#define UV3H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL -#define UV3H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL -#define UV3H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL -#define UV3H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL -#define UV3H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL -#define UV3H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL - union uvh_event_occurred0_u { unsigned long v; struct uvh_event_occurred0_s { @@ -797,66 +389,6 @@ union uvh_event_occurred0_u { unsigned long rh_aoerr0:1; /* RW, W1C */ unsigned long rsvd_12_63:52; } s; - struct uv1h_event_occurred0_s { - unsigned long lb_hcerr:1; /* RW, W1C */ - unsigned long gr0_hcerr:1; /* RW, W1C */ - unsigned long gr1_hcerr:1; /* RW, W1C */ - unsigned long lh_hcerr:1; /* RW, W1C */ - unsigned long rh_hcerr:1; /* RW, W1C */ - unsigned long xn_hcerr:1; /* RW, W1C */ - unsigned long si_hcerr:1; /* RW, W1C */ - unsigned long lb_aoerr0:1; /* RW, W1C */ - unsigned long gr0_aoerr0:1; /* RW, W1C */ - unsigned long gr1_aoerr0:1; /* RW, W1C */ - unsigned long lh_aoerr0:1; /* RW, W1C */ - unsigned long rh_aoerr0:1; /* RW, W1C */ - unsigned long xn_aoerr0:1; /* RW, W1C */ - unsigned long si_aoerr0:1; /* RW, W1C */ - unsigned long lb_aoerr1:1; /* RW, W1C */ - unsigned long gr0_aoerr1:1; /* RW, W1C */ - unsigned long gr1_aoerr1:1; /* RW, W1C */ - unsigned long lh_aoerr1:1; /* RW, W1C */ - unsigned long rh_aoerr1:1; /* RW, W1C */ - unsigned long xn_aoerr1:1; /* RW, W1C */ - unsigned long si_aoerr1:1; /* RW, W1C */ - unsigned long rh_vpi_int:1; /* RW, W1C */ - unsigned long system_shutdown_int:1; /* RW, W1C */ - unsigned long lb_irq_int_0:1; /* RW, W1C */ - unsigned long lb_irq_int_1:1; /* RW, W1C */ - unsigned long lb_irq_int_2:1; /* RW, W1C */ - unsigned long lb_irq_int_3:1; /* RW, W1C */ - unsigned long lb_irq_int_4:1; /* RW, W1C */ - unsigned long lb_irq_int_5:1; /* RW, W1C */ - unsigned long lb_irq_int_6:1; /* RW, W1C */ - unsigned long lb_irq_int_7:1; /* RW, W1C */ - unsigned long lb_irq_int_8:1; /* RW, W1C */ - unsigned long lb_irq_int_9:1; /* RW, W1C */ - unsigned long lb_irq_int_10:1; /* RW, W1C */ - unsigned long lb_irq_int_11:1; /* RW, W1C */ - unsigned long lb_irq_int_12:1; /* RW, W1C */ - unsigned long lb_irq_int_13:1; /* RW, W1C */ - unsigned long lb_irq_int_14:1; /* RW, W1C */ - unsigned long lb_irq_int_15:1; /* RW, W1C */ - unsigned long l1_nmi_int:1; /* RW, W1C */ - unsigned long stop_clock:1; /* RW, W1C */ - unsigned long asic_to_l1:1; /* RW, W1C */ - unsigned long l1_to_asic:1; /* RW, W1C */ - unsigned long ltc_int:1; /* RW, W1C */ - unsigned long la_seq_trigger:1; /* RW, W1C */ - unsigned long ipi_int:1; /* RW, W1C */ - unsigned long extio_int0:1; /* RW, W1C */ - unsigned long extio_int1:1; /* RW, W1C */ - unsigned long extio_int2:1; /* RW, W1C */ - unsigned long extio_int3:1; /* RW, W1C */ - unsigned long profile_int:1; /* RW, W1C */ - unsigned long rtc0:1; /* RW, W1C */ - unsigned long rtc1:1; /* RW, W1C */ - unsigned long rtc2:1; /* RW, W1C */ - unsigned long rtc3:1; /* RW, W1C */ - unsigned long bau_data:1; /* RW, W1C */ - unsigned long power_management_req:1; /* RW, W1C */ - unsigned long rsvd_57_63:7; - } s1; struct uvxh_event_occurred0_s { unsigned long lb_hcerr:1; /* RW */ unsigned long qp_hcerr:1; /* RW */ @@ -919,152 +451,19 @@ union uvh_event_occurred0_u { unsigned long profile_int:1; /* RW */ unsigned long rsvd_59_63:5; } sx; - struct uv2h_event_occurred0_s { - unsigned long lb_hcerr:1; /* RW */ - unsigned long qp_hcerr:1; /* RW */ - unsigned long rh_hcerr:1; /* RW */ - unsigned long lh0_hcerr:1; /* RW */ - unsigned long lh1_hcerr:1; /* RW */ - unsigned long gr0_hcerr:1; /* RW */ - unsigned long gr1_hcerr:1; /* RW */ - unsigned long ni0_hcerr:1; /* RW */ - unsigned long ni1_hcerr:1; /* RW */ - unsigned long lb_aoerr0:1; /* RW */ - unsigned long qp_aoerr0:1; /* RW */ - unsigned long rh_aoerr0:1; /* RW */ - unsigned long lh0_aoerr0:1; /* RW */ - unsigned long lh1_aoerr0:1; /* RW */ - unsigned long gr0_aoerr0:1; /* RW */ - unsigned long gr1_aoerr0:1; /* RW */ - unsigned long xb_aoerr0:1; /* RW */ - unsigned long rt_aoerr0:1; /* RW */ - unsigned long ni0_aoerr0:1; /* RW */ - unsigned long ni1_aoerr0:1; /* RW */ - unsigned long lb_aoerr1:1; /* RW */ - unsigned long qp_aoerr1:1; /* RW */ - unsigned long rh_aoerr1:1; /* RW */ - unsigned long lh0_aoerr1:1; /* RW */ - unsigned long lh1_aoerr1:1; /* RW */ - unsigned long gr0_aoerr1:1; /* RW */ - unsigned long gr1_aoerr1:1; /* RW */ - unsigned long xb_aoerr1:1; /* RW */ - unsigned long rt_aoerr1:1; /* RW */ - unsigned long ni0_aoerr1:1; /* RW */ - unsigned long ni1_aoerr1:1; /* RW */ - unsigned long system_shutdown_int:1; /* RW */ - unsigned long lb_irq_int_0:1; /* RW */ - unsigned long lb_irq_int_1:1; /* RW */ - unsigned long lb_irq_int_2:1; /* RW */ - unsigned long lb_irq_int_3:1; /* RW */ - unsigned long lb_irq_int_4:1; /* RW */ - unsigned long lb_irq_int_5:1; /* RW */ - unsigned long lb_irq_int_6:1; /* RW */ - unsigned long lb_irq_int_7:1; /* RW */ - unsigned long lb_irq_int_8:1; /* RW */ - unsigned long lb_irq_int_9:1; /* RW */ - unsigned long lb_irq_int_10:1; /* RW */ - unsigned long lb_irq_int_11:1; /* RW */ - unsigned long lb_irq_int_12:1; /* RW */ - unsigned long lb_irq_int_13:1; /* RW */ - unsigned long lb_irq_int_14:1; /* RW */ - unsigned long lb_irq_int_15:1; /* RW */ - unsigned long l1_nmi_int:1; /* RW */ - unsigned long stop_clock:1; /* RW */ - unsigned long asic_to_l1:1; /* RW */ - unsigned long l1_to_asic:1; /* RW */ - unsigned long la_seq_trigger:1; /* RW */ - unsigned long ipi_int:1; /* RW */ - unsigned long extio_int0:1; /* RW */ - unsigned long extio_int1:1; /* RW */ - unsigned long extio_int2:1; /* RW */ - unsigned long extio_int3:1; /* RW */ - unsigned long profile_int:1; /* RW */ - unsigned long rsvd_59_63:5; - } s2; - struct uv3h_event_occurred0_s { - unsigned long lb_hcerr:1; /* RW */ - unsigned long qp_hcerr:1; /* RW */ - unsigned long rh_hcerr:1; /* RW */ - unsigned long lh0_hcerr:1; /* RW */ - unsigned long lh1_hcerr:1; /* RW */ - unsigned long gr0_hcerr:1; /* RW */ - unsigned long gr1_hcerr:1; /* RW */ - unsigned long ni0_hcerr:1; /* RW */ - unsigned long ni1_hcerr:1; /* RW */ - unsigned long lb_aoerr0:1; /* RW */ - unsigned long qp_aoerr0:1; /* RW */ - unsigned long rh_aoerr0:1; /* RW */ - unsigned long lh0_aoerr0:1; /* RW */ - unsigned long lh1_aoerr0:1; /* RW */ - unsigned long gr0_aoerr0:1; /* RW */ - unsigned long gr1_aoerr0:1; /* RW */ - unsigned long xb_aoerr0:1; /* RW */ - unsigned long rt_aoerr0:1; /* RW */ - unsigned long ni0_aoerr0:1; /* RW */ - unsigned long ni1_aoerr0:1; /* RW */ - unsigned long lb_aoerr1:1; /* RW */ - unsigned long qp_aoerr1:1; /* RW */ - unsigned long rh_aoerr1:1; /* RW */ - unsigned long lh0_aoerr1:1; /* RW */ - unsigned long lh1_aoerr1:1; /* RW */ - unsigned long gr0_aoerr1:1; /* RW */ - unsigned long gr1_aoerr1:1; /* RW */ - unsigned long xb_aoerr1:1; /* RW */ - unsigned long rt_aoerr1:1; /* RW */ - unsigned long ni0_aoerr1:1; /* RW */ - unsigned long ni1_aoerr1:1; /* RW */ - unsigned long system_shutdown_int:1; /* RW */ - unsigned long lb_irq_int_0:1; /* RW */ - unsigned long lb_irq_int_1:1; /* RW */ - unsigned long lb_irq_int_2:1; /* RW */ - unsigned long lb_irq_int_3:1; /* RW */ - unsigned long lb_irq_int_4:1; /* RW */ - unsigned long lb_irq_int_5:1; /* RW */ - unsigned long lb_irq_int_6:1; /* RW */ - unsigned long lb_irq_int_7:1; /* RW */ - unsigned long lb_irq_int_8:1; /* RW */ - unsigned long lb_irq_int_9:1; /* RW */ - unsigned long lb_irq_int_10:1; /* RW */ - unsigned long lb_irq_int_11:1; /* RW */ - unsigned long lb_irq_int_12:1; /* RW */ - unsigned long lb_irq_int_13:1; /* RW */ - unsigned long lb_irq_int_14:1; /* RW */ - unsigned long lb_irq_int_15:1; /* RW */ - unsigned long l1_nmi_int:1; /* RW */ - unsigned long stop_clock:1; /* RW */ - unsigned long asic_to_l1:1; /* RW */ - unsigned long l1_to_asic:1; /* RW */ - unsigned long la_seq_trigger:1; /* RW */ - unsigned long ipi_int:1; /* RW */ - unsigned long extio_int0:1; /* RW */ - unsigned long extio_int1:1; /* RW */ - unsigned long extio_int2:1; /* RW */ - unsigned long extio_int3:1; /* RW */ - unsigned long profile_int:1; /* RW */ - unsigned long rsvd_59_63:5; - } s3; }; /* ========================================================================= */ /* UVH_EVENT_OCCURRED0_ALIAS */ /* ========================================================================= */ #define UVH_EVENT_OCCURRED0_ALIAS 0x70008UL -#define UV1H_EVENT_OCCURRED0_ALIAS 0x70008UL -#define UV2H_EVENT_OCCURRED0_ALIAS 0x70008UL -#define UV3H_EVENT_OCCURRED0_ALIAS 0x70008UL #define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 -#define UV1H_EVENT_OCCURRED0_ALIAS_32 0x70008UL -#define UV2H_EVENT_OCCURRED0_ALIAS_32 0x70008UL -#define UV3H_EVENT_OCCURRED0_ALIAS_32 0x70008UL /* ========================================================================= */ /* UVH_GR0_TLB_INT0_CONFIG */ /* ========================================================================= */ #define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL -#define UV1H_GR0_TLB_INT0_CONFIG 0x61b00UL -#define UV2H_GR0_TLB_INT0_CONFIG 0x61b00UL -#define UV3H_GR0_TLB_INT0_CONFIG 0x61b00UL #define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 #define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 @@ -1083,74 +482,6 @@ union uvh_event_occurred0_u { #define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL -#define UV1H_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UV1H_GR0_TLB_INT0_CONFIG_DM_SHFT 8 -#define UV1H_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UV1H_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UV1H_GR0_TLB_INT0_CONFIG_P_SHFT 13 -#define UV1H_GR0_TLB_INT0_CONFIG_T_SHFT 15 -#define UV1H_GR0_TLB_INT0_CONFIG_M_SHFT 16 -#define UV1H_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UV1H_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV1H_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UV1H_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV1H_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV1H_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UV1H_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UV1H_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UV1H_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UVXH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 -#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UVXH_GR0_TLB_INT0_CONFIG_P_SHFT 13 -#define UVXH_GR0_TLB_INT0_CONFIG_T_SHFT 15 -#define UVXH_GR0_TLB_INT0_CONFIG_M_SHFT 16 -#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UVXH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVXH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UVXH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVXH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVXH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UVXH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UVXH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UVXH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV2H_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UV2H_GR0_TLB_INT0_CONFIG_DM_SHFT 8 -#define UV2H_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UV2H_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UV2H_GR0_TLB_INT0_CONFIG_P_SHFT 13 -#define UV2H_GR0_TLB_INT0_CONFIG_T_SHFT 15 -#define UV2H_GR0_TLB_INT0_CONFIG_M_SHFT 16 -#define UV2H_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UV2H_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV2H_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UV2H_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV2H_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV2H_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UV2H_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UV2H_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UV2H_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV3H_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UV3H_GR0_TLB_INT0_CONFIG_DM_SHFT 8 -#define UV3H_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UV3H_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UV3H_GR0_TLB_INT0_CONFIG_P_SHFT 13 -#define UV3H_GR0_TLB_INT0_CONFIG_T_SHFT 15 -#define UV3H_GR0_TLB_INT0_CONFIG_M_SHFT 16 -#define UV3H_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UV3H_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV3H_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UV3H_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV3H_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV3H_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UV3H_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UV3H_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UV3H_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - union uvh_gr0_tlb_int0_config_u { unsigned long v; struct uvh_gr0_tlb_int0_config_s { @@ -1165,63 +496,12 @@ union uvh_gr0_tlb_int0_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; - struct uv1h_gr0_tlb_int0_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s1; - struct uvxh_gr0_tlb_int0_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } sx; - struct uv2h_gr0_tlb_int0_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s2; - struct uv3h_gr0_tlb_int0_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s3; }; /* ========================================================================= */ /* UVH_GR0_TLB_INT1_CONFIG */ /* ========================================================================= */ #define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL -#define UV1H_GR0_TLB_INT1_CONFIG 0x61b40UL -#define UV2H_GR0_TLB_INT1_CONFIG 0x61b40UL -#define UV3H_GR0_TLB_INT1_CONFIG 0x61b40UL #define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 #define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 @@ -1240,74 +520,6 @@ union uvh_gr0_tlb_int0_config_u { #define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL -#define UV1H_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UV1H_GR0_TLB_INT1_CONFIG_DM_SHFT 8 -#define UV1H_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UV1H_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UV1H_GR0_TLB_INT1_CONFIG_P_SHFT 13 -#define UV1H_GR0_TLB_INT1_CONFIG_T_SHFT 15 -#define UV1H_GR0_TLB_INT1_CONFIG_M_SHFT 16 -#define UV1H_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UV1H_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV1H_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UV1H_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV1H_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV1H_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UV1H_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UV1H_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UV1H_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UVXH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 -#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UVXH_GR0_TLB_INT1_CONFIG_P_SHFT 13 -#define UVXH_GR0_TLB_INT1_CONFIG_T_SHFT 15 -#define UVXH_GR0_TLB_INT1_CONFIG_M_SHFT 16 -#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UVXH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVXH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UVXH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVXH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVXH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UVXH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UVXH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UVXH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV2H_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UV2H_GR0_TLB_INT1_CONFIG_DM_SHFT 8 -#define UV2H_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UV2H_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UV2H_GR0_TLB_INT1_CONFIG_P_SHFT 13 -#define UV2H_GR0_TLB_INT1_CONFIG_T_SHFT 15 -#define UV2H_GR0_TLB_INT1_CONFIG_M_SHFT 16 -#define UV2H_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UV2H_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV2H_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UV2H_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV2H_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV2H_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UV2H_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UV2H_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UV2H_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV3H_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UV3H_GR0_TLB_INT1_CONFIG_DM_SHFT 8 -#define UV3H_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UV3H_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UV3H_GR0_TLB_INT1_CONFIG_P_SHFT 13 -#define UV3H_GR0_TLB_INT1_CONFIG_T_SHFT 15 -#define UV3H_GR0_TLB_INT1_CONFIG_M_SHFT 16 -#define UV3H_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UV3H_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV3H_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UV3H_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV3H_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV3H_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UV3H_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UV3H_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UV3H_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - union uvh_gr0_tlb_int1_config_u { unsigned long v; struct uvh_gr0_tlb_int1_config_s { @@ -1322,54 +534,6 @@ union uvh_gr0_tlb_int1_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; - struct uv1h_gr0_tlb_int1_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s1; - struct uvxh_gr0_tlb_int1_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } sx; - struct uv2h_gr0_tlb_int1_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s2; - struct uv3h_gr0_tlb_int1_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s3; }; /* ========================================================================= */ @@ -1742,9 +906,6 @@ union uvh_gr0_tlb_mmr_read_data_lo_u { /* UVH_GR1_TLB_INT0_CONFIG */ /* ========================================================================= */ #define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL -#define UV1H_GR1_TLB_INT0_CONFIG 0x61f00UL -#define UV2H_GR1_TLB_INT0_CONFIG 0x61f00UL -#define UV3H_GR1_TLB_INT0_CONFIG 0x61f00UL #define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 #define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 @@ -1763,74 +924,6 @@ union uvh_gr0_tlb_mmr_read_data_lo_u { #define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL -#define UV1H_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UV1H_GR1_TLB_INT0_CONFIG_DM_SHFT 8 -#define UV1H_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UV1H_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UV1H_GR1_TLB_INT0_CONFIG_P_SHFT 13 -#define UV1H_GR1_TLB_INT0_CONFIG_T_SHFT 15 -#define UV1H_GR1_TLB_INT0_CONFIG_M_SHFT 16 -#define UV1H_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UV1H_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV1H_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UV1H_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV1H_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV1H_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UV1H_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UV1H_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UV1H_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UVXH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 -#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UVXH_GR1_TLB_INT0_CONFIG_P_SHFT 13 -#define UVXH_GR1_TLB_INT0_CONFIG_T_SHFT 15 -#define UVXH_GR1_TLB_INT0_CONFIG_M_SHFT 16 -#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UVXH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVXH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UVXH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVXH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVXH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UVXH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UVXH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UVXH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV2H_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UV2H_GR1_TLB_INT0_CONFIG_DM_SHFT 8 -#define UV2H_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UV2H_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UV2H_GR1_TLB_INT0_CONFIG_P_SHFT 13 -#define UV2H_GR1_TLB_INT0_CONFIG_T_SHFT 15 -#define UV2H_GR1_TLB_INT0_CONFIG_M_SHFT 16 -#define UV2H_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UV2H_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV2H_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UV2H_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV2H_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV2H_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UV2H_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UV2H_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UV2H_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV3H_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 -#define UV3H_GR1_TLB_INT0_CONFIG_DM_SHFT 8 -#define UV3H_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 -#define UV3H_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 -#define UV3H_GR1_TLB_INT0_CONFIG_P_SHFT 13 -#define UV3H_GR1_TLB_INT0_CONFIG_T_SHFT 15 -#define UV3H_GR1_TLB_INT0_CONFIG_M_SHFT 16 -#define UV3H_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 -#define UV3H_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV3H_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL -#define UV3H_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV3H_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV3H_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL -#define UV3H_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL -#define UV3H_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL -#define UV3H_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - union uvh_gr1_tlb_int0_config_u { unsigned long v; struct uvh_gr1_tlb_int0_config_s { @@ -1845,63 +938,12 @@ union uvh_gr1_tlb_int0_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; - struct uv1h_gr1_tlb_int0_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s1; - struct uvxh_gr1_tlb_int0_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } sx; - struct uv2h_gr1_tlb_int0_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s2; - struct uv3h_gr1_tlb_int0_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s3; }; /* ========================================================================= */ /* UVH_GR1_TLB_INT1_CONFIG */ /* ========================================================================= */ #define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL -#define UV1H_GR1_TLB_INT1_CONFIG 0x61f40UL -#define UV2H_GR1_TLB_INT1_CONFIG 0x61f40UL -#define UV3H_GR1_TLB_INT1_CONFIG 0x61f40UL #define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 #define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 @@ -1920,74 +962,6 @@ union uvh_gr1_tlb_int0_config_u { #define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL #define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL -#define UV1H_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UV1H_GR1_TLB_INT1_CONFIG_DM_SHFT 8 -#define UV1H_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UV1H_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UV1H_GR1_TLB_INT1_CONFIG_P_SHFT 13 -#define UV1H_GR1_TLB_INT1_CONFIG_T_SHFT 15 -#define UV1H_GR1_TLB_INT1_CONFIG_M_SHFT 16 -#define UV1H_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UV1H_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV1H_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UV1H_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV1H_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV1H_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UV1H_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UV1H_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UV1H_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UVXH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 -#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UVXH_GR1_TLB_INT1_CONFIG_P_SHFT 13 -#define UVXH_GR1_TLB_INT1_CONFIG_T_SHFT 15 -#define UVXH_GR1_TLB_INT1_CONFIG_M_SHFT 16 -#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UVXH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVXH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UVXH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVXH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVXH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UVXH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UVXH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UVXH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV2H_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UV2H_GR1_TLB_INT1_CONFIG_DM_SHFT 8 -#define UV2H_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UV2H_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UV2H_GR1_TLB_INT1_CONFIG_P_SHFT 13 -#define UV2H_GR1_TLB_INT1_CONFIG_T_SHFT 15 -#define UV2H_GR1_TLB_INT1_CONFIG_M_SHFT 16 -#define UV2H_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UV2H_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV2H_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UV2H_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV2H_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV2H_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UV2H_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UV2H_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UV2H_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV3H_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 -#define UV3H_GR1_TLB_INT1_CONFIG_DM_SHFT 8 -#define UV3H_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 -#define UV3H_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 -#define UV3H_GR1_TLB_INT1_CONFIG_P_SHFT 13 -#define UV3H_GR1_TLB_INT1_CONFIG_T_SHFT 15 -#define UV3H_GR1_TLB_INT1_CONFIG_M_SHFT 16 -#define UV3H_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 -#define UV3H_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV3H_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL -#define UV3H_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV3H_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV3H_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL -#define UV3H_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL -#define UV3H_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL -#define UV3H_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - union uvh_gr1_tlb_int1_config_u { unsigned long v; struct uvh_gr1_tlb_int1_config_s { @@ -2002,54 +976,6 @@ union uvh_gr1_tlb_int1_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; - struct uv1h_gr1_tlb_int1_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s1; - struct uvxh_gr1_tlb_int1_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } sx; - struct uv2h_gr1_tlb_int1_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s2; - struct uv3h_gr1_tlb_int1_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s3; }; /* ========================================================================= */ @@ -2422,56 +1348,22 @@ union uvh_gr1_tlb_mmr_read_data_lo_u { /* UVH_INT_CMPB */ /* ========================================================================= */ #define UVH_INT_CMPB 0x22080UL -#define UV1H_INT_CMPB 0x22080UL -#define UV2H_INT_CMPB 0x22080UL -#define UV3H_INT_CMPB 0x22080UL #define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 #define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL -#define UV1H_INT_CMPB_REAL_TIME_CMPB_SHFT 0 -#define UV1H_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL - -#define UVXH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 -#define UVXH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL - -#define UV2H_INT_CMPB_REAL_TIME_CMPB_SHFT 0 -#define UV2H_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL - -#define UV3H_INT_CMPB_REAL_TIME_CMPB_SHFT 0 -#define UV3H_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL - union uvh_int_cmpb_u { unsigned long v; struct uvh_int_cmpb_s { unsigned long real_time_cmpb:56; /* RW */ unsigned long rsvd_56_63:8; } s; - struct uv1h_int_cmpb_s { - unsigned long real_time_cmpb:56; /* RW */ - unsigned long rsvd_56_63:8; - } s1; - struct uvxh_int_cmpb_s { - unsigned long real_time_cmpb:56; /* RW */ - unsigned long rsvd_56_63:8; - } sx; - struct uv2h_int_cmpb_s { - unsigned long real_time_cmpb:56; /* RW */ - unsigned long rsvd_56_63:8; - } s2; - struct uv3h_int_cmpb_s { - unsigned long real_time_cmpb:56; /* RW */ - unsigned long rsvd_56_63:8; - } s3; }; /* ========================================================================= */ /* UVH_INT_CMPC */ /* ========================================================================= */ #define UVH_INT_CMPC 0x22100UL -#define UV1H_INT_CMPC 0x22100UL -#define UV2H_INT_CMPC 0x22100UL -#define UV3H_INT_CMPC 0x22100UL #define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0 #define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL @@ -2479,43 +1371,18 @@ union uvh_int_cmpb_u { #define UVXH_INT_CMPC_REAL_TIME_CMP_2_SHFT 0 #define UVXH_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL -#define UV2H_INT_CMPC_REAL_TIME_CMP_2_SHFT 0 -#define UV2H_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL - -#define UV3H_INT_CMPC_REAL_TIME_CMP_2_SHFT 0 -#define UV3H_INT_CMPC_REAL_TIME_CMP_2_MASK 0x00ffffffffffffffUL - union uvh_int_cmpc_u { unsigned long v; struct uvh_int_cmpc_s { unsigned long real_time_cmpc:56; /* RW */ unsigned long rsvd_56_63:8; } s; - struct uv1h_int_cmpc_s { - unsigned long real_time_cmpc:56; /* RW */ - unsigned long rsvd_56_63:8; - } s1; - struct uvxh_int_cmpc_s { - unsigned long real_time_cmpc:56; /* RW */ - unsigned long rsvd_56_63:8; - } sx; - struct uv2h_int_cmpc_s { - unsigned long real_time_cmpc:56; /* RW */ - unsigned long rsvd_56_63:8; - } s2; - struct uv3h_int_cmpc_s { - unsigned long real_time_cmpc:56; /* RW */ - unsigned long rsvd_56_63:8; - } s3; }; /* ========================================================================= */ /* UVH_INT_CMPD */ /* ========================================================================= */ #define UVH_INT_CMPD 0x22180UL -#define UV1H_INT_CMPD 0x22180UL -#define UV2H_INT_CMPD 0x22180UL -#define UV3H_INT_CMPD 0x22180UL #define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0 #define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL @@ -2523,47 +1390,19 @@ union uvh_int_cmpc_u { #define UVXH_INT_CMPD_REAL_TIME_CMP_3_SHFT 0 #define UVXH_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL -#define UV2H_INT_CMPD_REAL_TIME_CMP_3_SHFT 0 -#define UV2H_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL - -#define UV3H_INT_CMPD_REAL_TIME_CMP_3_SHFT 0 -#define UV3H_INT_CMPD_REAL_TIME_CMP_3_MASK 0x00ffffffffffffffUL - union uvh_int_cmpd_u { unsigned long v; struct uvh_int_cmpd_s { unsigned long real_time_cmpd:56; /* RW */ unsigned long rsvd_56_63:8; } s; - struct uv1h_int_cmpd_s { - unsigned long real_time_cmpd:56; /* RW */ - unsigned long rsvd_56_63:8; - } s1; - struct uvxh_int_cmpd_s { - unsigned long real_time_cmpd:56; /* RW */ - unsigned long rsvd_56_63:8; - } sx; - struct uv2h_int_cmpd_s { - unsigned long real_time_cmpd:56; /* RW */ - unsigned long rsvd_56_63:8; - } s2; - struct uv3h_int_cmpd_s { - unsigned long real_time_cmpd:56; /* RW */ - unsigned long rsvd_56_63:8; - } s3; }; /* ========================================================================= */ /* UVH_IPI_INT */ /* ========================================================================= */ #define UVH_IPI_INT 0x60500UL -#define UV1H_IPI_INT 0x60500UL -#define UV2H_IPI_INT 0x60500UL -#define UV3H_IPI_INT 0x60500UL #define UVH_IPI_INT_32 0x348 -#define UV1H_IPI_INT_32 0x60500UL -#define UV2H_IPI_INT_32 0x60500UL -#define UV3H_IPI_INT_32 0x60500UL #define UVH_IPI_INT_VECTOR_SHFT 0 #define UVH_IPI_INT_DELIVERY_MODE_SHFT 8 @@ -2576,50 +1415,6 @@ union uvh_int_cmpd_u { #define UVH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL #define UVH_IPI_INT_SEND_MASK 0x8000000000000000UL -#define UV1H_IPI_INT_VECTOR_SHFT 0 -#define UV1H_IPI_INT_DELIVERY_MODE_SHFT 8 -#define UV1H_IPI_INT_DESTMODE_SHFT 11 -#define UV1H_IPI_INT_APIC_ID_SHFT 16 -#define UV1H_IPI_INT_SEND_SHFT 63 -#define UV1H_IPI_INT_VECTOR_MASK 0x00000000000000ffUL -#define UV1H_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL -#define UV1H_IPI_INT_DESTMODE_MASK 0x0000000000000800UL -#define UV1H_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL -#define UV1H_IPI_INT_SEND_MASK 0x8000000000000000UL - -#define UVXH_IPI_INT_VECTOR_SHFT 0 -#define UVXH_IPI_INT_DELIVERY_MODE_SHFT 8 -#define UVXH_IPI_INT_DESTMODE_SHFT 11 -#define UVXH_IPI_INT_APIC_ID_SHFT 16 -#define UVXH_IPI_INT_SEND_SHFT 63 -#define UVXH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL -#define UVXH_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL -#define UVXH_IPI_INT_DESTMODE_MASK 0x0000000000000800UL -#define UVXH_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL -#define UVXH_IPI_INT_SEND_MASK 0x8000000000000000UL - -#define UV2H_IPI_INT_VECTOR_SHFT 0 -#define UV2H_IPI_INT_DELIVERY_MODE_SHFT 8 -#define UV2H_IPI_INT_DESTMODE_SHFT 11 -#define UV2H_IPI_INT_APIC_ID_SHFT 16 -#define UV2H_IPI_INT_SEND_SHFT 63 -#define UV2H_IPI_INT_VECTOR_MASK 0x00000000000000ffUL -#define UV2H_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL -#define UV2H_IPI_INT_DESTMODE_MASK 0x0000000000000800UL -#define UV2H_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL -#define UV2H_IPI_INT_SEND_MASK 0x8000000000000000UL - -#define UV3H_IPI_INT_VECTOR_SHFT 0 -#define UV3H_IPI_INT_DELIVERY_MODE_SHFT 8 -#define UV3H_IPI_INT_DESTMODE_SHFT 11 -#define UV3H_IPI_INT_APIC_ID_SHFT 16 -#define UV3H_IPI_INT_SEND_SHFT 63 -#define UV3H_IPI_INT_VECTOR_MASK 0x00000000000000ffUL -#define UV3H_IPI_INT_DELIVERY_MODE_MASK 0x0000000000000700UL -#define UV3H_IPI_INT_DESTMODE_MASK 0x0000000000000800UL -#define UV3H_IPI_INT_APIC_ID_MASK 0x0000ffffffff0000UL -#define UV3H_IPI_INT_SEND_MASK 0x8000000000000000UL - union uvh_ipi_int_u { unsigned long v; struct uvh_ipi_int_s { @@ -2631,81 +1426,19 @@ union uvh_ipi_int_u { unsigned long rsvd_48_62:15; unsigned long send:1; /* WP */ } s; - struct uv1h_ipi_int_s { - unsigned long vector_:8; /* RW */ - unsigned long delivery_mode:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long rsvd_12_15:4; - unsigned long apic_id:32; /* RW */ - unsigned long rsvd_48_62:15; - unsigned long send:1; /* WP */ - } s1; - struct uvxh_ipi_int_s { - unsigned long vector_:8; /* RW */ - unsigned long delivery_mode:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long rsvd_12_15:4; - unsigned long apic_id:32; /* RW */ - unsigned long rsvd_48_62:15; - unsigned long send:1; /* WP */ - } sx; - struct uv2h_ipi_int_s { - unsigned long vector_:8; /* RW */ - unsigned long delivery_mode:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long rsvd_12_15:4; - unsigned long apic_id:32; /* RW */ - unsigned long rsvd_48_62:15; - unsigned long send:1; /* WP */ - } s2; - struct uv3h_ipi_int_s { - unsigned long vector_:8; /* RW */ - unsigned long delivery_mode:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long rsvd_12_15:4; - unsigned long apic_id:32; /* RW */ - unsigned long rsvd_48_62:15; - unsigned long send:1; /* WP */ - } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x320050UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x320050UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x320050UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL - -#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 -#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 -#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL -#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL - -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL - -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_SHFT 49 -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_NODE_ID_MASK 0x7ffe000000000000UL - union uvh_lb_bau_intd_payload_queue_first_u { unsigned long v; struct uvh_lb_bau_intd_payload_queue_first_s { @@ -2715,63 +1448,17 @@ union uvh_lb_bau_intd_payload_queue_first_u { unsigned long node_id:14; /* RW */ unsigned long rsvd_63:1; } s; - struct uv1h_lb_bau_intd_payload_queue_first_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s1; - struct uvxh_lb_bau_intd_payload_queue_first_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } sx; - struct uv2h_lb_bau_intd_payload_queue_first_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s2; - struct uv3h_lb_bau_intd_payload_queue_first_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x320060UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x320060UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x320060UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL - -#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 -#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL - -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL - -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL - union uvh_lb_bau_intd_payload_queue_last_u { unsigned long v; struct uvh_lb_bau_intd_payload_queue_last_s { @@ -2779,55 +1466,17 @@ union uvh_lb_bau_intd_payload_queue_last_u { unsigned long address:39; /* RW */ unsigned long rsvd_43_63:21; } s; - struct uv1h_lb_bau_intd_payload_queue_last_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s1; - struct uvxh_lb_bau_intd_payload_queue_last_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } sx; - struct uv2h_lb_bau_intd_payload_queue_last_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s2; - struct uv3h_lb_bau_intd_payload_queue_last_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x320070UL -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x320070UL -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x320070UL #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 -#define UV1H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL - -#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 -#define UVXH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL - -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 -#define UV2H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL - -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 -#define UV3H_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL - union uvh_lb_bau_intd_payload_queue_tail_u { unsigned long v; struct uvh_lb_bau_intd_payload_queue_tail_s { @@ -2835,39 +1484,13 @@ union uvh_lb_bau_intd_payload_queue_tail_u { unsigned long address:39; /* RW */ unsigned long rsvd_43_63:21; } s; - struct uv1h_lb_bau_intd_payload_queue_tail_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s1; - struct uvxh_lb_bau_intd_payload_queue_tail_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } sx; - struct uv2h_lb_bau_intd_payload_queue_tail_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s2; - struct uv3h_lb_bau_intd_payload_queue_tail_s { - unsigned long rsvd_0_3:4; - unsigned long address:39; /* RW */ - unsigned long rsvd_43_63:21; - } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x320080UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x320080UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x320080UL #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 @@ -2902,138 +1525,6 @@ union uvh_lb_bau_intd_payload_queue_tail_u { #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL - -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL -#define UVXH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL - -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL - -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_SHFT 1 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_SHFT 2 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_SHFT 3 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_SHFT 4 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_SHFT 5 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_SHFT 6 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_SHFT 7 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_SHFT 8 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_SHFT 9 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_SHFT 10 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_SHFT 11 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_SHFT 12 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_SHFT 13 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_SHFT 14 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_1_MASK 0x0000000000000002UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_2_MASK 0x0000000000000004UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_3_MASK 0x0000000000000008UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_4_MASK 0x0000000000000010UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_5_MASK 0x0000000000000020UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_6_MASK 0x0000000000000040UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_7_MASK 0x0000000000000080UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_0_MASK 0x0000000000000100UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_1_MASK 0x0000000000000200UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_2_MASK 0x0000000000000400UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_3_MASK 0x0000000000000800UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_4_MASK 0x0000000000001000UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_5_MASK 0x0000000000002000UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL - union uvh_lb_bau_intd_software_acknowledge_u { unsigned long v; struct uvh_lb_bau_intd_software_acknowledge_s { @@ -3055,95 +1546,13 @@ union uvh_lb_bau_intd_software_acknowledge_u { unsigned long timeout_7:1; /* RW, W1C */ unsigned long rsvd_16_63:48; } s; - struct uv1h_lb_bau_intd_software_acknowledge_s { - unsigned long pending_0:1; /* RW, W1C */ - unsigned long pending_1:1; /* RW, W1C */ - unsigned long pending_2:1; /* RW, W1C */ - unsigned long pending_3:1; /* RW, W1C */ - unsigned long pending_4:1; /* RW, W1C */ - unsigned long pending_5:1; /* RW, W1C */ - unsigned long pending_6:1; /* RW, W1C */ - unsigned long pending_7:1; /* RW, W1C */ - unsigned long timeout_0:1; /* RW, W1C */ - unsigned long timeout_1:1; /* RW, W1C */ - unsigned long timeout_2:1; /* RW, W1C */ - unsigned long timeout_3:1; /* RW, W1C */ - unsigned long timeout_4:1; /* RW, W1C */ - unsigned long timeout_5:1; /* RW, W1C */ - unsigned long timeout_6:1; /* RW, W1C */ - unsigned long timeout_7:1; /* RW, W1C */ - unsigned long rsvd_16_63:48; - } s1; - struct uvxh_lb_bau_intd_software_acknowledge_s { - unsigned long pending_0:1; /* RW */ - unsigned long pending_1:1; /* RW */ - unsigned long pending_2:1; /* RW */ - unsigned long pending_3:1; /* RW */ - unsigned long pending_4:1; /* RW */ - unsigned long pending_5:1; /* RW */ - unsigned long pending_6:1; /* RW */ - unsigned long pending_7:1; /* RW */ - unsigned long timeout_0:1; /* RW */ - unsigned long timeout_1:1; /* RW */ - unsigned long timeout_2:1; /* RW */ - unsigned long timeout_3:1; /* RW */ - unsigned long timeout_4:1; /* RW */ - unsigned long timeout_5:1; /* RW */ - unsigned long timeout_6:1; /* RW */ - unsigned long timeout_7:1; /* RW */ - unsigned long rsvd_16_63:48; - } sx; - struct uv2h_lb_bau_intd_software_acknowledge_s { - unsigned long pending_0:1; /* RW */ - unsigned long pending_1:1; /* RW */ - unsigned long pending_2:1; /* RW */ - unsigned long pending_3:1; /* RW */ - unsigned long pending_4:1; /* RW */ - unsigned long pending_5:1; /* RW */ - unsigned long pending_6:1; /* RW */ - unsigned long pending_7:1; /* RW */ - unsigned long timeout_0:1; /* RW */ - unsigned long timeout_1:1; /* RW */ - unsigned long timeout_2:1; /* RW */ - unsigned long timeout_3:1; /* RW */ - unsigned long timeout_4:1; /* RW */ - unsigned long timeout_5:1; /* RW */ - unsigned long timeout_6:1; /* RW */ - unsigned long timeout_7:1; /* RW */ - unsigned long rsvd_16_63:48; - } s2; - struct uv3h_lb_bau_intd_software_acknowledge_s { - unsigned long pending_0:1; /* RW */ - unsigned long pending_1:1; /* RW */ - unsigned long pending_2:1; /* RW */ - unsigned long pending_3:1; /* RW */ - unsigned long pending_4:1; /* RW */ - unsigned long pending_5:1; /* RW */ - unsigned long pending_6:1; /* RW */ - unsigned long pending_7:1; /* RW */ - unsigned long timeout_0:1; /* RW */ - unsigned long timeout_1:1; /* RW */ - unsigned long timeout_2:1; /* RW */ - unsigned long timeout_3:1; /* RW */ - unsigned long timeout_4:1; /* RW */ - unsigned long timeout_5:1; /* RW */ - unsigned long timeout_6:1; /* RW */ - unsigned long timeout_7:1; /* RW */ - unsigned long rsvd_16_63:48; - } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x320088UL #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 -#define UV1H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x320088UL -#define UV2H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x320088UL -#define UV3H_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x320088UL /* ========================================================================= */ @@ -3498,13 +1907,7 @@ union uvh_lb_bau_misc_control_u { /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 -#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x320020UL -#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x320020UL -#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_32 0x320020UL #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 @@ -3513,34 +1916,6 @@ union uvh_lb_bau_misc_control_u { #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL -#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 -#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 -#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 -#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL -#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL -#define UV1H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL - -#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 -#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 -#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 -#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL -#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL -#define UVXH_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL - -#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 -#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 -#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 -#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL -#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL -#define UV2H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL - -#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 -#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT 62 -#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_SHFT 63 -#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL -#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_MASK 0x4000000000000000UL -#define UV3H_LB_BAU_SB_ACTIVATION_CONTROL_INIT_MASK 0x8000000000000000UL - union uvh_lb_bau_sb_activation_control_u { unsigned long v; struct uvh_lb_bau_sb_activation_control_s { @@ -3549,161 +1924,51 @@ union uvh_lb_bau_sb_activation_control_u { unsigned long push:1; /* WP */ unsigned long init:1; /* WP */ } s; - struct uv1h_lb_bau_sb_activation_control_s { - unsigned long index:6; /* RW */ - unsigned long rsvd_6_61:56; - unsigned long push:1; /* WP */ - unsigned long init:1; /* WP */ - } s1; - struct uvxh_lb_bau_sb_activation_control_s { - unsigned long index:6; /* RW */ - unsigned long rsvd_6_61:56; - unsigned long push:1; /* WP */ - unsigned long init:1; /* WP */ - } sx; - struct uv2h_lb_bau_sb_activation_control_s { - unsigned long index:6; /* RW */ - unsigned long rsvd_6_61:56; - unsigned long push:1; /* WP */ - unsigned long init:1; /* WP */ - } s2; - struct uv3h_lb_bau_sb_activation_control_s { - unsigned long index:6; /* RW */ - unsigned long rsvd_6_61:56; - unsigned long push:1; /* WP */ - unsigned long init:1; /* WP */ - } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x320030UL -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x320030UL -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x320030UL #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL - -#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 -#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL - -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL - -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL - union uvh_lb_bau_sb_activation_status_0_u { unsigned long v; struct uvh_lb_bau_sb_activation_status_0_s { unsigned long status:64; /* RW */ } s; - struct uv1h_lb_bau_sb_activation_status_0_s { - unsigned long status:64; /* RW */ - } s1; - struct uvxh_lb_bau_sb_activation_status_0_s { - unsigned long status:64; /* RW */ - } sx; - struct uv2h_lb_bau_sb_activation_status_0_s { - unsigned long status:64; /* RW */ - } s2; - struct uv3h_lb_bau_sb_activation_status_0_s { - unsigned long status:64; /* RW */ - } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x320040UL -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x320040UL -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x320040UL #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 -#define UV1H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL - -#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 -#define UVXH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL - -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 -#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL - -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 -#define UV3H_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL - union uvh_lb_bau_sb_activation_status_1_u { unsigned long v; struct uvh_lb_bau_sb_activation_status_1_s { unsigned long status:64; /* RW */ } s; - struct uv1h_lb_bau_sb_activation_status_1_s { - unsigned long status:64; /* RW */ - } s1; - struct uvxh_lb_bau_sb_activation_status_1_s { - unsigned long status:64; /* RW */ - } sx; - struct uv2h_lb_bau_sb_activation_status_1_s { - unsigned long status:64; /* RW */ - } s2; - struct uv3h_lb_bau_sb_activation_status_1_s { - unsigned long status:64; /* RW */ - } s3; }; /* ========================================================================= */ /* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ /* ========================================================================= */ #define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x320010UL -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x320010UL -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_32 0x320010UL #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL -#define UV1H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL - -#define UVXH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 -#define UVXH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UVXH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL -#define UVXH_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL - -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL -#define UV2H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL - -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_SHFT 49 -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL -#define UV3H_LB_BAU_SB_DESCRIPTOR_BASE_NODE_ID_MASK 0x7ffe000000000000UL - union uvh_lb_bau_sb_descriptor_base_u { unsigned long v; struct uvh_lb_bau_sb_descriptor_base_s { @@ -3713,34 +1978,6 @@ union uvh_lb_bau_sb_descriptor_base_u { unsigned long node_id:14; /* RW */ unsigned long rsvd_63:1; } s; - struct uv1h_lb_bau_sb_descriptor_base_s { - unsigned long rsvd_0_11:12; - unsigned long page_address:31; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s1; - struct uvxh_lb_bau_sb_descriptor_base_s { - unsigned long rsvd_0_11:12; - unsigned long page_address:31; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } sx; - struct uv2h_lb_bau_sb_descriptor_base_s { - unsigned long rsvd_0_11:12; - unsigned long page_address:31; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s2; - struct uv3h_lb_bau_sb_descriptor_base_s { - unsigned long rsvd_0_11:12; - unsigned long page_address:31; /* RW */ - unsigned long rsvd_43_48:6; - unsigned long node_id:14; /* RW */ - unsigned long rsvd_63:1; - } s3; }; /* ========================================================================= */ @@ -3889,229 +2126,70 @@ union uvh_node_id_u { /* UVH_NODE_PRESENT_TABLE */ /* ========================================================================= */ #define UVH_NODE_PRESENT_TABLE 0x1400UL -#define UV1H_NODE_PRESENT_TABLE 0x1400UL -#define UV2H_NODE_PRESENT_TABLE 0x1400UL -#define UV3H_NODE_PRESENT_TABLE 0x1400UL #define UVH_NODE_PRESENT_TABLE_DEPTH 16 -#define UV1H_NODE_PRESENT_TABLE_DEPTH 0x1400UL -#define UV2H_NODE_PRESENT_TABLE_DEPTH 0x1400UL -#define UV3H_NODE_PRESENT_TABLE_DEPTH 0x1400UL #define UVH_NODE_PRESENT_TABLE_NODES_SHFT 0 #define UVH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL -#define UV1H_NODE_PRESENT_TABLE_NODES_SHFT 0 -#define UV1H_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL - -#define UVXH_NODE_PRESENT_TABLE_NODES_SHFT 0 -#define UVXH_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL - -#define UV2H_NODE_PRESENT_TABLE_NODES_SHFT 0 -#define UV2H_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL - -#define UV3H_NODE_PRESENT_TABLE_NODES_SHFT 0 -#define UV3H_NODE_PRESENT_TABLE_NODES_MASK 0xffffffffffffffffUL - union uvh_node_present_table_u { unsigned long v; struct uvh_node_present_table_s { unsigned long nodes:64; /* RW */ } s; - struct uv1h_node_present_table_s { - unsigned long nodes:64; /* RW */ - } s1; - struct uvxh_node_present_table_s { - unsigned long nodes:64; /* RW */ - } sx; - struct uv2h_node_present_table_s { - unsigned long nodes:64; /* RW */ - } s2; - struct uv3h_node_present_table_s { - unsigned long nodes:64; /* RW */ - } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR */ /* ========================================================================= */ #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR 0x16000c8UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_SHFT 48 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL - -union uvh_rh_gam_alias210_overlay_config_0_mmr_u { - unsigned long v; - struct uvh_rh_gam_alias210_overlay_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s; - struct uv1h_rh_gam_alias210_overlay_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s1; - struct uvxh_rh_gam_alias210_overlay_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } sx; - struct uv2h_rh_gam_alias210_overlay_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s2; - struct uv3h_rh_gam_alias210_overlay_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s3; -}; - -/* ========================================================================= */ -/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */ -/* ========================================================================= */ -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL - -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL - -union uvh_rh_gam_alias210_overlay_config_1_mmr_u { - unsigned long v; - struct uvh_rh_gam_alias210_overlay_config_1_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s; - struct uv1h_rh_gam_alias210_overlay_config_1_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s1; - struct uvxh_rh_gam_alias210_overlay_config_1_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } sx; - struct uv2h_rh_gam_alias210_overlay_config_1_mmr_s { +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_BASE_MASK 0x00000000ff000000UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_alias210_overlay_config_0_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_overlay_config_0_mmr_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; unsigned long m_alias:5; /* RW */ unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ - } s2; - struct uv3h_rh_gam_alias210_overlay_config_1_mmr_s { + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR 0x16000d8UL + +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_SHFT 48 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_BASE_MASK 0x00000000ff000000UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_alias210_overlay_config_1_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_overlay_config_1_mmr_s { unsigned long rsvd_0_23:24; unsigned long base:8; /* RW */ unsigned long rsvd_32_47:16; unsigned long m_alias:5; /* RW */ unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ - } s3; + } s; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR */ /* ========================================================================= */ #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR 0x16000e8UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 @@ -4120,34 +2198,6 @@ union uvh_rh_gam_alias210_overlay_config_1_mmr_u { #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL #define UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV1H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UVXH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV2H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL - -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_SHFT 48 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_SHFT 63 -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_BASE_MASK 0x00000000ff000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_M_ALIAS_MASK 0x001f000000000000UL -#define UV3H_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR_ENABLE_MASK 0x8000000000000000UL - union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_overlay_config_2_mmr_s { @@ -4158,63 +2208,16 @@ union uvh_rh_gam_alias210_overlay_config_2_mmr_u { unsigned long rsvd_53_62:10; unsigned long enable:1; /* RW */ } s; - struct uv1h_rh_gam_alias210_overlay_config_2_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s1; - struct uvxh_rh_gam_alias210_overlay_config_2_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } sx; - struct uv2h_rh_gam_alias210_overlay_config_2_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s2; - struct uv3h_rh_gam_alias210_overlay_config_2_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long base:8; /* RW */ - unsigned long rsvd_32_47:16; - unsigned long m_alias:5; /* RW */ - unsigned long rsvd_53_62:10; - unsigned long enable:1; /* RW */ - } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ /* ========================================================================= */ #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL - union uvh_rh_gam_alias210_redirect_config_0_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { @@ -4222,51 +2225,16 @@ union uvh_rh_gam_alias210_redirect_config_0_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; - struct uv1h_rh_gam_alias210_redirect_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s1; - struct uvxh_rh_gam_alias210_redirect_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } sx; - struct uv2h_rh_gam_alias210_redirect_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s2; - struct uv3h_rh_gam_alias210_redirect_config_0_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ /* ========================================================================= */ #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL - union uvh_rh_gam_alias210_redirect_config_1_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { @@ -4274,51 +2242,16 @@ union uvh_rh_gam_alias210_redirect_config_1_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; - struct uv1h_rh_gam_alias210_redirect_config_1_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s1; - struct uvxh_rh_gam_alias210_redirect_config_1_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } sx; - struct uv2h_rh_gam_alias210_redirect_config_1_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s2; - struct uv3h_rh_gam_alias210_redirect_config_1_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s3; }; /* ========================================================================= */ /* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ /* ========================================================================= */ #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 #define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UV1H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UVXH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UV2H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL - -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 -#define UV3H_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL - union uvh_rh_gam_alias210_redirect_config_2_mmr_u { unsigned long v; struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { @@ -4326,26 +2259,6 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { unsigned long dest_base:22; /* RW */ unsigned long rsvd_46_63:18; } s; - struct uv1h_rh_gam_alias210_redirect_config_2_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s1; - struct uvxh_rh_gam_alias210_redirect_config_2_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } sx; - struct uv2h_rh_gam_alias210_redirect_config_2_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s2; - struct uv3h_rh_gam_alias210_redirect_config_2_mmr_s { - unsigned long rsvd_0_23:24; - unsigned long dest_base:22; /* RW */ - unsigned long rsvd_46_63:18; - } s3; }; /* ========================================================================= */ @@ -4513,9 +2426,6 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { /* ========================================================================= */ #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR \ - (is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR : \ - UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR) #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 @@ -4629,56 +2539,22 @@ union uvh_rh_gam_mmr_overlay_config_mmr_u { /* UVH_RTC */ /* ========================================================================= */ #define UVH_RTC 0x340000UL -#define UV1H_RTC 0x340000UL -#define UV2H_RTC 0x340000UL -#define UV3H_RTC 0x340000UL #define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 #define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL -#define UV1H_RTC_REAL_TIME_CLOCK_SHFT 0 -#define UV1H_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL - -#define UVXH_RTC_REAL_TIME_CLOCK_SHFT 0 -#define UVXH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL - -#define UV2H_RTC_REAL_TIME_CLOCK_SHFT 0 -#define UV2H_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL - -#define UV3H_RTC_REAL_TIME_CLOCK_SHFT 0 -#define UV3H_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL - union uvh_rtc_u { unsigned long v; struct uvh_rtc_s { unsigned long real_time_clock:56; /* RW */ unsigned long rsvd_56_63:8; } s; - struct uv1h_rtc_s { - unsigned long real_time_clock:56; /* RW */ - unsigned long rsvd_56_63:8; - } s1; - struct uvxh_rtc_s { - unsigned long real_time_clock:56; /* RW */ - unsigned long rsvd_56_63:8; - } sx; - struct uv2h_rtc_s { - unsigned long real_time_clock:56; /* RW */ - unsigned long rsvd_56_63:8; - } s2; - struct uv3h_rtc_s { - unsigned long real_time_clock:56; /* RW */ - unsigned long rsvd_56_63:8; - } s3; }; /* ========================================================================= */ /* UVH_RTC1_INT_CONFIG */ /* ========================================================================= */ #define UVH_RTC1_INT_CONFIG 0x615c0UL -#define UV1H_RTC1_INT_CONFIG 0x615c0UL -#define UV2H_RTC1_INT_CONFIG 0x615c0UL -#define UV3H_RTC1_INT_CONFIG 0x615c0UL #define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 #define UVH_RTC1_INT_CONFIG_DM_SHFT 8 @@ -4697,74 +2573,6 @@ union uvh_rtc_u { #define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL #define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL -#define UV1H_RTC1_INT_CONFIG_VECTOR_SHFT 0 -#define UV1H_RTC1_INT_CONFIG_DM_SHFT 8 -#define UV1H_RTC1_INT_CONFIG_DESTMODE_SHFT 11 -#define UV1H_RTC1_INT_CONFIG_STATUS_SHFT 12 -#define UV1H_RTC1_INT_CONFIG_P_SHFT 13 -#define UV1H_RTC1_INT_CONFIG_T_SHFT 15 -#define UV1H_RTC1_INT_CONFIG_M_SHFT 16 -#define UV1H_RTC1_INT_CONFIG_APIC_ID_SHFT 32 -#define UV1H_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV1H_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL -#define UV1H_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV1H_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV1H_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL -#define UV1H_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL -#define UV1H_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL -#define UV1H_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UVXH_RTC1_INT_CONFIG_VECTOR_SHFT 0 -#define UVXH_RTC1_INT_CONFIG_DM_SHFT 8 -#define UVXH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 -#define UVXH_RTC1_INT_CONFIG_STATUS_SHFT 12 -#define UVXH_RTC1_INT_CONFIG_P_SHFT 13 -#define UVXH_RTC1_INT_CONFIG_T_SHFT 15 -#define UVXH_RTC1_INT_CONFIG_M_SHFT 16 -#define UVXH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 -#define UVXH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UVXH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL -#define UVXH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UVXH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UVXH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL -#define UVXH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL -#define UVXH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL -#define UVXH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV2H_RTC1_INT_CONFIG_VECTOR_SHFT 0 -#define UV2H_RTC1_INT_CONFIG_DM_SHFT 8 -#define UV2H_RTC1_INT_CONFIG_DESTMODE_SHFT 11 -#define UV2H_RTC1_INT_CONFIG_STATUS_SHFT 12 -#define UV2H_RTC1_INT_CONFIG_P_SHFT 13 -#define UV2H_RTC1_INT_CONFIG_T_SHFT 15 -#define UV2H_RTC1_INT_CONFIG_M_SHFT 16 -#define UV2H_RTC1_INT_CONFIG_APIC_ID_SHFT 32 -#define UV2H_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV2H_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL -#define UV2H_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV2H_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV2H_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL -#define UV2H_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL -#define UV2H_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL -#define UV2H_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - -#define UV3H_RTC1_INT_CONFIG_VECTOR_SHFT 0 -#define UV3H_RTC1_INT_CONFIG_DM_SHFT 8 -#define UV3H_RTC1_INT_CONFIG_DESTMODE_SHFT 11 -#define UV3H_RTC1_INT_CONFIG_STATUS_SHFT 12 -#define UV3H_RTC1_INT_CONFIG_P_SHFT 13 -#define UV3H_RTC1_INT_CONFIG_T_SHFT 15 -#define UV3H_RTC1_INT_CONFIG_M_SHFT 16 -#define UV3H_RTC1_INT_CONFIG_APIC_ID_SHFT 32 -#define UV3H_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL -#define UV3H_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL -#define UV3H_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL -#define UV3H_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL -#define UV3H_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL -#define UV3H_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL -#define UV3H_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL -#define UV3H_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL - union uvh_rtc1_int_config_u { unsigned long v; struct uvh_rtc1_int_config_s { @@ -4779,111 +2587,29 @@ union uvh_rtc1_int_config_u { unsigned long rsvd_17_31:15; unsigned long apic_id:32; /* RW */ } s; - struct uv1h_rtc1_int_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s1; - struct uvxh_rtc1_int_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } sx; - struct uv2h_rtc1_int_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s2; - struct uv3h_rtc1_int_config_s { - unsigned long vector_:8; /* RW */ - unsigned long dm:3; /* RW */ - unsigned long destmode:1; /* RW */ - unsigned long status:1; /* RO */ - unsigned long p:1; /* RO */ - unsigned long rsvd_14:1; - unsigned long t:1; /* RO */ - unsigned long m:1; /* RW */ - unsigned long rsvd_17_31:15; - unsigned long apic_id:32; /* RW */ - } s3; }; /* ========================================================================= */ /* UVH_SCRATCH5 */ /* ========================================================================= */ #define UVH_SCRATCH5 0x2d0200UL -#define UV1H_SCRATCH5 0x2d0200UL -#define UV2H_SCRATCH5 0x2d0200UL -#define UV3H_SCRATCH5 0x2d0200UL #define UVH_SCRATCH5_32 0x778 -#define UV1H_SCRATCH5_32 0x2d0200UL -#define UV2H_SCRATCH5_32 0x2d0200UL -#define UV3H_SCRATCH5_32 0x2d0200UL #define UVH_SCRATCH5_SCRATCH5_SHFT 0 #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL -#define UV1H_SCRATCH5_SCRATCH5_SHFT 0 -#define UV1H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL - -#define UVXH_SCRATCH5_SCRATCH5_SHFT 0 -#define UVXH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL - -#define UV2H_SCRATCH5_SCRATCH5_SHFT 0 -#define UV2H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL - -#define UV3H_SCRATCH5_SCRATCH5_SHFT 0 -#define UV3H_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL - union uvh_scratch5_u { unsigned long v; struct uvh_scratch5_s { unsigned long scratch5:64; /* RW, W1CS */ } s; - struct uv1h_scratch5_s { - unsigned long scratch5:64; /* RW, W1CS */ - } s1; - struct uvxh_scratch5_s { - unsigned long scratch5:64; /* RW */ - } sx; - struct uv2h_scratch5_s { - unsigned long scratch5:64; /* RW */ - } s2; - struct uv3h_scratch5_s { - unsigned long scratch5:64; /* RW */ - } s3; }; /* ========================================================================= */ /* UVXH_EVENT_OCCURRED2 */ /* ========================================================================= */ #define UVXH_EVENT_OCCURRED2 0x70100UL -#define UV2H_EVENT_OCCURRED2 0x70100UL -#define UV3H_EVENT_OCCURRED2 0x70100UL #define UVXH_EVENT_OCCURRED2_32 0xb68 -#define UV2H_EVENT_OCCURRED2_32 0x70100UL -#define UV3H_EVENT_OCCURRED2_32 0x70100UL #define UVXH_EVENT_OCCURRED2_RTC_0_SHFT 0 #define UVXH_EVENT_OCCURRED2_RTC_1_SHFT 1 @@ -4950,136 +2676,6 @@ union uvh_scratch5_u { #define UVXH_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL #define UVXH_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL -#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 -#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 -#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 -#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 -#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 -#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 -#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 -#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 -#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 -#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 -#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 -#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 -#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 -#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 -#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 -#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 -#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 -#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 -#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 -#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 -#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 -#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 -#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 -#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 -#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 -#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 -#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 -#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 -#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 -#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 -#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 -#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 -#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL -#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL -#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL -#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL -#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL -#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL -#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL -#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL -#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL -#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL -#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL -#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL -#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL -#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL -#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL -#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL -#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL -#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL -#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL -#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL -#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL -#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL -#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL -#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL -#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL -#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL -#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL -#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL -#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL -#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL -#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL -#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL - -#define UV3H_EVENT_OCCURRED2_RTC_0_SHFT 0 -#define UV3H_EVENT_OCCURRED2_RTC_1_SHFT 1 -#define UV3H_EVENT_OCCURRED2_RTC_2_SHFT 2 -#define UV3H_EVENT_OCCURRED2_RTC_3_SHFT 3 -#define UV3H_EVENT_OCCURRED2_RTC_4_SHFT 4 -#define UV3H_EVENT_OCCURRED2_RTC_5_SHFT 5 -#define UV3H_EVENT_OCCURRED2_RTC_6_SHFT 6 -#define UV3H_EVENT_OCCURRED2_RTC_7_SHFT 7 -#define UV3H_EVENT_OCCURRED2_RTC_8_SHFT 8 -#define UV3H_EVENT_OCCURRED2_RTC_9_SHFT 9 -#define UV3H_EVENT_OCCURRED2_RTC_10_SHFT 10 -#define UV3H_EVENT_OCCURRED2_RTC_11_SHFT 11 -#define UV3H_EVENT_OCCURRED2_RTC_12_SHFT 12 -#define UV3H_EVENT_OCCURRED2_RTC_13_SHFT 13 -#define UV3H_EVENT_OCCURRED2_RTC_14_SHFT 14 -#define UV3H_EVENT_OCCURRED2_RTC_15_SHFT 15 -#define UV3H_EVENT_OCCURRED2_RTC_16_SHFT 16 -#define UV3H_EVENT_OCCURRED2_RTC_17_SHFT 17 -#define UV3H_EVENT_OCCURRED2_RTC_18_SHFT 18 -#define UV3H_EVENT_OCCURRED2_RTC_19_SHFT 19 -#define UV3H_EVENT_OCCURRED2_RTC_20_SHFT 20 -#define UV3H_EVENT_OCCURRED2_RTC_21_SHFT 21 -#define UV3H_EVENT_OCCURRED2_RTC_22_SHFT 22 -#define UV3H_EVENT_OCCURRED2_RTC_23_SHFT 23 -#define UV3H_EVENT_OCCURRED2_RTC_24_SHFT 24 -#define UV3H_EVENT_OCCURRED2_RTC_25_SHFT 25 -#define UV3H_EVENT_OCCURRED2_RTC_26_SHFT 26 -#define UV3H_EVENT_OCCURRED2_RTC_27_SHFT 27 -#define UV3H_EVENT_OCCURRED2_RTC_28_SHFT 28 -#define UV3H_EVENT_OCCURRED2_RTC_29_SHFT 29 -#define UV3H_EVENT_OCCURRED2_RTC_30_SHFT 30 -#define UV3H_EVENT_OCCURRED2_RTC_31_SHFT 31 -#define UV3H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL -#define UV3H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL -#define UV3H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL -#define UV3H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL -#define UV3H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL -#define UV3H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL -#define UV3H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL -#define UV3H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL -#define UV3H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL -#define UV3H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL -#define UV3H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL -#define UV3H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL -#define UV3H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL -#define UV3H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL -#define UV3H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL -#define UV3H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL -#define UV3H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL -#define UV3H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL -#define UV3H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL -#define UV3H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL -#define UV3H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL -#define UV3H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL -#define UV3H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL -#define UV3H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL -#define UV3H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL -#define UV3H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL -#define UV3H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL -#define UV3H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL -#define UV3H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL -#define UV3H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL -#define UV3H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL -#define UV3H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL - union uvxh_event_occurred2_u { unsigned long v; struct uvxh_event_occurred2_s { @@ -5117,87 +2713,13 @@ union uvxh_event_occurred2_u { unsigned long rtc_31:1; /* RW */ unsigned long rsvd_32_63:32; } sx; - struct uv2h_event_occurred2_s { - unsigned long rtc_0:1; /* RW */ - unsigned long rtc_1:1; /* RW */ - unsigned long rtc_2:1; /* RW */ - unsigned long rtc_3:1; /* RW */ - unsigned long rtc_4:1; /* RW */ - unsigned long rtc_5:1; /* RW */ - unsigned long rtc_6:1; /* RW */ - unsigned long rtc_7:1; /* RW */ - unsigned long rtc_8:1; /* RW */ - unsigned long rtc_9:1; /* RW */ - unsigned long rtc_10:1; /* RW */ - unsigned long rtc_11:1; /* RW */ - unsigned long rtc_12:1; /* RW */ - unsigned long rtc_13:1; /* RW */ - unsigned long rtc_14:1; /* RW */ - unsigned long rtc_15:1; /* RW */ - unsigned long rtc_16:1; /* RW */ - unsigned long rtc_17:1; /* RW */ - unsigned long rtc_18:1; /* RW */ - unsigned long rtc_19:1; /* RW */ - unsigned long rtc_20:1; /* RW */ - unsigned long rtc_21:1; /* RW */ - unsigned long rtc_22:1; /* RW */ - unsigned long rtc_23:1; /* RW */ - unsigned long rtc_24:1; /* RW */ - unsigned long rtc_25:1; /* RW */ - unsigned long rtc_26:1; /* RW */ - unsigned long rtc_27:1; /* RW */ - unsigned long rtc_28:1; /* RW */ - unsigned long rtc_29:1; /* RW */ - unsigned long rtc_30:1; /* RW */ - unsigned long rtc_31:1; /* RW */ - unsigned long rsvd_32_63:32; - } s2; - struct uv3h_event_occurred2_s { - unsigned long rtc_0:1; /* RW */ - unsigned long rtc_1:1; /* RW */ - unsigned long rtc_2:1; /* RW */ - unsigned long rtc_3:1; /* RW */ - unsigned long rtc_4:1; /* RW */ - unsigned long rtc_5:1; /* RW */ - unsigned long rtc_6:1; /* RW */ - unsigned long rtc_7:1; /* RW */ - unsigned long rtc_8:1; /* RW */ - unsigned long rtc_9:1; /* RW */ - unsigned long rtc_10:1; /* RW */ - unsigned long rtc_11:1; /* RW */ - unsigned long rtc_12:1; /* RW */ - unsigned long rtc_13:1; /* RW */ - unsigned long rtc_14:1; /* RW */ - unsigned long rtc_15:1; /* RW */ - unsigned long rtc_16:1; /* RW */ - unsigned long rtc_17:1; /* RW */ - unsigned long rtc_18:1; /* RW */ - unsigned long rtc_19:1; /* RW */ - unsigned long rtc_20:1; /* RW */ - unsigned long rtc_21:1; /* RW */ - unsigned long rtc_22:1; /* RW */ - unsigned long rtc_23:1; /* RW */ - unsigned long rtc_24:1; /* RW */ - unsigned long rtc_25:1; /* RW */ - unsigned long rtc_26:1; /* RW */ - unsigned long rtc_27:1; /* RW */ - unsigned long rtc_28:1; /* RW */ - unsigned long rtc_29:1; /* RW */ - unsigned long rtc_30:1; /* RW */ - unsigned long rtc_31:1; /* RW */ - unsigned long rsvd_32_63:32; - } s3; }; /* ========================================================================= */ /* UVXH_EVENT_OCCURRED2_ALIAS */ /* ========================================================================= */ #define UVXH_EVENT_OCCURRED2_ALIAS 0x70108UL -#define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL -#define UV3H_EVENT_OCCURRED2_ALIAS 0x70108UL #define UVXH_EVENT_OCCURRED2_ALIAS_32 0xb70 -#define UV2H_EVENT_OCCURRED2_ALIAS_32 0x70108UL -#define UV3H_EVENT_OCCURRED2_ALIAS_32 0x70108UL /* ========================================================================= */ -- cgit v1.1 From b390784dc1649f6e6c5e66e5f53c21e715ccf39b Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 11 Feb 2013 16:27:28 -0800 Subject: x86, mm: Use a bitfield to mask nuisance get_user() warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Even though it is never executed, gcc wants to warn for casting from a large integer to a pointer. Furthermore, using a variable with __typeof__() doesn't work because __typeof__ retains storage specifiers (const, restrict, volatile). However, we can declare a bitfield using sizeof(), which is legal because sizeof() is a constant expression. This quiets the warning, although the code generated isn't 100% identical from the baseline before 96477b4 x86-32: Add support for 64bit get_user(): [x86-mb is baseline, x86-mm is this commit] text data bss filename 113716147 15858380 35037184 tip.x86-mb/o.i386-allconfig/vmlinux 113716145 15858380 35037184 tip.x86-mm/o.i386-allconfig/vmlinux 12989837 3597944 12255232 tip.x86-mb/o.i386-modconfig/vmlinux 12989831 3597944 12255232 tip.x86-mm/o.i386-modconfig/vmlinux 1462784 237608 1401988 tip.x86-mb/o.i386-noconfig/vmlinux 1462837 237608 1401964 tip.x86-mm/o.i386-noconfig/vmlinux 7938994 553688 7639040 tip.x86-mb/o.i386-pae/vmlinux 7943136 557784 7639040 tip.x86-mm/o.i386-pae/vmlinux 7186126 510572 6574080 tip.x86-mb/o.i386/vmlinux 7186124 510572 6574080 tip.x86-mm/o.i386/vmlinux 103747269 33578856 65888256 tip.x86-mb/o.x86_64-allconfig/vmlinux 103746949 33578856 65888256 tip.x86-mm/o.x86_64-allconfig/vmlinux 12116695 11035832 20160512 tip.x86-mb/o.x86_64-modconfig/vmlinux 12116567 11035832 20160512 tip.x86-mm/o.x86_64-modconfig/vmlinux 1700790 380524 511808 tip.x86-mb/o.x86_64-noconfig/vmlinux 1700790 380524 511808 tip.x86-mm/o.x86_64-noconfig/vmlinux 12413612 1133376 1101824 tip.x86-mb/o.x86_64/vmlinux 12413484 1133376 1101824 tip.x86-mm/o.x86_64/vmlinux Cc: Jamie Lokier Cc: Ville Syrjälä Cc: Borislav Petkov Cc: Russell King Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20130209110031.GA17833@n2100.arm.linux.org.uk Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uaccess.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 1e96326..a8d1265 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -168,31 +168,29 @@ do { \ #define get_user(x, ptr) \ ({ \ int __ret_gu; \ - unsigned long __val_gu; \ - unsigned long long __val_gu8; \ + struct { \ + unsigned long long __val_n : 8*sizeof(*(ptr)); \ + } __val_gu; \ __chk_user_ptr(ptr); \ might_fault(); \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_user_x(1, __ret_gu, __val_gu, ptr); \ + __get_user_x(1, __ret_gu, __val_gu.__val_n, ptr); \ break; \ case 2: \ - __get_user_x(2, __ret_gu, __val_gu, ptr); \ + __get_user_x(2, __ret_gu, __val_gu.__val_n, ptr); \ break; \ case 4: \ - __get_user_x(4, __ret_gu, __val_gu, ptr); \ + __get_user_x(4, __ret_gu, __val_gu.__val_n, ptr); \ break; \ case 8: \ - __get_user_8(__ret_gu, __val_gu8, ptr); \ + __get_user_8(__ret_gu, __val_gu.__val_n, ptr); \ break; \ default: \ - __get_user_x(X, __ret_gu, __val_gu, ptr); \ + __get_user_x(X, __ret_gu, __val_gu.__val_n, ptr); \ break; \ } \ - if (sizeof(*(ptr)) == 8) \ - (x) = (__typeof__(*(ptr)))__val_gu8; \ - else \ - (x) = (__typeof__(*(ptr)))__val_gu; \ + (x) = (__typeof__(*(ptr)))__val_gu.__val_n; \ __ret_gu; \ }) -- cgit v1.1 From 16640165c9079e2cf36fdcfca093f29663a716f7 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 11 Feb 2013 23:14:48 -0800 Subject: x86: Be consistent with data size in getuser.S Consistently use the data register by name and use a sized assembly instruction in getuser.S. There is never any reason to macroize it, and being inconsistent in the same file is just annoying. No actual code change. Signed-off-by: H. Peter Anvin --- arch/x86/lib/getuser.S | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index d3bf9f9..a451235 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -41,7 +41,7 @@ ENTRY(__get_user_1) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user ASM_STAC -1: movzb (%_ASM_AX),%edx +1: movzbl (%_ASM_AX),%edx xor %eax,%eax ASM_CLAC ret @@ -71,7 +71,7 @@ ENTRY(__get_user_4) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user ASM_STAC -3: mov -3(%_ASM_AX),%edx +3: movl -3(%_ASM_AX),%edx xor %eax,%eax ASM_CLAC ret @@ -87,7 +87,7 @@ ENTRY(__get_user_8) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user ASM_STAC -4: movq -7(%_ASM_AX),%_ASM_DX +4: movq -7(%_ASM_AX),%rdx xor %eax,%eax ASM_CLAC ret @@ -98,8 +98,8 @@ ENTRY(__get_user_8) cmp TI_addr_limit(%_ASM_DX),%_ASM_AX jae bad_get_user_8 ASM_STAC -4: mov -7(%_ASM_AX),%edx -5: mov -3(%_ASM_AX),%ecx +4: movl -7(%_ASM_AX),%edx +5: movl -3(%_ASM_AX),%ecx xor %eax,%eax ASM_CLAC ret -- cgit v1.1 From 3578baaed4613a9fc09bab9f79f6ce2ac682e8a3 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 12 Feb 2013 11:47:31 -0800 Subject: x86, mm: Redesign get_user with a __builtin_choose_expr hack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of using a bitfield, use an odd little trick using typeof, __builtin_choose_expr, and sizeof. __builtin_choose_expr is explicitly defined to not convert its type (its argument is required to be a constant expression) so this should be well-defined. The code is still not 100% preturbation-free versus the baseline before 64-bit get_user(), but the differences seem to be very small, mostly related to padding and to gcc deciding when to spill registers. Cc: Jamie Lokier Cc: Ville Syrjälä Cc: Borislav Petkov Cc: Russell King Cc: Linus Torvalds Cc: H. J. Lu Link: http://lkml.kernel.org/r/511A8922.6050908@zytor.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uaccess.h | 57 +++++++++++------------------------------- 1 file changed, 14 insertions(+), 43 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index a8d1265..d710a25 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -125,13 +125,12 @@ extern int __get_user_4(void); extern int __get_user_8(void); extern int __get_user_bad(void); -#define __get_user_x(size, ret, x, ptr) \ - asm volatile("call __get_user_" #size \ - : "=a" (ret), "=d" (x) \ - : "0" (ptr)) \ - -/* Careful: we have to cast the result to the type of the pointer - * for sign reasons */ +/* + * This is a type: either unsigned long, if the argument fits into + * that type, or otherwise unsigned long long. + */ +#define __inttype(x) \ +__typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) /** * get_user: - Get a simple variable from user space. @@ -149,48 +148,20 @@ extern int __get_user_bad(void); * * Returns zero on success, or -EFAULT on error. * On error, the variable @x is set to zero. + * + * Careful: we have to cast the result to the type of the pointer + * for sign reasons. */ -#ifdef CONFIG_X86_32 -#define __get_user_8(ret, x, ptr) \ -do { \ - register unsigned long long __xx asm("%edx"); \ - asm volatile("call __get_user_8" \ - : "=a" (ret), "=r" (__xx) \ - : "0" (ptr)); \ - (x) = __xx; \ -} while (0) - -#else -#define __get_user_8(__ret_gu, __val_gu, ptr) \ - __get_user_x(8, __ret_gu, __val_gu, ptr) -#endif - #define get_user(x, ptr) \ ({ \ int __ret_gu; \ - struct { \ - unsigned long long __val_n : 8*sizeof(*(ptr)); \ - } __val_gu; \ + register __inttype(*(ptr)) __val_gu asm("%edx"); \ __chk_user_ptr(ptr); \ might_fault(); \ - switch (sizeof(*(ptr))) { \ - case 1: \ - __get_user_x(1, __ret_gu, __val_gu.__val_n, ptr); \ - break; \ - case 2: \ - __get_user_x(2, __ret_gu, __val_gu.__val_n, ptr); \ - break; \ - case 4: \ - __get_user_x(4, __ret_gu, __val_gu.__val_n, ptr); \ - break; \ - case 8: \ - __get_user_8(__ret_gu, __val_gu.__val_n, ptr); \ - break; \ - default: \ - __get_user_x(X, __ret_gu, __val_gu.__val_n, ptr); \ - break; \ - } \ - (x) = (__typeof__(*(ptr)))__val_gu.__val_n; \ + asm volatile("call __get_user_%P3" \ + : "=a" (__ret_gu), "=r" (__val_gu) \ + : "0" (ptr), "i" (sizeof(*(ptr)))); \ + (x) = (__typeof__(*(ptr))) __val_gu; \ __ret_gu; \ }) -- cgit v1.1 From f431b634f24d099872e78acc356c7fd35913b36b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 12 Feb 2013 16:18:59 -0500 Subject: tracing/syscalls: Allow archs to ignore tracing compat syscalls The tracing of ia32 compat system calls has been a bit of a pain as they use different system call numbers than the 64bit equivalents. I wrote a simple 'lls' program that lists files. I compiled it as a i686 ELF binary and ran it under a x86_64 box. This is the result: echo 0 > /debug/tracing/tracing_on echo 1 > /debug/tracing/events/syscalls/enable echo 1 > /debug/tracing/tracing_on ; ./lls ; echo 0 > /debug/tracing/tracing_on grep lls /debug/tracing/trace [.. skipping calls before TS_COMPAT is set ...] lls-1127 [005] d... 936.409188: sys_recvfrom(fd: 0, ubuf: 4d560fc4, size: 0, flags: 8048034, addr: 8, addr_len: f7700420) lls-1127 [005] d... 936.409190: sys_recvfrom -> 0x8a77000 lls-1127 [005] d... 936.409211: sys_lgetxattr(pathname: 0, name: 1000, value: 3, size: 22) lls-1127 [005] d... 936.409215: sys_lgetxattr -> 0xf76ff000 lls-1127 [005] d... 936.409223: sys_dup2(oldfd: 4d55ae9b, newfd: 4) lls-1127 [005] d... 936.409228: sys_dup2 -> 0xfffffffffffffffe lls-1127 [005] d... 936.409236: sys_newfstat(fd: 4d55b085, statbuf: 80000) lls-1127 [005] d... 936.409242: sys_newfstat -> 0x3 lls-1127 [005] d... 936.409243: sys_removexattr(pathname: 3, name: ffcd0060) lls-1127 [005] d... 936.409244: sys_removexattr -> 0x0 lls-1127 [005] d... 936.409245: sys_lgetxattr(pathname: 0, name: 19614, value: 1, size: 2) lls-1127 [005] d... 936.409248: sys_lgetxattr -> 0xf76e5000 lls-1127 [005] d... 936.409248: sys_newlstat(filename: 3, statbuf: 19614) lls-1127 [005] d... 936.409249: sys_newlstat -> 0x0 lls-1127 [005] d... 936.409262: sys_newfstat(fd: f76fb588, statbuf: 80000) lls-1127 [005] d... 936.409279: sys_newfstat -> 0x3 lls-1127 [005] d... 936.409279: sys_close(fd: 3) lls-1127 [005] d... 936.421550: sys_close -> 0x200 lls-1127 [005] d... 936.421558: sys_removexattr(pathname: 3, name: ffcd00d0) lls-1127 [005] d... 936.421560: sys_removexattr -> 0x0 lls-1127 [005] d... 936.421569: sys_lgetxattr(pathname: 4d564000, name: 1b1abc, value: 5, size: 802) lls-1127 [005] d... 936.421574: sys_lgetxattr -> 0x4d564000 lls-1127 [005] d... 936.421575: sys_capget(header: 4d70f000, dataptr: 1000) lls-1127 [005] d... 936.421580: sys_capget -> 0x0 lls-1127 [005] d... 936.421580: sys_lgetxattr(pathname: 4d710000, name: 3000, value: 3, size: 812) lls-1127 [005] d... 936.421589: sys_lgetxattr -> 0x4d710000 lls-1127 [005] d... 936.426130: sys_lgetxattr(pathname: 4d713000, name: 2abc, value: 3, size: 32) lls-1127 [005] d... 936.426141: sys_lgetxattr -> 0x4d713000 lls-1127 [005] d... 936.426145: sys_newlstat(filename: 3, statbuf: f76ff3f0) lls-1127 [005] d... 936.426146: sys_newlstat -> 0x0 lls-1127 [005] d... 936.431748: sys_lgetxattr(pathname: 0, name: 1000, value: 3, size: 22) Obviously I'm not calling newfstat with a fd of 4d55b085. The calls are obviously incorrect, and confusing. Other efforts have been made to fix this: https://lkml.org/lkml/2012/3/26/367 But the real solution is to rewrite the syscall internals and come up with a fixed solution. One that doesn't require all the kluge that the current solution has. Thus for now, instead of outputting incorrect data, simply ignore them. With this patch the changes now have: #> grep lls /debug/tracing/trace #> Compat system calls simply are not traced. If users need compat syscalls, then they should just use the raw syscall tracepoints. For an architecture to make their compat syscalls ignored, it must define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS (done in asm/ftrace.h) and also define an arch_trace_is_compat_syscall() function that will return true if the current task should ignore tracing the syscall. I want to stress that this change does not affect actual syscalls in any way, shape or form. It is only used within the tracing system and doesn't interfere with the syscall logic at all. The changes are consolidated nicely into trace_syscalls.c and asm/ftrace.h. I had to make one small modification to asm/thread_info.h and that was to remove the include of asm/ftrace.h. As asm/ftrace.h required the current_thread_info() it was causing include hell. That include was added back in 2008 when the function graph tracer was added: commit caf4b323 "tracing, x86: add low level support for ftrace return tracing" It does not need to be included there. Link: http://lkml.kernel.org/r/1360703939.21867.99.camel@gandalf.local.home Acked-by: H. Peter Anvin Signed-off-by: Steven Rostedt --- arch/x86/include/asm/ftrace.h | 24 ++++++++++++++++++++++++ arch/x86/include/asm/thread_info.h | 1 - 2 files changed, 24 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 86cb51e..0525a8b 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -72,4 +72,28 @@ int ftrace_int3_handler(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ + +#if !defined(__ASSEMBLY__) && !defined(COMPILE_OFFSETS) + +#if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION) +#include + +/* + * Because ia32 syscalls do not map to x86_64 syscall numbers + * this screws up the trace output when tracing a ia32 task. + * Instead of reporting bogus syscalls, just do not trace them. + * + * If the user realy wants these, then they should use the + * raw syscall tracepoints with filtering. + */ +#define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS 1 +static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) +{ + if (is_compat_task()) + return true; + return false; +} +#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */ +#endif /* !__ASSEMBLY__ && !COMPILE_OFFSETS */ + #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2d946e6..2cd056e 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -20,7 +20,6 @@ struct task_struct; struct exec_domain; #include -#include #include struct thread_info { -- cgit v1.1 From ff52c3b02b3f73178bfe0c219cd22abdcb0e46c3 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 12 Feb 2013 15:37:02 -0800 Subject: x86, doc: Clarify the use of asm("%edx") in uaccess.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Put in a comment that explains that the use of asm("%edx") in uaccess.h doesn't actually necessarily mean %edx alone. Cc: Jamie Lokier Cc: Ville Syrjälä Cc: Borislav Petkov Cc: Russell King Cc: Linus Torvalds Cc: H. J. Lu Link: http://lkml.kernel.org/r/511ACDFB.1050707@zytor.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/uaccess.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index d710a25..5ee2687 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -148,9 +148,16 @@ __typeof__(__builtin_choose_expr(sizeof(x) > sizeof(0UL), 0ULL, 0UL)) * * Returns zero on success, or -EFAULT on error. * On error, the variable @x is set to zero. - * + */ +/* * Careful: we have to cast the result to the type of the pointer * for sign reasons. + * + * The use of %edx as the register specifier is a bit of a + * simplification, as gcc only cares about it as the starting point + * and not size: for a 64-bit value it will use %ecx:%edx on 32 bits + * (%ecx being the next register in gcc's x86 register sequence), and + * %rdx on 64 bits. */ #define get_user(x, ptr) \ ({ \ -- cgit v1.1 From 166df91daf38f619d4ca90b58ff90983de6e40d2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 11 Feb 2013 15:22:15 +0100 Subject: x86, head_32: Remove i386 pieces Remove code fragments detecting a 386 CPU since we don't support those anymore. Also, do not do alignment checks because they're done only at CPL3. Also, no need to preserve EFLAGS. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1360592538-10643-2-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index c8932c7..a9c5cc85 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -394,30 +394,21 @@ default_entry: jz 1f # Did we do this already? call *%eax 1: - -/* check if it is 486 or 386. */ + /* - * XXX - this does a lot of unnecessary setup. Alignment checks don't - * apply at our cpl of 0 and the stack ought to be aligned already, and - * we don't need to preserve eflags. + * Check if it is 486 */ movl $-1,X86_CPUID # -1 for no CPUID initially - movb $3,X86 # at least 386 + movb $4,X86 # at least 486 pushfl # push EFLAGS popl %eax # get EFLAGS movl %eax,%ecx # save original EFLAGS - xorl $0x240000,%eax # flip AC and ID bits in EFLAGS + xorl $0x200000,%eax # flip ID bit in EFLAGS pushl %eax # copy to EFLAGS popfl # set EFLAGS pushfl # get new EFLAGS popl %eax # put it in eax xorl %ecx,%eax # change in flags - pushl %ecx # restore original EFLAGS - popfl - testl $0x40000,%eax # check if AC bit changed - je is386 - - movb $4,X86 # at least 486 testl $0x200000,%eax # check if ID bit changed je is486 @@ -445,10 +436,7 @@ default_entry: movl %edx,X86_CAPABILITY is486: movl $0x50022,%ecx # set AM, WP, NE and MP - jmp 2f - -is386: movl $2,%ecx # set MP -2: movl %cr0,%eax + movl %cr0,%eax andl $0x80000011,%eax # Save PG,PE,ET orl %ecx,%eax movl %eax,%cr0 -- cgit v1.1 From 9efb58de919efa8312861d454be014094f6f0ffc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 11 Feb 2013 15:22:16 +0100 Subject: x86: Detect CPUID support early at boot We detect CPUID function support on each CPU and save it for later use, obviating the need to play the toggle EFLAGS.ID game every time. C code is looking at ->cpuid_level anyway. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1360592538-10643-3-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 50 +++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index a9c5cc85..e3725a0 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -307,30 +307,39 @@ default_entry: movl %eax,%cr0 /* - * New page tables may be in 4Mbyte page mode and may - * be using the global pages. + * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave + * bits like NT set. This would confuse the debugger if this code is traced. So + * initialize them properly now before switching to protected mode. That means + * DF in particular (even though we have cleared it earlier after copying the + * command line) because GCC expects it. + */ + pushl $0 + popfl + +/* + * New page tables may be in 4Mbyte page mode and may be using the global pages. * - * NOTE! If we are on a 486 we may have no cr4 at all! - * Specifically, cr4 exists if and only if CPUID exists - * and has flags other than the FPU flag set. + * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists + * if and only if CPUID exists and has flags other than the FPU flag set. */ + movl $-1,pa(X86_CPUID) # preset CPUID level movl $X86_EFLAGS_ID,%ecx pushl %ecx - popfl - pushfl - popl %eax - pushl $0 - popfl + popfl # set EFLAGS=ID pushfl - popl %edx - xorl %edx,%eax - testl %ecx,%eax - jz 6f # No ID flag = no CPUID = no CR4 + popl %eax # get EFLAGS + testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set? + jz 6f # hw disallowed setting of ID bit + # which means no CPUID and no CR4 + + xorl %eax,%eax + cpuid + movl %eax,pa(X86_CPUID) # save largest std CPUID function movl $1,%eax cpuid - andl $~1,%edx # Ignore CPUID.FPU - jz 6f # No flags or only CPUID.FPU = no CR4 + andl $~1,%edx # Ignore CPUID.FPU + jz 6f # No flags or only CPUID.FPU = no CR4 movl pa(mmu_cr4_features),%eax movl %eax,%cr4 @@ -378,14 +387,6 @@ default_entry: addl $__PAGE_OFFSET, %esp /* - * Initialize eflags. Some BIOS's leave bits like NT set. This would - * confuse the debugger if this code is traced. - * XXX - best to initialize before switching to protected mode. - */ - pushl $0 - popfl - -/* * start system 32-bit setup. We need to re-do some of the things done * in 16-bit mode for the "real" operations. */ @@ -461,7 +462,6 @@ is486: movl $0x50022,%ecx # set AM, WP, NE and MP xorl %eax,%eax # Clear LDT lldt %ax - cld # gcc2 wants the direction flag cleared at all times pushl $0 # fake return address for unwinder jmp *(initial_code) -- cgit v1.1 From c3a22a26d07d928e2b74b58e2f9d2436958620f0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 11 Feb 2013 15:22:17 +0100 Subject: x86, head_32: Remove second CPUID detection from default_entry We do that once earlier now and cache it into new_cpu_data.cpuid_level so no need for the EFLAGS.ID toggling dance anymore. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1360592538-10643-4-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index e3725a0..2e8532e 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -399,18 +399,7 @@ default_entry: /* * Check if it is 486 */ - movl $-1,X86_CPUID # -1 for no CPUID initially - movb $4,X86 # at least 486 - pushfl # push EFLAGS - popl %eax # get EFLAGS - movl %eax,%ecx # save original EFLAGS - xorl $0x200000,%eax # flip ID bit in EFLAGS - pushl %eax # copy to EFLAGS - popfl # set EFLAGS - pushfl # get new EFLAGS - popl %eax # put it in eax - xorl %ecx,%eax # change in flags - testl $0x200000,%eax # check if ID bit changed + cmpl $-1,X86_CPUID je is486 /* get vendor info */ @@ -436,7 +425,9 @@ default_entry: movb %cl,X86_MASK movl %edx,X86_CAPABILITY -is486: movl $0x50022,%ecx # set AM, WP, NE and MP +is486: + movb $4,X86 + movl $0x50022,%ecx # set AM, WP, NE and MP movl %cr0,%eax andl $0x80000011,%eax # Save PG,PE,ET orl %ecx,%eax -- cgit v1.1 From 5e2a044daf0c6f897eb69de931e3b29020e874a9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 11 Feb 2013 15:22:18 +0100 Subject: x86, head_32: Give the 6 label a real name Jumping here we are about to enable paging so rename the label accordingly. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1360592538-10643-5-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 2e8532e..3c3f58a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -329,7 +329,7 @@ default_entry: pushfl popl %eax # get EFLAGS testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set? - jz 6f # hw disallowed setting of ID bit + jz enable_paging # hw disallowed setting of ID bit # which means no CPUID and no CR4 xorl %eax,%eax @@ -339,13 +339,13 @@ default_entry: movl $1,%eax cpuid andl $~1,%edx # Ignore CPUID.FPU - jz 6f # No flags or only CPUID.FPU = no CR4 + jz enable_paging # No flags or only CPUID.FPU = no CR4 movl pa(mmu_cr4_features),%eax movl %eax,%cr4 testb $X86_CR4_PAE, %al # check if PAE is enabled - jz 6f + jz enable_paging /* Check if extended functions are implemented */ movl $0x80000000, %eax @@ -353,7 +353,7 @@ default_entry: /* Value must be in the range 0x80000001 to 0x8000ffff */ subl $0x80000001, %eax cmpl $(0x8000ffff-0x80000001), %eax - ja 6f + ja enable_paging /* Clear bogus XD_DISABLE bits */ call verify_cpu @@ -362,7 +362,7 @@ default_entry: cpuid /* Execute Disable bit supported? */ btl $(X86_FEATURE_NX & 31), %edx - jnc 6f + jnc enable_paging /* Setup EFER (Extended Feature Enable Register) */ movl $MSR_EFER, %ecx @@ -372,7 +372,7 @@ default_entry: /* Make changes effective */ wrmsr -6: +enable_paging: /* * Enable paging -- cgit v1.1 From 32068f6527b8f1822a30671dedaf59c567325026 Mon Sep 17 00:00:00 2001 From: Olaf Hering <[mailto:olaf@aepfle.de]> Date: Sun, 3 Feb 2013 17:22:37 -0800 Subject: x86: Hyper-V: register clocksource only if its advertised Enable hyperv_clocksource only if its advertised as a feature. XenServer 6 returns the signature which is checked in ms_hyperv_platform(), but it does not offer all features. Currently the clocksource is enabled unconditionally in ms_hyperv_init_platform(), and the result is a hanging guest. Hyper-V spec Bit 1 indicates the availability of Partition Reference Counter. Register the clocksource only if this bit is set. The guest in question prints this in dmesg: [ 0.000000] Hypervisor detected: Microsoft HyperV [ 0.000000] HyperV: features 0x70, hints 0x0 This bug can be reproduced easily be setting 'viridian=1' in a HVM domU .cfg file. A workaround without this patch is to boot the HVM guest with 'clocksource=jiffies'. Signed-off-by: Olaf Hering Link: http://lkml.kernel.org/r/1359940959-32168-1-git-send-email-kys@microsoft.com Signed-off-by: K. Y. Srinivasan Cc: Cc: Greg KH Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mshyperv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 0a630dd..646d192 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -68,7 +68,8 @@ static void __init ms_hyperv_init_platform(void) printk(KERN_INFO "HyperV: features 0x%x, hints 0x%x\n", ms_hyperv.features, ms_hyperv.hints); - clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); + if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) + clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { -- cgit v1.1 From db34bbb767bdfa1ebed7214b876fe01c5b7ee457 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sun, 3 Feb 2013 17:22:38 -0800 Subject: X86: Add a check to catch Xen emulation of Hyper-V Xen emulates Hyper-V to host enlightened Windows. Looks like this emulation may be turned on by default even for Linux guests. Check and fail Hyper-V detection if we are on Xen. [ hpa: the problem here is that Xen doesn't emulate Hyper-V well enough, and if the Xen support isn't compiled in, we end up stubling over the Hyper-V emulation and try to activate it -- and it fails. ] Signed-off-by: K. Y. Srinivasan Link: http://lkml.kernel.org/r/1359940959-32168-2-git-send-email-kys@microsoft.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/mshyperv.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 646d192..4dab317 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -30,6 +30,13 @@ static bool __init ms_hyperv_platform(void) if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) return false; + /* + * Xen emulates Hyper-V to support enlightened Windows. + * Check to see first if we are on a Xen Hypervisor. + */ + if (xen_cpuid_base()) + return false; + cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS, &eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]); -- cgit v1.1 From bc2b0331e077f576369a2b6c75d15ed4de4ef91f Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sun, 3 Feb 2013 17:22:39 -0800 Subject: X86: Handle Hyper-V vmbus interrupts as special hypervisor interrupts Starting with win8, vmbus interrupts can be delivered on any VCPU in the guest and furthermore can be concurrently active on multiple VCPUs. Support this interrupt delivery model by setting up a separate IDT entry for Hyper-V vmbus. interrupts. I would like to thank Jan Beulich and Thomas Gleixner , for their help. In this version of the patch, based on the feedback, I have merged the IDT vector for Xen and Hyper-V and made the necessary adjustments. Furhermore, based on Jan's feedback I have added the necessary compilation switches. Signed-off-by: K. Y. Srinivasan Link: http://lkml.kernel.org/r/1359940959-32168-3-git-send-email-kys@microsoft.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/irq_vectors.h | 4 ++-- arch/x86/include/asm/mshyperv.h | 4 ++++ arch/x86/kernel/cpu/mshyperv.c | 44 ++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/entry_32.S | 9 +++++++- arch/x86/kernel/entry_64.S | 7 +++++- 5 files changed, 64 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 1508e51..aac5fa6 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -109,8 +109,8 @@ #define UV_BAU_MESSAGE 0xf5 -/* Xen vector callback to receive events in a HVM domain */ -#define XEN_HVM_EVTCHN_CALLBACK 0xf3 +/* Vector on which hypervisor callbacks will be delivered */ +#define HYPERVISOR_CALLBACK_VECTOR 0xf3 /* * Local APIC timer IRQ vector is on a different priority level, diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 79ce568..c2934be 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -11,4 +11,8 @@ struct ms_hyperv_info { extern struct ms_hyperv_info ms_hyperv; +void hyperv_callback_vector(void); +void hyperv_vector_handler(struct pt_regs *regs); +void hv_register_vmbus_handler(int irq, irq_handler_t handler); + #endif diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 4dab317..a7d26d8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -14,10 +14,15 @@ #include #include #include +#include +#include #include #include #include #include +#include +#include +#include struct ms_hyperv_info ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); @@ -77,6 +82,12 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); +#if IS_ENABLED(CONFIG_HYPERV) + /* + * Setup the IDT for hypervisor callback. + */ + alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); +#endif } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { @@ -85,3 +96,36 @@ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .init_platform = ms_hyperv_init_platform, }; EXPORT_SYMBOL(x86_hyper_ms_hyperv); + +#if IS_ENABLED(CONFIG_HYPERV) +static int vmbus_irq = -1; +static irq_handler_t vmbus_isr; + +void hv_register_vmbus_handler(int irq, irq_handler_t handler) +{ + vmbus_irq = irq; + vmbus_isr = handler; +} + +void hyperv_vector_handler(struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + struct irq_desc *desc; + + irq_enter(); + exit_idle(); + + desc = irq_to_desc(vmbus_irq); + + if (desc) + generic_handle_irq_desc(vmbus_irq, desc); + + irq_exit(); + set_irq_regs(old_regs); +} +#else +void hv_register_vmbus_handler(int irq, irq_handler_t handler) +{ +} +#endif +EXPORT_SYMBOL_GPL(hv_register_vmbus_handler); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6ed91d9..8831176 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1091,11 +1091,18 @@ ENTRY(xen_failsafe_callback) _ASM_EXTABLE(4b,9b) ENDPROC(xen_failsafe_callback) -BUILD_INTERRUPT3(xen_hvm_callback_vector, XEN_HVM_EVTCHN_CALLBACK, +BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR, xen_evtchn_do_upcall) #endif /* CONFIG_XEN */ +#if IS_ENABLED(CONFIG_HYPERV) + +BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR, + hyperv_vector_handler) + +#endif /* CONFIG_HYPERV */ + #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index cb3c591..048f224 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1454,11 +1454,16 @@ ENTRY(xen_failsafe_callback) CFI_ENDPROC END(xen_failsafe_callback) -apicinterrupt XEN_HVM_EVTCHN_CALLBACK \ +apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ xen_hvm_callback_vector xen_evtchn_do_upcall #endif /* CONFIG_XEN */ +#if IS_ENABLED(CONFIG_HYPERV) +apicinterrupt HYPERVISOR_CALLBACK_VECTOR \ + hyperv_callback_vector hyperv_vector_handler +#endif /* CONFIG_HYPERV */ + /* * Some functions should be protected against kprobes */ -- cgit v1.1 From 0ee364eb316348ddf3e0dfcd986f5f13f528f821 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 11 Feb 2013 14:52:36 +0000 Subject: x86/mm: Check if PUD is large when validating a kernel address A user reported the following oops when a backup process reads /proc/kcore: BUG: unable to handle kernel paging request at ffffbb00ff33b000 IP: [] kern_addr_valid+0xbe/0x110 [...] Call Trace: [] read_kcore+0x17a/0x370 [] proc_reg_read+0x77/0xc0 [] vfs_read+0xc7/0x130 [] sys_read+0x53/0xa0 [] system_call_fastpath+0x16/0x1b Investigation determined that the bug triggered when reading system RAM at the 4G mark. On this system, that was the first address using 1G pages for the virt->phys direct mapping so the PUD is pointing to a physical address, not a PMD page. The problem is that the page table walker in kern_addr_valid() is not checking pud_large() and treats the physical address as if it was a PMD. If it happens to look like pmd_none then it'll silently fail, probably returning zeros instead of real data. If the data happens to look like a present PMD though, it will be walked resulting in the oops above. This patch adds the necessary pud_large() check. Unfortunately the problem was not readily reproducible and now they are running the backup program without accessing /proc/kcore so the patch has not been validated but I think it makes sense. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel Reviewed-by: Michal Hocko Acked-by: Johannes Weiner Cc: stable@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20130211145236.GX21389@suse.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable.h | 5 +++++ arch/x86/mm/init_64.c | 3 +++ 2 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5199db2..1c1a955 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT; } +static inline unsigned long pud_pfn(pud_t pud) +{ + return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 2ead3c8..75c9a6a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -831,6 +831,9 @@ int kern_addr_valid(unsigned long addr) if (pud_none(*pud)) return 0; + if (pud_large(*pud)) + return pfn_valid(pud_pfn(*pud)); + pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) return 0; -- cgit v1.1 From f583c29b7913fa32b0b1b7f43038d6a7d9f71b6f Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 13 Feb 2013 17:50:39 +0200 Subject: x86 emulator: fix parity calculation for AAD instruction Reported-by: Paolo Bonzini Suggested-by: Paolo Bonzini Reviewed-by: Paolo Bonzini Signed-off-by: Gleb Natapov --- arch/x86/kvm/emulate.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2b11318..a335cc6 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2995,14 +2995,11 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) ctxt->dst.val = (ctxt->dst.val & 0xffff0000) | al; - ctxt->eflags &= ~(X86_EFLAGS_PF | X86_EFLAGS_SF | X86_EFLAGS_ZF); - - if (!al) - ctxt->eflags |= X86_EFLAGS_ZF; - if (!(al & 1)) - ctxt->eflags |= X86_EFLAGS_PF; - if (al & 0x80) - ctxt->eflags |= X86_EFLAGS_SF; + /* Set PF, ZF, SF */ + ctxt->src.type = OP_IMM; + ctxt->src.val = 0; + ctxt->src.bytes = 1; + fastop(ctxt, em_or); return X86EMUL_CONTINUE; } -- cgit v1.1 From 13d2b4d11d69a92574a55bfd985cfb0ca77aebdc Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 24 Jan 2013 13:11:10 +0000 Subject: x86/xen: don't assume %ds is usable in xen_iret for 32-bit PVOPS. This fixes CVE-2013-0228 / XSA-42 Drew Jones while working on CVE-2013-0190 found that that unprivileged guest user in 32bit PV guest can use to crash the > guest with the panic like this: ------------- general protection fault: 0000 [#1] SMP last sysfs file: /sys/devices/vbd-51712/block/xvda/dev Modules linked in: sunrpc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables ipv6 xen_netfront ext4 mbcache jbd2 xen_blkfront dm_mirror dm_region_hash dm_log dm_mod [last unloaded: scsi_wait_scan] Pid: 1250, comm: r Not tainted 2.6.32-356.el6.i686 #1 EIP: 0061:[] EFLAGS: 00010086 CPU: 0 EIP is at xen_iret+0x12/0x2b EAX: eb8d0000 EBX: 00000001 ECX: 08049860 EDX: 00000010 ESI: 00000000 EDI: 003d0f00 EBP: b77f8388 ESP: eb8d1fe0 DS: 0000 ES: 007b FS: 0000 GS: 00e0 SS: 0069 Process r (pid: 1250, ti=eb8d0000 task=c2953550 task.ti=eb8d0000) Stack: 00000000 0027f416 00000073 00000206 b77f8364 0000007b 00000000 00000000 Call Trace: Code: c3 8b 44 24 18 81 4c 24 38 00 02 00 00 8d 64 24 30 e9 03 00 00 00 8d 76 00 f7 44 24 08 00 00 02 80 75 33 50 b8 00 e0 ff ff 21 e0 <8b> 40 10 8b 04 85 a0 f6 ab c0 8b 80 0c b0 b3 c0 f6 44 24 0d 02 EIP: [] xen_iret+0x12/0x2b SS:ESP 0069:eb8d1fe0 general protection fault: 0000 [#2] ---[ end trace ab0d29a492dcd330 ]--- Kernel panic - not syncing: Fatal exception Pid: 1250, comm: r Tainted: G D --------------- 2.6.32-356.el6.i686 #1 Call Trace: [] ? panic+0x6e/0x122 [] ? oops_end+0xbc/0xd0 [] ? do_general_protection+0x0/0x210 [] ? error_code+0x73/ ------------- Petr says: " I've analysed the bug and I think that xen_iret() cannot cope with mangled DS, in this case zeroed out (null selector/descriptor) by either xen_failsafe_callback() or RESTORE_REGS because the corresponding LDT entry was invalidated by the reproducer. " Jan took a look at the preliminary patch and came up a fix that solves this problem: "This code gets called after all registers other than those handled by IRET got already restored, hence a null selector in %ds or a non-null one that got loaded from a code or read-only data descriptor would cause a kernel mode fault (with the potential of crashing the kernel as a whole, if panic_on_oops is set)." The way to fix this is to realize that the we can only relay on the registers that IRET restores. The two that are guaranteed are the %cs and %ss as they are always fixed GDT selectors. Also they are inaccessible from user mode - so they cannot be altered. This is the approach taken in this patch. Another alternative option suggested by Jan would be to relay on the subtle realization that using the %ebp or %esp relative references uses the %ss segment. In which case we could switch from using %eax to %ebp and would not need the %ss over-rides. That would also require one extra instruction to compensate for the one place where the register is used as scaled index. However Andrew pointed out that is too subtle and if further work was to be done in this code-path it could escape folks attention and lead to accidents. Reviewed-by: Petr Matousek Reported-by: Petr Matousek Reviewed-by: Andrew Cooper Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/xen-asm_32.S | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S index f9643fc..33ca6e4 100644 --- a/arch/x86/xen/xen-asm_32.S +++ b/arch/x86/xen/xen-asm_32.S @@ -89,11 +89,11 @@ ENTRY(xen_iret) */ #ifdef CONFIG_SMP GET_THREAD_INFO(%eax) - movl TI_cpu(%eax), %eax - movl __per_cpu_offset(,%eax,4), %eax - mov xen_vcpu(%eax), %eax + movl %ss:TI_cpu(%eax), %eax + movl %ss:__per_cpu_offset(,%eax,4), %eax + mov %ss:xen_vcpu(%eax), %eax #else - movl xen_vcpu, %eax + movl %ss:xen_vcpu, %eax #endif /* check IF state we're restoring */ @@ -106,11 +106,11 @@ ENTRY(xen_iret) * resuming the code, so we don't have to be worried about * being preempted to another CPU. */ - setz XEN_vcpu_info_mask(%eax) + setz %ss:XEN_vcpu_info_mask(%eax) xen_iret_start_crit: /* check for unmasked and pending */ - cmpw $0x0001, XEN_vcpu_info_pending(%eax) + cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) /* * If there's something pending, mask events again so we can @@ -118,7 +118,7 @@ xen_iret_start_crit: * touch XEN_vcpu_info_mask. */ jne 1f - movb $1, XEN_vcpu_info_mask(%eax) + movb $1, %ss:XEN_vcpu_info_mask(%eax) 1: popl %eax -- cgit v1.1 From 1ed51011af7450991780f9a7fd916554be19d2a3 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sun, 10 Feb 2013 17:19:24 -0500 Subject: tools/power turbostat: display SMI count by default The SMI counter is popular -- so display it by default rather than requiring an option. What the heck, we've blown the 80 column budget on many systems already... Note that the value displayed is the delta during the measurement interval. The absolute value of the counter can still be seen with the generic 32-bit MSR option, ie. -m 0x34 Signed-off-by: Len Brown --- arch/x86/include/uapi/asm/msr-index.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 7bdaf7c..8d013f5 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -274,6 +274,7 @@ #define MSR_IA32_PLATFORM_ID 0x00000017 #define MSR_IA32_EBL_CR_POWERON 0x0000002a #define MSR_EBC_FREQUENCY_ID 0x0000002c +#define MSR_SMI_COUNT 0x00000034 #define MSR_IA32_FEATURE_CONTROL 0x0000003a #define MSR_IA32_TSC_ADJUST 0x0000003b -- cgit v1.1 From 1de63d60cd5b0d33a812efa455d5933bf1564a51 Mon Sep 17 00:00:00 2001 From: Satoru Takeuchi Date: Thu, 14 Feb 2013 09:12:52 +0900 Subject: efi: Clear EFI_RUNTIME_SERVICES rather than EFI_BOOT by "noefi" boot parameter There was a serious problem in samsung-laptop that its platform driver is designed to run under BIOS and running under EFI can cause the machine to become bricked or can cause Machine Check Exceptions. Discussion about this problem: https://bugs.launchpad.net/ubuntu-cdimage/+bug/1040557 https://bugzilla.kernel.org/show_bug.cgi?id=47121 The patches to fix this problem: efi: Make 'efi_enabled' a function to query EFI facilities 83e68189745ad931c2afd45d8ee3303929233e7f samsung-laptop: Disable on EFI hardware e0094244e41c4d0c7ad69920681972fc45d8ce34 Unfortunately this problem comes back again if users specify "noefi" option. This parameter clears EFI_BOOT and that driver continues to run even if running under EFI. Refer to the document, this parameter should clear EFI_RUNTIME_SERVICES instead. Documentation/kernel-parameters.txt: =============================================================================== ... noefi [X86] Disable EFI runtime services support. ... =============================================================================== Documentation/x86/x86_64/uefi.txt: =============================================================================== ... - If some or all EFI runtime services don't work, you can try following kernel command line parameters to turn off some or all EFI runtime services. noefi turn off all EFI runtime services ... =============================================================================== Signed-off-by: Satoru Takeuchi Link: http://lkml.kernel.org/r/511C2C04.2070108@jp.fujitsu.com Cc: Matt Fleming Cc: Signed-off-by: H. Peter Anvin --- arch/x86/platform/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 77cf009..928bf83 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -87,7 +87,7 @@ EXPORT_SYMBOL(efi_enabled); static int __init setup_noefi(char *arg) { - clear_bit(EFI_BOOT, &x86_efi_facility); + clear_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); return 0; } early_param("noefi", setup_noefi); -- cgit v1.1 From cbd29cb6e38af6119df2cdac0c58acf0e85c177e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 11 Feb 2013 12:19:28 +0100 Subject: KVM: nVMX: Remove redundant get_vmcs12 from nested_vmx_exit_handled_msr We already pass vmcs12 as argument. Signed-off-by: Jan Kiszka Signed-off-by: Gleb Natapov --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c794478..6667042 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5920,7 +5920,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, u32 msr_index = vcpu->arch.regs[VCPU_REGS_RCX]; gpa_t bitmap; - if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS)) + if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) return 1; /* -- cgit v1.1 From d64008a8f30e0b381b292788ec6f3ee509b3bb40 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 25 Nov 2012 23:12:10 -0500 Subject: burying unused conditionals __ARCH_WANT_SYS_RT_SIGACTION, __ARCH_WANT_SYS_RT_SIGSUSPEND, __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND, __ARCH_WANT_COMPAT_SYS_SCHED_RR_GET_INTERVAL - not used anymore CONFIG_GENERIC_{SIGALTSTACK,COMPAT_RT_SIG{ACTION,QUEUEINFO,PENDING,PROCMASK}} - can be assumed always set. --- arch/x86/Kconfig | 4 ---- arch/x86/include/asm/unistd.h | 2 -- arch/x86/um/Kconfig | 1 - 3 files changed, 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 87d0917..49fb44e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -113,10 +113,6 @@ config X86 select MODULES_USE_ELF_REL if X86_32 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 - select GENERIC_SIGALTSTACK - select GENERIC_COMPAT_RT_SIGACTION - select GENERIC_COMPAT_RT_SIGQUEUEINFO - select GENERIC_COMPAT_RT_SIGPENDING select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION select OLD_SIGACTION if X86_32 select COMPAT_OLD_SIGACTION if IA32_EMULATION diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index a0790e0..3d5df1c 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h @@ -38,8 +38,6 @@ # define __ARCH_WANT_SYS_OLD_GETRLIMIT # define __ARCH_WANT_SYS_OLD_UNAME # define __ARCH_WANT_SYS_PAUSE -# define __ARCH_WANT_SYS_RT_SIGACTION -# define __ARCH_WANT_SYS_RT_SIGSUSPEND # define __ARCH_WANT_SYS_SGETMASK # define __ARCH_WANT_SYS_SIGNAL # define __ARCH_WANT_SYS_SIGPENDING diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index cf0f273..fafc941 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -13,7 +13,6 @@ endmenu config UML_X86 def_bool y select GENERIC_FIND_FIRST_BIT - select GENERIC_SIGALTSTACK config 64BIT bool "64-bit kernel" if SUBARCH = "x86" -- cgit v1.1 From 235b80226b986dabcbba844968f7807866bd0bfe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 9 Nov 2012 23:51:47 -0500 Subject: x86: convert to ksignal Signed-off-by: Al Viro --- arch/x86/ia32/ia32_signal.c | 37 ++++++------ arch/x86/include/asm/fpu-internal.h | 5 +- arch/x86/kernel/signal.c | 117 +++++++++++++++++------------------- 3 files changed, 74 insertions(+), 85 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index b0460cd..cf1a471 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -309,7 +309,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, /* * Determine which stack to use.. */ -static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, +static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { @@ -319,16 +319,13 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, sp = regs->sp; /* This is the X/Open sanctioned signal stack switching. */ - if (ka->sa.sa_flags & SA_ONSTACK) { - if (sas_ss_flags(sp) == 0) - sp = current->sas_ss_sp + current->sas_ss_size; - } - + if (ksig->ka.sa.sa_flags & SA_ONSTACK) + sp = sigsp(sp, ksig); /* This is the legacy signal stack switching. */ else if ((regs->ss & 0xffff) != __USER32_DS && - !(ka->sa.sa_flags & SA_RESTORER) && - ka->sa.sa_restorer) - sp = (unsigned long) ka->sa.sa_restorer; + !(ksig->ka.sa.sa_flags & SA_RESTORER) && + ksig->ka.sa.sa_restorer) + sp = (unsigned long) ksig->ka.sa.sa_restorer; if (used_math()) { unsigned long fx_aligned, math_size; @@ -347,7 +344,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, return (void __user *) sp; } -int ia32_setup_frame(int sig, struct k_sigaction *ka, +int ia32_setup_frame(int sig, struct ksignal *ksig, compat_sigset_t *set, struct pt_regs *regs) { struct sigframe_ia32 __user *frame; @@ -366,7 +363,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, 0x80cd, /* int $0x80 */ }; - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; @@ -383,8 +380,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, return -EFAULT; } - if (ka->sa.sa_flags & SA_RESTORER) { - restorer = ka->sa.sa_restorer; + if (ksig->ka.sa.sa_flags & SA_RESTORER) { + restorer = ksig->ka.sa.sa_restorer; } else { /* Return stub is in 32bit vsyscall page */ if (current->mm->context.vdso) @@ -409,7 +406,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->sp = (unsigned long) frame; - regs->ip = (unsigned long) ka->sa.sa_handler; + regs->ip = (unsigned long) ksig->ka.sa.sa_handler; /* Make -mregparm=3 work */ regs->ax = sig; @@ -425,7 +422,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, return 0; } -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +int ia32_setup_rt_frame(int sig, struct ksignal *ksig, compat_sigset_t *set, struct pt_regs *regs) { struct rt_sigframe_ia32 __user *frame; @@ -446,7 +443,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 0, }; - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + frame = get_sigframe(ksig, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; @@ -464,8 +461,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(0, &frame->uc.uc_link); err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; + if (ksig->ka.sa.sa_flags & SA_RESTORER) + restorer = ksig->ka.sa.sa_restorer; else restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); @@ -478,7 +475,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(*((u64 *)&code), (u64 __user *)frame->retcode); } put_user_catch(err); - err |= copy_siginfo_to_user32(&frame->info, info); + err |= copy_siginfo_to_user32(&frame->info, &ksig->info); err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -488,7 +485,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->sp = (unsigned long) frame; - regs->ip = (unsigned long) ka->sa.sa_handler; + regs->ip = (unsigned long) ksig->ka.sa.sa_handler; /* Make -mregparm=3 work */ regs->ax = sig; diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 41ab26e..e25cc33 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -26,9 +26,10 @@ #ifdef CONFIG_X86_64 # include # include -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +struct ksignal; +int ia32_setup_rt_frame(int sig, struct ksignal *ksig, compat_sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, +int ia32_setup_frame(int sig, struct ksignal *ksig, compat_sigset_t *set, struct pt_regs *regs); #else # define user_i387_ia32_struct user_i387_struct diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index d5b1f8a..6956299 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -278,7 +278,7 @@ static const struct { }; static int -__setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, +__setup_frame(int sig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct sigframe __user *frame; @@ -286,7 +286,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, int err = 0; void __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; @@ -307,8 +307,8 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, restorer = VDSO32_SYMBOL(current->mm->context.vdso, sigreturn); else restorer = &frame->retcode; - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; + if (ksig->ka.sa.sa_flags & SA_RESTORER) + restorer = ksig->ka.sa.sa_restorer; /* Set up to return from userspace. */ err |= __put_user(restorer, &frame->pretcode); @@ -327,7 +327,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, /* Set up registers for signal handler */ regs->sp = (unsigned long)frame; - regs->ip = (unsigned long)ka->sa.sa_handler; + regs->ip = (unsigned long)ksig->ka.sa.sa_handler; regs->ax = (unsigned long)sig; regs->dx = 0; regs->cx = 0; @@ -340,7 +340,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, return 0; } -static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int __setup_rt_frame(int sig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; @@ -348,7 +348,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, int err = 0; void __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; @@ -368,8 +368,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up to return from userspace. */ restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); - if (ka->sa.sa_flags & SA_RESTORER) - restorer = ka->sa.sa_restorer; + if (ksig->ka.sa.sa_flags & SA_RESTORER) + restorer = ksig->ka.sa.sa_restorer; put_user_ex(restorer, &frame->pretcode); /* @@ -382,7 +382,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode); } put_user_catch(err); - err |= copy_siginfo_to_user(&frame->info, info); + err |= copy_siginfo_to_user(&frame->info, &ksig->info); err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate, regs, set->sig[0]); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -392,7 +392,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->sp = (unsigned long)frame; - regs->ip = (unsigned long)ka->sa.sa_handler; + regs->ip = (unsigned long)ksig->ka.sa.sa_handler; regs->ax = (unsigned long)sig; regs->dx = (unsigned long)&frame->info; regs->cx = (unsigned long)&frame->uc; @@ -405,20 +405,20 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return 0; } #else /* !CONFIG_X86_32 */ -static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int __setup_rt_frame(int sig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; void __user *fp = NULL; int err = 0; - frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp); + frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - if (ka->sa.sa_flags & SA_SIGINFO) { - if (copy_siginfo_to_user(&frame->info, info)) + if (ksig->ka.sa.sa_flags & SA_SIGINFO) { + if (copy_siginfo_to_user(&frame->info, &ksig->info)) return -EFAULT; } @@ -434,8 +434,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up to return from userspace. If provided, use a stub already in userspace. */ /* x86-64 should always use SA_RESTORER. */ - if (ka->sa.sa_flags & SA_RESTORER) { - put_user_ex(ka->sa.sa_restorer, &frame->pretcode); + if (ksig->ka.sa.sa_flags & SA_RESTORER) { + put_user_ex(ksig->ka.sa.sa_restorer, &frame->pretcode); } else { /* could use a vstub here */ err |= -EFAULT; @@ -457,7 +457,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, next argument after the signal number on the stack. */ regs->si = (unsigned long)&frame->info; regs->dx = (unsigned long)&frame->uc; - regs->ip = (unsigned long) ka->sa.sa_handler; + regs->ip = (unsigned long) ksig->ka.sa.sa_handler; regs->sp = (unsigned long)frame; @@ -469,8 +469,8 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, } #endif /* CONFIG_X86_32 */ -static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, - siginfo_t *info, compat_sigset_t *set, +static int x32_setup_rt_frame(struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs) { #ifdef CONFIG_X86_X32_ABI @@ -479,13 +479,13 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, int err = 0; void __user *fpstate = NULL; - frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); + frame = get_sigframe(&ksig->ka, regs, sizeof(*frame), &fpstate); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return -EFAULT; - if (ka->sa.sa_flags & SA_SIGINFO) { - if (copy_siginfo_to_user32(&frame->info, info)) + if (ksig->ka.sa.sa_flags & SA_SIGINFO) { + if (copy_siginfo_to_user32(&frame->info, &ksig->info)) return -EFAULT; } @@ -499,8 +499,8 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); put_user_ex(0, &frame->uc.uc__pad0); - if (ka->sa.sa_flags & SA_RESTORER) { - restorer = ka->sa.sa_restorer; + if (ksig->ka.sa.sa_flags & SA_RESTORER) { + restorer = ksig->ka.sa.sa_restorer; } else { /* could use a vstub here */ restorer = NULL; @@ -518,10 +518,10 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka, /* Set up registers for signal handler */ regs->sp = (unsigned long) frame; - regs->ip = (unsigned long) ka->sa.sa_handler; + regs->ip = (unsigned long) ksig->ka.sa.sa_handler; /* We use the x32 calling convention here... */ - regs->di = sig; + regs->di = ksig->sig; regs->si = (unsigned long) &frame->info; regs->dx = (unsigned long) &frame->uc; @@ -611,30 +611,29 @@ static int signr_convert(int sig) } static int -setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - struct pt_regs *regs) +setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) { - int usig = signr_convert(sig); + int usig = signr_convert(ksig->sig); sigset_t *set = sigmask_to_save(); compat_sigset_t *cset = (compat_sigset_t *) set; /* Set up the stack frame */ if (is_ia32_frame()) { - if (ka->sa.sa_flags & SA_SIGINFO) - return ia32_setup_rt_frame(usig, ka, info, cset, regs); + if (ksig->ka.sa.sa_flags & SA_SIGINFO) + return ia32_setup_rt_frame(usig, ksig, cset, regs); else - return ia32_setup_frame(usig, ka, cset, regs); + return ia32_setup_frame(usig, ksig, cset, regs); } else if (is_x32_frame()) { - return x32_setup_rt_frame(usig, ka, info, cset, regs); + return x32_setup_rt_frame(ksig, cset, regs); } else { - return __setup_rt_frame(sig, ka, info, set, regs); + return __setup_rt_frame(ksig->sig, ksig, set, regs); } } static void -handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, - struct pt_regs *regs) +handle_signal(struct ksignal *ksig, struct pt_regs *regs) { + bool failed; /* Are we from a system call? */ if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ @@ -645,7 +644,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, break; case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { + if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { regs->ax = -EINTR; break; } @@ -665,26 +664,21 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, likely(test_and_clear_thread_flag(TIF_FORCED_TF))) regs->flags &= ~X86_EFLAGS_TF; - if (setup_rt_frame(sig, ka, info, regs) < 0) { - force_sigsegv(sig, current); - return; + failed = (setup_rt_frame(ksig, regs) < 0); + if (!failed) { + /* + * Clear the direction flag as per the ABI for function entry. + */ + regs->flags &= ~X86_EFLAGS_DF; + /* + * Clear TF when entering the signal handler, but + * notify any tracer that was single-stepping it. + * The tracer may want to single-step inside the + * handler too. + */ + regs->flags &= ~X86_EFLAGS_TF; } - - /* - * Clear the direction flag as per the ABI for function entry. - */ - regs->flags &= ~X86_EFLAGS_DF; - - /* - * Clear TF when entering the signal handler, but - * notify any tracer that was single-stepping it. - * The tracer may want to single-step inside the - * handler too. - */ - regs->flags &= ~X86_EFLAGS_TF; - - signal_delivered(sig, info, ka, regs, - test_thread_flag(TIF_SINGLESTEP)); + signal_setup_done(failed, ksig, test_thread_flag(TIF_SINGLESTEP)); } #ifdef CONFIG_X86_32 @@ -701,14 +695,11 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, */ static void do_signal(struct pt_regs *regs) { - struct k_sigaction ka; - siginfo_t info; - int signr; + struct ksignal ksig; - signr = get_signal_to_deliver(&info, &ka, regs, NULL); - if (signr > 0) { + if (get_signal(&ksig)) { /* Whee! Actually deliver the signal. */ - handle_signal(signr, &info, &ka, regs); + handle_signal(&ksig, regs); return; } -- cgit v1.1 From 19348e749e9515c429f5d561d2f2c724862a4bee Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Thu, 14 Feb 2013 15:14:02 -0500 Subject: x86: ptrace.c only needs export.h and not the full module.h Commit cb57a2b4cff7edf2a4e32c0163200e9434807e0a ("x86-32: Export kernel_stack_pointer() for modules") added an include of the module.h header in conjunction with adding an EXPORT_SYMBOL_GPL of kernel_stack_pointer. But module.h should be avoided for simple exports, since it in turn includes the world. Swap the module.h for export.h instead. Cc: Jiri Kosina Signed-off-by: Paul Gortmaker Link: http://lkml.kernel.org/r/1360872842-28417-1-git-send-email-paul.gortmaker@windriver.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b629bbe..29a8120 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.1 From 95c9608478d639dcffc14ea47b31bff021a99ed1 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 14 Feb 2013 14:02:52 -0800 Subject: x86, mm: Move reserving low memory later in initialization Move the reservation of low memory, except for the 4K which actually does belong to the BIOS, later in the initialization; in particular, after we have already reserved the trampoline. The current code locates the trampoline as high as possible, so by deferring the allocation we will still be able to reserve as much memory as is possible. This allows us to run with reservelow=640k without getting a crash on system startup. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/n/tip-0y9dqmmsousf69wutxwl3kkf@git.kernel.org --- arch/x86/kernel/setup.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 8354399..0aebd77 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -608,8 +608,6 @@ static __init void reserve_ibft_region(void) memblock_reserve(addr, size); } -static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; - static bool __init snb_gfx_workaround_needed(void) { #ifdef CONFIG_PCI @@ -698,8 +696,7 @@ static void __init trim_bios_range(void) * since some BIOSes are known to corrupt low memory. See the * Kconfig help text for X86_RESERVE_LOW. */ - e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE), - E820_RAM, E820_RESERVED); + e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); /* * special case: Some BIOSen report the PC BIOS @@ -711,6 +708,8 @@ static void __init trim_bios_range(void) sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); } +static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10; + static int __init parse_reservelow(char *p) { unsigned long long size; @@ -733,6 +732,11 @@ static int __init parse_reservelow(char *p) early_param("reservelow", parse_reservelow); +static void __init trim_low_memory_range(void) +{ + memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); +} + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -987,6 +991,7 @@ void __init setup_arch(char **cmdline_p) setup_real_mode(); trim_platform_memory_ranges(); + trim_low_memory_range(); init_gbpages(); -- cgit v1.1 From cb8081cb6bfbdb867d17cafaaf3509ee31140f7f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 14 Feb 2013 10:55:06 -0800 Subject: lguest: select CONFIG_TTY to build properly. Fix kconfig warning for LGUEST_GUEST config by selecting TTY: warning: (KVMTOOL_TEST_ENABLE && LGUEST_GUEST) selects VIRTIO_CONSOLE which has unmet direct dependencies (VIRTIO && TTY) Signed-off-by: Randy Dunlap Cc: Stephen Rothwell Cc: Joe Millenbach Signed-off-by: Greg Kroah-Hartman --- arch/x86/lguest/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 7872a33..29043d2 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig @@ -2,6 +2,7 @@ config LGUEST_GUEST bool "Lguest guest support" select PARAVIRT depends on X86_32 + select TTY select VIRTUALIZATION select VIRTIO select VIRTIO_CONSOLE -- cgit v1.1 From 5eb65be2d9a1f7c5e2b95aede16e7eab1cdb67e2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Feb 2013 21:29:27 -0500 Subject: Revert "xen/PVonHVM: fix compile warning in init_hvm_pv_info" This reverts commit a7be94ac8d69c037d08f0fd94b45a593f1d45176. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 138e566..5fb3ec1 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1579,7 +1579,7 @@ static void __init xen_hvm_init_shared_info(void) static void __init init_hvm_pv_info(void) { - uint32_t ecx, edx, pages, msr, base; + uint32_t eax, ebx, ecx, edx, pages, msr, base; u64 pfn; base = xen_cpuid_base(); -- cgit v1.1 From e9daff24a266307943457086533041bd971d0ef9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 14 Feb 2013 21:29:31 -0500 Subject: Revert "xen PVonHVM: use E820_Reserved area for shared_info" This reverts commit 9d02b43dee0d7fb18dfb13a00915550b1a3daa9f. We are doing this b/c on 32-bit PVonHVM with older hypervisors (Xen 4.1) it ends up bothing up the start_info. This is bad b/c we use it for the time keeping, and the timekeeping code loops forever - as the version field never changes. Olaf says to revert it, so lets do that. Acked-by: Olaf Hering Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 75 ++++++++++++++---------------------------------- arch/x86/xen/suspend.c | 2 +- arch/x86/xen/xen-ops.h | 2 +- 3 files changed, 24 insertions(+), 55 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5fb3ec1..e0140923 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1517,72 +1517,51 @@ asmlinkage void __init xen_start_kernel(void) #endif } -#ifdef CONFIG_XEN_PVHVM -#define HVM_SHARED_INFO_ADDR 0xFE700000UL -static struct shared_info *xen_hvm_shared_info; -static unsigned long xen_hvm_sip_phys; -static int xen_major, xen_minor; - -static void xen_hvm_connect_shared_info(unsigned long pfn) +void __ref xen_hvm_init_shared_info(void) { + int cpu; struct xen_add_to_physmap xatp; + static struct shared_info *shared_info_page = 0; + if (!shared_info_page) + shared_info_page = (struct shared_info *) + extend_brk(PAGE_SIZE, PAGE_SIZE); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = pfn; + xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); -} -static void __init xen_hvm_set_shared_info(struct shared_info *sip) -{ - int cpu; - - HYPERVISOR_shared_info = sip; + HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info * page, we use it in the event channel upcall and in some pvclock * related functions. We don't need the vcpu_info placement * optimizations because we don't use any pv_mmu or pv_irq op on - * HVM. */ - for_each_online_cpu(cpu) + * HVM. + * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is + * online but xen_hvm_init_shared_info is run at resume time too and + * in that case multiple vcpus might be online. */ + for_each_online_cpu(cpu) { per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; -} - -/* Reconnect the shared_info pfn to a (new) mfn */ -void xen_hvm_resume_shared_info(void) -{ - xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); -} - -/* Xen tools prior to Xen 4 do not provide a E820_Reserved area for guest usage. - * On these old tools the shared info page will be placed in E820_Ram. - * Xen 4 provides a E820_Reserved area at 0xFC000000, and this code expects - * that nothing is mapped up to HVM_SHARED_INFO_ADDR. - * Xen 4.3+ provides an explicit 1MB area at HVM_SHARED_INFO_ADDR which is used - * here for the shared info page. */ -static void __init xen_hvm_init_shared_info(void) -{ - if (xen_major < 4) { - xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); - xen_hvm_sip_phys = __pa(xen_hvm_shared_info); - } else { - xen_hvm_sip_phys = HVM_SHARED_INFO_ADDR; - set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_hvm_sip_phys); - xen_hvm_shared_info = - (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); } - xen_hvm_connect_shared_info(xen_hvm_sip_phys >> PAGE_SHIFT); - xen_hvm_set_shared_info(xen_hvm_shared_info); } +#ifdef CONFIG_XEN_PVHVM static void __init init_hvm_pv_info(void) { + int major, minor; uint32_t eax, ebx, ecx, edx, pages, msr, base; u64 pfn; base = xen_cpuid_base(); + cpuid(base + 1, &eax, &ebx, &ecx, &edx); + + major = eax >> 16; + minor = eax & 0xffff; + printk(KERN_INFO "Xen version %d.%d.\n", major, minor); + cpuid(base + 2, &pages, &msr, &ecx, &edx); pfn = __pa(hypercall_page); @@ -1633,22 +1612,12 @@ static void __init xen_hvm_guest_init(void) static bool __init xen_hvm_platform(void) { - uint32_t eax, ebx, ecx, edx, base; - if (xen_pv_domain()) return false; - base = xen_cpuid_base(); - if (!base) + if (!xen_cpuid_base()) return false; - cpuid(base + 1, &eax, &ebx, &ecx, &edx); - - xen_major = eax >> 16; - xen_minor = eax & 0xffff; - - printk(KERN_INFO "Xen version %d.%d.\n", xen_major, xen_minor); - return true; } diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index ae8a00c..45329c8 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -30,7 +30,7 @@ void xen_arch_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_resume_shared_info(); + xen_hvm_init_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index d2e73d1..a95b417 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -40,7 +40,7 @@ void xen_enable_syscall(void); void xen_vcpu_restore(void); void xen_callback_vector(void); -void xen_hvm_resume_shared_info(void); +void xen_hvm_init_shared_info(void); void xen_unplug_emulated_devices(void); void __init xen_build_dynamic_phys_to_machine(void); -- cgit v1.1 From 36dfbbf136db0d645bacfd42ce7d9d6928ea532d Mon Sep 17 00:00:00 2001 From: Satoru Takeuchi Date: Fri, 15 Feb 2013 16:58:14 +0900 Subject: timers/x86/hpet: Use HPET_COUNTER to specify the hpet counter in vread_hpet() vread_hpet() uses "0xf0" as the offset of the hpet counter. To clarify the meaning of this code, it should use symbolic name, HPET_COUNTER, instead. Signed-off-by: Satoru Takeuchi Cc: H. Peter Anvin Cc: "H. Peter Anvin" Signed-off-by: Ingo Molnar --- arch/x86/vdso/vclock_gettime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 205ad32..c74436e 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -60,7 +60,7 @@ notrace static cycle_t vread_tsc(void) static notrace cycle_t vread_hpet(void) { - return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0); + return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); } #ifdef CONFIG_PARAVIRT_CLOCK -- cgit v1.1 From e259514eef764a5286873618e34c560ecb6cff13 Mon Sep 17 00:00:00 2001 From: Jacob Shin Date: Wed, 6 Feb 2013 11:26:29 -0600 Subject: perf/x86/amd: Enable northbridge performance counters on AMD family 15h On AMD family 15h processors, there are 4 new performance counters (in addition to 6 core performance counters) that can be used for counting northbridge events (i.e. DRAM accesses). Their bit fields are almost identical to the core performance counters. However, unlike the core performance counters, these MSRs are shared between multiple cores (that share the same northbridge). We will reuse the same code path as existing family 10h northbridge event constraints handler logic to enforce this sharing. Signed-off-by: Jacob Shin Acked-by: Stephane Eranian Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jacob Shin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1360171589-6381-7-git-send-email-jacob.shin@amd.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeature.h | 2 + arch/x86/include/asm/perf_event.h | 9 ++ arch/x86/include/uapi/asm/msr-index.h | 2 + arch/x86/kernel/cpu/perf_event_amd.c | 171 ++++++++++++++++++++++++++++++---- 4 files changed, 164 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 2d9075e..93fe929 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -167,6 +167,7 @@ #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -309,6 +310,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) #define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) +#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB) #define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8) #define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16) #define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU) diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 2234eaaec..57cb634 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -29,9 +29,14 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) #define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) +#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37 +#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \ + (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT) + #define AMD64_EVENTSEL_EVENT \ (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) #define INTEL_ARCH_EVENT_MASK \ @@ -46,8 +51,12 @@ #define AMD64_RAW_EVENT_MASK \ (X86_RAW_EVENT_MASK | \ AMD64_EVENTSEL_EVENT) +#define AMD64_RAW_EVENT_MASK_NB \ + (AMD64_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK) #define AMD64_NUM_COUNTERS 4 #define AMD64_NUM_COUNTERS_CORE 6 +#define AMD64_NUM_COUNTERS_NB 4 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 433a59f..075a402 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -194,6 +194,8 @@ /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 #define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_NB_PERF_CTL 0xc0010240 +#define MSR_F15H_NB_PERF_CTR 0xc0010241 /* Fam 10h MSRs */ #define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 05462f0..dfdab42 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -132,11 +132,14 @@ static u64 amd_pmu_event_map(int hw_event) return amd_perfmon_event_map[hw_event]; } +static struct event_constraint *amd_nb_event_constraint; + /* * Previously calculated offsets */ static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; +static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly; /* * Legacy CPUs: @@ -144,10 +147,14 @@ static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; * * CPUs with core performance counter extensions: * 6 counters starting at 0xc0010200 each offset by 2 + * + * CPUs with north bridge performance counter extensions: + * 4 additional counters starting at 0xc0010240 each offset by 2 + * (indexed right above either one of the above core counters) */ static inline int amd_pmu_addr_offset(int index, bool eventsel) { - int offset; + int offset, first, base; if (!index) return index; @@ -160,7 +167,23 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) if (offset) return offset; - if (!cpu_has_perfctr_core) + if (amd_nb_event_constraint && + test_bit(index, amd_nb_event_constraint->idxmsk)) { + /* + * calculate the offset of NB counters with respect to + * base eventsel or perfctr + */ + + first = find_first_bit(amd_nb_event_constraint->idxmsk, + X86_PMC_IDX_MAX); + + if (eventsel) + base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel; + else + base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr; + + offset = base + ((index - first) << 1); + } else if (!cpu_has_perfctr_core) offset = index; else offset = index << 1; @@ -175,24 +198,36 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) static inline int amd_pmu_rdpmc_index(int index) { - return index; -} + int ret, first; -static int amd_pmu_hw_config(struct perf_event *event) -{ - int ret; + if (!index) + return index; - /* pass precise event sampling to ibs: */ - if (event->attr.precise_ip && get_ibs_caps()) - return -ENOENT; + ret = rdpmc_indexes[index]; - ret = x86_pmu_hw_config(event); if (ret) return ret; - if (has_branch_stack(event)) - return -EOPNOTSUPP; + if (amd_nb_event_constraint && + test_bit(index, amd_nb_event_constraint->idxmsk)) { + /* + * according to the mnual, ECX value of the NB counters is + * the index of the NB counter (0, 1, 2 or 3) plus 6 + */ + + first = find_first_bit(amd_nb_event_constraint->idxmsk, + X86_PMC_IDX_MAX); + ret = index - first + 6; + } else + ret = index; + + rdpmc_indexes[index] = ret; + return ret; +} + +static int amd_core_hw_config(struct perf_event *event) +{ if (event->attr.exclude_host && event->attr.exclude_guest) /* * When HO == GO == 1 the hardware treats that as GO == HO == 0 @@ -206,10 +241,33 @@ static int amd_pmu_hw_config(struct perf_event *event) else if (event->attr.exclude_guest) event->hw.config |= AMD64_EVENTSEL_HOSTONLY; - if (event->attr.type != PERF_TYPE_RAW) - return 0; + return 0; +} - event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; +/* + * NB counters do not support the following event select bits: + * Host/Guest only + * Counter mask + * Invert counter mask + * Edge detect + * OS/User mode + */ +static int amd_nb_hw_config(struct perf_event *event) +{ + /* for NB, we only allow system wide counting mode */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + if (event->attr.exclude_user || event->attr.exclude_kernel || + event->attr.exclude_host || event->attr.exclude_guest) + return -EINVAL; + + event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | + ARCH_PERFMON_EVENTSEL_OS); + + if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB | + ARCH_PERFMON_EVENTSEL_INT)) + return -EINVAL; return 0; } @@ -227,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) return (hwc->config & 0xe0) == 0xe0; } +static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc) +{ + return amd_nb_event_constraint && amd_is_nb_event(hwc); +} + static inline int amd_has_nb(struct cpu_hw_events *cpuc) { struct amd_nb *nb = cpuc->amd_nb; @@ -234,6 +297,30 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc) return nb && nb->nb_id != -1; } +static int amd_pmu_hw_config(struct perf_event *event) +{ + int ret; + + /* pass precise event sampling to ibs: */ + if (event->attr.precise_ip && get_ibs_caps()) + return -ENOENT; + + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + ret = x86_pmu_hw_config(event); + if (ret) + return ret; + + if (event->attr.type == PERF_TYPE_RAW) + event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; + + if (amd_is_perfctr_nb_event(&event->hw)) + return amd_nb_hw_config(event); + + return amd_core_hw_config(event); +} + static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { @@ -254,6 +341,19 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, } } +static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc) +{ + int core_id = cpu_data(smp_processor_id()).cpu_core_id; + + /* deliver interrupts only to this core */ + if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) { + hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE; + hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK; + hwc->config |= (u64)(core_id) << + AMD64_EVENTSEL_INT_CORE_SEL_SHIFT; + } +} + /* * AMD64 NorthBridge events need special treatment because * counter access needs to be synchronized across all cores @@ -299,6 +399,12 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev struct perf_event *old; int idx, new = -1; + if (!c) + c = &unconstrained; + + if (cpuc->is_fake) + return c; + /* * detect if already present, if so reuse * @@ -335,6 +441,9 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev if (new == -1) return &emptyconstraint; + if (amd_is_perfctr_nb_event(hwc)) + amd_nb_interrupt_hw_config(hwc); + return &nb->event_constraints[new]; } @@ -434,7 +543,8 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) return &unconstrained; - return __amd_get_nb_event_constraints(cpuc, event, &unconstrained); + return __amd_get_nb_event_constraints(cpuc, event, + amd_nb_event_constraint); } static void amd_put_event_constraints(struct cpu_hw_events *cpuc, @@ -533,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); +static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0); +static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0); + static struct event_constraint * amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) { @@ -598,8 +711,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev return &amd_f15_PMC20; } case AMD_EVENT_NB: - /* not yet implemented */ - return &emptyconstraint; + return __amd_get_nb_event_constraints(cpuc, event, + amd_nb_event_constraint); default: return &emptyconstraint; } @@ -647,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = { static int setup_event_constraints(void) { - if (boot_cpu_data.x86 >= 0x15) + if (boot_cpu_data.x86 == 0x15) x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; return 0; } @@ -677,6 +790,23 @@ static int setup_perfctr_core(void) return 0; } +static int setup_perfctr_nb(void) +{ + if (!cpu_has_perfctr_nb) + return -ENODEV; + + x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB; + + if (cpu_has_perfctr_core) + amd_nb_event_constraint = &amd_NBPMC96; + else + amd_nb_event_constraint = &amd_NBPMC74; + + printk(KERN_INFO "perf: AMD northbridge performance counters detected\n"); + + return 0; +} + __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ @@ -687,6 +817,7 @@ __init int amd_pmu_init(void) setup_event_constraints(); setup_perfctr_core(); + setup_perfctr_nb(); /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, -- cgit v1.1 From dd8af076262cc1ff85a8d5e0c5b1a4716d19fe25 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 9 Feb 2013 21:10:04 -0500 Subject: APM idle: register apm_cpu_idle via cpuidle Update APM to register its local idle routine with cpuidle. This allows us to stop exporting pm_idle to modules on x86. The Kconfig sub-option, APM_CPU_IDLE, now depends on on CPU_IDLE. Compile-tested only. Signed-off-by: Len Brown Reviewed-by: Daniel Lezcano Cc: Jiri Kosina --- arch/x86/Kconfig | 1 + arch/x86/kernel/apm_32.c | 57 ++++++++++++++++++++++++++++------------------- arch/x86/kernel/process.c | 3 --- 3 files changed, 35 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 225543b..1b63586 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1912,6 +1912,7 @@ config APM_DO_ENABLE this feature. config APM_CPU_IDLE + depends on CPU_IDLE bool "Make CPU Idle calls when idle" ---help--- Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index d65464e..9f4bc6a 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -232,6 +232,7 @@ #include #include #include +#include #include #include @@ -360,13 +361,35 @@ struct apm_user { * idle percentage above which bios idle calls are done */ #ifdef CONFIG_APM_CPU_IDLE -#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012 #define DEFAULT_IDLE_THRESHOLD 95 #else #define DEFAULT_IDLE_THRESHOLD 100 #endif #define DEFAULT_IDLE_PERIOD (100 / 3) +static int apm_cpu_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index); + +static struct cpuidle_driver apm_idle_driver = { + .name = "apm_idle", + .owner = THIS_MODULE, + .en_core_tk_irqen = 1, + .states = { + { /* entry 0 is for polling */ }, + { /* entry 1 is for APM idle */ + .name = "APM", + .desc = "APM idle", + .flags = CPUIDLE_FLAG_TIME_VALID, + .exit_latency = 250, /* WAG */ + .target_residency = 500, /* WAG */ + .enter = &apm_cpu_idle + }, + }, + .state_count = 2, +}; + +static struct cpuidle_device apm_cpuidle_device; + /* * Local variables */ @@ -377,7 +400,6 @@ static struct { static int clock_slowed; static int idle_threshold __read_mostly = DEFAULT_IDLE_THRESHOLD; static int idle_period __read_mostly = DEFAULT_IDLE_PERIOD; -static int set_pm_idle; static int suspends_pending; static int standbys_pending; static int ignore_sys_suspend; @@ -884,8 +906,6 @@ static void apm_do_busy(void) #define IDLE_CALC_LIMIT (HZ * 100) #define IDLE_LEAKY_MAX 16 -static void (*original_pm_idle)(void) __read_mostly; - /** * apm_cpu_idle - cpu idling for APM capable Linux * @@ -894,7 +914,8 @@ static void (*original_pm_idle)(void) __read_mostly; * Furthermore it calls the system default idle routine. */ -static void apm_cpu_idle(void) +static int apm_cpu_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { static int use_apm_idle; /* = 0 */ static unsigned int last_jiffies; /* = 0 */ @@ -904,7 +925,6 @@ static void apm_cpu_idle(void) unsigned int jiffies_since_last_check = jiffies - last_jiffies; unsigned int bucket; - WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); recalc: if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; @@ -950,10 +970,7 @@ recalc: break; } } - if (original_pm_idle) - original_pm_idle(); - else - default_idle(); + default_idle(); local_irq_disable(); jiffies_since_last_check = jiffies - last_jiffies; if (jiffies_since_last_check > idle_period) @@ -963,7 +980,7 @@ recalc: if (apm_idle_done) apm_do_busy(); - local_irq_enable(); + return index; } /** @@ -2381,9 +2398,9 @@ static int __init apm_init(void) if (HZ != 100) idle_period = (idle_period * HZ) / 100; if (idle_threshold < 100) { - original_pm_idle = pm_idle; - pm_idle = apm_cpu_idle; - set_pm_idle = 1; + if (!cpuidle_register_driver(&apm_idle_driver)) + if (cpuidle_register_device(&apm_cpuidle_device)) + cpuidle_unregister_driver(&apm_idle_driver); } return 0; @@ -2393,15 +2410,9 @@ static void __exit apm_exit(void) { int error; - if (set_pm_idle) { - pm_idle = original_pm_idle; - /* - * We are about to unload the current idle thread pm callback - * (pm_idle), Wait for all processors to update cached/local - * copies of pm_idle before proceeding. - */ - kick_all_cpus_sync(); - } + cpuidle_unregister_device(&apm_cpuidle_device); + cpuidle_unregister_driver(&apm_idle_driver); + if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0) && (apm_info.connection_version > 0x0100)) { error = apm_engage_power_management(APM_DEVICE_ALL, 0); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2ed787f..f571a6e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -272,9 +272,6 @@ EXPORT_SYMBOL(boot_option_idle_override); * Powermanagement idle function, if any.. */ void (*pm_idle)(void); -#ifdef CONFIG_APM_MODULE -EXPORT_SYMBOL(pm_idle); -#endif #ifndef CONFIG_SMP static inline void play_dead(void) -- cgit v1.1 From a476bda30baf7efa7f305793a340aae07b6e5780 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 9 Feb 2013 21:45:03 -0500 Subject: x86 idle: rename global pm_idle to static x86_idle (pm_idle)() is being removed from linux/pm.h because Linux does not have such a cross-architecture concept. x86 uses an idle function pointer in its architecture specific code as a backup to cpuidle. So we re-name x86 use of pm_idle to x86_idle, and make it static to x86. Signed-off-by: Len Brown Cc: x86@kernel.org --- arch/x86/kernel/process.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f571a6e..ceb05db 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -268,10 +268,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); -/* - * Powermanagement idle function, if any.. - */ -void (*pm_idle)(void); +static void (*x86_idle)(void); #ifndef CONFIG_SMP static inline void play_dead(void) @@ -348,7 +345,7 @@ void cpu_idle(void) rcu_idle_enter(); if (cpuidle_idle_call()) - pm_idle(); + x86_idle(); rcu_idle_exit(); start_critical_timings(); @@ -395,9 +392,9 @@ EXPORT_SYMBOL(default_idle); bool set_pm_idle_to_default(void) { - bool ret = !!pm_idle; + bool ret = !!x86_idle; - pm_idle = default_idle; + x86_idle = default_idle; return ret; } @@ -564,11 +561,10 @@ static void amd_e400_idle(void) void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP - if (pm_idle == poll_idle && smp_num_siblings > 1) { + if (x86_idle == poll_idle && smp_num_siblings > 1) pr_warn_once("WARNING: polling idle and HT enabled, performance may degrade\n"); - } #endif - if (pm_idle) + if (x86_idle) return; if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { @@ -576,19 +572,19 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) * One CPU supports mwait => All CPUs supports mwait */ pr_info("using mwait in idle threads\n"); - pm_idle = mwait_idle; + x86_idle = mwait_idle; } else if (cpu_has_amd_erratum(amd_erratum_400)) { /* E400: APIC timer interrupt does not wake up CPU from C1e */ pr_info("using AMD E400 aware idle routine\n"); - pm_idle = amd_e400_idle; + x86_idle = amd_e400_idle; } else - pm_idle = default_idle; + x86_idle = default_idle; } void __init init_amd_e400_c1e_mask(void) { /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ - if (pm_idle == amd_e400_idle) + if (x86_idle == amd_e400_idle) zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); } @@ -599,7 +595,7 @@ static int __init idle_setup(char *str) if (!strcmp(str, "poll")) { pr_info("using polling idle threads\n"); - pm_idle = poll_idle; + x86_idle = poll_idle; boot_option_idle_override = IDLE_POLL; } else if (!strcmp(str, "mwait")) { boot_option_idle_override = IDLE_FORCE_MWAIT; @@ -612,7 +608,7 @@ static int __init idle_setup(char *str) * To continue to load the CPU idle driver, don't touch * the boot_option_idle_override. */ - pm_idle = default_idle; + x86_idle = default_idle; boot_option_idle_override = IDLE_HALT; } else if (!strcmp(str, "nomwait")) { /* -- cgit v1.1 From 20bf062c6575e162ede00308ca3a5714ca112009 Mon Sep 17 00:00:00 2001 From: Alexander Holler Date: Sat, 16 Feb 2013 16:38:17 +0100 Subject: x86/memtest: Shorten time for tests By just reversing the order memtest is using the test patterns, an additional round to zero the memory is not necessary. This might save up to a second or even more for setups which are doing tests on every boot. Signed-off-by: Alexander Holler Cc: Yinghai Lu Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1361029097-8308-1-git-send-email-holler@ahsoftware.de Signed-off-by: Ingo Molnar --- arch/x86/mm/memtest.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index c80b9fb..8dabbed 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c @@ -9,6 +9,7 @@ #include static u64 patterns[] __initdata = { + /* The first entry has to be 0 to leave memtest with zeroed memory */ 0, 0xffffffffffffffffULL, 0x5555555555555555ULL, @@ -110,15 +111,8 @@ void __init early_memtest(unsigned long start, unsigned long end) return; printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern); - for (i = 0; i < memtest_pattern; i++) { + for (i = memtest_pattern-1; i < UINT_MAX; --i) { idx = i % ARRAY_SIZE(patterns); do_one_pass(patterns[idx], start, end); } - - if (idx > 0) { - printk(KERN_INFO "early_memtest: wipe out " - "test pattern from memory\n"); - /* additional test with pattern 0 will do this */ - do_one_pass(0, start, end); - } } -- cgit v1.1 From ed55705dd5008b408c48a8459b8b34b01f3de985 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 18 Feb 2013 22:58:14 -0300 Subject: x86: pvclock kvm: align allocation size to page size To match whats mapped via vsyscalls to userspace. Reported-by: Peter Hurley Signed-off-by: Marcelo Tosatti --- arch/x86/kernel/kvmclock.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 220a360..5bedbdd 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -218,6 +218,9 @@ static void kvm_shutdown(void) void __init kvmclock_init(void) { unsigned long mem; + int size; + + size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); if (!kvm_para_available()) return; @@ -231,16 +234,14 @@ void __init kvmclock_init(void) printk(KERN_INFO "kvm-clock: Using msrs %x and %x", msr_kvm_system_time, msr_kvm_wall_clock); - mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS, - PAGE_SIZE); + mem = memblock_alloc(size, PAGE_SIZE); if (!mem) return; hv_clock = __va(mem); if (kvm_register_clock("boot clock")) { hv_clock = NULL; - memblock_free(mem, - sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); + memblock_free(mem, size); return; } pv_time_ops.sched_clock = kvm_clock_read; @@ -275,7 +276,7 @@ int __init kvm_setup_vsyscall_timeinfo(void) struct pvclock_vcpu_time_info *vcpu_time; unsigned int size; - size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS; + size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS); preempt_disable(); cpu = smp_processor_id(); -- cgit v1.1 From 52d3d06e706bdde3d6c5c386deb065c3b4c51618 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 19 Feb 2013 19:33:12 +0100 Subject: x86, cpu, amd: Fix WC+ workaround for older virtual hosts The WC+ workaround for F10h introduces a new MSR and kvm host #GPs on accesses to unknown MSRs if paravirt is not compiled in. Use the exception-handling MSR accessors so as not to break 3.8 and later guests booting on older hosts. Remove a redundant family check while at it. Cc: Gleb Natapov Cc: Boris Ostrovsky Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1361298793-31834-1-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 721ef32..163af4a 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -723,12 +723,14 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) * performance degradation for certain nested-paging guests. * Prevent this conversion by clearing bit 24 in * MSR_AMD64_BU_CFG2. + * + * NOTE: we want to use the _safe accessors so as not to #GP kvm + * guests on older kvm hosts. */ - if (c->x86 == 0x10) { - rdmsrl(MSR_AMD64_BU_CFG2, value); - value &= ~(1ULL << 24); - wrmsrl(MSR_AMD64_BU_CFG2, value); - } + + rdmsrl_safe(MSR_AMD64_BU_CFG2, &value); + value &= ~(1ULL << 24); + wrmsrl_safe(MSR_AMD64_BU_CFG2, value); } rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy); -- cgit v1.1 From 2e32b7190641a184b8510d3e342400473ff1ab60 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 19 Feb 2013 19:33:13 +0100 Subject: x86, kvm: Add MSR_AMD64_BU_CFG2 to the list of ignored MSRs The "x86, AMD: Enable WC+ memory type on family 10 processors" patch currently in -tip added a workaround for AMD F10h CPUs which #GPs my guest when booted in kvm. This is because it accesses MSR_AMD64_BU_CFG2 which is not currently ignored by kvm. Do that because this MSR is only baremetal-relevant anyway. While at it, move the ignored MSRs at the beginning of kvm_set_msr_common so that we exit then and there. Acked-by: Gleb Natapov Cc: Boris Ostrovsky Cc: Andre Przywara Cc: Marcelo Tosatti Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1361298793-31834-2-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin --- arch/x86/kvm/x86.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c243b81..3704007 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1881,6 +1881,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u64 data = msr_info->data; switch (msr) { + case MSR_AMD64_NB_CFG: + case MSR_IA32_UCODE_REV: + case MSR_IA32_UCODE_WRITE: + case MSR_VM_HSAVE_PA: + case MSR_AMD64_PATCH_LOADER: + case MSR_AMD64_BU_CFG2: + break; + case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -1900,8 +1908,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; } break; - case MSR_AMD64_NB_CFG: - break; case MSR_IA32_DEBUGCTLMSR: if (!data) { /* We support the non-activated case already */ @@ -1914,11 +1920,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n", __func__, data); break; - case MSR_IA32_UCODE_REV: - case MSR_IA32_UCODE_WRITE: - case MSR_VM_HSAVE_PA: - case MSR_AMD64_PATCH_LOADER: - break; case 0x200 ... 0x2ff: return set_msr_mtrr(vcpu, msr, data); case MSR_IA32_APICBASE: @@ -2253,6 +2254,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case MSR_K8_INT_PENDING_MSG: case MSR_AMD64_NB_CFG: case MSR_FAM10H_MMIO_CONF_BASE: + case MSR_AMD64_BU_CFG2: data = 0; break; case MSR_P6_PERFCTR0: -- cgit v1.1 From dacd45f4e793e46e8299c9a580e400866ffe0770 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 22 Oct 2012 11:35:16 -0400 Subject: xen/smp: Move the common CPU init code a bit to prep for PVH patch. The PV and PVH code CPU init code share some functionality. The PVH code ("xen/pvh: Extend vcpu_guest_context, p2m, event, and XenBus") sets some of these up, but not all. To make it easier to read, this patch removes the PV specific out of the generic way. No functional change - just code movement. Acked-by: Stefano Stabellini [v2: Fixed compile errors noticed by Fengguang Wu build system] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/smp.c | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 34bc4ce..09ea61d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -300,8 +300,6 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) gdt = get_cpu_gdt_table(cpu); ctxt->flags = VGCF_IN_KERNEL; - ctxt->user_regs.ds = __USER_DS; - ctxt->user_regs.es = __USER_DS; ctxt->user_regs.ss = __KERNEL_DS; #ifdef CONFIG_X86_32 ctxt->user_regs.fs = __KERNEL_PERCPU; @@ -310,35 +308,41 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; - ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); - xen_copy_trap_info(ctxt->trap_ctxt); + { + ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ + ctxt->user_regs.ds = __USER_DS; + ctxt->user_regs.es = __USER_DS; - ctxt->ldt_ents = 0; + xen_copy_trap_info(ctxt->trap_ctxt); - BUG_ON((unsigned long)gdt & ~PAGE_MASK); + ctxt->ldt_ents = 0; - gdt_mfn = arbitrary_virt_to_mfn(gdt); - make_lowmem_page_readonly(gdt); - make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); + BUG_ON((unsigned long)gdt & ~PAGE_MASK); - ctxt->gdt_frames[0] = gdt_mfn; - ctxt->gdt_ents = GDT_ENTRIES; + gdt_mfn = arbitrary_virt_to_mfn(gdt); + make_lowmem_page_readonly(gdt); + make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); - ctxt->user_regs.cs = __KERNEL_CS; - ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); + ctxt->gdt_frames[0] = gdt_mfn; + ctxt->gdt_ents = GDT_ENTRIES; - ctxt->kernel_ss = __KERNEL_DS; - ctxt->kernel_sp = idle->thread.sp0; + ctxt->kernel_ss = __KERNEL_DS; + ctxt->kernel_sp = idle->thread.sp0; #ifdef CONFIG_X86_32 - ctxt->event_callback_cs = __KERNEL_CS; - ctxt->failsafe_callback_cs = __KERNEL_CS; + ctxt->event_callback_cs = __KERNEL_CS; + ctxt->failsafe_callback_cs = __KERNEL_CS; #endif - ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; - ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; + ctxt->event_callback_eip = + (unsigned long)xen_hypervisor_callback; + ctxt->failsafe_callback_eip = + (unsigned long)xen_failsafe_callback; + } + ctxt->user_regs.cs = __KERNEL_CS; + ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); -- cgit v1.1 From 3216dceb31c08be08ea98814a9ca5775fa680389 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Tue, 19 Feb 2013 13:59:19 +0000 Subject: xen: introduce xen_remap, use it instead of ioremap ioremap can't be used to map ring pages on ARM because it uses device memory caching attributes (MT_DEVICE*). Introduce a Xen specific abstraction to map ring pages, called xen_remap, that is defined as ioremap on x86 (no behavioral changes). On ARM it explicitly calls __arm_ioremap with the right caching attributes: MT_MEMORY. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/page.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 472b9b7..6aef9fb 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -212,4 +212,6 @@ unsigned long arbitrary_virt_to_mfn(void *vaddr); void make_lowmem_page_readonly(void *vaddr); void make_lowmem_page_readwrite(void *vaddr); +#define xen_remap(cookie, size) ioremap((cookie), (size)); + #endif /* _ASM_X86_XEN_PAGE_H */ -- cgit v1.1 From 76eaca031f0af2bb303e405986f637811956a422 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Fri, 15 Feb 2013 09:48:52 +0100 Subject: xen: Send spinlock IPI to all waiters There is a loophole between Xen's current implementation of pv-spinlocks and the scheduler. This was triggerable through a testcase until v3.6 changed the TLB flushing code. The problem potentially is still there just not observable in the same way. What could happen was (is): 1. CPU n tries to schedule task x away and goes into a slow wait for the runq lock of CPU n-# (must be one with a lower number). 2. CPU n-#, while processing softirqs, tries to balance domains and goes into a slow wait for its own runq lock (for updating some records). Since this is a spin_lock_irqsave in softirq context, interrupts will be re-enabled for the duration of the poll_irq hypercall used by Xen. 3. Before the runq lock of CPU n-# is unlocked, CPU n-1 receives an interrupt (e.g. endio) and when processing the interrupt, tries to wake up task x. But that is in schedule and still on_cpu, so try_to_wake_up goes into a tight loop. 4. The runq lock of CPU n-# gets unlocked, but the message only gets sent to the first waiter, which is CPU n-# and that is busily stuck. 5. CPU n-# never returns from the nested interruption to take and release the lock because the scheduler uses a busy wait. And CPU n never finishes the task migration because the unlock notification only went to CPU n-#. To avoid this and since the unlocking code has no real sense of which waiter is best suited to grab the lock, just send the IPI to all of them. This causes the waiters to return from the hyper- call (those not interrupted at least) and do active spinlocking. BugLink: http://bugs.launchpad.net/bugs/1011792 Acked-by: Jan Beulich Signed-off-by: Stefan Bader Cc: stable@vger.kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/spinlock.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 83e866d..f7a080e 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -328,7 +328,6 @@ static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) if (per_cpu(lock_spinners, cpu) == xl) { ADD_STATS(released_slow_kicked, 1); xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); - break; } } } -- cgit v1.1 From 69943182bb9e19e4b60ea5033f683ec1af1703a9 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 20 Feb 2013 11:15:12 +0100 Subject: perf/x86: Add Intel IvyBridge event scheduling constraints Intel IvyBridge processor has different constraints compared to SandyBridge. Therefore it needs its own contraint table. This patch adds the constraint table. Without this patch, the events listed in the patch may not be scheduled correctly and bogus counts may be collected. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: acme@redhat.com Cc: jolsa@redhat.com Cc: namhyung.kim@lge.com Link: http://lkml.kernel.org/r/1361355312-3323-1-git-send-email-eranian@google.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 4914e94..529c893 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -107,6 +107,27 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; +static struct event_constraint intel_ivb_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */ + INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */ + INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */ + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */ + INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ + EVENT_CONSTRAINT_END +}; + static struct extra_reg intel_westmere_extra_regs[] __read_mostly = { INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0), @@ -2095,7 +2116,7 @@ __init int intel_pmu_init(void) intel_pmu_lbr_init_snb(); - x86_pmu.event_constraints = intel_snb_event_constraints; + x86_pmu.event_constraints = intel_ivb_event_constraints; x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints; x86_pmu.pebs_aliases = intel_pebs_aliases_snb; x86_pmu.extra_regs = intel_snb_extra_regs; -- cgit v1.1 From 27cf929845b10043f2257693c7d179a9e0b1980e Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Tue, 19 Feb 2013 20:47:07 +0100 Subject: x86/apic: Fix parsing of the 'lapic' cmdline option Including " lapic " in the kernel cmdline on an x86-64 kernel makes it panic while parsing early params -- e.g. with no user visible output. Fix this bug by ensuring arg is non-NULL before passing it to strncmp(). Reported-by: PaX Team Signed-off-by: Mathias Krause Acked-by: David Rientjes Cc: Suresh Siddha Link: http://lkml.kernel.org/r/1361303227-13174-1-git-send-email-minipli@googlemail.com Cc: stable@vger.kernel.org # v3.8 Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a5b4dce..904611b 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -131,7 +131,7 @@ static int __init parse_lapic(char *arg) { if (config_enabled(CONFIG_X86_32) && !arg) force_enable_local_apic = 1; - else if (!strncmp(arg, "notscdeadline", 13)) + else if (arg && !strncmp(arg, "notscdeadline", 13)) setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER); return 0; } -- cgit v1.1 From c81611c4e96f595a80d8be9367c385d2c116428b Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 20 Feb 2013 11:48:06 +0000 Subject: xen: event channel arrays are xen_ulong_t and not unsigned long On ARM we want these to be the same size on 32- and 64-bit. This is an ABI change on ARM. X86 does not change. Signed-off-by: Ian Campbell Cc: Jan Beulich Cc: Keir (Xen.org) Cc: Tim Deegan Cc: Stefano Stabellini Cc: linux-arm-kernel@lists.infradead.org Cc: xen-devel@lists.xen.org Cc: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/events.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xen/events.h b/arch/x86/include/asm/xen/events.h index cc146d5..ca842f2 100644 --- a/arch/x86/include/asm/xen/events.h +++ b/arch/x86/include/asm/xen/events.h @@ -16,4 +16,7 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return raw_irqs_disabled_flags(regs->flags); } +/* No need for a barrier -- XCHG is a barrier on x86. */ +#define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) + #endif /* _ASM_X86_XEN_EVENTS_H */ -- cgit v1.1 From fb834c7acc5e140cf4f9e86da93a66de8c0514da Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 20 Feb 2013 20:36:12 +0000 Subject: x86, efi: Make "noefi" really disable EFI runtime serivces commit 1de63d60cd5b ("efi: Clear EFI_RUNTIME_SERVICES rather than EFI_BOOT by "noefi" boot parameter") attempted to make "noefi" true to its documentation and disable EFI runtime services to prevent the bricking bug described in commit e0094244e41c ("samsung-laptop: Disable on EFI hardware"). However, it's not possible to clear EFI_RUNTIME_SERVICES from an early param function because EFI_RUNTIME_SERVICES is set in efi_init() *after* parse_early_param(). This resulted in "noefi" effectively becoming a no-op and no longer providing users with a way to disable EFI, which is bad for those users that have buggy machines. Reported-by: Walt Nelson Jr Cc: Satoru Takeuchi Cc: Signed-off-by: Matt Fleming Link: http://lkml.kernel.org/r/1361392572-25657-1-git-send-email-matt@console-pimps.org Signed-off-by: H. Peter Anvin --- arch/x86/platform/efi/efi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 928bf83..e2cd38f 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -85,9 +85,10 @@ int efi_enabled(int facility) } EXPORT_SYMBOL(efi_enabled); +static bool disable_runtime = false; static int __init setup_noefi(char *arg) { - clear_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); + disable_runtime = true; return 0; } early_param("noefi", setup_noefi); @@ -734,7 +735,7 @@ void __init efi_init(void) if (!efi_is_native()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); else { - if (efi_runtime_init()) + if (disable_runtime || efi_runtime_init()) return; set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); } -- cgit v1.1 From 6b73a96065e89dc9fa75ba4f78b1aa3a3bbd0470 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 20 Feb 2013 18:52:02 -0300 Subject: Revert "KVM: MMU: lazily drop large spte" This reverts commit caf6900f2d8aaebe404c976753f6813ccd31d95e. It is causing migration failures, reference https://bugzilla.kernel.org/show_bug.cgi?id=54061. Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1cda1f3..4ed3edb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1105,7 +1105,8 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) /* * Write-protect on the specified @sptep, @pt_protect indicates whether - * spte write-protection is caused by protecting shadow page table. + * spte writ-protection is caused by protecting shadow page table. + * @flush indicates whether tlb need be flushed. * * Note: write protection is difference between drity logging and spte * protection: @@ -1114,9 +1115,10 @@ static void drop_large_spte(struct kvm_vcpu *vcpu, u64 *sptep) * - for spte protection, the spte can be writable only after unsync-ing * shadow page. * - * Return true if tlb need be flushed. + * Return true if the spte is dropped. */ -static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect) +static bool +spte_write_protect(struct kvm *kvm, u64 *sptep, bool *flush, bool pt_protect) { u64 spte = *sptep; @@ -1126,11 +1128,17 @@ static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect) rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); + if (__drop_large_spte(kvm, sptep)) { + *flush |= true; + return true; + } + if (pt_protect) spte &= ~SPTE_MMU_WRITEABLE; spte = spte & ~PT_WRITABLE_MASK; - return mmu_spte_update(sptep, spte); + *flush |= mmu_spte_update(sptep, spte); + return false; } static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, @@ -1142,8 +1150,11 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { BUG_ON(!(*sptep & PT_PRESENT_MASK)); + if (spte_write_protect(kvm, sptep, &flush, pt_protect)) { + sptep = rmap_get_first(*rmapp, &iter); + continue; + } - flush |= spte_write_protect(kvm, sptep, pt_protect); sptep = rmap_get_next(&iter); } @@ -2581,8 +2592,6 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, break; } - drop_large_spte(vcpu, iterator.sptep); - if (!is_shadow_present_pte(*iterator.sptep)) { u64 base_addr = iterator.addr; -- cgit v1.1 From ac630dd98a47b60b27d716758d5f4276cb974662 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 22 Feb 2013 13:09:51 -0800 Subject: x86-64: don't set the early IDT to point directly to 'early_idt_handler' The code requires the use of the proper per-exception-vector stub functions (set up as the early_idt_handlers[] array - note the 's') that make sure to set up the error vector number. This is true regardless of whether CONFIG_EARLY_PRINTK is set or not. Why? The stack offset for the comparison of __KERNEL_CS won't be right otherwise, nor will the new check (from commit 8170e6bed465: "x86, 64bit: Use a #PF handler to materialize early mappings on demand") for the page fault exception vector. Acked-by: H. Peter Anvin Signed-off-by: Linus Torvalds --- arch/x86/include/asm/proto.h | 2 -- arch/x86/kernel/head64.c | 7 +------ arch/x86/kernel/head_64.S | 2 ++ 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 6f414ed..6fd3fd7 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -5,8 +5,6 @@ /* misc architecture specific prototypes */ -void early_idt_handler(void); - void system_call(void); void syscall_init(void); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 57334f4c..2590025 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -159,13 +159,8 @@ void __init x86_64_start_kernel(char * real_mode_data) /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); - for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) { -#ifdef CONFIG_EARLY_PRINTK + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, &early_idt_handlers[i]); -#else - set_intr_gate(i, early_idt_handler); -#endif - } load_idt((const struct desc_ptr *)&idt_descr); copy_bootdata(__va(real_mode_data)); diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d94f6d6..b7de3b2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -336,6 +336,7 @@ early_idt_handlers: i = i + 1 .endr +/* This is global to keep gas from relaxing the jumps */ ENTRY(early_idt_handler) cld @@ -404,6 +405,7 @@ ENTRY(early_idt_handler) addq $16,%rsp # drop vector number and error code decl early_recursion_flag(%rip) INTERRUPT_RETURN +ENDPROC(early_idt_handler) __INITDATA -- cgit v1.1 From 0cc9129d75ef8993702d97ab0e49542c15ac6ab9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 22 Feb 2013 17:35:13 -0800 Subject: x86-64, xen, mmu: Provide an early version of write_cr3. With commit 8170e6bed465 ("x86, 64bit: Use a #PF handler to materialize early mappings on demand") we started hitting an early bootup crash where the Xen hypervisor would inform us that: (XEN) d7:v0: unhandled page fault (ec=0000) (XEN) Pagetable walk from ffffea000005b2d0: (XEN) L4[0x1d4] = 0000000000000000 ffffffffffffffff (XEN) domain_crash_sync called from entry.S (XEN) Domain 7 (vcpu#0) crashed on cpu#3: (XEN) ----[ Xen-4.2.0 x86_64 debug=n Not tainted ]---- .. that Xen was unable to context switch back to dom0. Looking at the calling stack we find: [] xen_get_user_pgd+0x5a <-- [] xen_get_user_pgd+0x5a [] xen_write_cr3+0x77 [] init_mem_mapping+0x1f9 [] setup_arch+0x742 [] printk+0x48 We are trying to figure out whether we need to up-date the user PGD as well. Please keep in mind that under 64-bit PV guests we have a limited amount of rings: 0 for the Hypervisor, and 1 for both the Linux kernel and user-space. As such the Linux pvops'fied version of write_cr3 checks if it has to update the user-space cr3 as well. That clearly is not needed during early bootup. The recent changes (see above git commit) streamline the x86 page table allocation to be much simpler (And also incidentally the #PF handler ends up in spirit being similar to how the Xen toolstack sets up the initial page-tables). The fix is to have an early-bootup version of cr3 that just loads the kernel %cr3. The later version - which also handles user-page modifications will be used after the initial page tables have been setup. [ hpa: removed a redundant #ifdef and made the new function __init. Also note that x86-32 already has such an early xen_write_cr3. ] Tested-by: "H. Peter Anvin" Reported-by: Konrad Rzeszutek Wilk Signed-off-by: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1361579812-23709-1-git-send-email-konrad.wilk@oracle.com Signed-off-by: H. Peter Anvin Signed-off-by: Linus Torvalds --- arch/x86/xen/mmu.c | 44 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 39 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f5e86ee..e8e3493 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1408,7 +1408,6 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3) xen_mc_callback(set_current_cr3, (void *)cr3); } } - static void xen_write_cr3(unsigned long cr3) { BUG_ON(preemptible()); @@ -1434,6 +1433,45 @@ static void xen_write_cr3(unsigned long cr3) xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ } +#ifdef CONFIG_X86_64 +/* + * At the start of the day - when Xen launches a guest, it has already + * built pagetables for the guest. We diligently look over them + * in xen_setup_kernel_pagetable and graft as appropiate them in the + * init_level4_pgt and its friends. Then when we are happy we load + * the new init_level4_pgt - and continue on. + * + * The generic code starts (start_kernel) and 'init_mem_mapping' sets + * up the rest of the pagetables. When it has completed it loads the cr3. + * N.B. that baremetal would start at 'start_kernel' (and the early + * #PF handler would create bootstrap pagetables) - so we are running + * with the same assumptions as what to do when write_cr3 is executed + * at this point. + * + * Since there are no user-page tables at all, we have two variants + * of xen_write_cr3 - the early bootup (this one), and the late one + * (xen_write_cr3). The reason we have to do that is that in 64-bit + * the Linux kernel and user-space are both in ring 3 while the + * hypervisor is in ring 0. + */ +static void __init xen_write_cr3_init(unsigned long cr3) +{ + BUG_ON(preemptible()); + + xen_mc_batch(); /* disables interrupts */ + + /* Update while interrupts are disabled, so its atomic with + respect to ipis */ + this_cpu_write(xen_cr3, cr3); + + __xen_write_cr3(true, cr3); + + xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ + + pv_mmu_ops.write_cr3 = &xen_write_cr3; +} +#endif + static int xen_pgd_alloc(struct mm_struct *mm) { pgd_t *pgd = mm->pgd; @@ -2102,11 +2140,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { .write_cr2 = xen_write_cr2, .read_cr3 = xen_read_cr3, -#ifdef CONFIG_X86_32 .write_cr3 = xen_write_cr3_init, -#else - .write_cr3 = xen_write_cr3, -#endif .flush_tlb_user = xen_flush_tlb, .flush_tlb_kernel = xen_flush_tlb, -- cgit v1.1 From 496ad9aa8ef448058e36ca7a787c61f2e63f0f54 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Jan 2013 17:07:38 -0500 Subject: new helper: file_inode(file) Signed-off-by: Al Viro --- arch/x86/ia32/ia32_aout.c | 6 ++---- arch/x86/kernel/cpuid.c | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index a703af1..03abf9b 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -271,7 +271,7 @@ static int load_aout_binary(struct linux_binprm *bprm) if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC && N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || - i_size_read(bprm->file->f_path.dentry->d_inode) < + i_size_read(file_inode(bprm->file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { return -ENOEXEC; } @@ -425,12 +425,10 @@ beyond_if: static int load_aout_library(struct file *file) { - struct inode *inode; unsigned long bss, start_addr, len, error; int retval; struct exec ex; - inode = file->f_path.dentry->d_inode; retval = -ENOEXEC; error = kernel_read(file, 0, (char *) &ex, sizeof(ex)); @@ -440,7 +438,7 @@ static int load_aout_library(struct file *file) /* We come in here for the regular a.out style of shared libraries */ if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) || N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) || - i_size_read(inode) < + i_size_read(file_inode(file)) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) { goto out; } diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 60c7891..1e4dbcf 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -85,7 +85,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, { char __user *tmp = buf; struct cpuid_regs cmd; - int cpu = iminor(file->f_path.dentry->d_inode); + int cpu = iminor(file_inode(file)); u64 pos = *ppos; ssize_t bytes = 0; int err = 0; @@ -116,7 +116,7 @@ static int cpuid_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; - cpu = iminor(file->f_path.dentry->d_inode); + cpu = iminor(file_inode(file)); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ -- cgit v1.1 From 24d335ca3606b610ec69c66a1e42760c96d89470 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 Feb 2013 16:32:58 -0800 Subject: memory-hotplug: introduce new arch_remove_memory() for removing page table For removing memory, we need to remove page tables. But it depends on architecture. So the patch introduce arch_remove_memory() for removing page table. Now it only calls __remove_pages(). Note: __remove_pages() for some archtecuture is not implemented (I don't know how to implement it for s390). Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Acked-by: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_32.c | 12 ++++++++++++ arch/x86/mm/init_64.c | 15 +++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index b299724..2d19001 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -862,6 +862,18 @@ int arch_add_memory(int nid, u64 start, u64 size) return __add_pages(nid, zone, start_pfn, nr_pages); } + +#ifdef CONFIG_MEMORY_HOTREMOVE +int arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + + zone = page_zone(pfn_to_page(start_pfn)); + return __remove_pages(zone, start_pfn, nr_pages); +} +#endif #endif /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3eba7f4..b6dd1c4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -707,6 +707,21 @@ int arch_add_memory(int nid, u64 start, u64 size) } EXPORT_SYMBOL_GPL(arch_add_memory); +#ifdef CONFIG_MEMORY_HOTREMOVE +int __ref arch_remove_memory(u64 start, u64 size) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + struct zone *zone; + int ret; + + zone = page_zone(pfn_to_page(start_pfn)); + ret = __remove_pages(zone, start_pfn, nr_pages); + WARN_ON_ONCE(ret); + + return ret; +} +#endif #endif /* CONFIG_MEMORY_HOTPLUG */ static struct kcore_list kcore_vsyscall; -- cgit v1.1 From 46723bfa540f0a1e494476a1734d03626a0bd1e0 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Fri, 22 Feb 2013 16:33:00 -0800 Subject: memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap For removing memmap region of sparse-vmemmap which is allocated bootmem, memmap region of sparse-vmemmap needs to be registered by get_page_bootmem(). So the patch searches pages of virtual mapping and registers the pages by get_page_bootmem(). NOTE: register_page_bootmem_memmap() is not implemented for ia64, ppc, s390, and sparc. So introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node() when platform doesn't support it. It's implemented by adding a new Kconfig option named CONFIG_HAVE_BOOTMEM_INFO_NODE, which will be automatically selected by memory-hotplug feature fully supported archs(currently only on x86_64). Since we have 2 config options called MEMORY_HOTPLUG and MEMORY_HOTREMOVE used for memory hot-add and hot-remove separately, and codes in function register_page_bootmem_info_node() are only used for collecting infomation for hot-remove, so reside it under MEMORY_HOTREMOVE. Besides page_isolation.c selected by MEMORY_ISOLATION under MEMORY_HOTPLUG is also such case, move it too. [mhocko@suse.cz: put register_page_bootmem_memmap inside CONFIG_MEMORY_HOTPLUG_SPARSE] [linfeng@cn.fujitsu.com: introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node()] [mhocko@suse.cz: remove the arch specific functions without any implementation] [linfeng@cn.fujitsu.com: mm/Kconfig: move auto selects from MEMORY_HOTPLUG to MEMORY_HOTREMOVE as needed] [rientjes@google.com: fix defined but not used warning] Signed-off-by: Wen Congyang Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Tang Chen Reviewed-by: Wu Jianguo Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Michal Hocko Signed-off-by: Lin Feng Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b6dd1c4..f17aa76 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1034,6 +1034,66 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) return 0; } +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) +void register_page_bootmem_memmap(unsigned long section_nr, + struct page *start_page, unsigned long size) +{ + unsigned long addr = (unsigned long)start_page; + unsigned long end = (unsigned long)(start_page + size); + unsigned long next; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned int nr_pages; + struct page *page; + + for (; addr < end; addr = next) { + pte_t *pte = NULL; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + continue; + } + get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO); + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + continue; + } + get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO); + + if (!cpu_has_pse) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + continue; + get_page_bootmem(section_nr, pmd_page(*pmd), + MIX_SECTION_INFO); + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + continue; + get_page_bootmem(section_nr, pte_page(*pte), + SECTION_INFO); + } else { + next = pmd_addr_end(addr, end); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + continue; + + nr_pages = 1 << (get_order(PMD_SIZE)); + page = pmd_page(*pmd); + while (nr_pages--) + get_page_bootmem(section_nr, page++, + SECTION_INFO); + } + } +} +#endif + void __meminit vmemmap_populate_print_last(void) { if (p_start) { -- cgit v1.1 From ae9aae9eda2db71bf4b592f15618b0160eb07731 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 Feb 2013 16:33:04 -0800 Subject: memory-hotplug: common APIs to support page tables hot-remove When memory is removed, the corresponding pagetables should alse be removed. This patch introduces some common APIs to support vmemmap pagetable and x86_64 architecture direct mapping pagetable removing. All pages of virtual mapping in removed memory cannot be freed if some pages used as PGD/PUD include not only removed memory but also other memory. So this patch uses the following way to check whether a page can be freed or not. 1) When removing memory, the page structs of the removed memory are filled with 0FD. 2) All page structs are filled with 0xFD on PT/PMD, PT/PMD can be cleared. In this case, the page used as PT/PMD can be freed. For direct mapping pages, update direct_pages_count[level] when we freed their pagetables. And do not free the pages again because they were freed when offlining. For vmemmap pages, free the pages and their pagetables. For larger pages, do not split them into smaller ones because there is no way to know if the larger page has been split. As a result, there is no way to decide when to split. We deal the larger pages in the following way: 1) For direct mapped pages, all the pages were freed when they were offlined. And since menmory offline is done section by section, all the memory ranges being removed are aligned to PAGE_SIZE. So only need to deal with unaligned pages when freeing vmemmap pages. 2) For vmemmap pages being used to store page_struct, if part of the larger page is still in use, just fill the unused part with 0xFD. And when the whole page is fulfilled with 0xFD, then free the larger page. [akpm@linux-foundation.org: fix typo in comment] [tangchen@cn.fujitsu.com: do not calculate direct mapping pages when freeing vmemmap pagetables] [tangchen@cn.fujitsu.com: do not free direct mapping pages twice] [tangchen@cn.fujitsu.com: do not free page split from hugepage one by one] [tangchen@cn.fujitsu.com: do not split pages when freeing pagetable pages] [akpm@linux-foundation.org: use pmd_page_vaddr()] [akpm@linux-foundation.org: fix used-uninitialised bug] Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Jianguo Wu Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable_types.h | 1 + arch/x86/mm/init_64.c | 304 +++++++++++++++++++++++++++++++++++ arch/x86/mm/pageattr.c | 47 +++--- 3 files changed, 330 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index e642300..567b5d0 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -351,6 +351,7 @@ static inline void update_page_count(int level, unsigned long pages) { } * as a pte too. */ extern pte_t *lookup_address(unsigned long address, unsigned int *level); +extern int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase); extern phys_addr_t slow_virt_to_phys(void *__address); #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index f17aa76..ca6cd40 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -707,6 +707,310 @@ int arch_add_memory(int nid, u64 start, u64 size) } EXPORT_SYMBOL_GPL(arch_add_memory); +#define PAGE_INUSE 0xFD + +static void __meminit free_pagetable(struct page *page, int order) +{ + struct zone *zone; + bool bootmem = false; + unsigned long magic; + unsigned int nr_pages = 1 << order; + + /* bootmem page has reserved flag */ + if (PageReserved(page)) { + __ClearPageReserved(page); + bootmem = true; + + magic = (unsigned long)page->lru.next; + if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { + while (nr_pages--) + put_page_bootmem(page++); + } else + __free_pages_bootmem(page, order); + } else + free_pages((unsigned long)page_address(page), order); + + /* + * SECTION_INFO pages and MIX_SECTION_INFO pages + * are all allocated by bootmem. + */ + if (bootmem) { + zone = page_zone(page); + zone_span_writelock(zone); + zone->present_pages += nr_pages; + zone_span_writeunlock(zone); + totalram_pages += nr_pages; + } +} + +static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) +{ + pte_t *pte; + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) { + pte = pte_start + i; + if (pte_val(*pte)) + return; + } + + /* free a pte talbe */ + free_pagetable(pmd_page(*pmd), 0); + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); +} + +static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) +{ + pmd_t *pmd; + int i; + + for (i = 0; i < PTRS_PER_PMD; i++) { + pmd = pmd_start + i; + if (pmd_val(*pmd)) + return; + } + + /* free a pmd talbe */ + free_pagetable(pud_page(*pud), 0); + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); +} + +/* Return true if pgd is changed, otherwise return false. */ +static bool __meminit free_pud_table(pud_t *pud_start, pgd_t *pgd) +{ + pud_t *pud; + int i; + + for (i = 0; i < PTRS_PER_PUD; i++) { + pud = pud_start + i; + if (pud_val(*pud)) + return false; + } + + /* free a pud table */ + free_pagetable(pgd_page(*pgd), 0); + spin_lock(&init_mm.page_table_lock); + pgd_clear(pgd); + spin_unlock(&init_mm.page_table_lock); + + return true; +} + +static void __meminit +remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, + bool direct) +{ + unsigned long next, pages = 0; + pte_t *pte; + void *page_addr; + phys_addr_t phys_addr; + + pte = pte_start + pte_index(addr); + for (; addr < end; addr = next, pte++) { + next = (addr + PAGE_SIZE) & PAGE_MASK; + if (next > end) + next = end; + + if (!pte_present(*pte)) + continue; + + /* + * We mapped [0,1G) memory as identity mapping when + * initializing, in arch/x86/kernel/head_64.S. These + * pagetables cannot be removed. + */ + phys_addr = pte_val(*pte) + (addr & PAGE_MASK); + if (phys_addr < (phys_addr_t)0x40000000) + return; + + if (IS_ALIGNED(addr, PAGE_SIZE) && + IS_ALIGNED(next, PAGE_SIZE)) { + /* + * Do not free direct mapping pages since they were + * freed when offlining, or simplely not in use. + */ + if (!direct) + free_pagetable(pte_page(*pte), 0); + + spin_lock(&init_mm.page_table_lock); + pte_clear(&init_mm, addr, pte); + spin_unlock(&init_mm.page_table_lock); + + /* For non-direct mapping, pages means nothing. */ + pages++; + } else { + /* + * If we are here, we are freeing vmemmap pages since + * direct mapped memory ranges to be freed are aligned. + * + * If we are not removing the whole page, it means + * other page structs in this page are being used and + * we canot remove them. So fill the unused page_structs + * with 0xFD, and remove the page when it is wholly + * filled with 0xFD. + */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pte_page(*pte)); + if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { + free_pagetable(pte_page(*pte), 0); + + spin_lock(&init_mm.page_table_lock); + pte_clear(&init_mm, addr, pte); + spin_unlock(&init_mm.page_table_lock); + } + } + } + + /* Call free_pte_table() in remove_pmd_table(). */ + flush_tlb_all(); + if (direct) + update_page_count(PG_LEVEL_4K, -pages); +} + +static void __meminit +remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, + bool direct) +{ + unsigned long next, pages = 0; + pte_t *pte_base; + pmd_t *pmd; + void *page_addr; + + pmd = pmd_start + pmd_index(addr); + for (; addr < end; addr = next, pmd++) { + next = pmd_addr_end(addr, end); + + if (!pmd_present(*pmd)) + continue; + + if (pmd_large(*pmd)) { + if (IS_ALIGNED(addr, PMD_SIZE) && + IS_ALIGNED(next, PMD_SIZE)) { + if (!direct) + free_pagetable(pmd_page(*pmd), + get_order(PMD_SIZE)); + + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); + pages++; + } else { + /* If here, we are freeing vmemmap pages. */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pmd_page(*pmd)); + if (!memchr_inv(page_addr, PAGE_INUSE, + PMD_SIZE)) { + free_pagetable(pmd_page(*pmd), + get_order(PMD_SIZE)); + + spin_lock(&init_mm.page_table_lock); + pmd_clear(pmd); + spin_unlock(&init_mm.page_table_lock); + } + } + + continue; + } + + pte_base = (pte_t *)pmd_page_vaddr(*pmd); + remove_pte_table(pte_base, addr, next, direct); + free_pte_table(pte_base, pmd); + } + + /* Call free_pmd_table() in remove_pud_table(). */ + if (direct) + update_page_count(PG_LEVEL_2M, -pages); +} + +static void __meminit +remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, + bool direct) +{ + unsigned long next, pages = 0; + pmd_t *pmd_base; + pud_t *pud; + void *page_addr; + + pud = pud_start + pud_index(addr); + for (; addr < end; addr = next, pud++) { + next = pud_addr_end(addr, end); + + if (!pud_present(*pud)) + continue; + + if (pud_large(*pud)) { + if (IS_ALIGNED(addr, PUD_SIZE) && + IS_ALIGNED(next, PUD_SIZE)) { + if (!direct) + free_pagetable(pud_page(*pud), + get_order(PUD_SIZE)); + + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); + pages++; + } else { + /* If here, we are freeing vmemmap pages. */ + memset((void *)addr, PAGE_INUSE, next - addr); + + page_addr = page_address(pud_page(*pud)); + if (!memchr_inv(page_addr, PAGE_INUSE, + PUD_SIZE)) { + free_pagetable(pud_page(*pud), + get_order(PUD_SIZE)); + + spin_lock(&init_mm.page_table_lock); + pud_clear(pud); + spin_unlock(&init_mm.page_table_lock); + } + } + + continue; + } + + pmd_base = (pmd_t *)pud_page_vaddr(*pud); + remove_pmd_table(pmd_base, addr, next, direct); + free_pmd_table(pmd_base, pud); + } + + if (direct) + update_page_count(PG_LEVEL_1G, -pages); +} + +/* start and end are both virtual address. */ +static void __meminit +remove_pagetable(unsigned long start, unsigned long end, bool direct) +{ + unsigned long next; + pgd_t *pgd; + pud_t *pud; + bool pgd_changed = false; + + for (; start < end; start = next) { + next = pgd_addr_end(start, end); + + pgd = pgd_offset_k(start); + if (!pgd_present(*pgd)) + continue; + + pud = (pud_t *)pgd_page_vaddr(*pgd); + remove_pud_table(pud, start, next, direct); + if (free_pud_table(pud, pgd)) + pgd_changed = true; + } + + if (pgd_changed) + sync_global_pgds(start, end - 1); + + flush_tlb_all(); +} + #ifdef CONFIG_MEMORY_HOTREMOVE int __ref arch_remove_memory(u64 start, u64 size) { diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a1b1c88..ca1f1c2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -529,21 +529,13 @@ out_unlock: return do_split; } -static int split_large_page(pte_t *kpte, unsigned long address) +int __split_large_page(pte_t *kpte, unsigned long address, pte_t *pbase) { unsigned long pfn, pfninc = 1; unsigned int i, level; - pte_t *pbase, *tmp; + pte_t *tmp; pgprot_t ref_prot; - struct page *base; - - if (!debug_pagealloc) - spin_unlock(&cpa_lock); - base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); - if (!debug_pagealloc) - spin_lock(&cpa_lock); - if (!base) - return -ENOMEM; + struct page *base = virt_to_page(pbase); spin_lock(&pgd_lock); /* @@ -551,10 +543,11 @@ static int split_large_page(pte_t *kpte, unsigned long address) * up for us already: */ tmp = lookup_address(address, &level); - if (tmp != kpte) - goto out_unlock; + if (tmp != kpte) { + spin_unlock(&pgd_lock); + return 1; + } - pbase = (pte_t *)page_address(base); paravirt_alloc_pte(&init_mm, page_to_pfn(base)); ref_prot = pte_pgprot(pte_clrhuge(*kpte)); /* @@ -601,17 +594,27 @@ static int split_large_page(pte_t *kpte, unsigned long address) * going on. */ __flush_tlb_all(); + spin_unlock(&pgd_lock); - base = NULL; + return 0; +} -out_unlock: - /* - * If we dropped out via the lookup_address check under - * pgd_lock then stick the page back into the pool: - */ - if (base) +static int split_large_page(pte_t *kpte, unsigned long address) +{ + pte_t *pbase; + struct page *base; + + if (!debug_pagealloc) + spin_unlock(&cpa_lock); + base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); + if (!debug_pagealloc) + spin_lock(&cpa_lock); + if (!base) + return -ENOMEM; + + pbase = (pte_t *)page_address(base); + if (__split_large_page(kpte, address, pbase)) __free_page(base); - spin_unlock(&pgd_lock); return 0; } -- cgit v1.1 From bbcab8789d4a5b942773aa7496794ceebe2d3f78 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:06 -0800 Subject: memory-hotplug: remove page table of x86_64 architecture Search a page table about the removed memory, and clear page table for x86_64 architecture. [akpm@linux-foundation.org: make kernel_physical_mapping_remove() static] Signed-off-by: Wen Congyang Signed-off-by: Jianguo Wu Signed-off-by: Jiang Liu Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index ca6cd40..4e58b83 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1011,6 +1011,15 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) flush_tlb_all(); } +static void __meminit +kernel_physical_mapping_remove(unsigned long start, unsigned long end) +{ + start = (unsigned long)__va(start); + end = (unsigned long)__va(end); + + remove_pagetable(start, end, true); +} + #ifdef CONFIG_MEMORY_HOTREMOVE int __ref arch_remove_memory(u64 start, u64 size) { @@ -1020,6 +1029,7 @@ int __ref arch_remove_memory(u64 start, u64 size) int ret; zone = page_zone(pfn_to_page(start_pfn)); + kernel_physical_mapping_remove(start, start + size); ret = __remove_pages(zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); -- cgit v1.1 From 0197518cd3672029618a16a57597946a094ac7a8 Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:08 -0800 Subject: memory-hotplug: remove memmap of sparse-vmemmap Introduce a new API vmemmap_free() to free and remove vmemmap pagetables. Since pagetable implements are different, each architecture has to provide its own version of vmemmap_free(), just like vmemmap_populate(). Note: vmemmap_free() is not implemented for ia64, ppc, s390, and sparc. [mhocko@suse.cz: fix implicit declaration of remove_pagetable] Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Jianguo Wu Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Signed-off-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 4e58b83..474e28f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1011,6 +1011,14 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct) flush_tlb_all(); } +void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages) +{ + unsigned long start = (unsigned long)memmap; + unsigned long end = (unsigned long)(memmap + nr_pages); + + remove_pagetable(start, end, false); +} + static void __meminit kernel_physical_mapping_remove(unsigned long start, unsigned long end) { -- cgit v1.1 From c4c605246452d0e578945ea95a8e72877e97e8c6 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 Feb 2013 16:33:24 -0800 Subject: cpu_hotplug: clear apicid to node when the cpu is hotremoved When a cpu is hotpluged, we call acpi_map_cpu2node() in _acpi_map_lsapic() to store the cpu's node and apicid's node. But we don't clear the cpu's node in acpi_unmap_lsapic() when this cpu is hotremoved. If the node is also hotremoved, we will get the following messages: kernel BUG at include/linux/gfp.h:329! invalid opcode: 0000 [#1] SMP Modules linked in: ebtable_nat ebtables ipt_MASQUERADE iptable_nat nf_nat xt_CHECKSUM iptable_mangle bridge stp llc sunrpc ipt_REJECT nf_conntrack_ipv4 nf_defrag_ipv4 iptable_filter ip_tables ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables binfmt_misc dm_mirror dm_region_hash dm_log dm_mod vhost_net macvtap macvlan tun uinput iTCO_wdt iTCO_vendor_support coretemp kvm_intel kvm crc32c_intel microcode pcspkr i2c_i801 i2c_core lpc_ich mfd_core ioatdma e1000e i7core_edac edac_core sg acpi_memhotplug igb dca sd_mod crc_t10dif megaraid_sas mptsas mptscsih mptbase scsi_transport_sas scsi_mod Pid: 3126, comm: init Not tainted 3.6.0-rc3-tangchen-hostbridge+ #13 FUJITSU-SV PRIMEQUEST 1800E/SB RIP: 0010:[] [] allocate_slab+0x28d/0x300 RSP: 0018:ffff88078a049cf8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000001 RDI: 0000000000000246 RBP: ffff88078a049d38 R08: 00000000000040d0 R09: 0000000000000001 R10: 0000000000000000 R11: 0000000000000b5f R12: 00000000000052d0 R13: ffff8807c1417300 R14: 0000000000030038 R15: 0000000000000003 FS: 00007fa9b1b44700(0000) GS:ffff8807c3800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007fa9b09acca0 CR3: 000000078b855000 CR4: 00000000000007e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process init (pid: 3126, threadinfo ffff88078a048000, task ffff8807bb6f2650) Call Trace: new_slab+0x30/0x1b0 __slab_alloc+0x358/0x4c0 kmem_cache_alloc_node_trace+0xb4/0x1e0 alloc_fair_sched_group+0xd0/0x1b0 sched_create_group+0x3e/0x110 sched_autogroup_create_attach+0x4d/0x180 sys_setsid+0xd4/0xf0 system_call_fastpath+0x16/0x1b Code: 89 c4 e9 73 fe ff ff 31 c0 89 de 48 c7 c7 45 de 9e 81 44 89 45 c8 e8 22 05 4b 00 85 db 44 8b 45 c8 0f 89 4f ff ff ff 0f 0b eb fe <0f> 0b 90 eb fd 0f 0b eb fe 89 de 48 c7 c7 45 de 9e 81 31 c0 44 RIP [] allocate_slab+0x28d/0x300 RSP ---[ end trace adf84c90f3fea3e5 ]--- The reason is that the cpu's node is not NUMA_NO_NODE, we will call alloc_pages_exact_node() to alloc memory on the node, but the node is offlined. If the node is onlined, we still need cpu's node. For example: a task on the cpu is sleeped when the cpu is hotremoved. We will choose another cpu to run this task when it is waked up. If we know the cpu's node, we will choose the cpu on the same node first. So we should clear cpu-to-node mapping when the node is offlined. This patch only clears apicid-to-node mapping when the cpu is hotremoved. [akpm@linux-foundation.org: fix section error] Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Cc: Yasuaki Ishimatsu Cc: David Rientjes Cc: Jiang Liu Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/acpi/boot.c | 4 ++++ arch/x86/mm/numa.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index cfc755d..230c8ea 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -696,6 +696,10 @@ EXPORT_SYMBOL(acpi_map_lsapic); int acpi_unmap_lsapic(int cpu) { +#ifdef CONFIG_ACPI_NUMA + set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE); +#endif + per_cpu(x86_cpu_to_apicid, cpu) = -1; set_cpu_present(cpu, false); num_processors--; diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 8504f36..f22680b 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -56,7 +56,7 @@ early_param("numa", numa_setup); /* * apicid, cpu, node mappings */ -s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { +s16 __apicid_to_node[MAX_LOCAL_APIC] = { [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE }; -- cgit v1.1 From e13fe8695c57fed678877a9f3f8e99fc637ff4fb Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 Feb 2013 16:33:31 -0800 Subject: cpu-hotplug,memory-hotplug: clear cpu_to_node() when offlining the node When the node is offlined, there is no memory/cpu on the node. If a sleep task runs on a cpu of this node, it will be migrated to the cpu on the other node. So we can clear cpu-to-node mapping. [akpm@linux-foundation.org: numa_clear_node() and numa_set_node() can no longer be __cpuinit] Signed-off-by: Wen Congyang Signed-off-by: Tang Chen Cc: Yasuaki Ishimatsu Cc: David Rientjes Cc: Jiang Liu Cc: Minchan Kim Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/numa.h | 4 ++-- arch/x86/mm/numa.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h index 52560a2..1b99ee5 100644 --- a/arch/x86/include/asm/numa.h +++ b/arch/x86/include/asm/numa.h @@ -57,8 +57,8 @@ static inline int numa_cpu_node(int cpu) #endif #ifdef CONFIG_NUMA -extern void __cpuinit numa_set_node(int cpu, int node); -extern void __cpuinit numa_clear_node(int cpu); +extern void numa_set_node(int cpu, int node); +extern void numa_clear_node(int cpu); extern void __init init_cpu_to_node(void); extern void __cpuinit numa_add_cpu(int cpu); extern void __cpuinit numa_remove_cpu(int cpu); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index f22680b..a713d08 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -78,7 +78,7 @@ EXPORT_SYMBOL(node_to_cpumask_map); DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); -void __cpuinit numa_set_node(int cpu, int node) +void numa_set_node(int cpu, int node) { int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); @@ -101,7 +101,7 @@ void __cpuinit numa_set_node(int cpu, int node) set_cpu_numa_node(cpu, node); } -void __cpuinit numa_clear_node(int cpu) +void numa_clear_node(int cpu) { numa_set_node(cpu, NUMA_NO_NODE); } -- cgit v1.1 From 4d59a75125d5a4717e57e9fc62c64b3d346e603e Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Fri, 22 Feb 2013 16:33:35 -0800 Subject: x86: get pg_data_t's memory from other node During the implementation of SRAT support, we met a problem. In setup_arch(), we have the following call series: 1) memblock is ready; 2) some functions use memblock to allocate memory; 3) parse ACPI tables, such as SRAT. Before 3), we don't know which memory is hotpluggable, and as a result, we cannot prevent memblock from allocating hotpluggable memory. So, in 2), there could be some hotpluggable memory allocated by memblock. Now, we are trying to parse SRAT earlier, before memblock is ready. But I think we need more investigation on this topic. So in this v5, I dropped all the SRAT support, and v5 is just the same as v3, and it is based on 3.8-rc3. As we planned, we will support getting info from SRAT without users' participation at last. And we will post another patch-set to do so. And also, I think for now, we can add this boot option as the first step of supporting movable node. Since Linux cannot migrate the direct mapped pages, the only way for now is to limit the whole node containing only movable memory. Using SRAT is one way. But even if we can use SRAT, users still need an interface to enable/disable this functionality if they don't want to loose their NUMA performance. So I think, a user interface is always needed. For now, users can disable this functionality by not specifying the boot option. Later, we will post SRAT support, and add another option value "movablecore_map=acpi" to using SRAT. This patch: If system can create movable node which all memory of the node is allocated as ZONE_MOVABLE, setup_node_data() cannot allocate memory for the node's pg_data_t. So, use memblock_alloc_try_nid() instead of memblock_alloc_nid() to retry when the first allocation fails. Signed-off-by: Yasuaki Ishimatsu Signed-off-by: Lai Jiangshan Signed-off-by: Tang Chen Signed-off-by: Jiang Liu Cc: Wu Jianguo Cc: Wen Congyang Cc: Mel Gorman Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/numa.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index a713d08..e3963f5 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -213,10 +213,9 @@ static void __init setup_node_data(int nid, u64 start, u64 end) * Allocate node data. Try node-local memory and then any node. * Never allocate in DMA zone. */ - nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); + nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); if (!nd_pa) { - pr_err("Cannot find %zu bytes in node %d\n", - nd_size, nid); + pr_err("Cannot find %zu bytes in any node\n", nd_size); return; } nd = __va(nd_pa); -- cgit v1.1 From e8d1955258091e4c92d5a975ebd7fd8a98f5d30f Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:44 -0800 Subject: acpi, memory-hotplug: parse SRAT before memblock is ready On linux, the pages used by kernel could not be migrated. As a result, if a memory range is used by kernel, it cannot be hot-removed. So if we want to hot-remove memory, we should prevent kernel from using it. The way now used to prevent this is specify a memory range by movablemem_map boot option and set it as ZONE_MOVABLE. But when the system is booting, memblock will allocate memory, and reserve the memory for kernel. And before we parse SRAT, and know the node memory ranges, memblock is working. And it may allocate memory in ranges to be set as ZONE_MOVABLE. This memory can be used by kernel, and never be freed. So, let's parse SRAT before memblock is called first. And it is early enough. The first call of memblock_find_in_range_node() is in: setup_arch() |-->setup_real_mode() so, this patch add a function early_parse_srat() to parse SRAT, and call it before setup_real_mode() is called. NOTE: 1) early_parse_srat() is called before numa_init(), and has initialized numa_meminfo. So DO NOT clear numa_nodes_parsed in numa_init() and DO NOT zero numa_meminfo in numa_init(), otherwise we will lose memory numa info. 2) I don't know why using count of memory affinities parsed from SRAT as a return value in original acpi_numa_init(). So I add a static variable srat_mem_cnt to remember this count and use it as the return value of the new acpi_numa_init() [mhocko@suse.cz: parse SRAT before memblock is ready fix] Signed-off-by: Tang Chen Reviewed-by: Wen Congyang Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Len Brown Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup.c | 13 +++++++++---- arch/x86/mm/numa.c | 6 ++++-- 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 915f5ef..9c857f0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1056,6 +1056,15 @@ void __init setup_arch(char **cmdline_p) setup_bios_corruption_check(); #endif + /* + * In the memory hotplug case, the kernel needs info from SRAT to + * determine which memory is hotpluggable before allocating memory + * using memblock. + */ + acpi_boot_table_init(); + early_acpi_boot_init(); + early_parse_srat(); + #ifdef CONFIG_X86_32 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", (max_pfn_mapped< Date: Fri, 22 Feb 2013 16:33:46 -0800 Subject: acpi, memory-hotplug: extend movablemem_map ranges to the end of node When implementing movablemem_map boot option, we introduced an array movablemem_map.map[] to store the memory ranges to be set as ZONE_MOVABLE. Since ZONE_MOVABLE is the latst zone of a node, if user didn't specify the whole node memory range, we need to extend it to the node end so that we can use it to prevent memblock from allocating memory in the ranges user didn't specify. We now implement movablemem_map boot option like this: /* * For movablemem_map=nn[KMG]@ss[KMG]: * * SRAT: |_____| |_____| |_________| |_________| ...... * node id: 0 1 1 2 * user specified: |__| |___| * movablemem_map: |___| |_________| |______| ...... * * Using movablemem_map, we can prevent memblock from allocating memory * on ZONE_MOVABLE at boot time. * * NOTE: In this case, SRAT info will be ingored. */ [akpm@linux-foundation.org: clean up code, fix build warning] Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Len Brown Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/srat.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 61 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index cdd0da9..3e90039 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -141,11 +141,65 @@ static inline int save_add_info(void) {return 1;} static inline int save_add_info(void) {return 0;} #endif +#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP +static void __init handle_movablemem(int node, u64 start, u64 end) +{ + int overlap; + unsigned long start_pfn, end_pfn; + + start_pfn = PFN_DOWN(start); + end_pfn = PFN_UP(end); + + /* + * For movablecore_map=nn[KMG]@ss[KMG]: + * + * SRAT: |_____| |_____| |_________| |_________| ...... + * node id: 0 1 1 2 + * user specified: |__| |___| + * movablemem_map: |___| |_________| |______| ...... + * + * Using movablemem_map, we can prevent memblock from allocating memory + * on ZONE_MOVABLE at boot time. + */ + overlap = movablemem_map_overlap(start_pfn, end_pfn); + if (overlap >= 0) { + /* + * If part of this range is in movablemem_map, we need to + * add the range after it to extend the range to the end + * of the node, because from the min address specified to + * the end of the node will be ZONE_MOVABLE. + */ + start_pfn = max(start_pfn, + movablemem_map.map[overlap].start_pfn); + insert_movablemem_map(start_pfn, end_pfn); + + /* + * Set the nodemask, so that if the address range on one node + * is not continuse, we can add the subsequent ranges on the + * same node into movablemem_map. + */ + node_set(node, movablemem_map.numa_nodes_hotplug); + } else { + if (node_isset(node, movablemem_map.numa_nodes_hotplug)) + /* + * Insert the range if we already have movable ranges + * on the same node. + */ + insert_movablemem_map(start_pfn, end_pfn); + } +} +#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ +static inline void handle_movablemem(int node, u64 start, u64 end) +{ +} +#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ + /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ int __init acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) { u64 start, end; + u32 hotpluggable; int node, pxm; if (srat_disabled()) @@ -154,7 +208,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) goto out_err_bad_srat; if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) goto out_err; - if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) + hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; + if (hotpluggable && !save_add_info()) goto out_err; start = ma->base_address; @@ -174,9 +229,12 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) node_set(node, numa_nodes_parsed); - printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n", node, pxm, - (unsigned long long) start, (unsigned long long) end - 1); + (unsigned long long) start, (unsigned long long) end - 1, + hotpluggable ? "Hot Pluggable": ""); + + handle_movablemem(node, start, end); return 0; out_err_bad_srat: -- cgit v1.1 From 01a178a94e8eaec351b29ee49fbb3d1c124cb7fb Mon Sep 17 00:00:00 2001 From: Tang Chen Date: Fri, 22 Feb 2013 16:33:49 -0800 Subject: acpi, memory-hotplug: support getting hotplug info from SRAT We now provide an option for users who don't want to specify physical memory address in kernel commandline. /* * For movablemem_map=acpi: * * SRAT: |_____| |_____| |_________| |_________| ...... * node id: 0 1 1 2 * hotpluggable: n y y n * movablemem_map: |_____| |_________| * * Using movablemem_map, we can prevent memblock from allocating memory * on ZONE_MOVABLE at boot time. */ So user just specify movablemem_map=acpi, and the kernel will use hotpluggable info in SRAT to determine which memory ranges should be set as ZONE_MOVABLE. If all the memory ranges in SRAT is hotpluggable, then no memory can be used by kernel. But before parsing SRAT, memblock has already reserve some memory ranges for other purposes, such as for kernel image, and so on. We cannot prevent kernel from using these memory. So we need to exclude these ranges even if these memory is hotpluggable. Furthermore, there could be several memory ranges in the single node which the kernel resides in. We may skip one range that have memory reserved by memblock, but if the rest of memory is too small, then the kernel will fail to boot. So, make the whole node which the kernel resides in un-hotpluggable. Then the kernel has enough memory to use. NOTE: Using this way will cause NUMA performance down because the whole node will be set as ZONE_MOVABLE, and kernel cannot use memory on it. If users don't want to lose NUMA performance, just don't use it. [akpm@linux-foundation.org: fix warning] [akpm@linux-foundation.org: use strcmp()] Signed-off-by: Tang Chen Cc: KOSAKI Motohiro Cc: Jiang Liu Cc: Jianguo Wu Cc: Kamezawa Hiroyuki Cc: Lai Jiangshan Cc: Wu Jianguo Cc: Yasuaki Ishimatsu Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Len Brown Cc: "Brown, Len" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/srat.c | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 3e90039..79836d0 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -142,16 +142,72 @@ static inline int save_add_info(void) {return 0;} #endif #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP -static void __init handle_movablemem(int node, u64 start, u64 end) +static void __init +handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) { - int overlap; + int overlap, i; unsigned long start_pfn, end_pfn; start_pfn = PFN_DOWN(start); end_pfn = PFN_UP(end); /* - * For movablecore_map=nn[KMG]@ss[KMG]: + * For movablemem_map=acpi: + * + * SRAT: |_____| |_____| |_________| |_________| ...... + * node id: 0 1 1 2 + * hotpluggable: n y y n + * movablemem_map: |_____| |_________| + * + * Using movablemem_map, we can prevent memblock from allocating memory + * on ZONE_MOVABLE at boot time. + * + * Before parsing SRAT, memblock has already reserve some memory ranges + * for other purposes, such as for kernel image. We cannot prevent + * kernel from using these memory, so we need to exclude these memory + * even if it is hotpluggable. + * Furthermore, to ensure the kernel has enough memory to boot, we make + * all the memory on the node which the kernel resides in + * un-hotpluggable. + */ + if (hotpluggable && movablemem_map.acpi) { + /* Exclude ranges reserved by memblock. */ + struct memblock_type *rgn = &memblock.reserved; + + for (i = 0; i < rgn->cnt; i++) { + if (end <= rgn->regions[i].base || + start >= rgn->regions[i].base + + rgn->regions[i].size) + continue; + + /* + * If the memory range overlaps the memory reserved by + * memblock, then the kernel resides in this node. + */ + node_set(node, movablemem_map.numa_nodes_kernel); + + goto out; + } + + /* + * If the kernel resides in this node, then the whole node + * should not be hotpluggable. + */ + if (node_isset(node, movablemem_map.numa_nodes_kernel)) + goto out; + + insert_movablemem_map(start_pfn, end_pfn); + + /* + * numa_nodes_hotplug nodemask represents which nodes are put + * into movablemem_map.map[]. + */ + node_set(node, movablemem_map.numa_nodes_hotplug); + goto out; + } + + /* + * For movablemem_map=nn[KMG]@ss[KMG]: * * SRAT: |_____| |_____| |_________| |_________| ...... * node id: 0 1 1 2 @@ -160,6 +216,8 @@ static void __init handle_movablemem(int node, u64 start, u64 end) * * Using movablemem_map, we can prevent memblock from allocating memory * on ZONE_MOVABLE at boot time. + * + * NOTE: In this case, SRAT info will be ingored. */ overlap = movablemem_map_overlap(start_pfn, end_pfn); if (overlap >= 0) { @@ -187,9 +245,12 @@ static void __init handle_movablemem(int node, u64 start, u64 end) */ insert_movablemem_map(start_pfn, end_pfn); } +out: + return; } #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -static inline void handle_movablemem(int node, u64 start, u64 end) +static inline void +handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) { } #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ @@ -234,7 +295,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) (unsigned long long) start, (unsigned long long) end - 1, hotpluggable ? "Hot Pluggable": ""); - handle_movablemem(node, start, end); + handle_movablemem(node, start, end, hotpluggable); return 0; out_err_bad_srat: -- cgit v1.1 From 942670d0dc41b5fe9b735c31ca9234d80729bf7e Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 Feb 2013 15:11:47 -0800 Subject: x86/mm/numa: Don't check if node is NUMA_NO_NODE If we aren't debugging per_cpu maps, the cpu's node is stored in per_cpu variable numa_node. If `node' is NUMA_NO_NODE, it means the caller wants to clear the cpu's node. So we should also call set_cpu_numa_node() in this case. Signed-off-by: Wen Congyang Cc: Len Brown Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: "H. Peter Anvin" Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/mm/numa.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 2d125be..21d02f0 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -97,8 +97,7 @@ void __cpuinit numa_set_node(int cpu, int node) #endif per_cpu(x86_cpu_to_node_map, cpu) = node; - if (node != NUMA_NO_NODE) - set_cpu_numa_node(cpu, node); + set_cpu_numa_node(cpu, node); } void __cpuinit numa_clear_node(int cpu) -- cgit v1.1 From 954f857187033ee3d3704a8206715cf354c38898 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 22 Feb 2013 15:11:49 -0800 Subject: Revert "x86, mm: Make spurious_fault check explicitly check explicitly check the PRESENT bit" I got a report for a minor regression introduced by commit 027ef6c87853b ("mm: thp: fix pmd_present for split_huge_page and PROT_NONE with THP"). So the problem is, pageattr creates kernel pagetables (pte and pmds) that breaks pte_present/pmd_present and the patch above exposed this invariant breakage for pmd_present. The same problem already existed for the pte and pte_present and it was fixed by commit 660a293ea9be709 ("x86, mm: Make spurious_fault check explicitly check the PRESENT bit") (if it wasn't for that commit, it wouldn't even be a regression). That fix avoids the pagefault to use pte_present. I could follow through by stopping using pmd_present/pmd_huge too. However I think it's more robust to fix pageattr and to clear the PSE/GLOBAL bitflags too in addition to the present bitflag. So the kernel page fault can keep using the regular pte_present/pmd_present/pmd_huge. The confusion arises because _PAGE_GLOBAL and _PAGE_PROTNONE are sharing the same bit, and in the pmd case we pretend _PAGE_PSE to be set only in present pmds (to facilitate split_huge_page final tlb flush). Signed-off-by: Andrea Arcangeli Cc: Andi Kleen Cc: Shaohua Li Cc: "H. Peter Anvin" Cc: Mel Gorman Cc: Hugh Dickins Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index fb674fd..2b97525 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -939,14 +939,8 @@ spurious_fault(unsigned long error_code, unsigned long address) if (pmd_large(*pmd)) return spurious_fault_check(error_code, (pte_t *) pmd); - /* - * Note: don't use pte_present() here, since it returns true - * if the _PAGE_PROTNONE bit is set. However, this aliases the - * _PAGE_GLOBAL bit, which for kernel pages give false positives - * when CONFIG_DEBUG_PAGEALLOC is used. - */ pte = pte_offset_kernel(pmd, address); - if (!(pte_flags(*pte) & _PAGE_PRESENT)) + if (!pte_present(*pte)) return 0; ret = spurious_fault_check(error_code, pte); -- cgit v1.1 From a8aed3e0752b4beb2e37cbed6df69faae88268da Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Fri, 22 Feb 2013 15:11:51 -0800 Subject: x86/mm/pageattr: Prevent PSE and GLOABL leftovers to confuse pmd/pte_present and pmd_huge Without this patch any kernel code that reads kernel memory in non present kernel pte/pmds (as set by pageattr.c) will crash. With this kernel code: static struct page *crash_page; static unsigned long *crash_address; [..] crash_page = alloc_pages(GFP_KERNEL, 9); crash_address = page_address(crash_page); if (set_memory_np((unsigned long)crash_address, 1)) printk("set_memory_np failure\n"); [..] The kernel will crash if inside the "crash tool" one would try to read the memory at the not present address. crash> p crash_address crash_address = $8 = (long unsigned int *) 0xffff88023c000000 crash> rd 0xffff88023c000000 [ *lockup* ] The lockup happens because _PAGE_GLOBAL and _PAGE_PROTNONE shares the same bit, and pageattr leaves _PAGE_GLOBAL set on a kernel pte which is then mistaken as _PAGE_PROTNONE (so pte_present returns true by mistake and the kernel fault then gets confused and loops). With THP the same can happen after we taught pmd_present to check _PAGE_PROTNONE and _PAGE_PSE in commit 027ef6c87853b0a9df5317 ("mm: thp: fix pmd_present for split_huge_page and PROT_NONE with THP"). THP has the same problem with _PAGE_GLOBAL as the 4k pages, but it also has a problem with _PAGE_PSE, which must be cleared too. After the patch is applied copy_user correctly returns -EFAULT and doesn't lockup anymore. crash> p crash_address crash_address = $9 = (long unsigned int *) 0xffff88023c000000 crash> rd 0xffff88023c000000 rd: read error: kernel virtual address: ffff88023c000000 type: "64-bit KVADDR" Signed-off-by: Andrea Arcangeli Cc: Andi Kleen Cc: Shaohua Li Cc: "H. Peter Anvin" Cc: Mel Gorman Cc: Hugh Dickins Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/mm/pageattr.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index a718e0d..2713be4 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -445,6 +445,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); /* + * Set the PSE and GLOBAL flags only if the PRESENT flag is + * set otherwise pmd_present/pmd_huge will return true even on + * a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL + * for the ancient hardware that doesn't support it. + */ + if (pgprot_val(new_prot) & _PAGE_PRESENT) + pgprot_val(new_prot) |= _PAGE_PSE | _PAGE_GLOBAL; + else + pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL); + + new_prot = canon_pgprot(new_prot); + + /* * old_pte points to the large page base address. So we need * to add the offset of the virtual address: */ @@ -489,7 +502,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, * The address is aligned and the number of pages * covers the full page. */ - new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); + new_pte = pfn_pte(pte_pfn(old_pte), new_prot); __set_pmd_pte(kpte, address, new_pte); cpa->flags |= CPA_FLUSHTLB; do_split = 0; @@ -540,16 +553,35 @@ static int split_large_page(pte_t *kpte, unsigned long address) #ifdef CONFIG_X86_64 if (level == PG_LEVEL_1G) { pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; - pgprot_val(ref_prot) |= _PAGE_PSE; + /* + * Set the PSE flags only if the PRESENT flag is set + * otherwise pmd_present/pmd_huge will return true + * even on a non present pmd. + */ + if (pgprot_val(ref_prot) & _PAGE_PRESENT) + pgprot_val(ref_prot) |= _PAGE_PSE; + else + pgprot_val(ref_prot) &= ~_PAGE_PSE; } #endif /* + * Set the GLOBAL flags only if the PRESENT flag is set + * otherwise pmd/pte_present will return true even on a non + * present pmd/pte. The canon_pgprot will clear _PAGE_GLOBAL + * for the ancient hardware that doesn't support it. + */ + if (pgprot_val(ref_prot) & _PAGE_PRESENT) + pgprot_val(ref_prot) |= _PAGE_GLOBAL; + else + pgprot_val(ref_prot) &= ~_PAGE_GLOBAL; + + /* * Get the target pfn from the original entry: */ pfn = pte_pfn(*kpte); for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) - set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); + set_pte(&pbase[i], pfn_pte(pfn, canon_pgprot(ref_prot))); if (address >= (unsigned long)__va(0) && address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) @@ -660,6 +692,18 @@ repeat: new_prot = static_protections(new_prot, address, pfn); /* + * Set the GLOBAL flags only if the PRESENT flag is + * set otherwise pte_present will return true even on + * a non present pte. The canon_pgprot will clear + * _PAGE_GLOBAL for the ancient hardware that doesn't + * support it. + */ + if (pgprot_val(new_prot) & _PAGE_PRESENT) + pgprot_val(new_prot) |= _PAGE_GLOBAL; + else + pgprot_val(new_prot) &= ~_PAGE_GLOBAL; + + /* * We need to keep the pfn from the existing PTE, * after all we're only going to change it's attributes * not the memory it points to -- cgit v1.1 From 561c6731978fa128f29342495f47fc3365898b3d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Feb 2013 10:52:26 -0500 Subject: switch lseek to COMPAT_SYSCALL_DEFINE Signed-off-by: Al Viro --- arch/x86/ia32/sys_ia32.c | 5 ----- arch/x86/include/asm/sys_ia32.h | 1 - arch/x86/syscalls/syscall_32.tbl | 2 +- 3 files changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 592f5a9..ad7a20c 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -218,11 +218,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, * Some system calls that need sign extended arguments. This could be * done by a generic wrapper. */ -long sys32_lseek(unsigned int fd, int offset, unsigned int whence) -{ - return sys_lseek(fd, offset, whence); -} - long sys32_kill(int pid, int sig) { return sys_kill(pid, sig); diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 0218d91..8459efc 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -43,7 +43,6 @@ asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); asmlinkage long sys32_personality(unsigned long); asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); -long sys32_lseek(unsigned int, int, unsigned int); long sys32_kill(int, int); long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); long sys32_vm86_warning(void); diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index f2fe78f..f51810b 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -25,7 +25,7 @@ 16 i386 lchown sys_lchown16 17 i386 break 18 i386 oldstat sys_stat -19 i386 lseek sys_lseek sys32_lseek +19 i386 lseek sys_lseek compat_sys_lseek 20 i386 getpid sys_getpid 21 i386 mount sys_mount compat_sys_mount 22 i386 umount sys_oldumount -- cgit v1.1 From 3f6d078d4accfff8b114f968259a060bfdc7c682 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Feb 2013 13:49:08 -0500 Subject: fix compat truncate/ftruncate Signed-off-by: Al Viro --- arch/x86/syscalls/syscall_32.tbl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index f51810b..e6d55f0 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -98,8 +98,8 @@ 89 i386 readdir sys_old_readdir compat_sys_old_readdir 90 i386 mmap sys_old_mmap sys32_mmap 91 i386 munmap sys_munmap -92 i386 truncate sys_truncate -93 i386 ftruncate sys_ftruncate +92 i386 truncate sys_truncate compat_sys_truncate +93 i386 ftruncate sys_ftruncate compat_sys_ftruncate 94 i386 fchmod sys_fchmod 95 i386 fchown sys_fchown16 96 i386 getpriority sys_getpriority -- cgit v1.1 From 1256276c98dbcfb009ac8e0687df9a1e291fd149 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 25 Feb 2013 15:54:10 -0500 Subject: x86, doc: Fix incorrect comment about 64-bit code segment descriptors The AMD64 Architecture Programmer's Manual Volume 2, on page 89 mentions: "If the processor is running in 64-bit mode (L=1), the only valid setting of the D bit is 0." This matches with what the code does. Signed-off-by: Konrad Rzeszutek Wilk Link: http://lkml.kernel.org/r/1361825650-14031-4-git-send-email-konrad.wilk@oracle.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 980053c..37f5304 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -49,7 +49,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) startup_64: /* - * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, + * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded an identity mapped page table * for us. These identity mapped page tables map all of the * kernel pages and possibly all of memory. @@ -146,7 +146,7 @@ ident_complete: jmp secondary_startup_64 ENTRY(secondary_startup_64) /* - * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, + * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded a mapped page table. * * %esi holds a physical pointer to real_mode_data. -- cgit v1.1 From 058e7b5814534461b0e2468fce5a8f8d2f43c38f Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Fri, 22 Feb 2013 15:03:47 +0000 Subject: x86, efi: Mark disable_runtime as __initdata disable_runtime is only referenced from __init functions, so mark it as __initdata. Reported-by: Yinghai Lu Reviewed-by: Satoru Takeuchi Signed-off-by: Matt Fleming Link: http://lkml.kernel.org/r/1361545427-26393-1-git-send-email-matt@console-pimps.org Signed-off-by: H. Peter Anvin --- arch/x86/platform/efi/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index e2cd38f..ec9f325 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -85,7 +85,7 @@ int efi_enabled(int facility) } EXPORT_SYMBOL(efi_enabled); -static bool disable_runtime = false; +static bool __initdata disable_runtime = false; static int __init setup_noefi(char *arg) { disable_runtime = true; -- cgit v1.1 From 7c10093692ed2e6f318387d96b829320aa0ca64c Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 27 Feb 2013 12:46:40 -0800 Subject: x86: Make sure we can boot in the case the BDA contains pure garbage On non-BIOS platforms it is possible that the BIOS data area contains garbage instead of being zeroed or something equivalent (firmware people: we are talking of 1.5K here, so please do the sane thing.) We need on the order of 20-30K of low memory in order to boot, which may grow up to < 64K in the future. We probably want to avoid the lowest of the low memory. At the same time, it seems extremely unlikely that a legitimate EBDA would ever reach down to the 128K (which would require it to be over half a megabyte in size.) Thus, pick 128K as the cutoff for "this is insane, ignore." We may still end up reserving a bunch of extra memory on the low megabyte, but that is not really a major issue these days. In the worst case we lose 512K of RAM. This code really should be merged with trim_bios_range() in arch/x86/kernel/setup.c, but that is a bigger patch for a later merge window. Reported-by: Darren Hart Signed-off-by: H. Peter Anvin Cc: Matt Fleming Cc: Link: http://lkml.kernel.org/n/tip-oebml055yyfm8yxmria09rja@git.kernel.org --- arch/x86/kernel/head.c | 53 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 48d9d4e..992f442 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c @@ -5,8 +5,6 @@ #include #include -#define BIOS_LOWMEM_KILOBYTES 0x413 - /* * The BIOS places the EBDA/XBDA at the top of conventional * memory, and usually decreases the reported amount of @@ -16,17 +14,30 @@ * chipset: reserve a page before VGA to prevent PCI prefetch * into it (errata #56). Usually the page is reserved anyways, * unless you have no PS/2 mouse plugged in. + * + * This functions is deliberately very conservative. Losing + * memory in the bottom megabyte is rarely a problem, as long + * as we have enough memory to install the trampoline. Using + * memory that is in use by the BIOS or by some DMA device + * the BIOS didn't shut down *is* a big problem. */ + +#define BIOS_LOWMEM_KILOBYTES 0x413 +#define LOWMEM_CAP 0x9f000U /* Absolute maximum */ +#define INSANE_CUTOFF 0x20000U /* Less than this = insane */ + void __init reserve_ebda_region(void) { unsigned int lowmem, ebda_addr; - /* To determine the position of the EBDA and the */ - /* end of conventional memory, we need to look at */ - /* the BIOS data area. In a paravirtual environment */ - /* that area is absent. We'll just have to assume */ - /* that the paravirt case can handle memory setup */ - /* correctly, without our help. */ + /* + * To determine the position of the EBDA and the + * end of conventional memory, we need to look at + * the BIOS data area. In a paravirtual environment + * that area is absent. We'll just have to assume + * that the paravirt case can handle memory setup + * correctly, without our help. + */ if (paravirt_enabled()) return; @@ -37,19 +48,23 @@ void __init reserve_ebda_region(void) /* start of EBDA area */ ebda_addr = get_bios_ebda(); - /* Fixup: bios puts an EBDA in the top 64K segment */ - /* of conventional memory, but does not adjust lowmem. */ - if ((lowmem - ebda_addr) <= 0x10000) - lowmem = ebda_addr; + /* + * Note: some old Dells seem to need 4k EBDA without + * reporting so, so just consider the memory above 0x9f000 + * to be off limits (bugzilla 2990). + */ + + /* If the EBDA address is below 128K, assume it is bogus */ + if (ebda_addr < INSANE_CUTOFF) + ebda_addr = LOWMEM_CAP; - /* Fixup: bios does not report an EBDA at all. */ - /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */ - if ((ebda_addr == 0) && (lowmem >= 0x9f000)) - lowmem = 0x9f000; + /* If lowmem is less than 128K, assume it is bogus */ + if (lowmem < INSANE_CUTOFF) + lowmem = LOWMEM_CAP; - /* Paranoia: should never happen, but... */ - if ((lowmem == 0) || (lowmem >= 0x100000)) - lowmem = 0x9f000; + /* Use the lower of the lowmem and EBDA markers as the cutoff */ + lowmem = min(lowmem, ebda_addr); + lowmem = min(lowmem, LOWMEM_CAP); /* Absolute cap */ /* reserve all memory between lowmem and the 1MB mark */ memblock_reserve(lowmem, 0x100000 - lowmem); -- cgit v1.1 From 6131ffaa1f091415b7a24abb01f033d9c0a727f4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Feb 2013 16:59:05 -0500 Subject: more file_inode() open-coded instances Signed-off-by: Al Viro --- arch/x86/kernel/msr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 4929502..ce13049 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -71,7 +71,7 @@ static ssize_t msr_read(struct file *file, char __user *buf, u32 __user *tmp = (u32 __user *) buf; u32 data[2]; u32 reg = *ppos; - int cpu = iminor(file->f_path.dentry->d_inode); + int cpu = iminor(file_inode(file)); int err = 0; ssize_t bytes = 0; @@ -99,7 +99,7 @@ static ssize_t msr_write(struct file *file, const char __user *buf, const u32 __user *tmp = (const u32 __user *)buf; u32 data[2]; u32 reg = *ppos; - int cpu = iminor(file->f_path.dentry->d_inode); + int cpu = iminor(file_inode(file)); int err = 0; ssize_t bytes = 0; @@ -125,7 +125,7 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) { u32 __user *uregs = (u32 __user *)arg; u32 regs[8]; - int cpu = iminor(file->f_path.dentry->d_inode); + int cpu = iminor(file_inode(file)); int err; switch (ioc) { @@ -171,13 +171,12 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) static int msr_open(struct inode *inode, struct file *file) { - unsigned int cpu; + unsigned int cpu = iminor(file_inode(file)); struct cpuinfo_x86 *c; if (!capable(CAP_SYS_RAWIO)) return -EPERM; - cpu = iminor(file->f_path.dentry->d_inode); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ -- cgit v1.1 From 887cbce0adead8dc394157b8e53603ed001a3060 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 27 Feb 2013 17:05:48 -0800 Subject: arch Kconfig: centralise CONFIG_ARCH_NO_VIRT_TO_BUS Change it to CONFIG_HAVE_VIRT_TO_BUS and set it in all architecures that already provide virt_to_bus(). Signed-off-by: Stephen Rothwell Reviewed-by: James Hogan Cc: Bjorn Helgaas Cc: H Hartley Sweeten Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "David S. Miller" Cc: Paul Mundt Cc: Vineet Gupta Cc: James Bottomley Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6a93833..a4f24f5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -112,6 +112,7 @@ config X86 select GENERIC_STRNLEN_USER select HAVE_CONTEXT_TRACKING if X86_64 select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_VIRT_TO_BUS select MODULES_USE_ELF_REL if X86_32 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 -- cgit v1.1 From b67bfe0d42cac56c512dd5da4b1b347a23f4b70a Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Wed, 27 Feb 2013 17:06:00 -0800 Subject: hlist: drop the node parameter from iterators I'm not sure why, but the hlist for each entry iterators were conceived list_for_each_entry(pos, head, member) The hlist ones were greedy and wanted an extra parameter: hlist_for_each_entry(tpos, pos, head, member) Why did they need an extra pos parameter? I'm not quite sure. Not only they don't really need it, it also prevents the iterator from looking exactly like the list iterator, which is unfortunate. Besides the semantic patch, there was some manual work required: - Fix up the actual hlist iterators in linux/list.h - Fix up the declaration of other iterators based on the hlist ones. - A very small amount of places were using the 'node' parameter, this was modified to use 'obj->member' instead. - Coccinelle didn't handle the hlist_for_each_entry_safe iterator properly, so those had to be fixed up manually. The semantic patch which is mostly the work of Peter Senna Tschudin is here: @@ iterator name hlist_for_each_entry, hlist_for_each_entry_continue, hlist_for_each_entry_from, hlist_for_each_entry_rcu, hlist_for_each_entry_rcu_bh, hlist_for_each_entry_continue_rcu_bh, for_each_busy_worker, ax25_uid_for_each, ax25_for_each, inet_bind_bucket_for_each, sctp_for_each_hentry, sk_for_each, sk_for_each_rcu, sk_for_each_from, sk_for_each_safe, sk_for_each_bound, hlist_for_each_entry_safe, hlist_for_each_entry_continue_rcu, nr_neigh_for_each, nr_neigh_for_each_safe, nr_node_for_each, nr_node_for_each_safe, for_each_gfn_indirect_valid_sp, for_each_gfn_sp, for_each_host; type T; expression a,c,d,e; identifier b; statement S; @@ -T b; <+... when != b ( hlist_for_each_entry(a, - b, c, d) S | hlist_for_each_entry_continue(a, - b, c) S | hlist_for_each_entry_from(a, - b, c) S | hlist_for_each_entry_rcu(a, - b, c, d) S | hlist_for_each_entry_rcu_bh(a, - b, c, d) S | hlist_for_each_entry_continue_rcu_bh(a, - b, c) S | for_each_busy_worker(a, c, - b, d) S | ax25_uid_for_each(a, - b, c) S | ax25_for_each(a, - b, c) S | inet_bind_bucket_for_each(a, - b, c) S | sctp_for_each_hentry(a, - b, c) S | sk_for_each(a, - b, c) S | sk_for_each_rcu(a, - b, c) S | sk_for_each_from -(a, b) +(a) S + sk_for_each_from(a) S | sk_for_each_safe(a, - b, c, d) S | sk_for_each_bound(a, - b, c) S | hlist_for_each_entry_safe(a, - b, c, d, e) S | hlist_for_each_entry_continue_rcu(a, - b, c) S | nr_neigh_for_each(a, - b, c) S | nr_neigh_for_each_safe(a, - b, c, d) S | nr_node_for_each(a, - b, c) S | nr_node_for_each_safe(a, - b, c, d) S | - for_each_gfn_sp(a, c, d, b) S + for_each_gfn_sp(a, c, d) S | - for_each_gfn_indirect_valid_sp(a, c, d, b) S + for_each_gfn_indirect_valid_sp(a, c, d) S | for_each_host(a, - b, c) S | for_each_host_safe(a, - b, c, d) S | for_each_mesh_entry(a, - b, c, d) S ) ...+> [akpm@linux-foundation.org: drop bogus change from net/ipv4/raw.c] [akpm@linux-foundation.org: drop bogus hunk from net/ipv6/raw.c] [akpm@linux-foundation.org: checkpatch fixes] [akpm@linux-foundation.org: fix warnings] [akpm@linux-foudnation.org: redo intrusive kvm changes] Tested-by: Peter Senna Tschudin Acked-by: Paul E. McKenney Signed-off-by: Sasha Levin Cc: Wu Fengguang Cc: Marcelo Tosatti Cc: Gleb Natapov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/kernel/kprobes/core.c | 8 ++++---- arch/x86/kvm/mmu.c | 26 ++++++++++---------------- 2 files changed, 14 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index e124554..3f06e61 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -652,7 +652,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; kprobe_opcode_t *correct_ret_addr = NULL; @@ -682,7 +682,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) * will be the real return address, and all the rest will * point to kretprobe_trampoline. */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -701,7 +701,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); correct_ret_addr = ri->ret_addr; - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; @@ -728,7 +728,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_hash_unlock(current, &flags); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 4ed3edb..956ca35 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1644,13 +1644,13 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); -#define for_each_gfn_sp(kvm, sp, gfn, pos) \ - hlist_for_each_entry(sp, pos, \ +#define for_each_gfn_sp(kvm, sp, gfn) \ + hlist_for_each_entry(sp, \ &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ if ((sp)->gfn != (gfn)) {} else -#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn, pos) \ - hlist_for_each_entry(sp, pos, \ +#define for_each_gfn_indirect_valid_sp(kvm, sp, gfn) \ + hlist_for_each_entry(sp, \ &(kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(gfn)], hash_link) \ if ((sp)->gfn != (gfn) || (sp)->role.direct || \ (sp)->role.invalid) {} else @@ -1706,11 +1706,10 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) { struct kvm_mmu_page *s; - struct hlist_node *node; LIST_HEAD(invalid_list); bool flush = false; - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (!s->unsync) continue; @@ -1848,7 +1847,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, union kvm_mmu_page_role role; unsigned quadrant; struct kvm_mmu_page *sp; - struct hlist_node *node; bool need_sync = false; role = vcpu->arch.mmu.base_role; @@ -1863,7 +1861,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; role.quadrant = quadrant; } - for_each_gfn_sp(vcpu->kvm, sp, gfn, node) { + for_each_gfn_sp(vcpu->kvm, sp, gfn) { if (!need_sync && sp->unsync) need_sync = true; @@ -2151,14 +2149,13 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int goal_nr_mmu_pages) int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn) { struct kvm_mmu_page *sp; - struct hlist_node *node; LIST_HEAD(invalid_list); int r; pgprintk("%s: looking for gfn %llx\n", __func__, gfn); r = 0; spin_lock(&kvm->mmu_lock); - for_each_gfn_indirect_valid_sp(kvm, sp, gfn, node) { + for_each_gfn_indirect_valid_sp(kvm, sp, gfn) { pgprintk("%s: gfn %llx role %x\n", __func__, gfn, sp->role.word); r = 1; @@ -2288,9 +2285,8 @@ static void __kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) static void kvm_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) { struct kvm_mmu_page *s; - struct hlist_node *node; - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (s->unsync) continue; WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); @@ -2302,10 +2298,9 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync) { struct kvm_mmu_page *s; - struct hlist_node *node; bool need_unsync = false; - for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, s, gfn) { if (!can_unsync) return 1; @@ -3933,7 +3928,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, gfn_t gfn = gpa >> PAGE_SHIFT; union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; - struct hlist_node *node; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; @@ -3964,7 +3958,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; - for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn, node) { + for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { zap_page |= !!kvm_mmu_prepare_zap_page(vcpu->kvm, sp, -- cgit v1.1 From 3d2a80a230250c2534ce5b17503670adaf1d7fff Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Wed, 27 Feb 2013 15:28:28 -0500 Subject: x86/kvm: Fix pvclock vsyscall fixmap The physical memory fixmapped for the pvclock clock_gettime vsyscall was allocated, and thus is not a kernel symbol. __pa() is the proper method to use in this case. Fixes the crash below when booting a next-20130204+ smp guest on a 3.8-rc5+ KVM host. [ 0.666410] udevd[97]: starting version 175 [ 0.674043] udevd[97]: udevd:[97]: segfault at ffffffffff5fd020 ip 00007fff069e277f sp 00007fff068c9ef8 error d Acked-by: Marcelo Tosatti Signed-off-by: Peter Hurley Signed-off-by: Gleb Natapov --- arch/x86/kernel/pvclock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 85c3959..2cb9470 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -185,7 +185,7 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, - __pa_symbol(i) + (idx*PAGE_SIZE), + __pa(i) + (idx*PAGE_SIZE), PAGE_KERNEL_VVAR); } -- cgit v1.1 From c79c49826270b8b0061b2fca840fc3f013c8a78a Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 26 Feb 2013 12:51:27 -0500 Subject: xen/pat: Disable PAT using pat_enabled value. The git commit 8eaffa67b43e99ae581622c5133e20b0f48bcef1 (xen/pat: Disable PAT support for now) explains in details why we want to disable PAT for right now. However that change was not enough and we should have also disabled the pat_enabled value. Otherwise we end up with: mmap-example:3481 map pfn expected mapping type write-back for [mem 0x00010000-0x00010fff], got uncached-minus ------------[ cut here ]------------ WARNING: at /build/buildd/linux-3.8.0/arch/x86/mm/pat.c:774 untrack_pfn+0xb8/0xd0() mem 0x00010000-0x00010fff], got uncached-minus ------------[ cut here ]------------ WARNING: at /build/buildd/linux-3.8.0/arch/x86/mm/pat.c:774 untrack_pfn+0xb8/0xd0() ... Pid: 3481, comm: mmap-example Tainted: GF 3.8.0-6-generic #13-Ubuntu Call Trace: [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] untrack_pfn+0xb8/0xd0 [] unmap_single_vma+0xac/0x100 [] unmap_vmas+0x49/0x90 [] exit_mmap+0x98/0x170 [] mmput+0x64/0x100 [] dup_mm+0x445/0x660 [] copy_process.part.22+0xa5f/0x1510 [] do_fork+0x91/0x350 [] sys_clone+0x16/0x20 [] stub_clone+0x69/0x90 [] ? system_call_fastpath+0x1a/0x1f ---[ end trace 4918cdd0a4c9fea4 ]--- (a similar message shows up if you end up launching 'mcelog') The call chain is (as analyzed by Liu, Jinsong): do_fork --> copy_process --> dup_mm --> dup_mmap --> copy_page_range --> track_pfn_copy --> reserve_pfn_range --> line 624: flags != want_flags It comes from different memory types of page table (_PAGE_CACHE_WB) and MTRR (_PAGE_CACHE_UC_MINUS). Stefan Bader dug in this deep and found out that: "That makes it clearer as this will do reserve_memtype(...) --> pat_x_mtrr_type --> mtrr_type_lookup --> __mtrr_type_lookup And that can return -1/0xff in case of MTRR not being enabled/initialized. Which is not the case (given there are no messages for it in dmesg). This is not equal to MTRR_TYPE_WRBACK and thus becomes _PAGE_CACHE_UC_MINUS. It looks like the problem starts early in reserve_memtype: if (!pat_enabled) { /* This is identical to page table setting without PAT */ if (new_type) { if (req_type == _PAGE_CACHE_WC) *new_type = _PAGE_CACHE_UC_MINUS; else *new_type = req_type & _PAGE_CACHE_MASK; } return 0; } This would be what we want, that is clearing the PWT and PCD flags from the supported flags - if pat_enabled is disabled." This patch does that - disabling PAT. CC: stable@vger.kernel.org # 3.3 and further Reported-by: Sander Eikelenboom Reported-and-Tested-by: Konrad Rzeszutek Wilk Reported-and-Tested-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 39928d1..c8e1c7b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -67,6 +67,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI #include @@ -1417,7 +1418,14 @@ asmlinkage void __init xen_start_kernel(void) */ acpi_numa = -1; #endif - +#ifdef CONFIG_X86_PAT + /* + * For right now disable the PAT. We should remove this once + * git commit 8eaffa67b43e99ae581622c5133e20b0f48bcef1 + * (xen/pat: Disable PAT support for now) is reverted. + */ + pat_enabled = 0; +#endif /* Don't do the full vcpu_info placement stuff until we have a possible map and a non-dummy shared_info. */ per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; -- cgit v1.1 From 884ac2978a295b7df3c4a686d3bff6932bbbb460 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 28 Feb 2013 09:05:41 -0500 Subject: xen/pci: We don't do multiple MSI's. There is no hypercall to setup multiple MSI per PCI device. As such with these two new commits: - 08261d87f7d1b6253ab3223756625a5c74532293 PCI/MSI: Enable multiple MSIs with pci_enable_msi_block_auto() - 5ca72c4f7c412c2002363218901eba5516c476b1 AHCI: Support multiple MSIs we would call the PHYSDEVOP_map_pirq 'nvec' times with the same contents of the PCI device. Sander discovered that we would get the same PIRQ value 'nvec' times and return said values to the caller. That of course meant that the device was configured only with one MSI and AHCI would fail with: ahci 0000:00:11.0: version 3.0 xen: registering gsi 19 triggering 0 polarity 1 xen: --> pirq=19 -> irq=19 (gsi=19) (XEN) [2013-02-27 19:43:07] IOAPIC[0]: Set PCI routing entry (6-19 -> 0x99 -> IRQ 19 Mode:1 Active:1) ahci 0000:00:11.0: AHCI 0001.0200 32 slots 4 ports 6 Gbps 0xf impl SATA mode ahci 0000:00:11.0: flags: 64bit ncq sntf ilck pm led clo pmp pio slum part ahci: probe of 0000:00:11.0 failed with error -22 That is b/c in ahci_host_activate the second call to devm_request_threaded_irq would return -EINVAL as we passed in (on the second run) an IRQ that was never initialized. CC: stable@vger.kernel.org Reported-and-Tested-by: Sander Eikelenboom Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 56ab749..94e7662 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -162,6 +162,9 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_desc *msidesc; int *v; + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + v = kzalloc(sizeof(int) * max(1, nvec), GFP_KERNEL); if (!v) return -ENOMEM; @@ -220,6 +223,9 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) struct msi_desc *msidesc; struct msi_msg msg; + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + list_for_each_entry(msidesc, &dev->msi_list, list) { __read_msi_msg(msidesc, &msg); pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | @@ -263,6 +269,9 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) int ret = 0; struct msi_desc *msidesc; + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + list_for_each_entry(msidesc, &dev->msi_list, list) { struct physdev_map_pirq map_irq; domid_t domid; -- cgit v1.1 From 20e6926dcbafa1b361f1c29d967688be14b6ca4b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 1 Mar 2013 14:51:27 -0800 Subject: x86, ACPI, mm: Revert movablemem_map support Tim found: WARNING: at arch/x86/kernel/smpboot.c:324 topology_sane.isra.2+0x6f/0x80() Hardware name: S2600CP sched: CPU #1's llc-sibling CPU #0 is not on the same node! [node: 1 != 0]. Ignoring dependency. smpboot: Booting Node 1, Processors #1 Modules linked in: Pid: 0, comm: swapper/1 Not tainted 3.9.0-0-generic #1 Call Trace: set_cpu_sibling_map+0x279/0x449 start_secondary+0x11d/0x1e5 Don Morris reproduced on a HP z620 workstation, and bisected it to commit e8d195525809 ("acpi, memory-hotplug: parse SRAT before memblock is ready") It turns out movable_map has some problems, and it breaks several things 1. numa_init is called several times, NOT just for srat. so those nodes_clear(numa_nodes_parsed) memset(&numa_meminfo, 0, sizeof(numa_meminfo)) can not be just removed. Need to consider sequence is: numaq, srat, amd, dummy. and make fall back path working. 2. simply split acpi_numa_init to early_parse_srat. a. that early_parse_srat is NOT called for ia64, so you break ia64. b. for (i = 0; i < MAX_LOCAL_APIC; i++) set_apicid_to_node(i, NUMA_NO_NODE) still left in numa_init. So it will just clear result from early_parse_srat. it should be moved before that.... c. it breaks ACPI_TABLE_OVERIDE...as the acpi table scan is moved early before override from INITRD is settled. 3. that patch TITLE is total misleading, there is NO x86 in the title, but it changes critical x86 code. It caused x86 guys did not pay attention to find the problem early. Those patches really should be routed via tip/x86/mm. 4. after that commit, following range can not use movable ram: a. real_mode code.... well..funny, legacy Node0 [0,1M) could be hot-removed? b. initrd... it will be freed after booting, so it could be on movable... c. crashkernel for kdump...: looks like we can not put kdump kernel above 4G anymore. d. init_mem_mapping: can not put page table high anymore. e. initmem_init: vmemmap can not be high local node anymore. That is not good. If node is hotplugable, the mem related range like page table and vmemmap could be on the that node without problem and should be on that node. We have workaround patch that could fix some problems, but some can not be fixed. So just remove that offending commit and related ones including: f7210e6c4ac7 ("mm/memblock.c: use CONFIG_HAVE_MEMBLOCK_NODE_MAP to protect movablecore_map in memblock_overlaps_region().") 01a178a94e8e ("acpi, memory-hotplug: support getting hotplug info from SRAT") 27168d38fa20 ("acpi, memory-hotplug: extend movablemem_map ranges to the end of node") e8d195525809 ("acpi, memory-hotplug: parse SRAT before memblock is ready") fb06bc8e5f42 ("page_alloc: bootmem limit with movablecore_map") 42f47e27e761 ("page_alloc: make movablemem_map have higher priority") 6981ec31146c ("page_alloc: introduce zone_movable_limit[] to keep movable limit for nodes") 34b71f1e04fc ("page_alloc: add movable_memmap kernel parameter") 4d59a75125d5 ("x86: get pg_data_t's memory from other node") Later we should have patches that will make sure kernel put page table and vmemmap on local node ram instead of push them down to node0. Also need to find way to put other kernel used ram to local node ram. Reported-by: Tim Gardner Reported-by: Don Morris Bisected-by: Don Morris Tested-by: Don Morris Signed-off-by: Yinghai Lu Cc: Tony Luck Cc: Thomas Renninger Cc: Tejun Heo Cc: Tang Chen Cc: Yasuaki Ishimatsu Signed-off-by: Linus Torvalds --- arch/x86/kernel/setup.c | 13 ++--- arch/x86/mm/numa.c | 11 ++--- arch/x86/mm/srat.c | 125 ++---------------------------------------------- 3 files changed, 12 insertions(+), 137 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index e89acdf..84d3285 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1056,15 +1056,6 @@ void __init setup_arch(char **cmdline_p) setup_bios_corruption_check(); #endif - /* - * In the memory hotplug case, the kernel needs info from SRAT to - * determine which memory is hotpluggable before allocating memory - * using memblock. - */ - acpi_boot_table_init(); - early_acpi_boot_init(); - early_parse_srat(); - #ifdef CONFIG_X86_32 printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n", (max_pfn_mapped<cnt; i++) { - if (end <= rgn->regions[i].base || - start >= rgn->regions[i].base + - rgn->regions[i].size) - continue; - - /* - * If the memory range overlaps the memory reserved by - * memblock, then the kernel resides in this node. - */ - node_set(node, movablemem_map.numa_nodes_kernel); - - goto out; - } - - /* - * If the kernel resides in this node, then the whole node - * should not be hotpluggable. - */ - if (node_isset(node, movablemem_map.numa_nodes_kernel)) - goto out; - - insert_movablemem_map(start_pfn, end_pfn); - - /* - * numa_nodes_hotplug nodemask represents which nodes are put - * into movablemem_map.map[]. - */ - node_set(node, movablemem_map.numa_nodes_hotplug); - goto out; - } - - /* - * For movablemem_map=nn[KMG]@ss[KMG]: - * - * SRAT: |_____| |_____| |_________| |_________| ...... - * node id: 0 1 1 2 - * user specified: |__| |___| - * movablemem_map: |___| |_________| |______| ...... - * - * Using movablemem_map, we can prevent memblock from allocating memory - * on ZONE_MOVABLE at boot time. - * - * NOTE: In this case, SRAT info will be ingored. - */ - overlap = movablemem_map_overlap(start_pfn, end_pfn); - if (overlap >= 0) { - /* - * If part of this range is in movablemem_map, we need to - * add the range after it to extend the range to the end - * of the node, because from the min address specified to - * the end of the node will be ZONE_MOVABLE. - */ - start_pfn = max(start_pfn, - movablemem_map.map[overlap].start_pfn); - insert_movablemem_map(start_pfn, end_pfn); - - /* - * Set the nodemask, so that if the address range on one node - * is not continuse, we can add the subsequent ranges on the - * same node into movablemem_map. - */ - node_set(node, movablemem_map.numa_nodes_hotplug); - } else { - if (node_isset(node, movablemem_map.numa_nodes_hotplug)) - /* - * Insert the range if we already have movable ranges - * on the same node. - */ - insert_movablemem_map(start_pfn, end_pfn); - } -out: - return; -} -#else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ -static inline void -handle_movablemem(int node, u64 start, u64 end, u32 hotpluggable) -{ -} -#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */ - /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ int __init acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) { u64 start, end; - u32 hotpluggable; int node, pxm; if (srat_disabled()) @@ -269,8 +154,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) goto out_err_bad_srat; if ((ma->flags & ACPI_SRAT_MEM_ENABLED) == 0) goto out_err; - hotpluggable = ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE; - if (hotpluggable && !save_add_info()) + if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && !save_add_info()) goto out_err; start = ma->base_address; @@ -290,12 +174,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) node_set(node, numa_nodes_parsed); - printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx] %s\n", + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", node, pxm, - (unsigned long long) start, (unsigned long long) end - 1, - hotpluggable ? "Hot Pluggable": ""); - - handle_movablemem(node, start, end, hotpluggable); + (unsigned long long) start, (unsigned long long) end - 1); return 0; out_err_bad_srat: -- cgit v1.1 From 576cfb404c9cab728e9462ea713f3422679d5cf7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Mar 2013 21:16:16 +0100 Subject: x86, smpboot: Remove unused variable The cpuinfo_x86 ptr is unused now. Drop it. Got obsolete by 69fb3676df33 ("x86 idle: remove mwait_idle() and "idle=mwait" cmdline param") removing its only user. [ hpa: fixes gcc warning ] Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1362428180-8865-2-git-send-email-bp@alien8.de Cc: Len Brown Signed-off-by: H. Peter Anvin --- arch/x86/kernel/smpboot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a6ceaed..9f190a2 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1365,9 +1365,8 @@ static inline void mwait_play_dead(void) unsigned int eax, ebx, ecx, edx; unsigned int highest_cstate = 0; unsigned int highest_subcstate = 0; - int i; void *mwait_ptr; - struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); + int i; if (!this_cpu_has(X86_FEATURE_MWAIT)) return; -- cgit v1.1 From 015221fefbc93689dd47508a66326556adf2abcd Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Sun, 3 Mar 2013 00:14:42 +0100 Subject: x86: Fix 32-bit *_cpu_data initializers The commit 27be457000211a6903968dfce06d5f73f051a217 ('x86 idle: remove 32-bit-only "no-hlt" parameter, hlt_works_ok flag') removed the hlt_works_ok flag from struct cpuinfo_x86, but boot_cpu_data and new_cpu_data initializers were not changed causing setting f00f_bug flag, instead of fdiv_bug. If CONFIG_X86_F00F_BUG is not set the f00f_bug flag is never cleared. To avoid such problems in future C99-style initialization is now used. Signed-off-by: Krzysztof Mazur Acked-by: Borislav Petkov Cc: len.brown@intel.com Link: http://lkml.kernel.org/r/1362266082-2227-1-git-send-email-krzysiek@podlesie.net Signed-off-by: Ingo Molnar Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 84d3285..90d8cc9 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -171,9 +171,15 @@ static struct resource bss_resource = { #ifdef CONFIG_X86_32 /* cpu data as detected by the assembly code in head.S */ -struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +struct cpuinfo_x86 new_cpu_data __cpuinitdata = { + .wp_works_ok = -1, + .fdiv_bug = -1, +}; /* common cpu data for all cpus */ -struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1}; +struct cpuinfo_x86 boot_cpu_data __read_mostly = { + .wp_works_ok = -1, + .fdiv_bug = -1, +}; EXPORT_SYMBOL(boot_cpu_data); unsigned int def_to_bigsmp; -- cgit v1.1 From 98e7a989979b185f49e86ddaed2ad6890299d9f0 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 6 Mar 2013 20:18:21 -0800 Subject: x86, mm: Make sure to find a 2M free block for the first mapped area Henrik reported that his MacAir 3.1 would not boot with | commit 8d57470d8f859635deffe3919d7d4867b488b85a | Date: Fri Nov 16 19:38:58 2012 -0800 | | x86, mm: setup page table in top-down It turns out that we do not calculate the real_end properly: We try to get 2M size with 4K alignment, and later will round down to 2M, so we will get less then 2M for first mapping, in extreme case could be only 4K only. In Henrik's system it has (1M-32K) as last usable rage is [mem 0x7f9db000-0x7fef8fff]. The problem is exposed when EFI booting have several holes and it will force mapping to use PTE instead as we only map usable areas. To fix it, just make it be 2M aligned, so we can be guaranteed to be able to use large pages to map it. Reported-by: Henrik Rydberg Bisected-by: Henrik Rydberg Tested-by: Henrik Rydberg Signed-off-by: Yinghai Lu Link: http://lkml.kernel.org/r/CAE9FiQX4nQ7_1kg5RL_vh56rmcSHXUi1ExrZX7CwED4NGMnHfg@mail.gmail.com Signed-off-by: H. Peter Anvin --- arch/x86/mm/init.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 4903a03..59b7fc4 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -410,9 +410,8 @@ void __init init_mem_mapping(void) /* the ISA range is always mapped regardless of memory holes */ init_memory_mapping(0, ISA_END_ADDRESS); - /* xen has big range in reserved near end of ram, skip it at first */ - addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, - PAGE_SIZE); + /* xen has big range in reserved near end of ram, skip it at first.*/ + addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE); real_end = addr + PMD_SIZE; /* step_size need to be small so pgt_buf from BRK could cover it */ -- cgit v1.1 From 2e604c0f19dcdd433b3863ffc3da9bc0787ca765 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Wed, 6 Mar 2013 20:23:30 -0800 Subject: x86: Don't clear efi_info even if the sentinel hits When boot_params->sentinel is set, all we really know is that some undefined set of fields in struct boot_params contain garbage. In the particular case of efi_info, however, there is a private magic for that substructure, so it is generally safe to leave it even if the bootloader is broken. kexec (for which we did the initial analysis) did not initialize this field, but of course all the EFI bootloaders do, and most EFI bootloaders are broken in this respect (and should be fixed.) Reported-by: Robin Holt Link: http://lkml.kernel.org/r/CA%2B5PVA51-FT14p4CRYKbicykugVb=PiaEycdQ57CK2km_OQuRQ@mail.gmail.com Tested-by: Josh Boyer Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/bootparam_utils.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index 5b5e9cb..ff808ef 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -14,13 +14,15 @@ * analysis of kexec-tools; if other broken bootloaders initialize a * different set of fields we will need to figure out how to disambiguate. * + * Note: efi_info is commonly left uninitialized, but that field has a + * private magic, so it is better to leave it unchanged. */ static void sanitize_boot_params(struct boot_params *boot_params) { if (boot_params->sentinel) { /*fields in boot_params are not valid, clear them */ memset(&boot_params->olpc_ofw_header, 0, - (char *)&boot_params->alt_mem_k - + (char *)&boot_params->efi_info - (char *)&boot_params->olpc_ofw_header); memset(&boot_params->kbd_status, 0, (char *)&boot_params->hdr - -- cgit v1.1 From 3c4aff6b9a183b4f24eb7b8dd6c8a92cdba3bc75 Mon Sep 17 00:00:00 2001 From: Peter Jones Date: Wed, 6 Mar 2013 13:00:23 -0500 Subject: x86, doc: Be explicit about what the x86 struct boot_params requires If the sentinel triggers, we do not want the boot loader authors to just poke it and make the error go away, we want them to actually fix the problem. This should help avoid making the incorrect change in non-compliant bootloaders. [ hpa: dropped the Documentation/x86/boot.txt hunk pending clarifications ] Signed-off-by: Peter Jones Link: http://lkml.kernel.org/r/1362592823-28967-1-git-send-email-pjones@redhat.com Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/bootparam_utils.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h index ff808ef..653668d 100644 --- a/arch/x86/include/asm/bootparam_utils.h +++ b/arch/x86/include/asm/bootparam_utils.h @@ -19,8 +19,22 @@ */ static void sanitize_boot_params(struct boot_params *boot_params) { + /* + * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear + * this field. The purpose of this field is to guarantee + * compliance with the x86 boot spec located in + * Documentation/x86/boot.txt . That spec says that the + * *whole* structure should be cleared, after which only the + * portion defined by struct setup_header (boot_params->hdr) + * should be copied in. + * + * If you're having an issue because the sentinel is set, you + * need to change the whole structure to be cleared, not this + * (or any other) individual field, or you will soon have + * problems again. + */ if (boot_params->sentinel) { - /*fields in boot_params are not valid, clear them */ + /* fields in boot_params are left uninitialized, clear them */ memset(&boot_params->olpc_ofw_header, 0, (char *)&boot_params->efi_info - (char *)&boot_params->olpc_ofw_header); -- cgit v1.1 From 60f583d56aa515b896a9d94f860f52640c1e8a75 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 7 Mar 2013 08:31:51 -0800 Subject: x86: Do not try to sync identity map for non-mapped pages kernel_map_sync_memtype() is called from a variety of contexts. The pat.c code that calls it seems to ensure that it is not called for non-ram areas by checking via pat_pagerange_is_ram(). It is important that it only be called on the actual identity map because there *IS* no map to sync for highmem pages, or for memory holes. The ioremap.c uses are not as careful as those from pat.c, and call kernel_map_sync_memtype() on PCI space which is in the middle of the kernel identity map _range_, but is not actually mapped. This patch adds a check to kernel_map_sync_memtype() which probably duplicates some of the checks already in pat.c. But, it is necessary for the ioremap.c uses and shouldn't hurt other callers. I have reproduced this bug and this patch fixes it for me and the original bug reporter: https://lkml.org/lkml/2013/2/5/396 Signed-off-by: Dave Hansen Link: http://lkml.kernel.org/r/20130307163151.D9B58C4E@kernel.stglabs.ibm.com Signed-off-by: Dave Hansen Tested-by: Tetsuo Handa Signed-off-by: H. Peter Anvin --- arch/x86/mm/pat.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2610bd9..6574388 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -563,6 +563,13 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags) if (base > __pa(high_memory-1)) return 0; + /* + * some areas in the middle of the kernel identity range + * are not mapped, like the PCI space. + */ + if (!page_is_ram(base >> PAGE_SHIFT)) + return 0; + id_sz = (__pa(high_memory-1) <= base + size) ? __pa(high_memory) - base : size; -- cgit v1.1 From 4febd95a8a85dd38b1a71fcf9726e19c7fd20039 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 7 Mar 2013 15:48:16 +1100 Subject: Select VIRT_TO_BUS directly where needed In commit 887cbce0adea ("arch Kconfig: centralise ARCH_NO_VIRT_TO_BUS") I introduced the config sybmol HAVE_VIRT_TO_BUS and selected that where needed. I am not sure what I was thinking. Instead, just directly select VIRT_TO_BUS where it is needed. Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a4f24f5..70c0f3d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -112,7 +112,7 @@ config X86 select GENERIC_STRNLEN_USER select HAVE_CONTEXT_TRACKING if X86_64 select HAVE_IRQ_TIME_ACCOUNTING - select HAVE_VIRT_TO_BUS + select VIRT_TO_BUS select MODULES_USE_ELF_REL if X86_32 select MODULES_USE_ELF_RELA if X86_64 select CLONE_BACKWARDS if X86_32 -- cgit v1.1 From 1d9d8639c063caf6efc2447f5f26aa637f844ff6 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Fri, 15 Mar 2013 14:26:07 +0100 Subject: perf,x86: fix kernel crash with PEBS/BTS after suspend/resume This patch fixes a kernel crash when using precise sampling (PEBS) after a suspend/resume. Turns out the CPU notifier code is not invoked on CPU0 (BP). Therefore, the DS_AREA (used by PEBS) is not restored properly by the kernel and keeps it power-on/resume value of 0 causing any PEBS measurement to crash when running on CPU0. The workaround is to add a hook in the actual resume code to restore the DS Area MSR value. It is invoked for all CPUS. So for all but CPU0, the DS_AREA will be restored twice but this is harmless. Reported-by: Linus Torvalds Signed-off-by: Stephane Eranian Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 8 ++++++++ arch/x86/power/cpu.c | 2 ++ 2 files changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 826054a..0e9bdd3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -729,3 +729,11 @@ void intel_ds_init(void) } } } + +void perf_restore_debug_store(void) +{ + if (!x86_pmu.bts && !x86_pmu.pebs) + return; + + init_debug_store_on_cpu(smp_processor_id()); +} diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 120cee1..3c68768 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -228,6 +229,7 @@ static void __restore_processor_state(struct saved_context *ctxt) do_fpu_end(); x86_platform.restore_sched_clock_state(); mtrr_bp_restore(); + perf_restore_debug_store(); } /* Needed by apm.c */ -- cgit v1.1 From 2a6e06b2aed6995af401dcd4feb5e79a0c7ea554 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 17 Mar 2013 15:44:43 -0700 Subject: perf,x86: fix wrmsr_on_cpu() warning on suspend/resume Commit 1d9d8639c063 ("perf,x86: fix kernel crash with PEBS/BTS after suspend/resume") fixed a crash when doing PEBS performance profiling after resuming, but in using init_debug_store_on_cpu() to restore the DS_AREA mtrr it also resulted in a new WARN_ON() triggering. init_debug_store_on_cpu() uses "wrmsr_on_cpu()", which in turn uses CPU cross-calls to do the MSR update. Which is not really valid at the early resume stage, and the warning is quite reasonable. Now, it all happens to _work_, for the simple reason that smp_call_function_single() ends up just doing the call directly on the CPU when the CPU number matches, but we really should just do the wrmsr() directly instead. This duplicates the wrmsr() logic, but hopefully we can just remove the wrmsr_on_cpu() version eventually. Reported-and-tested-by: Parag Warudkar Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 0e9bdd3..b05a575 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -732,8 +732,10 @@ void intel_ds_init(void) void perf_restore_debug_store(void) { + struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds); + if (!x86_pmu.bts && !x86_pmu.pebs) return; - init_debug_store_on_cpu(smp_processor_id()); + wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds); } -- cgit v1.1 From 9a556ab998071457e79b319f2527642dd6e50617 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 14 Mar 2013 20:52:43 +0900 Subject: kprobes/x86: Check Interrupt Flag modifier when registering probe Currently kprobes check whether the copied instruction modifies IF (interrupt flag) on each probe hit. This results not only in introducing overhead but also involving inat_get_opcode_attribute into the kprobes hot path, and it can cause an infinite recursive call (and kernel panic in the end). Actually, since the copied instruction itself can never be modified on the buffer, it is needless to analyze the instruction on every probe hit. To fix this issue, we check it only once when registering probe and store the result on ainsn->if_modifier. Reported-by: Timo Juhani Lindfors Signed-off-by: Masami Hiramatsu Acked-by: Ananth N Mavinakayanahalli Cc: yrl.pp-manager.tt@hitachi.com Cc: Steven Rostedt Cc: David S. Miller Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20130314115242.19690.33573.stgit@mhiramat-M0-7522 Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kprobes.h | 1 + arch/x86/kernel/kprobes/core.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index d3ddd17..5a6d287 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -77,6 +77,7 @@ struct arch_specific_insn { * a post_handler or break_handler). */ int boostable; + bool if_modifier; }; struct arch_optimized_insn { diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 3f06e61..7bfe318 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -375,6 +375,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) else p->ainsn.boostable = -1; + /* Check whether the instruction modifies Interrupt Flag or not */ + p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn); + /* Also, displacement change doesn't affect the first byte */ p->opcode = p->ainsn.insn[0]; } @@ -434,7 +437,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, __this_cpu_write(current_kprobe, p); kcb->kprobe_saved_flags = kcb->kprobe_old_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); - if (is_IF_modifier(p->ainsn.insn)) + if (p->ainsn.if_modifier) kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; } -- cgit v1.1 From fd4a5aef002bb57e8a35ed34d8a878034b9bde94 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Sun, 17 Mar 2013 14:49:57 +0100 Subject: perf/x86: Add SNB/SNB-EP scheduling constraints for cycle_activity event Add scheduling constraints for SNB/SNB-EP CYCLE_ACTIVITY event as defined by SDM Jan 2013 edition. The STALLS umasks are combinations with the NO_DISPATCH umask. Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Cc: ak@linux.intel.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/20130317134957.GA8550@quad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 529c893..dab7580 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -101,6 +101,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */ + INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ + INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ -- cgit v1.1 From 66db3feb486c01349f767b98ebb10b0c3d2d021b Mon Sep 17 00:00:00 2001 From: CQ Tang Date: Mon, 18 Mar 2013 11:02:21 -0400 Subject: x86-64: Fix the failure case in copy_user_handle_tail() The increment of "to" in copy_user_handle_tail() will have incremented before a failure has been noted. This causes us to skip a byte in the failure case. Only do the increment when assured there is no failure. Signed-off-by: CQ Tang Link: http://lkml.kernel.org/r/20130318150221.8439.993.stgit@phlsvslse11.ph.intel.com Signed-off-by: Mike Marciniszyn Signed-off-by: H. Peter Anvin Cc: --- arch/x86/lib/usercopy_64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 05928aa..906fea3 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -74,10 +74,10 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) char c; unsigned zero_len; - for (; len; --len) { + for (; len; --len, to++) { if (__get_user_nocheck(c, from++, sizeof(char))) break; - if (__put_user_nocheck(c, to++, sizeof(char))) + if (__put_user_nocheck(c, to, sizeof(char))) break; } -- cgit v1.1 From c09664bb44184b3846e8c5254db4eae4b932682a Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 18 Mar 2013 13:54:32 -0300 Subject: KVM: x86: fix deadlock in clock-in-progress request handling There is a deadlock in pvclock handling: cpu0: cpu1: kvm_gen_update_masterclock() kvm_guest_time_update() spin_lock(pvclock_gtod_sync_lock) local_irq_save(flags) spin_lock(pvclock_gtod_sync_lock) kvm_make_mclock_inprogress_request(kvm) make_all_cpus_request() smp_call_function_many() Now if smp_call_function_many() called by cpu0 tries to call function on cpu1 there will be a deadlock. Fix by moving pvclock_gtod_sync_lock protected section outside irq disabled section. Analyzed by Gleb Natapov Acked-by: Gleb Natapov Reported-and-Tested-by: Yongjie Ren Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f71500a..f7c850b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1416,15 +1416,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kernel_ns = 0; host_tsc = 0; - /* Keep irq disabled to prevent changes to the clock */ - local_irq_save(flags); - this_tsc_khz = __get_cpu_var(cpu_tsc_khz); - if (unlikely(this_tsc_khz == 0)) { - local_irq_restore(flags); - kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); - return 1; - } - /* * If the host uses TSC clock, then passthrough TSC as stable * to the guest. @@ -1436,6 +1427,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kernel_ns = ka->master_kernel_ns; } spin_unlock(&ka->pvclock_gtod_sync_lock); + + /* Keep irq disabled to prevent changes to the clock */ + local_irq_save(flags); + this_tsc_khz = __get_cpu_var(cpu_tsc_khz); + if (unlikely(this_tsc_khz == 0)) { + local_irq_restore(flags); + kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); + return 1; + } if (!use_master_clock) { host_tsc = native_read_tsc(); kernel_ns = get_kernel_ns(); -- cgit v1.1 From c300aa64ddf57d9c5d9c898a64b36877345dd4a9 Mon Sep 17 00:00:00 2001 From: Andy Honig Date: Mon, 11 Mar 2013 09:34:52 -0700 Subject: KVM: x86: fix for buffer overflow in handling of MSR_KVM_SYSTEM_TIME (CVE-2013-1796) If the guest sets the GPA of the time_page so that the request to update the time straddles a page then KVM will write onto an incorrect page. The write is done byusing kmap atomic to get a pointer to the page for the time structure and then performing a memcpy to that page starting at an offset that the guest controls. Well behaved guests always provide a 32-byte aligned address, however a malicious guest could use this to corrupt host kernel memory. Tested: Tested against kvmclock unit test. Signed-off-by: Andrew Honig Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f7c850b..2ade60c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1959,6 +1959,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* ...but clean it before doing the actual write */ vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); + /* Check that the address is 32-byte aligned. */ + if (vcpu->arch.time_offset & + (sizeof(struct pvclock_vcpu_time_info) - 1)) + break; + vcpu->arch.time_page = gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); -- cgit v1.1 From 0b79459b482e85cb7426aa7da683a9f2c97aeae1 Mon Sep 17 00:00:00 2001 From: Andy Honig Date: Wed, 20 Feb 2013 14:48:10 -0800 Subject: KVM: x86: Convert MSR_KVM_SYSTEM_TIME to use gfn_to_hva_cache functions (CVE-2013-1797) There is a potential use after free issue with the handling of MSR_KVM_SYSTEM_TIME. If the guest specifies a GPA in a movable or removable memory such as frame buffers then KVM might continue to write to that address even after it's removed via KVM_SET_USER_MEMORY_REGION. KVM pins the page in memory so it's unlikely to cause an issue, but if the user space component re-purposes the memory previously used for the guest, then the guest will be able to corrupt that memory. Tested: Tested against kvmclock unit test Signed-off-by: Andrew Honig Signed-off-by: Marcelo Tosatti --- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/x86.c | 47 ++++++++++++++++++----------------------- 2 files changed, 22 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 635a74d..4979778 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -414,8 +414,8 @@ struct kvm_vcpu_arch { gpa_t time; struct pvclock_vcpu_time_info hv_clock; unsigned int hw_tsc_khz; - unsigned int time_offset; - struct page *time_page; + struct gfn_to_hva_cache pv_time; + bool pv_time_enabled; /* set guest stopped flag in pvclock flags field */ bool pvclock_set_guest_stopped_request; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2ade60c..f19ac0a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1406,10 +1406,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) unsigned long flags, this_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; - void *shared_kaddr; s64 kernel_ns, max_kernel_ns; u64 tsc_timestamp, host_tsc; - struct pvclock_vcpu_time_info *guest_hv_clock; + struct pvclock_vcpu_time_info guest_hv_clock; u8 pvclock_flags; bool use_master_clock; @@ -1463,7 +1462,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) local_irq_restore(flags); - if (!vcpu->time_page) + if (!vcpu->pv_time_enabled) return 0; /* @@ -1525,12 +1524,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) */ vcpu->hv_clock.version += 2; - shared_kaddr = kmap_atomic(vcpu->time_page); - - guest_hv_clock = shared_kaddr + vcpu->time_offset; + if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, + &guest_hv_clock, sizeof(guest_hv_clock)))) + return 0; /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ - pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED); + pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); if (vcpu->pvclock_set_guest_stopped_request) { pvclock_flags |= PVCLOCK_GUEST_STOPPED; @@ -1543,12 +1542,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hv_clock.flags = pvclock_flags; - memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); - - kunmap_atomic(shared_kaddr); - - mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); return 0; } @@ -1837,10 +1833,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) static void kvmclock_reset(struct kvm_vcpu *vcpu) { - if (vcpu->arch.time_page) { - kvm_release_page_dirty(vcpu->arch.time_page); - vcpu->arch.time_page = NULL; - } + vcpu->arch.pv_time_enabled = false; } static void accumulate_steal_time(struct kvm_vcpu *vcpu) @@ -1947,6 +1940,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_KVM_SYSTEM_TIME_NEW: case MSR_KVM_SYSTEM_TIME: { + u64 gpa_offset; kvmclock_reset(vcpu); vcpu->arch.time = data; @@ -1956,19 +1950,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!(data & 1)) break; - /* ...but clean it before doing the actual write */ - vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); + gpa_offset = data & ~(PAGE_MASK | 1); /* Check that the address is 32-byte aligned. */ - if (vcpu->arch.time_offset & - (sizeof(struct pvclock_vcpu_time_info) - 1)) + if (gpa_offset & (sizeof(struct pvclock_vcpu_time_info) - 1)) break; - vcpu->arch.time_page = - gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); - - if (is_error_page(vcpu->arch.time_page)) - vcpu->arch.time_page = NULL; + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, + &vcpu->arch.pv_time, data & ~1ULL)) + vcpu->arch.pv_time_enabled = false; + else + vcpu->arch.pv_time_enabled = true; break; } @@ -2972,7 +2964,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu, */ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) { - if (!vcpu->arch.time_page) + if (!vcpu->arch.pv_time_enabled) return -EINVAL; vcpu->arch.pvclock_set_guest_stopped_request = true; kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -6723,6 +6715,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) goto fail_free_wbinvd_dirty_mask; vcpu->arch.ia32_tsc_adjust_msr = 0x0; + vcpu->arch.pv_time_enabled = false; kvm_async_pf_hash_reset(vcpu); kvm_pmu_init(vcpu); -- cgit v1.1 From c83a9d5e425d4678b05ca058fec6254f18601474 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Tue, 19 Mar 2013 08:04:44 -0700 Subject: x86-32, microcode_intel_early: Fix crash with CONFIG_DEBUG_VIRTUAL In 32-bit, __pa_symbol() in CONFIG_DEBUG_VIRTUAL accesses kernel data (e.g. max_low_pfn) that not only hasn't been setup yet in such early boot phase, but since we are in linear mode, cannot even be detected as uninitialized. Thus, use __pa_nodebug() rather than __pa_symbol() to get a global symbol's physical address. Signed-off-by: Fenghua Yu Link: http://lkml.kernel.org/r/1363705484-27645-1-git-send-email-fenghua.yu@intel.com Reported-and-tested-by: Tetsuo Handa Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_intel_early.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c index 7890bc8..5992ee8 100644 --- a/arch/x86/kernel/microcode_intel_early.c +++ b/arch/x86/kernel/microcode_intel_early.c @@ -90,13 +90,13 @@ microcode_phys(struct microcode_intel **mc_saved_tmp, struct microcode_intel ***mc_saved; mc_saved = (struct microcode_intel ***) - __pa_symbol(&mc_saved_data->mc_saved); + __pa_nodebug(&mc_saved_data->mc_saved); for (i = 0; i < mc_saved_data->mc_saved_count; i++) { struct microcode_intel *p; p = *(struct microcode_intel **) - __pa(mc_saved_data->mc_saved + i); - mc_saved_tmp[i] = (struct microcode_intel *)__pa(p); + __pa_nodebug(mc_saved_data->mc_saved + i); + mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p); } } #endif @@ -562,7 +562,7 @@ scan_microcode(unsigned long start, unsigned long end, struct cpio_data cd; long offset = 0; #ifdef CONFIG_X86_32 - char *p = (char *)__pa_symbol(ucode_name); + char *p = (char *)__pa_nodebug(ucode_name); #else char *p = ucode_name; #endif @@ -630,8 +630,8 @@ static void __cpuinit print_ucode(struct ucode_cpu_info *uci) if (mc_intel == NULL) return; - delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info); - current_mc_date_p = (int *)__pa_symbol(¤t_mc_date); + delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info); + current_mc_date_p = (int *)__pa_nodebug(¤t_mc_date); *delay_ucode_info_p = 1; *current_mc_date_p = mc_intel->hdr.date; @@ -741,15 +741,15 @@ load_ucode_intel_bsp(void) #ifdef CONFIG_X86_32 struct boot_params *boot_params_p; - boot_params_p = (struct boot_params *)__pa_symbol(&boot_params); + boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params); ramdisk_image = boot_params_p->hdr.ramdisk_image; ramdisk_size = boot_params_p->hdr.ramdisk_size; initrd_start_early = ramdisk_image; initrd_end_early = initrd_start_early + ramdisk_size; _load_ucode_intel_bsp( - (struct mc_saved_data *)__pa_symbol(&mc_saved_data), - (unsigned long *)__pa_symbol(&mc_saved_in_initrd), + (struct mc_saved_data *)__pa_nodebug(&mc_saved_data), + (unsigned long *)__pa_nodebug(&mc_saved_in_initrd), initrd_start_early, initrd_end_early, &uci); #else ramdisk_image = boot_params.hdr.ramdisk_image; @@ -772,10 +772,10 @@ void __cpuinit load_ucode_intel_ap(void) unsigned long *initrd_start_p; mc_saved_in_initrd_p = - (unsigned long *)__pa_symbol(mc_saved_in_initrd); - mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data); - initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start); - initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p); + (unsigned long *)__pa_nodebug(mc_saved_in_initrd); + mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data); + initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start); + initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p); #else mc_saved_data_p = &mc_saved_data; mc_saved_in_initrd_p = mc_saved_in_initrd; -- cgit v1.1 From f564c24103f87dc740c1c293c975565ac46b12ef Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Thu, 21 Mar 2013 17:32:36 -0700 Subject: x86, microcode_intel_early: Mark apply_microcode_early() as cpuinit Add missing __cpuinit annotation to apply_microcode_early(). Reported-by: Shaun Ruffell Cc: Fenghua Yu Link: http://lkml.kernel.org/r/20130320170310.GA23362@digium.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/microcode_intel_early.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c index 5992ee8..d893e8e 100644 --- a/arch/x86/kernel/microcode_intel_early.c +++ b/arch/x86/kernel/microcode_intel_early.c @@ -659,8 +659,8 @@ static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci) } #endif -static int apply_microcode_early(struct mc_saved_data *mc_saved_data, - struct ucode_cpu_info *uci) +static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data, + struct ucode_cpu_info *uci) { struct microcode_intel *mc_intel; unsigned int val[2]; -- cgit v1.1 From 909b3fdb0dd4f3db07b2d75425a00a2adb551383 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 12 Mar 2013 15:06:23 +0000 Subject: xen-pciback: notify hypervisor about devices intended to be assigned to guests For MSI-X capable devices the hypervisor wants to write protect the MSI-X table and PBA, yet it can't assume that resources have been assigned to their final values at device enumeration time. Thus have pciback do that notification, as having the device controlled by it is a prerequisite to assigning the device to guests anyway. This is the kernel part of hypervisor side commit 4245d33 ("x86/MSI: add mechanism to fully protect MSI-X table from PV guest accesses") on the master branch of git://xenbits.xen.org/xen.git. CC: stable@vger.kernel.org Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/hypercall.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index c20d1ce..e709884 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -382,14 +382,14 @@ HYPERVISOR_console_io(int cmd, int count, char *str) return _hypercall3(int, console_io, cmd, count, str); } -extern int __must_check HYPERVISOR_physdev_op_compat(int, void *); +extern int __must_check xen_physdev_op_compat(int, void *); static inline int HYPERVISOR_physdev_op(int cmd, void *arg) { int rc = _hypercall2(int, physdev_op, cmd, arg); if (unlikely(rc == -ENOSYS)) - rc = HYPERVISOR_physdev_op_compat(cmd, arg); + rc = xen_physdev_op_compat(cmd, arg); return rc; } -- cgit v1.1 From 05e99c8cf9d4e53ef6e016815db40a89a6156529 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 20 Mar 2013 14:21:10 +0000 Subject: intel-pstate: Use #defines instead of hard-coded values. They are defined in coreboot (MSR_PLATFORM) and the other one is already defined in msr-index.h. Let's use those. Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Viresh Kumar Acked-by: Dirk Brandewie Signed-off-by: Rafael J. Wysocki --- arch/x86/include/uapi/asm/msr-index.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 892ce40..7a060f4 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -44,6 +44,7 @@ #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) +#define MSR_PLATFORM_INFO 0x000000ce #define MSR_MTRRcap 0x000000fe #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e -- cgit v1.1 From d3eb2c89e7ba996e8781b22a6e7d0a895ef55630 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 22 Mar 2013 10:34:28 -0400 Subject: xen/mmu: Move the setting of pvops.write_cr3 to later phase in bootup. We move the setting of write_cr3 from the early bootup variant (see git commit 0cc9129d75ef8993702d97ab0e49542c15ac6ab9 "x86-64, xen, mmu: Provide an early version of write_cr3.") to a more appropiate location. This new location sets all of the other non-early variants of pvops calls - and most importantly is before the alternative_asm mechanism kicks in. Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index e8e3493..6afbb2c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1467,8 +1467,6 @@ static void __init xen_write_cr3_init(unsigned long cr3) __xen_write_cr3(true, cr3); xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ - - pv_mmu_ops.write_cr3 = &xen_write_cr3; } #endif @@ -2122,6 +2120,7 @@ static void __init xen_post_allocator_init(void) #endif #ifdef CONFIG_X86_64 + pv_mmu_ops.write_cr3 = &xen_write_cr3; SetPagePinned(virt_to_page(level3_user_vsyscall)); #endif xen_mark_init_mm_pinned(); -- cgit v1.1