From 1cd9c22fee3ac21db52a0997d08cf2f065d2c0c0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 31 Jan 2018 16:54:02 +0300 Subject: x86/mm/encrypt: Move page table helpers into separate translation unit There are bunch of functions in mem_encrypt.c that operate on the identity mapping, which means they want virtual addresses to be equal to physical one, without PAGE_OFFSET shift. We also need to avoid paravirtualizaion call there. Getting this done is tricky. We cannot use usual page table helpers. It forces us to open-code a lot of things. It makes code ugly and hard to modify. We can get it work with the page table helpers, but it requires few preprocessor tricks. These tricks may have side effects for the rest of the file. Let's isolate such functions into own translation unit. Tested-by: Tom Lendacky Signed-off-by: Kirill A. Shutemov Reviewed-by: Tom Lendacky Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180131135404.40692-2-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mem_encrypt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 22c5f3e..8fe61ad 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -22,6 +22,7 @@ #ifdef CONFIG_AMD_MEM_ENCRYPT extern u64 sme_me_mask; +extern bool sev_enabled; void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, unsigned long decrypted_kernel_vaddr, -- cgit v1.1 From ccf5355d05cd891522267f04b2723002e7f061de Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 17 Jan 2018 15:37:48 +0800 Subject: x86/apic: Simplify init_bsp_APIC() usage Since CONFIG_X86_64 selects CONFIG_X86_LOCAL_APIC, the following condition: #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) is equivalent to: #if defined(CONFIG_X86_LOCAL_APIC) ... and we can eliminate that #ifdef by providing an empty init_bsp_APIC() stub in the !CONFIG_X86_LOCAL_APIC case. Also add some comments to explain why we call init_bsp_APIC(). Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: mroos@linux.ee Cc: ville.syrjala@linux.intel.com Link: http://lkml.kernel.org/r/20180117073748.23905-1-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 9872277..6e1990d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -183,6 +183,7 @@ static inline void disable_local_APIC(void) { } # define setup_boot_APIC_clock x86_init_noop # define setup_secondary_APIC_clock x86_init_noop static inline void lapic_update_tsc_freq(void) { } +static inline void init_bsp_APIC(void) { } static inline void apic_intr_mode_init(void) { } static inline void lapic_assign_system_vectors(void) { } static inline void lapic_assign_legacy_vector(unsigned int i, bool r) { } -- cgit v1.1 From 1acdbf7ea8a80706561013346d281d282a3c00f9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 13 Jan 2018 21:50:48 +0300 Subject: x86/asm: Clobber flags in clear_page() All clear_page() implementations use XOR which resets flags. Judging by allyesconfig disassembly no code is affected. Signed-off-by: Alexey Dobriyan Reviewed-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180113185048.GA23111@avx2 Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 4baa6bc..f8a85c6 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -47,7 +47,7 @@ static inline void clear_page(void *page) clear_page_erms, X86_FEATURE_ERMS, "=D" (page), "0" (page) - : "memory", "rax", "rcx"); + : "cc", "memory", "rax", "rcx"); } void copy_page(void *to, void *from); -- cgit v1.1 From b83ce5ee91471d19c403ff91227204fb37c95fb2 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:48 +0300 Subject: x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52 __PHYSICAL_MASK_SHIFT is used to define the mask that helps to extract physical address from a page table entry. Although real physical address space available may differ between machines, it's safe to use 52 as __PHYSICAL_MASK_SHIFT. Unused bits above log2(MAXPHYADDR) up to bit 51 are reserved and must be 0. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-2-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_64_types.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index e140731..f68e652 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -52,11 +52,12 @@ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ -#ifdef CONFIG_X86_5LEVEL + #define __PHYSICAL_MASK_SHIFT 52 + +#ifdef CONFIG_X86_5LEVEL #define __VIRTUAL_MASK_SHIFT 56 #else -#define __PHYSICAL_MASK_SHIFT 46 #define __VIRTUAL_MASK_SHIFT 47 #endif -- cgit v1.1 From 02390b87a9459937cdb299e6b34ff33992512ec7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:49 +0300 Subject: mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS With boot-time switching between paging mode we will have variable MAX_PHYSMEM_BITS. Let's use the maximum variable possible for CONFIG_X86_5LEVEL=y configuration to define zsmalloc data structures. The patch introduces MAX_POSSIBLE_PHYSMEM_BITS to cover such case. It also suits well to handle PAE special case. Signed-off-by: Kirill A. Shutemov Reviewed-by: Nitin Gupta Acked-by: Minchan Kim Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-3-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable-3level_types.h | 1 + arch/x86/include/asm/pgtable_64_types.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 876b4c7..6a59a6d 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -44,5 +44,6 @@ typedef union { */ #define PTRS_PER_PTE 512 +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6b8f73d..7168de7 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -40,6 +40,8 @@ typedef struct { pteval_t pte; } pte_t; #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) #define P4D_MASK (~(P4D_SIZE - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 52 + #else /* CONFIG_X86_5LEVEL */ /* -- cgit v1.1 From eedb92abb9bb03ef21442614a6f5867eaac6e77f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:50 +0300 Subject: x86/mm: Make virtual memory layout dynamic for CONFIG_X86_5LEVEL=y We need to be able to adjust virtual memory layout at runtime to be able to switch between 4- and 5-level paging at boot-time. KASLR already has movable __VMALLOC_BASE, __VMEMMAP_BASE and __PAGE_OFFSET. Let's re-use it. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kaslr.h | 4 ---- arch/x86/include/asm/page_64.h | 4 ++++ arch/x86/include/asm/page_64_types.h | 4 ++-- arch/x86/include/asm/pgtable_64_types.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index 460991e..db7ba2f 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -5,10 +5,6 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY -extern unsigned long page_offset_base; -extern unsigned long vmalloc_base; -extern unsigned long vmemmap_base; - void kernel_randomize_memory(void); #else static inline void kernel_randomize_memory(void) { } diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 4baa6bc..09637865 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -11,6 +11,10 @@ extern unsigned long max_pfn; extern unsigned long phys_base; +extern unsigned long page_offset_base; +extern unsigned long vmalloc_base; +extern unsigned long vmemmap_base; + static inline unsigned long __phys_addr_nodebug(unsigned long x) { unsigned long y = x - __START_KERNEL_map; diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index f68e652..d54a3d5 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -43,11 +43,11 @@ #define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) #endif -#ifdef CONFIG_RANDOMIZE_MEMORY +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #define __PAGE_OFFSET page_offset_base #else #define __PAGE_OFFSET __PAGE_OFFSET_BASE -#endif /* CONFIG_RANDOMIZE_MEMORY */ +#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 7168de7..a0db91a 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -100,13 +100,13 @@ typedef struct { pteval_t pte; } pte_t; # define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif -#ifdef CONFIG_RANDOMIZE_MEMORY +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base # define VMEMMAP_START vmemmap_base #else # define VMALLOC_START __VMALLOC_BASE # define VMEMMAP_START __VMEMMAP_BASE -#endif /* CONFIG_RANDOMIZE_MEMORY */ +#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) -- cgit v1.1 From e626e6bb0dfaca41487241d49ce0ae827716101a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:51 +0300 Subject: x86/mm: Introduce 'pgtable_l5_enabled' The new flag would indicate what paging mode we are in. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-5-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_32_types.h | 2 ++ arch/x86/include/asm/pgtable_64_types.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 0777e18..e3225e8 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -15,6 +15,8 @@ # include #endif +#define pgtable_l5_enabled 0 + #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index a0db91a..5e2d724 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -20,6 +20,12 @@ typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; +#ifdef CONFIG_X86_5LEVEL +extern unsigned int pgtable_l5_enabled; +#else +#define pgtable_l5_enabled 0 +#endif + #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 -- cgit v1.1 From 5c7919bb1994f8dc7fed219a5db09e6bb9d473a5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:52 +0300 Subject: x86/mm: Make LDT_BASE_ADDR dynamic LDT_BASE_ADDR has different value in 4- and 5-level paging configurations. We need to make it dynamic in preparation for boot-time switching between paging modes. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-6-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 5e2d724..903e4d05 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -92,18 +92,19 @@ extern unsigned int pgtable_l5_enabled; */ #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define LDT_PGD_ENTRY_L4 -3UL +#define LDT_PGD_ENTRY_L5 -112UL +#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) +#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) + #ifdef CONFIG_X86_5LEVEL # define VMALLOC_SIZE_TB _AC(12800, UL) # define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) -# define LDT_PGD_ENTRY _AC(-112, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #else # define VMALLOC_SIZE_TB _AC(32, UL) # define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) -# define LDT_PGD_ENTRY _AC(-3, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT -- cgit v1.1 From c65e774fb3f6af212641538694b9778ff9ab4300 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:53 +0300 Subject: x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable For boot-time switching between 4- and 5-level paging we need to be able to fold p4d page table level at runtime. It requires variable PGDIR_SHIFT and PTRS_PER_P4D. The change doesn't affect the kernel image size much: text data bss dec hex filename 8628091 4734304 1368064 14730459 e0c4db vmlinux.before 8628393 4734340 1368064 14730797 e0c62d vmlinux.after Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-7-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_32.h | 2 ++ arch/x86/include/asm/pgtable_64_types.h | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e67c062..d829360 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -33,6 +33,8 @@ static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); +static inline int pgd_large(pgd_t pgd) { return 0; } + /* * Define this if things work differently on an i386 and an i486: * it will (on an i486) warn about kernel memory accesses that are diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 903e4d05..0c48d80 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -26,6 +26,9 @@ extern unsigned int pgtable_l5_enabled; #define pgtable_l5_enabled 0 #endif +extern unsigned int pgdir_shift; +extern unsigned int ptrs_per_p4d; + #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 @@ -35,16 +38,17 @@ extern unsigned int pgtable_l5_enabled; /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 48 +#define PGDIR_SHIFT pgdir_shift #define PTRS_PER_PGD 512 /* * 4th level page in 5-level paging case */ -#define P4D_SHIFT 39 -#define PTRS_PER_P4D 512 -#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) -#define P4D_MASK (~(P4D_SIZE - 1)) +#define P4D_SHIFT 39 +#define MAX_PTRS_PER_P4D 512 +#define PTRS_PER_P4D ptrs_per_p4d +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE - 1)) #define MAX_POSSIBLE_PHYSMEM_BITS 52 @@ -53,8 +57,9 @@ extern unsigned int pgtable_l5_enabled; /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 39 -#define PTRS_PER_PGD 512 +#define PGDIR_SHIFT 39 +#define PTRS_PER_PGD 512 +#define MAX_PTRS_PER_P4D 1 #endif /* CONFIG_X86_5LEVEL */ -- cgit v1.1 From 162434e7f58b21f0b6c9cc5fb02222cd7d9064cc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:54 +0300 Subject: x86/mm: Make MAX_PHYSADDR_BITS and MAX_PHYSMEM_BITS dynamic For boot-time switching between paging modes, we need to be able to adjust size of physical address space at runtime. As part of making physical address space size variable, we have to make X86_5LEVEL dependent on SPARSEMEM_VMEMMAP. !SPARSEMEM_VMEMMAP configuration doesn't build with variable MAX_PHYSMEM_BITS. For !SPARSEMEM_VMEMMAP SECTIONS_WIDTH depends on MAX_PHYSMEM_BITS: SECTIONS_WIDTH SECTIONS_SHIFT MAX_PHYSMEM_BITS And SECTIONS_WIDTH is used on pre-processor stage, it doesn't work if it's dyncamic. See include/linux/page-flags-layout.h. Effect on kernel image size: text data bss dec hex filename 8628393 4734340 1368064 14730797 e0c62d vmlinux.before 8628892 4734340 1368064 14731296 e0c820 vmlinux.after Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-8-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 2 +- arch/x86/include/asm/sparsemem.h | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 0c48d80..59d971c 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -95,7 +95,7 @@ extern unsigned int ptrs_per_p4d; * range must not overlap with anything except the KASAN shadow area, which * is correct as KASAN disables KASLR. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define MAXMEM (1UL << MAX_PHYSMEM_BITS) #define LDT_PGD_ENTRY_L4 -3UL #define LDT_PGD_ENTRY_L5 -112UL diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 4fc1e9d..4617a2b 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -27,13 +27,8 @@ # endif #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ -# ifdef CONFIG_X86_5LEVEL -# define MAX_PHYSADDR_BITS 52 -# define MAX_PHYSMEM_BITS 52 -# else -# define MAX_PHYSADDR_BITS 44 -# define MAX_PHYSMEM_BITS 46 -# endif +# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44) +# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46) #endif #endif /* CONFIG_SPARSEMEM */ -- cgit v1.1 From 09e61a779e7f171c50325e6d7108a593afb2e5d4 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:55 +0300 Subject: x86/mm: Make __VIRTUAL_MASK_SHIFT dynamic For boot-time switching between paging modes, we need to be able to adjust virtual mask shifts. The change doesn't affect the kernel image size much: text data bss dec hex filename 8628892 4734340 1368064 14731296 e0c820 vmlinux.before 8628966 4734340 1368064 14731370 e0c86a vmlinux.after Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-9-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_64_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d54a3d5..fa7dc7c 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -56,7 +56,7 @@ #define __PHYSICAL_MASK_SHIFT 52 #ifdef CONFIG_X86_5LEVEL -#define __VIRTUAL_MASK_SHIFT 56 +#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47) #else #define __VIRTUAL_MASK_SHIFT 47 #endif -- cgit v1.1 From b753a2b79a5bbad35dfaf8d3dba964727c30654a Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 14 Feb 2018 14:25:54 +0800 Subject: x86/apic: Make setup_local_APIC() static This function isn't used outside of apic.c, so let's mark it static. Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bhe@redhat.com Cc: ebiederm@xmission.com Link: http://lkml.kernel.org/r/20180214062554.21020-1-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 6e1990d..c6a3201 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -138,7 +138,6 @@ extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); extern void init_bsp_APIC(void); extern void apic_intr_mode_init(void); -extern void setup_local_APIC(void); extern void init_apic_mappings(void); void register_lapic_address(unsigned long address); extern void setup_boot_APIC_clock(void); -- cgit v1.1 From 4fa5662b6b49611f11856db8be346710217473ef Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:36 +0300 Subject: x86/mm: Initialize 'page_offset_base' at boot-time For 4- and 5-level paging we have different 'page_offset_base'. Let's initialize it at boot-time accordingly to machine capability. We also have to split __PAGE_OFFSET_BASE into two constants -- for 4- and 5-level paging. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_64_types.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index fa7dc7c..2c5a966 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -37,16 +37,13 @@ * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's * what Xen requires. */ -#ifdef CONFIG_X86_5LEVEL -#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL) -#else -#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) -#endif +#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL) +#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL) #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #define __PAGE_OFFSET page_offset_base #else -#define __PAGE_OFFSET __PAGE_OFFSET_BASE +#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -- cgit v1.1 From a7412546d8cb5ad578805060b4006f2a021b5868 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:37 +0300 Subject: x86/mm: Adjust vmalloc base and size at boot-time vmalloc area has different placement and size depending on paging mode. Let's adjust it during early boot accodring to machine capability. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-5-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 59d971c..6863299 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -102,25 +102,29 @@ extern unsigned int ptrs_per_p4d; #define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) +#define __VMALLOC_BASE_L4 0xffffc90000000000 +#define __VMALLOC_BASE_L5 0xffa0000000000000 + +#define VMALLOC_SIZE_TB_L4 32UL +#define VMALLOC_SIZE_TB_L5 12800UL + #ifdef CONFIG_X86_5LEVEL -# define VMALLOC_SIZE_TB _AC(12800, UL) -# define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) #else -# define VMALLOC_SIZE_TB _AC(32, UL) -# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #endif #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base +# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4) # define VMEMMAP_START vmemmap_base #else -# define VMALLOC_START __VMALLOC_BASE +# define VMALLOC_START __VMALLOC_BASE_L4 +# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 # define VMEMMAP_START __VMEMMAP_BASE #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) +#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -- cgit v1.1 From 9b46a051e43461a9afda2bdd50e0e0ae349341df Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:38 +0300 Subject: x86/mm: Initialize vmemmap_base at boot-time vmemmap area has different placement depending on paging mode. Let's adjust it during early boot accodring to machine capability. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-6-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6863299..68909a6 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -108,11 +108,8 @@ extern unsigned int ptrs_per_p4d; #define VMALLOC_SIZE_TB_L4 32UL #define VMALLOC_SIZE_TB_L5 12800UL -#ifdef CONFIG_X86_5LEVEL -# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) -#else -# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) -#endif +#define __VMEMMAP_BASE_L4 0xffffea0000000000 +#define __VMEMMAP_BASE_L5 0xffd4000000000000 #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base @@ -121,7 +118,7 @@ extern unsigned int ptrs_per_p4d; #else # define VMALLOC_START __VMALLOC_BASE_L4 # define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 -# define VMEMMAP_START __VMEMMAP_BASE +# define VMEMMAP_START __VMEMMAP_BASE_L4 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) -- cgit v1.1 From 98219dda2ab56ce2a967fdebf81e838d676d9ddc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:40 +0300 Subject: x86/mm: Fold p4d page table layer at runtime Change page table helpers to fold p4d at runtime. The logic is the same as in . Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-8-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 10 ++++++---- arch/x86/include/asm/pgalloc.h | 5 ++++- arch/x86/include/asm/pgtable.h | 11 ++++++++++- 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 892df37..3fbaad2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -569,14 +569,16 @@ static inline p4dval_t p4d_val(p4d_t p4d) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { - pgdval_t val = native_pgd_val(pgd); - - PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val); + if (pgtable_l5_enabled) + PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd)); + else + set_p4d((p4d_t *)(pgdp), (p4d_t) { pgd.pgd }); } static inline void pgd_clear(pgd_t *pgdp) { - set_pgd(pgdp, __pgd(0)); + if (pgtable_l5_enabled) + set_pgd(pgdp, __pgd(0)); } #endif /* CONFIG_PGTABLE_LEVELS == 5 */ diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index aff42e1..263c142 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -167,6 +167,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, #if CONFIG_PGTABLE_LEVELS > 4 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { + if (!pgtable_l5_enabled) + return; paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } @@ -191,7 +193,8 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long address) { - ___p4d_free_tlb(tlb, p4d); + if (pgtable_l5_enabled) + ___p4d_free_tlb(tlb, p4d); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 63c2552..c8baa7f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags; #ifndef __PAGETABLE_P4D_FOLDED #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) -#define pgd_clear(pgd) native_pgd_clear(pgd) +#define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0) #endif #ifndef set_p4d @@ -859,6 +859,8 @@ static inline unsigned long p4d_index(unsigned long address) #if CONFIG_PGTABLE_LEVELS > 4 static inline int pgd_present(pgd_t pgd) { + if (!pgtable_l5_enabled) + return 1; return pgd_flags(pgd) & _PAGE_PRESENT; } @@ -876,6 +878,8 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) /* to find an entry in a page-table-directory. */ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { + if (!pgtable_l5_enabled) + return (p4d_t *)pgd; return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); } @@ -883,6 +887,9 @@ static inline int pgd_bad(pgd_t pgd) { unsigned long ignore_flags = _PAGE_USER; + if (!pgtable_l5_enabled) + return 0; + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; @@ -891,6 +898,8 @@ static inline int pgd_bad(pgd_t pgd) static inline int pgd_none(pgd_t pgd) { + if (!pgtable_l5_enabled) + return 0; /* * There is no need to do a workaround for the KNL stray * A/D bit erratum here. PGDs only point to page tables -- cgit v1.1 From 91f606a8fa68264224cbc76888fa8649cdbe9990 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:41 +0300 Subject: x86/mm: Replace compile-time checks for 5-level paging with runtime-time checks This patch converts the of CONFIG_X86_5LEVEL check to runtime checks for p4d folding. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-9-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 81462e9..81dda8d 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -217,29 +217,26 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { -#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) - p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); -#else - *p4dp = p4d; -#endif + pgd_t pgd; + + if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + *p4dp = p4d; + return; + } + + pgd = native_make_pgd(p4d_val(p4d)); + pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd); + *p4dp = native_make_p4d(pgd_val(pgd)); } static inline void native_p4d_clear(p4d_t *p4d) { -#ifdef CONFIG_X86_5LEVEL native_set_p4d(p4d, native_make_p4d(0)); -#else - native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)}); -#endif } static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION *pgdp = pti_set_user_pgd(pgdp, pgd); -#else - *pgdp = pgd; -#endif } static inline void native_pgd_clear(pgd_t *pgd) -- cgit v1.1 From 6657fca06e3ffab8d0b3f9d8b397f5ee498952d7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:42 +0300 Subject: x86/mm: Allow to boot without LA57 if CONFIG_X86_5LEVEL=y All pieces of the puzzle are in place and we can now allow to boot with CONFIG_X86_5LEVEL=y on a machine without LA57 support. Kernel will detect that LA57 is missing and fold p4d at runtime. Update the documentation and the Kconfig option description to reflect the change. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-10-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/required-features.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index fb3a6de..6847d85 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -53,12 +53,6 @@ # define NEED_MOVBE 0 #endif -#ifdef CONFIG_X86_5LEVEL -# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#else -# define NEED_LA57 0 -#endif - #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -104,7 +98,7 @@ #define REQUIRED_MASK13 0 #define REQUIRED_MASK14 0 #define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 (NEED_LA57) +#define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) -- cgit v1.1 From ce279cdc04aafd5c41ae49f941ee2c3342e35e3e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 14 Feb 2018 13:46:51 +0800 Subject: x86/apic: Split out restore_boot_irq_mode() from disable_IO_APIC() This is a preparation patch. Split out the code which restores boot irq mode from disable_IO_APIC() into the new restore_boot_irq_mode() function. No functional changes. Signed-off-by: Baoquan He Reviewed-by: Eric W. Biederman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: douly.fnst@cn.fujitsu.com Cc: joro@8bytes.org Cc: prarit@redhat.com Cc: uobergfe@redhat.com Link: http://lkml.kernel.org/r/20180214054656.3780-2-bhe@redhat.com [ Build fix for !CONFIG_IO_APIC and rewrote the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index a8834dd..4e3bb13 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -193,6 +193,7 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); extern void disable_IO_APIC(void); +extern void restore_boot_irq_mode(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); extern void print_IO_APICs(void); #else /* !CONFIG_X86_IO_APIC */ @@ -232,6 +233,7 @@ static inline void io_apic_init_mappings(void) { } static inline void setup_IO_APIC(void) { } static inline void enable_IO_APIC(void) { } +static inline void restore_boot_irq_mode(void) { } #endif -- cgit v1.1 From 3c9e76dbea004b2c7c3ce872022ceaf5ff0dae79 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 14 Feb 2018 13:46:52 +0800 Subject: x86/apic: Split disable_IO_APIC() into two functions to fix CONFIG_KEXEC_JUMP=y Split following patches disable_IO_APIC() will be broken up into clear_IO_APIC() and restore_boot_irq_mode(). These two functions will be called separately where they are needed to fix a regression introduced by: 522e66464467 ("x86/apic: Disable I/O APIC before shutdown of the local APIC"). While the CONFIG_KEXEC_JUMP=y code doesn't call lapic_shutdown() before jump like kexec/kdump, so it's not impacted by commit 522e66464467. Hence here change clear_IO_APIC() as public, and replace disable_IO_APIC() with clear_IO_APIC() and restore_boot_irq_mode() to keep CONFIG_KEXEC_JUMP=y code unchanged in essence. No functional change. Signed-off-by: Baoquan He Reviewed-by: Eric W. Biederman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: douly.fnst@cn.fujitsu.com Cc: joro@8bytes.org Cc: prarit@redhat.com Cc: uobergfe@redhat.com Link: http://lkml.kernel.org/r/20180214054656.3780-3-bhe@redhat.com [ Rewrote the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 4e3bb13..2ae1b424c 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -193,6 +193,7 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); extern void disable_IO_APIC(void); +extern void clear_IO_APIC(void); extern void restore_boot_irq_mode(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); extern void print_IO_APICs(void); -- cgit v1.1 From 50374b96d2d30c03c8d42b3f8846d8938748d454 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 14 Feb 2018 13:46:54 +0800 Subject: x86/apic: Remove the (now) unused disable_IO_APIC() function No one uses it anymore. Signed-off-by: Baoquan He Reviewed-by: Eric W. Biederman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: douly.fnst@cn.fujitsu.com Cc: joro@8bytes.org Cc: prarit@redhat.com Cc: uobergfe@redhat.com Link: http://lkml.kernel.org/r/20180214054656.3780-5-bhe@redhat.com [ Rewrote the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 2ae1b424c..8018fc4 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -192,7 +192,6 @@ static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) extern void setup_IO_APIC(void); extern void enable_IO_APIC(void); -extern void disable_IO_APIC(void); extern void clear_IO_APIC(void); extern void restore_boot_irq_mode(void); extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin); -- cgit v1.1 From 51b146c572201e3c368e0baa3e565760aefcf25f Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 14 Feb 2018 13:46:55 +0800 Subject: x86/apic: Rename variables and functions related to x86_io_apic_ops The names of x86_io_apic_ops and its two member variables are misleading: The ->read() member is to read IO_APIC reg, while ->disable() which is called by native_disable_io_apic()/irq_remapping_disable_io_apic() is actually used to restore boot IRQ mode, not to disable the IO-APIC. So rename x86_io_apic_ops to 'x86_apic_ops' since it doesn't only handle the IO-APIC, but also the local APIC. Also rename its member variables and the related callbacks. Signed-off-by: Baoquan He Reviewed-by: Eric W. Biederman Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: douly.fnst@cn.fujitsu.com Cc: joro@8bytes.org Cc: prarit@redhat.com Cc: uobergfe@redhat.com Link: http://lkml.kernel.org/r/20180214054656.3780-6-bhe@redhat.com [ Rewrote the changelog. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io_apic.h | 6 +++--- arch/x86/include/asm/x86_init.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 8018fc4..fd20a23 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -183,11 +183,11 @@ extern void disable_ioapic_support(void); extern void __init io_apic_init_mappings(void); extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg); -extern void native_disable_io_apic(void); +extern void native_restore_boot_irq_mode(void); static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) { - return x86_io_apic_ops.read(apic, reg); + return x86_apic_ops.io_apic_read(apic, reg); } extern void setup_IO_APIC(void); @@ -229,7 +229,7 @@ static inline void mp_save_irq(struct mpc_intsrc *m) { } static inline void disable_ioapic_support(void) { } static inline void io_apic_init_mappings(void) { } #define native_io_apic_read NULL -#define native_disable_io_apic NULL +#define native_restore_boot_irq_mode NULL static inline void setup_IO_APIC(void) { } static inline void enable_IO_APIC(void) { } diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index fc2f082..8830605 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -274,16 +274,16 @@ struct x86_msi_ops { void (*restore_msi_irqs)(struct pci_dev *dev); }; -struct x86_io_apic_ops { - unsigned int (*read) (unsigned int apic, unsigned int reg); - void (*disable)(void); +struct x86_apic_ops { + unsigned int (*io_apic_read) (unsigned int apic, unsigned int reg); + void (*restore)(void); }; extern struct x86_init_ops x86_init; extern struct x86_cpuinit_ops x86_cpuinit; extern struct x86_platform_ops x86_platform; extern struct x86_msi_ops x86_msi; -extern struct x86_io_apic_ops x86_io_apic_ops; +extern struct x86_apic_ops x86_apic_ops; extern void x86_early_init_platform_quirks(void); extern void x86_init_noop(void); -- cgit v1.1 From 92e1c5b3f7bf5407cfdbf13613e7101831216dc5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 16 Feb 2018 14:49:47 +0300 Subject: x86/mm: Redefine some of page table helpers as macros This is preparation for the next patch, which would change pgtable_l5_enabled to be cpu_feature_enabled(X86_FEATURE_LA57). The change makes few helpers in paravirt.h dependent on cpu_feature_enabled() definition from cpufeature.h. And cpufeature.h is dependent on paravirt.h. Let's re-define some of helpers as macros to break this dependency loop. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180216114948.68868-3-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 3fbaad2..2c0c8c9 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -567,19 +567,22 @@ static inline p4dval_t p4d_val(p4d_t p4d) return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d); } -static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) { - if (pgtable_l5_enabled) - PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd)); - else - set_p4d((p4d_t *)(pgdp), (p4d_t) { pgd.pgd }); + PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd)); } -static inline void pgd_clear(pgd_t *pgdp) -{ - if (pgtable_l5_enabled) - set_pgd(pgdp, __pgd(0)); -} +#define set_pgd(pgdp, pgdval) do { \ + if (pgtable_l5_enabled) \ + __set_pgd(pgdp, pgdval); \ + else \ + set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \ +} while (0) + +#define pgd_clear(pgdp) do { \ + if (pgtable_l5_enabled) \ + set_pgd(pgdp, __pgd(0)); \ +} while (0) #endif /* CONFIG_PGTABLE_LEVELS == 5 */ -- cgit v1.1 From 39b9552281abfcdfc54162897018890dafe7ffef Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 16 Feb 2018 14:49:48 +0300 Subject: x86/mm: Optimize boot-time paging mode switching cost By this point we have functioning boot-time switching between 4- and 5-level paging mode. But naive approach comes with cost. Numbers below are for kernel build, allmodconfig, 5 times. CONFIG_X86_5LEVEL=n: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17308719.892691 task-clock:u (msec) # 26.772 CPUs utilized ( +- 0.11% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,993,164 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,614,978,867,455 cycles:u # 2.520 GHz ( +- 0.01% ) 39,371,534,575,126 stalled-cycles-frontend:u # 90.27% frontend cycles idle ( +- 0.09% ) 28,363,350,152,428 instructions:u # 0.65 insn per cycle # 1.39 stalled cycles per insn ( +- 0.00% ) 6,316,784,066,413 branches:u # 364.948 M/sec ( +- 0.00% ) 250,808,144,781 branch-misses:u # 3.97% of all branches ( +- 0.01% ) 646.531974142 seconds time elapsed ( +- 1.15% ) CONFIG_X86_5LEVEL=y: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17411536.780625 task-clock:u (msec) # 26.426 CPUs utilized ( +- 0.10% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,868,663 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,865,909,056,301 cycles:u # 2.519 GHz ( +- 0.01% ) 39,740,130,365,581 stalled-cycles-frontend:u # 90.59% frontend cycles idle ( +- 0.05% ) 28,363,358,997,959 instructions:u # 0.65 insn per cycle # 1.40 stalled cycles per insn ( +- 0.00% ) 6,316,784,937,460 branches:u # 362.793 M/sec ( +- 0.00% ) 251,531,919,485 branch-misses:u # 3.98% of all branches ( +- 0.00% ) 658.886307752 seconds time elapsed ( +- 0.92% ) The patch tries to fix the performance regression by using cpu_feature_enabled(X86_FEATURE_LA57) instead of pgtable_l5_enabled in all hot code paths. These will statically patch the target code for additional performance. CONFIG_X86_5LEVEL=y + the patch: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17381990.268506 task-clock:u (msec) # 26.907 CPUs utilized ( +- 0.19% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,862,625 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,697,726,320,051 cycles:u # 2.514 GHz ( +- 0.03% ) 39,480,408,690,401 stalled-cycles-frontend:u # 90.35% frontend cycles idle ( +- 0.05% ) 28,363,394,221,388 instructions:u # 0.65 insn per cycle # 1.39 stalled cycles per insn ( +- 0.00% ) 6,316,794,985,573 branches:u # 363.410 M/sec ( +- 0.00% ) 251,013,232,547 branch-misses:u # 3.97% of all branches ( +- 0.01% ) 645.991174661 seconds time elapsed ( +- 1.19% ) Unfortunately, this approach doesn't help with text size: vmlinux.before .text size: 8190319 vmlinux.after .text size: 8200623 The .text section is increased by about 4k. Not sure if we can do anything about this. Signed-off-by: Kirill A. Shuemov Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180216114948.68868-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 68909a6..d5c21a3 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -21,7 +21,10 @@ typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; #ifdef CONFIG_X86_5LEVEL -extern unsigned int pgtable_l5_enabled; +extern unsigned int __pgtable_l5_enabled; +#ifndef pgtable_l5_enabled +#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57) +#endif #else #define pgtable_l5_enabled 0 #endif -- cgit v1.1 From a189c03235639a31343215f82b83b49985c55336 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 21 Feb 2018 11:18:53 +0100 Subject: x86/mce: Put private structures and definitions into the internal header ... because they don't need to be exported outside of MCE. Signed-off-by: Borislav Petkov Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/20180221101900.10326-2-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 52 ---------------------------------------------- 1 file changed, 52 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 96ea4b5..c3fb9a7 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -138,58 +138,6 @@ struct mce_log_buffer { struct mce entry[MCE_LOG_LEN]; }; -struct mca_config { - bool dont_log_ce; - bool cmci_disabled; - bool lmce_disabled; - bool ignore_ce; - bool disabled; - bool ser; - bool recovery; - bool bios_cmci_threshold; - u8 banks; - s8 bootlog; - int tolerant; - int monarch_timeout; - int panic_timeout; - u32 rip_msr; -}; - -struct mce_vendor_flags { - /* - * Indicates that overflow conditions are not fatal, when set. - */ - __u64 overflow_recov : 1, - - /* - * (AMD) SUCCOR stands for S/W UnCorrectable error COntainment and - * Recovery. It indicates support for data poisoning in HW and deferred - * error interrupts. - */ - succor : 1, - - /* - * (AMD) SMCA: This bit indicates support for Scalable MCA which expands - * the register space for each MCA bank and also increases number of - * banks. Also, to accommodate the new banks and registers, the MCA - * register space is moved to a new MSR range. - */ - smca : 1, - - __reserved_0 : 61; -}; - -struct mca_msr_regs { - u32 (*ctl) (int bank); - u32 (*status) (int bank); - u32 (*addr) (int bank); - u32 (*misc) (int bank); -}; - -extern struct mce_vendor_flags mce_flags; - -extern struct mca_msr_regs msr_ops; - enum mce_notifier_prios { MCE_PRIO_FIRST = INT_MAX, MCE_PRIO_SRAO = INT_MAX - 1, -- cgit v1.1 From 68627a697c195937672ce07683094c72b1174786 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Wed, 21 Feb 2018 11:18:58 +0100 Subject: x86/mce/AMD, EDAC/mce_amd: Enumerate Reserved SMCA bank type Currently, bank 4 is reserved on Fam17h, so we chose not to initialize bank 4 in the smca_banks array. This means that when we check if a bank is initialized, like during boot or resume, we will see that bank 4 is not initialized and try to initialize it. This will cause a call trace, when resuming from suspend, due to rdmsr_*on_cpu() calls in the init path. The rdmsr_*on_cpu() calls issue an IPI but we're running with interrupts disabled. This triggers: WARNING: CPU: 0 PID: 11523 at kernel/smp.c:291 smp_call_function_single+0xdc/0xe0 ... Reserved banks will be read-as-zero, so their MCA_IPID register will be zero. So, like the smca_banks array, the threshold_banks array will not have an entry for a reserved bank since all its MCA_MISC* registers will be zero. Enumerate a "Reserved" bank type that matches on a HWID_MCATYPE of 0,0. Use the "Reserved" type when checking if a bank is reserved. It's possible that other bank numbers may be reserved on future systems. Don't try to find the block address on reserved banks. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: # 4.14.x Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/20180221101900.10326-7-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index c3fb9a7..8c7b3e5 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -294,6 +294,7 @@ enum smca_bank_types { SMCA_IF, /* Instruction Fetch */ SMCA_L2_CACHE, /* L2 Cache */ SMCA_DE, /* Decoder Unit */ + SMCA_RESERVED, /* Reserved */ SMCA_EX, /* Execution Unit */ SMCA_FP, /* Floating Point */ SMCA_L3_CACHE, /* L3 Cache */ -- cgit v1.1 From 038bac2b02989acf1fc938cedcb7944c02672b9f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 19 Feb 2018 11:09:05 +0100 Subject: x86/acpi: Add a new x86_init_acpi structure to x86_init_ops Add a new struct x86_init_acpi to x86_init_ops. For now it contains only one init function to get the RSDP table address. Signed-off-by: Juergen Gross Reviewed-by: Andy Shevchenko Acked-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Cc: Borislav Petkov Cc: Eric Biederman Cc: H. Peter Anvin Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: boris.ostrovsky@oracle.com Cc: lenb@kernel.org Cc: linux-acpi@vger.kernel.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20180219100906.14265-3-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/acpi.h | 7 +++++++ arch/x86/include/asm/x86_init.h | 9 +++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 1188172..6609dd7 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI_APEI # include @@ -133,6 +134,12 @@ static inline bool acpi_has_cpu_in_madt(void) return !!acpi_lapic; } +#define ACPI_HAVE_ARCH_GET_ROOT_POINTER +static inline u64 acpi_arch_get_root_pointer(void) +{ + return x86_init.acpi.get_root_pointer(); +} + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index fc2f082..2e2c34d 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -131,6 +131,14 @@ struct x86_hyper_init { }; /** + * struct x86_init_acpi - x86 ACPI init functions + * @get_root_pointer: get RSDP address + */ +struct x86_init_acpi { + u64 (*get_root_pointer)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -144,6 +152,7 @@ struct x86_init_ops { struct x86_init_iommu iommu; struct x86_init_pci pci; struct x86_hyper_init hyper; + struct x86_init_acpi acpi; }; /** -- cgit v1.1 From 8f1561680f42a5491b371b513f1ab8197f31fd62 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 1 Mar 2018 13:59:30 +0800 Subject: x86/apic: Drop logical_smp_processor_id() inline The logical_smp_processor_id() inline which is only called in setup_local_APIC() on x86_32 systems has no real value. Drop it and directly use GET_APIC_LOGICAL_ID() at the call site and use a more suitable variable name for readability Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: andy.shevchenko@gmail.com Cc: bhe@redhat.com Cc: ebiederm@xmission.com Link: https://lkml.kernel.org/r/20180301055930.2396-4-douly.fnst@cn.fujitsu.com --- arch/x86/include/asm/smp.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 461f53d..e205778 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -176,16 +176,6 @@ static inline int wbinvd_on_all_cpus(void) extern unsigned disabled_cpus; #ifdef CONFIG_X86_LOCAL_APIC - -#ifndef CONFIG_X86_64 -static inline int logical_smp_processor_id(void) -{ - /* we don't want to mark this access volatile - bad code generation */ - return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); -} - -#endif - extern int hard_smp_processor_id(void); #else /* CONFIG_X86_LOCAL_APIC */ -- cgit v1.1 From a41e2ab08ed62fffc81f71a9bc9c642495a52308 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 6 Mar 2018 22:18:04 +0100 Subject: x86/entry: Remove stale syscall prototype sys32_vm86_warning() is long gone. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Acked-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: luto@amacapital.net Cc: viro@zeniv.linux.org.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/sys_ia32.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 82c34ee..43d59ca 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -37,7 +37,6 @@ asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); -long sys32_vm86_warning(void); asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, -- cgit v1.1 From 4ddb45db30851b2269101ee8969f079b028dd257 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 6 Mar 2018 22:18:07 +0100 Subject: x86/syscalls: Use COMPAT_SYSCALL_DEFINEx() macros for x86-only compat syscalls While at it, convert declarations of type "unsigned" to "unsigned int". Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Acked-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: luto@amacapital.net Cc: viro@zeniv.linux.org.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/sys_ia32.h | 45 +++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 43d59ca..3283190 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -20,30 +20,41 @@ #include /* ia32/sys_ia32.c */ -asmlinkage long sys32_truncate64(const char __user *, unsigned long, unsigned long); -asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); +asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long, + unsigned long); +asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long, + unsigned long); -asmlinkage long sys32_stat64(const char __user *, struct stat64 __user *); -asmlinkage long sys32_lstat64(const char __user *, struct stat64 __user *); -asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long sys32_fstatat(unsigned int, const char __user *, +asmlinkage long compat_sys_x86_stat64(const char __user *, + struct stat64 __user *); +asmlinkage long compat_sys_x86_lstat64(const char __user *, + struct stat64 __user *); +asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *); +asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *, struct stat64 __user *, int); struct mmap_arg_struct32; -asmlinkage long sys32_mmap(struct mmap_arg_struct32 __user *); +asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *); -asmlinkage long sys32_waitpid(compat_pid_t, unsigned int __user *, int); +asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *, + int); -asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); -asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32); +asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32, + u32); +asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32, + u32, u32); -long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); +asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32, + int); -asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); -asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, - unsigned, unsigned, int); -asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); -asmlinkage long sys32_fallocate(int, int, unsigned, - unsigned, unsigned, unsigned); +asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int, + size_t); +asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int, + unsigned int, unsigned int, + int); +asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int, + size_t, int); +asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int, + unsigned int, unsigned int); /* ia32/ia32_signal.c */ asmlinkage long sys32_sigreturn(void); -- cgit v1.1 From af52201d991624d2d5adce2c123805b3d42a8d4d Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Tue, 6 Mar 2018 22:18:08 +0100 Subject: x86/entry: Do not special-case clone(2) in compat entry With the CPU renaming registers on its own, and all the overhead of the syscall entry/exit, it is doubtful whether the compiled output of mov %r8, %rax mov %rcx, %r8 mov %rax, %rcx jmpq sys_clone is measurably slower than the hand-crafted version of xchg %r8, %rcx So get rid of this special case. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Acked-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: luto@amacapital.net Cc: viro@zeniv.linux.org.uk Signed-off-by: Ingo Molnar --- arch/x86/include/asm/sys_ia32.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 3283190..906794a 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -55,6 +55,8 @@ asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int, size_t, int); asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int, unsigned int, unsigned int); +asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *, + unsigned long, int __user *); /* ia32/ia32_signal.c */ asmlinkage long sys32_sigreturn(void); -- cgit v1.1 From 076ca272a14cea558b1092ec85cea08510283f2a Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 7 Mar 2018 11:12:27 -0800 Subject: x86/vsyscall/64: Drop "native" vsyscalls Since Linux v3.2, vsyscalls have been deprecated and slow. From v3.2 on, Linux had three vsyscall modes: "native", "emulate", and "none". "emulate" is the default. All known user programs work correctly in emulate mode, but vsyscalls turn into page faults and are emulated. This is very slow. In "native" mode, the vsyscall page is easily usable as an exploit gadget, but vsyscalls are a bit faster -- they turn into normal syscalls. (This is in contrast to vDSO functions, which can be much faster than syscalls.) In "none" mode, there are no vsyscalls. For all practical purposes, "native" was really just a chicken bit in case something went wrong with the emulation. It's been over six years, and nothing has gone wrong. Delete it. Signed-off-by: Andy Lutomirski Acked-by: Kees Cook Acked-by: Linus Torvalds Cc: Borislav Petkov Cc: Dominik Brodowski Cc: Kernel Hardening Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/519fee5268faea09ae550776ce969fa6e88668b0.1520449896.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_types.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 246f15b..acfe755 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -174,7 +174,6 @@ enum page_cache_mode { #define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW) #define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW) #define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_NOCACHE) -#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER) #define __PAGE_KERNEL_VVAR (__PAGE_KERNEL_RO | _PAGE_USER) #define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE) #define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE) @@ -206,7 +205,6 @@ enum page_cache_mode { #define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE | _PAGE_ENC) #define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE | _PAGE_ENC) #define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC | _PAGE_ENC) -#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL | _PAGE_ENC) #define PAGE_KERNEL_VVAR __pgprot(__PAGE_KERNEL_VVAR | _PAGE_ENC) #define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO) -- cgit v1.1 From c996f3802006a585a6c3f8eaa73e375330efc0e7 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 1 Mar 2018 16:13:36 +0100 Subject: x86/MSR: Move native_* variants to msr.h ... where they belong. No functional change. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Darren Kenny Cc: kvm@vger.kernel.org Link: https://lkml.kernel.org/r/20180301151336.12948-1-bp@alien8.de --- arch/x86/include/asm/microcode.h | 14 -------------- arch/x86/include/asm/msr.h | 14 ++++++++++++++ 2 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 7fb1047..871714e 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -6,20 +6,6 @@ #include #include -#define native_rdmsr(msr, val1, val2) \ -do { \ - u64 __val = __rdmsr((msr)); \ - (void)((val1) = (u32)__val); \ - (void)((val2) = (u32)(__val >> 32)); \ -} while (0) - -#define native_wrmsr(msr, low, high) \ - __wrmsr(msr, low, high) - -#define native_wrmsrl(msr, val) \ - __wrmsr((msr), (u32)((u64)(val)), \ - (u32)((u64)(val) >> 32)) - struct ucode_patch { struct list_head plist; void *data; /* Intel uses only this one */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 30df295..77254c9 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -108,6 +108,20 @@ static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) : : "c" (msr), "a"(low), "d" (high) : "memory"); } +#define native_rdmsr(msr, val1, val2) \ +do { \ + u64 __val = __rdmsr((msr)); \ + (void)((val1) = (u32)__val); \ + (void)((val2) = (u32)(__val >> 32)); \ +} while (0) + +#define native_wrmsr(msr, low, high) \ + __wrmsr(msr, low, high) + +#define native_wrmsrl(msr, val) \ + __wrmsr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)) + static inline unsigned long long native_read_msr(unsigned int msr) { unsigned long long val; -- cgit v1.1 From 16d1cb0bc43642a4d934631a73c5210ad2499e2f Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 6 Mar 2018 10:49:14 +0100 Subject: x86/dumpstack: Unify show_regs() The 32-bit version uses KERN_EMERG and commit b0f4c4b32c8e ("bugs, x86: Fix printk levels for panic, softlockups and stack dumps") changed the 64-bit version to KERN_DEFAULT. The same justification in that commit that those messages do not belong in the terminal, holds true for 32-bit also, so make it so. Make code_bytes static, while at it. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20180306094920.16917-4-bp@alien8.de --- arch/x86/include/asm/stacktrace.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index f737068..133d942 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -87,8 +87,6 @@ get_stack_pointer(struct task_struct *task, struct pt_regs *regs) void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, char *log_lvl); -extern unsigned int code_bytes; - /* The form of the top of the frame on the stack */ struct stack_frame { struct stack_frame *next_frame; -- cgit v1.1 From 63338a38db955cb4e0352c11b78732157c78d30b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 7 Mar 2018 08:39:12 +0100 Subject: jailhouse: Provide detection for non-x86 systems Implement jailhouse_paravirt() via device tree probing on architectures != x86. Will be used by the PCI core. Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross Cc: jailhouse-dev@googlegroups.com Cc: Mark Rutland Cc: linux-pci@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: Andy Shevchenko Cc: Rob Herring Cc: Bjorn Helgaas Link: https://lkml.kernel.org/r/dae9fe0c6e63141c28ca90492fa5712b4c33ffb5.1520408357.git.jan.kiszka@siemens.com --- arch/x86/include/asm/jailhouse_para.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/jailhouse_para.h b/arch/x86/include/asm/jailhouse_para.h index 875b543..b885a96 100644 --- a/arch/x86/include/asm/jailhouse_para.h +++ b/arch/x86/include/asm/jailhouse_para.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL2.0 */ /* - * Jailhouse paravirt_ops implementation + * Jailhouse paravirt detection * * Copyright (c) Siemens AG, 2015-2017 * -- cgit v1.1 From 6fa4a94e150be25d02547d51f03b4bc34aaec32f Mon Sep 17 00:00:00 2001 From: Otavio Pontes Date: Wed, 7 Mar 2018 08:39:14 +0100 Subject: x86/jailhouse: Enable PCI mmconfig access in inmates Use the PCI mmconfig base address exported by jailhouse in boot parameters in order to access the memory mapped PCI configuration space. [Jan: rebased, fixed !CONFIG_PCI_MMCONFIG, used pcibios_last_bus] Signed-off-by: Otavio Pontes Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Reviewed-by: Andy Shevchenko Cc: jailhouse-dev@googlegroups.com Cc: linux-pci@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: Bjorn Helgaas Link: https://lkml.kernel.org/r/2ee9e4401fa22377b3965893a558120f169be82b.1520408357.git.jan.kiszka@siemens.com --- arch/x86/include/asm/pci_x86.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index eb66fa9..959d618 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -151,6 +151,8 @@ extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end, phys_addr_t addr); extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end); extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); +extern struct pci_mmcfg_region *__init pci_mmconfig_add(int segment, int start, + int end, u64 addr); extern struct list_head pci_mmcfg_list; -- cgit v1.1 From fa94d0c6e0f3431523f5701084d799c77c7d4a4f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 6 Mar 2018 15:21:41 +0100 Subject: x86/MCE: Save microcode revision in machine check records Updating microcode used to be relatively rare. Now that it has become more common we should save the microcode version in a machine check record to make sure that those people looking at the error have this important information bundled with the rest of the logged information. [ Borislav: Simplify a bit. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Yazen Ghannam Cc: linux-edac Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180301233449.24311-1-tony.luck@intel.com --- arch/x86/include/uapi/asm/mce.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 91723461..435db58 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -30,6 +30,7 @@ struct mce { __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ __u64 ppin; /* Protected Processor Inventory Number */ + __u32 microcode;/* Microcode revision */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) -- cgit v1.1 From 24193c5de470358d0ed70e1f8e58fdaf83823b95 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 6 Mar 2018 15:21:42 +0100 Subject: x86/MCE: Cleanup and complete struct mce fields definitions The struct is part of the uapi, document that fact and all fields properly and fix formatting. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Tony Luck Link: https://lkml.kernel.org/r/20180306142143.19990-3-bp@alien8.de --- arch/x86/include/uapi/asm/mce.h | 52 ++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 24 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index 435db58..955c2a2 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h @@ -5,32 +5,36 @@ #include #include -/* Fields are zero when not available */ +/* + * Fields are zero when not available. Also, this struct is shared with + * userspace mcelog and thus must keep existing fields at current offsets. + * Only add new fields to the end of the structure + */ struct mce { - __u64 status; - __u64 misc; - __u64 addr; - __u64 mcgstatus; - __u64 ip; - __u64 tsc; /* cpu time stamp counter */ - __u64 time; /* wall time_t when error was detected */ - __u8 cpuvendor; /* cpu vendor as encoded in system.h */ - __u8 inject_flags; /* software inject flags */ - __u8 severity; + __u64 status; /* Bank's MCi_STATUS MSR */ + __u64 misc; /* Bank's MCi_MISC MSR */ + __u64 addr; /* Bank's MCi_ADDR MSR */ + __u64 mcgstatus; /* Machine Check Global Status MSR */ + __u64 ip; /* Instruction Pointer when the error happened */ + __u64 tsc; /* CPU time stamp counter */ + __u64 time; /* Wall time_t when error was detected */ + __u8 cpuvendor; /* Kernel's X86_VENDOR enum */ + __u8 inject_flags; /* Software inject flags */ + __u8 severity; /* Error severity */ __u8 pad; - __u32 cpuid; /* CPUID 1 EAX */ - __u8 cs; /* code segment */ - __u8 bank; /* machine check bank */ - __u8 cpu; /* cpu number; obsolete; use extcpu now */ - __u8 finished; /* entry is valid */ - __u32 extcpu; /* linux cpu number that detected the error */ - __u32 socketid; /* CPU socket ID */ - __u32 apicid; /* CPU initial apic ID */ - __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ - __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ - __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ - __u64 ppin; /* Protected Processor Inventory Number */ - __u32 microcode;/* Microcode revision */ + __u32 cpuid; /* CPUID 1 EAX */ + __u8 cs; /* Code segment */ + __u8 bank; /* Machine check bank reporting the error */ + __u8 cpu; /* CPU number; obsoleted by extcpu */ + __u8 finished; /* Entry is valid */ + __u32 extcpu; /* Linux CPU number that detected the error */ + __u32 socketid; /* CPU socket ID */ + __u32 apicid; /* CPU initial APIC ID */ + __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ + __u64 synd; /* MCA_SYND MSR: only valid on SMCA systems */ + __u64 ipid; /* MCA_IPID MSR: only valid on SMCA systems */ + __u64 ppin; /* Protected Processor Inventory Number */ + __u32 microcode; /* Microcode revision */ }; #define MCE_GET_RECORD_LEN _IOR('M', 1, int) -- cgit v1.1 From c07a8f8b08ba683ea24f3ac9159f37ae94daf47f Mon Sep 17 00:00:00 2001 From: Francis Deslauriers Date: Thu, 8 Mar 2018 22:18:12 -0500 Subject: x86/kprobes: Fix kernel crash when probing .entry_trampoline code Disable the kprobe probing of the entry trampoline: .entry_trampoline is a code area that is used to ensure page table isolation between userspace and kernelspace. At the beginning of the execution of the trampoline, we load the kernel's CR3 register. This has the effect of enabling the translation of the kernel virtual addresses to physical addresses. Before this happens most kernel addresses can not be translated because the running process' CR3 is still used. If a kprobe is placed on the trampoline code before that change of the CR3 register happens the kernel crashes because int3 handling pages are not accessible. To fix this, add the .entry_trampoline section to the kprobe blacklist to prohibit the probing of code before all the kernel pages are accessible. Signed-off-by: Francis Deslauriers Reviewed-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: mathieu.desnoyers@efficios.com Cc: mhiramat@kernel.org Link: http://lkml.kernel.org/r/1520565492-4637-2-git-send-email-francis.deslauriers@efficios.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/sections.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h index d6baf23..5c019d2 100644 --- a/arch/x86/include/asm/sections.h +++ b/arch/x86/include/asm/sections.h @@ -10,6 +10,7 @@ extern struct exception_table_entry __stop___ex_table[]; #if defined(CONFIG_X86_64) extern char __end_rodata_hpage_align[]; +extern char __entry_trampoline_start[], __entry_trampoline_end[]; #endif #endif /* _ASM_X86_SECTIONS_H */ -- cgit v1.1 From 7e904a91bf6049071ef9d605a52f863ae774081d Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Mon, 12 Mar 2018 08:44:56 +0000 Subject: efi: Use efi_mm in x86 as well as ARM Presently, only ARM uses mm_struct to manage EFI page tables and EFI runtime region mappings. As this is the preferred approach, let's make this data structure common across architectures. Specially, for x86, using this data structure improves code maintainability and readability. Tested-by: Bhupesh Sharma [ardb: don't #include the world to get a declaration of struct mm_struct] Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Lee, Chun-Yi Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20180312084500.10764-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index a399c1e..c62443f 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -7,6 +7,7 @@ #include #include #include +#include /* * We map the EFI regions needed for runtime services non-contiguously, -- cgit v1.1 From a5b162b2ecb013ed517ab5ce90079117ada743f4 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 11:16:41 +0300 Subject: x86/mm: Do not use paravirtualized calls in native_set_p4d() In 4-level paging mode, native_set_p4d() updates the entry in the top-level page table. With PTI, update to the top-level kernel page table requires update to the userspace copy of the table as well, using pti_set_user_pgd(). native_set_p4d() uses p4d_val() and pgd_val() to convert types between p4d_t and pgd_t. p4d_val() and pgd_val() are paravirtualized and we must not use them in native helpers, as they crash the boot in paravirtualized environments. Replace p4d_val() and pgd_val() with native_p4d_val() and native_pgd_val() in native_set_p4d(). Reported-by: Fengguang Wu Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 91f606a8fa68 ("x86/mm: Replace compile-time checks for 5-level paging with runtime-time checks") Link: http://lkml.kernel.org/r/20180305081641.4290-1-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 81dda8d..163e01a 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -224,9 +224,9 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) return; } - pgd = native_make_pgd(p4d_val(p4d)); + pgd = native_make_pgd(native_p4d_val(p4d)); pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd); - *p4dp = native_make_p4d(pgd_val(pgd)); + *p4dp = native_make_p4d(native_pgd_val(pgd)); } static inline void native_p4d_clear(p4d_t *p4d) -- cgit v1.1 From 03781e40890c18bdea40092355b61431d0073c1d Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Mon, 12 Mar 2018 09:43:55 +0000 Subject: x86/efi: Use efi_switch_mm() rather than manually twiddling with %cr3 Use helper function efi_switch_mm() to switch to/from efi_mm when invoking any UEFI runtime services. Likewise, we need to switch back to previous mm (mm context stolen by efi_mm) after the above calls return successfully. We can use efi_switch_mm() helper function only with x86_64 kernel and "efi=old_map" disabled because, x86_32 and efi=old_map do not use efi_pgd, rather they use swapper_pg_dir. Tested-by: Bhupesh Sharma [ardb: add #include of sched/task.h for task_lock/_unlock] Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Lee, Chun-Yi Cc: Linus Torvalds Cc: Michael S. Tsirkin Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-efi@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/efi.h | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index c62443f..cec5fae 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -70,14 +70,13 @@ extern asmlinkage u64 efi_call(void *fp, ...); #define efi_call_phys(f, args...) efi_call((f), args) /* - * Scratch space used for switching the pagetable in the EFI stub + * struct efi_scratch - Scratch space used while switching to/from efi_mm + * @phys_stack: stack used during EFI Mixed Mode + * @prev_mm: store/restore stolen mm_struct while switching to/from efi_mm */ struct efi_scratch { - u64 r15; - u64 prev_cr3; - pgd_t *efi_pgt; - bool use_pgd; - u64 phys_stack; + u64 phys_stack; + struct mm_struct *prev_mm; } __packed; #define arch_efi_call_virt_setup() \ @@ -87,11 +86,8 @@ struct efi_scratch { __kernel_fpu_begin(); \ firmware_restrict_branch_speculation_start(); \ \ - if (efi_scratch.use_pgd) { \ - efi_scratch.prev_cr3 = __read_cr3(); \ - write_cr3((unsigned long)efi_scratch.efi_pgt); \ - __flush_tlb_all(); \ - } \ + if (!efi_enabled(EFI_OLD_MEMMAP)) \ + efi_switch_mm(&efi_mm); \ }) #define arch_efi_call_virt(p, f, args...) \ @@ -99,10 +95,8 @@ struct efi_scratch { #define arch_efi_call_virt_teardown() \ ({ \ - if (efi_scratch.use_pgd) { \ - write_cr3(efi_scratch.prev_cr3); \ - __flush_tlb_all(); \ - } \ + if (!efi_enabled(EFI_OLD_MEMMAP)) \ + efi_switch_mm(efi_scratch.prev_mm); \ \ firmware_restrict_branch_speculation_end(); \ __kernel_fpu_end(); \ @@ -145,6 +139,7 @@ extern void __init efi_dump_pagetable(void); extern void __init efi_apply_memmap_quirks(void); extern int __init efi_reuse_config(u64 tables, int nr_tables); extern void efi_delete_dummy_variable(void); +extern void efi_switch_mm(struct mm_struct *mm); struct efi_setup_data { u64 fw_vendor; -- cgit v1.1 From 1da961d72ab0cfbe8b7c26cba731dc2bb6b9494b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 19:25:49 +0300 Subject: x86/cpufeatures: Add Intel Total Memory Encryption cpufeature CPUID.0x7.0x0:ECX[13] indicates whether CPU supports Intel Total Memory Encryption. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Kai Huang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180305162610.37510-2-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f41079d..16898eb 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -316,6 +316,7 @@ #define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ #define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ #define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -- cgit v1.1 From 7958b2246fadf54b7ff820a2a5a2c5ca1554716f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 19:25:51 +0300 Subject: x86/cpufeatures: Add Intel PCONFIG cpufeature CPUID.0x7.0x0:EDX[18] indicates whether Intel CPU support PCONFIG instruction. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Kai Huang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180305162610.37510-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 16898eb..d554c11 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -329,6 +329,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -- cgit v1.1 From be7825c19b4866ddc7b1431740b69ede2eeb93c1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 19:25:52 +0300 Subject: x86/pconfig: Detect PCONFIG targets Intel PCONFIG targets are enumerated via new CPUID leaf 0x1b. This patch detects all supported targets of PCONFIG and implements helper to check if the target is supported. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Kai Huang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180305162610.37510-5-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel_pconfig.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 arch/x86/include/asm/intel_pconfig.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h new file mode 100644 index 0000000..fb7a37c --- /dev/null +++ b/arch/x86/include/asm/intel_pconfig.h @@ -0,0 +1,15 @@ +#ifndef _ASM_X86_INTEL_PCONFIG_H +#define _ASM_X86_INTEL_PCONFIG_H + +#include +#include + +enum pconfig_target { + INVALID_TARGET = 0, + MKTME_TARGET = 1, + PCONFIG_TARGET_NR +}; + +int pconfig_target_supported(enum pconfig_target target); + +#endif /* _ASM_X86_INTEL_PCONFIG_H */ -- cgit v1.1 From 24c517856af6511be1339dd55edd131160e37aac Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 19:25:53 +0300 Subject: x86/pconfig: Provide defines and helper to run MKTME_KEY_PROG leaf MKTME_KEY_PROG allows to manipulate MKTME keys in the CPU. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Kai Huang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180305162610.37510-6-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel_pconfig.h | 50 ++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h index fb7a37c..3cb002b 100644 --- a/arch/x86/include/asm/intel_pconfig.h +++ b/arch/x86/include/asm/intel_pconfig.h @@ -12,4 +12,54 @@ enum pconfig_target { int pconfig_target_supported(enum pconfig_target target); +enum pconfig_leaf { + MKTME_KEY_PROGRAM = 0, + PCONFIG_LEAF_INVALID, +}; + +#define PCONFIG ".byte 0x0f, 0x01, 0xc5" + +/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */ + +/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */ +#define MKTME_KEYID_SET_KEY_DIRECT 0 +#define MKTME_KEYID_SET_KEY_RANDOM 1 +#define MKTME_KEYID_CLEAR_KEY 2 +#define MKTME_KEYID_NO_ENCRYPT 3 + +/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */ +#define MKTME_AES_XTS_128 (1 << 8) + +/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */ +#define MKTME_PROG_SUCCESS 0 +#define MKTME_INVALID_PROG_CMD 1 +#define MKTME_ENTROPY_ERROR 2 +#define MKTME_INVALID_KEYID 3 +#define MKTME_INVALID_ENC_ALG 4 +#define MKTME_DEVICE_BUSY 5 + +/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */ +struct mktme_key_program { + u16 keyid; + u32 keyid_ctrl; + u8 __rsvd[58]; + u8 key_field_1[64]; + u8 key_field_2[64]; +} __packed __aligned(256); + +static inline int mktme_key_program(struct mktme_key_program *key_program) +{ + unsigned long rax = MKTME_KEY_PROGRAM; + + if (!pconfig_target_supported(MKTME_TARGET)) + return -ENXIO; + + asm volatile(PCONFIG + : "=a" (rax), "=b" (key_program) + : "0" (rax), "1" (key_program) + : "memory", "cc"); + + return rax; +} + #endif /* _ASM_X86_INTEL_PCONFIG_H */ -- cgit v1.1 From 8bf705d130396e69c04cd8e6e010244ad2ce71f4 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 29 Jan 2018 18:26:05 +0100 Subject: locking/atomic/x86: Switch atomic.h to use atomic-instrumented.h Add arch_ prefix to all atomic operations and include . This will allow to add KASAN instrumentation to all atomic ops. Signed-off-by: Dmitry Vyukov Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: kasan-dev@googlegroups.com Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/54f0eb64260b84199e538652e079a89b5423ad41.1517246437.git.dvyukov@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 102 ++++++++++++++++++----------------- arch/x86/include/asm/atomic64_32.h | 106 ++++++++++++++++++------------------ arch/x86/include/asm/atomic64_64.h | 108 ++++++++++++++++++------------------- arch/x86/include/asm/cmpxchg.h | 12 ++--- arch/x86/include/asm/cmpxchg_32.h | 8 +-- arch/x86/include/asm/cmpxchg_64.h | 4 +- 6 files changed, 172 insertions(+), 168 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 72759f1..33afc96 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -17,36 +17,36 @@ #define ATOMIC_INIT(i) { (i) } /** - * atomic_read - read atomic variable + * arch_atomic_read - read atomic variable * @v: pointer of type atomic_t * * Atomically reads the value of @v. */ -static __always_inline int atomic_read(const atomic_t *v) +static __always_inline int arch_atomic_read(const atomic_t *v) { return READ_ONCE((v)->counter); } /** - * atomic_set - set atomic variable + * arch_atomic_set - set atomic variable * @v: pointer of type atomic_t * @i: required value * * Atomically sets the value of @v to @i. */ -static __always_inline void atomic_set(atomic_t *v, int i) +static __always_inline void arch_atomic_set(atomic_t *v, int i) { WRITE_ONCE(v->counter, i); } /** - * atomic_add - add integer to atomic variable + * arch_atomic_add - add integer to atomic variable * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v. */ -static __always_inline void atomic_add(int i, atomic_t *v) +static __always_inline void arch_atomic_add(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "addl %1,%0" : "+m" (v->counter) @@ -54,13 +54,13 @@ static __always_inline void atomic_add(int i, atomic_t *v) } /** - * atomic_sub - subtract integer from atomic variable + * arch_atomic_sub - subtract integer from atomic variable * @i: integer value to subtract * @v: pointer of type atomic_t * * Atomically subtracts @i from @v. */ -static __always_inline void atomic_sub(int i, atomic_t *v) +static __always_inline void arch_atomic_sub(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "subl %1,%0" : "+m" (v->counter) @@ -68,7 +68,7 @@ static __always_inline void atomic_sub(int i, atomic_t *v) } /** - * atomic_sub_and_test - subtract value from variable and test result + * arch_atomic_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer of type atomic_t * @@ -76,63 +76,63 @@ static __always_inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) +static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); } /** - * atomic_inc - increment atomic variable + * arch_atomic_inc - increment atomic variable * @v: pointer of type atomic_t * * Atomically increments @v by 1. */ -static __always_inline void atomic_inc(atomic_t *v) +static __always_inline void arch_atomic_inc(atomic_t *v) { asm volatile(LOCK_PREFIX "incl %0" : "+m" (v->counter)); } /** - * atomic_dec - decrement atomic variable + * arch_atomic_dec - decrement atomic variable * @v: pointer of type atomic_t * * Atomically decrements @v by 1. */ -static __always_inline void atomic_dec(atomic_t *v) +static __always_inline void arch_atomic_dec(atomic_t *v) { asm volatile(LOCK_PREFIX "decl %0" : "+m" (v->counter)); } /** - * atomic_dec_and_test - decrement and test + * arch_atomic_dec_and_test - decrement and test * @v: pointer of type atomic_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ -static __always_inline bool atomic_dec_and_test(atomic_t *v) +static __always_inline bool arch_atomic_dec_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); } /** - * atomic_inc_and_test - increment and test + * arch_atomic_inc_and_test - increment and test * @v: pointer of type atomic_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ -static __always_inline bool atomic_inc_and_test(atomic_t *v) +static __always_inline bool arch_atomic_inc_and_test(atomic_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); } /** - * atomic_add_negative - add and test if negative + * arch_atomic_add_negative - add and test if negative * @i: integer value to add * @v: pointer of type atomic_t * @@ -140,65 +140,65 @@ static __always_inline bool atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static __always_inline bool atomic_add_negative(int i, atomic_t *v) +static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); } /** - * atomic_add_return - add integer and return + * arch_atomic_add_return - add integer and return * @i: integer value to add * @v: pointer of type atomic_t * * Atomically adds @i to @v and returns @i + @v */ -static __always_inline int atomic_add_return(int i, atomic_t *v) +static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return i + xadd(&v->counter, i); } /** - * atomic_sub_return - subtract integer and return + * arch_atomic_sub_return - subtract integer and return * @v: pointer of type atomic_t * @i: integer value to subtract * * Atomically subtracts @i from @v and returns @v - @i */ -static __always_inline int atomic_sub_return(int i, atomic_t *v) +static __always_inline int arch_atomic_sub_return(int i, atomic_t *v) { - return atomic_add_return(-i, v); + return arch_atomic_add_return(-i, v); } -#define atomic_inc_return(v) (atomic_add_return(1, v)) -#define atomic_dec_return(v) (atomic_sub_return(1, v)) +#define arch_atomic_inc_return(v) (arch_atomic_add_return(1, v)) +#define arch_atomic_dec_return(v) (arch_atomic_sub_return(1, v)) -static __always_inline int atomic_fetch_add(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return xadd(&v->counter, i); } -static __always_inline int atomic_fetch_sub(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v) { return xadd(&v->counter, -i); } -static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { - return cmpxchg(&v->counter, old, new); + return arch_cmpxchg(&v->counter, old, new); } -#define atomic_try_cmpxchg atomic_try_cmpxchg -static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) +#define arch_atomic_try_cmpxchg arch_atomic_try_cmpxchg +static __always_inline bool arch_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { return try_cmpxchg(&v->counter, old, new); } -static inline int atomic_xchg(atomic_t *v, int new) +static inline int arch_atomic_xchg(atomic_t *v, int new) { return xchg(&v->counter, new); } -static inline void atomic_and(int i, atomic_t *v) +static inline void arch_atomic_and(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "andl %1,%0" : "+m" (v->counter) @@ -206,16 +206,16 @@ static inline void atomic_and(int i, atomic_t *v) : "memory"); } -static inline int atomic_fetch_and(int i, atomic_t *v) +static inline int arch_atomic_fetch_and(int i, atomic_t *v) { - int val = atomic_read(v); + int val = arch_atomic_read(v); - do { } while (!atomic_try_cmpxchg(v, &val, val & i)); + do { } while (!arch_atomic_try_cmpxchg(v, &val, val & i)); return val; } -static inline void atomic_or(int i, atomic_t *v) +static inline void arch_atomic_or(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "orl %1,%0" : "+m" (v->counter) @@ -223,16 +223,16 @@ static inline void atomic_or(int i, atomic_t *v) : "memory"); } -static inline int atomic_fetch_or(int i, atomic_t *v) +static inline int arch_atomic_fetch_or(int i, atomic_t *v) { - int val = atomic_read(v); + int val = arch_atomic_read(v); - do { } while (!atomic_try_cmpxchg(v, &val, val | i)); + do { } while (!arch_atomic_try_cmpxchg(v, &val, val | i)); return val; } -static inline void atomic_xor(int i, atomic_t *v) +static inline void arch_atomic_xor(int i, atomic_t *v) { asm volatile(LOCK_PREFIX "xorl %1,%0" : "+m" (v->counter) @@ -240,17 +240,17 @@ static inline void atomic_xor(int i, atomic_t *v) : "memory"); } -static inline int atomic_fetch_xor(int i, atomic_t *v) +static inline int arch_atomic_fetch_xor(int i, atomic_t *v) { - int val = atomic_read(v); + int val = arch_atomic_read(v); - do { } while (!atomic_try_cmpxchg(v, &val, val ^ i)); + do { } while (!arch_atomic_try_cmpxchg(v, &val, val ^ i)); return val; } /** - * __atomic_add_unless - add unless the number is already a given value + * __arch_atomic_add_unless - add unless the number is already a given value * @v: pointer of type atomic_t * @a: the amount to add to v... * @u: ...unless v is equal to u. @@ -258,14 +258,14 @@ static inline int atomic_fetch_xor(int i, atomic_t *v) * Atomically adds @a to @v, so long as @v was not already @u. * Returns the old value of @v. */ -static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) +static __always_inline int __arch_atomic_add_unless(atomic_t *v, int a, int u) { - int c = atomic_read(v); + int c = arch_atomic_read(v); do { if (unlikely(c == u)) break; - } while (!atomic_try_cmpxchg(v, &c, c + a)); + } while (!arch_atomic_try_cmpxchg(v, &c, c + a)); return c; } @@ -276,4 +276,6 @@ static __always_inline int __atomic_add_unless(atomic_t *v, int a, int u) # include #endif +#include + #endif /* _ASM_X86_ATOMIC_H */ diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 97c46b8..46e1ef1 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -62,7 +62,7 @@ ATOMIC64_DECL(add_unless); #undef ATOMIC64_EXPORT /** - * atomic64_cmpxchg - cmpxchg atomic64 variable + * arch_atomic64_cmpxchg - cmpxchg atomic64 variable * @v: pointer to type atomic64_t * @o: expected value * @n: new value @@ -71,20 +71,21 @@ ATOMIC64_DECL(add_unless); * the old value. */ -static inline long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n) +static inline long long arch_atomic64_cmpxchg(atomic64_t *v, long long o, + long long n) { - return cmpxchg64(&v->counter, o, n); + return arch_cmpxchg64(&v->counter, o, n); } /** - * atomic64_xchg - xchg atomic64 variable + * arch_atomic64_xchg - xchg atomic64 variable * @v: pointer to type atomic64_t * @n: value to assign * * Atomically xchgs the value of @v to @n and returns * the old value. */ -static inline long long atomic64_xchg(atomic64_t *v, long long n) +static inline long long arch_atomic64_xchg(atomic64_t *v, long long n) { long long o; unsigned high = (unsigned)(n >> 32); @@ -96,13 +97,13 @@ static inline long long atomic64_xchg(atomic64_t *v, long long n) } /** - * atomic64_set - set atomic64 variable + * arch_atomic64_set - set atomic64 variable * @v: pointer to type atomic64_t * @i: value to assign * * Atomically sets the value of @v to @n. */ -static inline void atomic64_set(atomic64_t *v, long long i) +static inline void arch_atomic64_set(atomic64_t *v, long long i) { unsigned high = (unsigned)(i >> 32); unsigned low = (unsigned)i; @@ -112,12 +113,12 @@ static inline void atomic64_set(atomic64_t *v, long long i) } /** - * atomic64_read - read atomic64 variable + * arch_atomic64_read - read atomic64 variable * @v: pointer to type atomic64_t * * Atomically reads the value of @v and returns it. */ -static inline long long atomic64_read(const atomic64_t *v) +static inline long long arch_atomic64_read(const atomic64_t *v) { long long r; alternative_atomic64(read, "=&A" (r), "c" (v) : "memory"); @@ -125,13 +126,13 @@ static inline long long atomic64_read(const atomic64_t *v) } /** - * atomic64_add_return - add and return + * arch_atomic64_add_return - add and return * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v and returns @i + *@v */ -static inline long long atomic64_add_return(long long i, atomic64_t *v) +static inline long long arch_atomic64_add_return(long long i, atomic64_t *v) { alternative_atomic64(add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -142,7 +143,7 @@ static inline long long atomic64_add_return(long long i, atomic64_t *v) /* * Other variants with different arithmetic operators: */ -static inline long long atomic64_sub_return(long long i, atomic64_t *v) +static inline long long arch_atomic64_sub_return(long long i, atomic64_t *v) { alternative_atomic64(sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -150,7 +151,7 @@ static inline long long atomic64_sub_return(long long i, atomic64_t *v) return i; } -static inline long long atomic64_inc_return(atomic64_t *v) +static inline long long arch_atomic64_inc_return(atomic64_t *v) { long long a; alternative_atomic64(inc_return, "=&A" (a), @@ -158,7 +159,7 @@ static inline long long atomic64_inc_return(atomic64_t *v) return a; } -static inline long long atomic64_dec_return(atomic64_t *v) +static inline long long arch_atomic64_dec_return(atomic64_t *v) { long long a; alternative_atomic64(dec_return, "=&A" (a), @@ -167,13 +168,13 @@ static inline long long atomic64_dec_return(atomic64_t *v) } /** - * atomic64_add - add integer to atomic64 variable + * arch_atomic64_add - add integer to atomic64 variable * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v. */ -static inline long long atomic64_add(long long i, atomic64_t *v) +static inline long long arch_atomic64_add(long long i, atomic64_t *v) { __alternative_atomic64(add, add_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -182,13 +183,13 @@ static inline long long atomic64_add(long long i, atomic64_t *v) } /** - * atomic64_sub - subtract the atomic64 variable + * arch_atomic64_sub - subtract the atomic64 variable * @i: integer value to subtract * @v: pointer to type atomic64_t * * Atomically subtracts @i from @v. */ -static inline long long atomic64_sub(long long i, atomic64_t *v) +static inline long long arch_atomic64_sub(long long i, atomic64_t *v) { __alternative_atomic64(sub, sub_return, ASM_OUTPUT2("+A" (i), "+c" (v)), @@ -197,7 +198,7 @@ static inline long long atomic64_sub(long long i, atomic64_t *v) } /** - * atomic64_sub_and_test - subtract value from variable and test result + * arch_atomic64_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer to type atomic64_t * @@ -205,46 +206,46 @@ static inline long long atomic64_sub(long long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic64_sub_and_test(long long i, atomic64_t *v) +static inline int arch_atomic64_sub_and_test(long long i, atomic64_t *v) { - return atomic64_sub_return(i, v) == 0; + return arch_atomic64_sub_return(i, v) == 0; } /** - * atomic64_inc - increment atomic64 variable + * arch_atomic64_inc - increment atomic64 variable * @v: pointer to type atomic64_t * * Atomically increments @v by 1. */ -static inline void atomic64_inc(atomic64_t *v) +static inline void arch_atomic64_inc(atomic64_t *v) { __alternative_atomic64(inc, inc_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); } /** - * atomic64_dec - decrement atomic64 variable + * arch_atomic64_dec - decrement atomic64 variable * @v: pointer to type atomic64_t * * Atomically decrements @v by 1. */ -static inline void atomic64_dec(atomic64_t *v) +static inline void arch_atomic64_dec(atomic64_t *v) { __alternative_atomic64(dec, dec_return, /* no output */, "S" (v) : "memory", "eax", "ecx", "edx"); } /** - * atomic64_dec_and_test - decrement and test + * arch_atomic64_dec_and_test - decrement and test * @v: pointer to type atomic64_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic64_dec_and_test(atomic64_t *v) +static inline int arch_atomic64_dec_and_test(atomic64_t *v) { - return atomic64_dec_return(v) == 0; + return arch_atomic64_dec_return(v) == 0; } /** @@ -255,13 +256,13 @@ static inline int atomic64_dec_and_test(atomic64_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline int atomic64_inc_and_test(atomic64_t *v) +static inline int arch_atomic64_inc_and_test(atomic64_t *v) { - return atomic64_inc_return(v) == 0; + return arch_atomic64_inc_return(v) == 0; } /** - * atomic64_add_negative - add and test if negative + * arch_atomic64_add_negative - add and test if negative * @i: integer value to add * @v: pointer to type atomic64_t * @@ -269,13 +270,13 @@ static inline int atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int atomic64_add_negative(long long i, atomic64_t *v) +static inline int arch_atomic64_add_negative(long long i, atomic64_t *v) { - return atomic64_add_return(i, v) < 0; + return arch_atomic64_add_return(i, v) < 0; } /** - * atomic64_add_unless - add unless the number is a given value + * arch_atomic64_add_unless - add unless the number is a given value * @v: pointer of type atomic64_t * @a: the amount to add to v... * @u: ...unless v is equal to u. @@ -283,7 +284,8 @@ static inline int atomic64_add_negative(long long i, atomic64_t *v) * Atomically adds @a to @v, so long as it was not @u. * Returns non-zero if the add was done, zero otherwise. */ -static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) +static inline int arch_atomic64_add_unless(atomic64_t *v, long long a, + long long u) { unsigned low = (unsigned)u; unsigned high = (unsigned)(u >> 32); @@ -294,7 +296,7 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u) } -static inline int atomic64_inc_not_zero(atomic64_t *v) +static inline int arch_atomic64_inc_not_zero(atomic64_t *v) { int r; alternative_atomic64(inc_not_zero, "=&a" (r), @@ -302,7 +304,7 @@ static inline int atomic64_inc_not_zero(atomic64_t *v) return r; } -static inline long long atomic64_dec_if_positive(atomic64_t *v) +static inline long long arch_atomic64_dec_if_positive(atomic64_t *v) { long long r; alternative_atomic64(dec_if_positive, "=&A" (r), @@ -313,70 +315,70 @@ static inline long long atomic64_dec_if_positive(atomic64_t *v) #undef alternative_atomic64 #undef __alternative_atomic64 -static inline void atomic64_and(long long i, atomic64_t *v) +static inline void arch_atomic64_and(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c & i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) c = old; } -static inline long long atomic64_fetch_and(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_and(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c & i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c) c = old; return old; } -static inline void atomic64_or(long long i, atomic64_t *v) +static inline void arch_atomic64_or(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c | i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) c = old; } -static inline long long atomic64_fetch_or(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_or(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c | i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c) c = old; return old; } -static inline void atomic64_xor(long long i, atomic64_t *v) +static inline void arch_atomic64_xor(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) c = old; } -static inline long long atomic64_fetch_xor(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_xor(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c ^ i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c) c = old; return old; } -static inline long long atomic64_fetch_add(long long i, atomic64_t *v) +static inline long long arch_atomic64_fetch_add(long long i, atomic64_t *v) { long long old, c = 0; - while ((old = atomic64_cmpxchg(v, c, c + i)) != c) + while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c) c = old; return old; } -#define atomic64_fetch_sub(i, v) atomic64_fetch_add(-(i), (v)) +#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), (v)) #endif /* _ASM_X86_ATOMIC64_32_H */ diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 738495c..6106b59 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -11,37 +11,37 @@ #define ATOMIC64_INIT(i) { (i) } /** - * atomic64_read - read atomic64 variable + * arch_atomic64_read - read atomic64 variable * @v: pointer of type atomic64_t * * Atomically reads the value of @v. * Doesn't imply a read memory barrier. */ -static inline long atomic64_read(const atomic64_t *v) +static inline long arch_atomic64_read(const atomic64_t *v) { return READ_ONCE((v)->counter); } /** - * atomic64_set - set atomic64 variable + * arch_atomic64_set - set atomic64 variable * @v: pointer to type atomic64_t * @i: required value * * Atomically sets the value of @v to @i. */ -static inline void atomic64_set(atomic64_t *v, long i) +static inline void arch_atomic64_set(atomic64_t *v, long i) { WRITE_ONCE(v->counter, i); } /** - * atomic64_add - add integer to atomic64 variable + * arch_atomic64_add - add integer to atomic64 variable * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v. */ -static __always_inline void atomic64_add(long i, atomic64_t *v) +static __always_inline void arch_atomic64_add(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "addq %1,%0" : "=m" (v->counter) @@ -49,13 +49,13 @@ static __always_inline void atomic64_add(long i, atomic64_t *v) } /** - * atomic64_sub - subtract the atomic64 variable + * arch_atomic64_sub - subtract the atomic64 variable * @i: integer value to subtract * @v: pointer to type atomic64_t * * Atomically subtracts @i from @v. */ -static inline void atomic64_sub(long i, atomic64_t *v) +static inline void arch_atomic64_sub(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "subq %1,%0" : "=m" (v->counter) @@ -63,7 +63,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) } /** - * atomic64_sub_and_test - subtract value from variable and test result + * arch_atomic64_sub_and_test - subtract value from variable and test result * @i: integer value to subtract * @v: pointer to type atomic64_t * @@ -71,18 +71,18 @@ static inline void atomic64_sub(long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline bool atomic64_sub_and_test(long i, atomic64_t *v) +static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); } /** - * atomic64_inc - increment atomic64 variable + * arch_atomic64_inc - increment atomic64 variable * @v: pointer to type atomic64_t * * Atomically increments @v by 1. */ -static __always_inline void atomic64_inc(atomic64_t *v) +static __always_inline void arch_atomic64_inc(atomic64_t *v) { asm volatile(LOCK_PREFIX "incq %0" : "=m" (v->counter) @@ -90,12 +90,12 @@ static __always_inline void atomic64_inc(atomic64_t *v) } /** - * atomic64_dec - decrement atomic64 variable + * arch_atomic64_dec - decrement atomic64 variable * @v: pointer to type atomic64_t * * Atomically decrements @v by 1. */ -static __always_inline void atomic64_dec(atomic64_t *v) +static __always_inline void arch_atomic64_dec(atomic64_t *v) { asm volatile(LOCK_PREFIX "decq %0" : "=m" (v->counter) @@ -103,33 +103,33 @@ static __always_inline void atomic64_dec(atomic64_t *v) } /** - * atomic64_dec_and_test - decrement and test + * arch_atomic64_dec_and_test - decrement and test * @v: pointer to type atomic64_t * * Atomically decrements @v by 1 and * returns true if the result is 0, or false for all other * cases. */ -static inline bool atomic64_dec_and_test(atomic64_t *v) +static inline bool arch_atomic64_dec_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); } /** - * atomic64_inc_and_test - increment and test + * arch_atomic64_inc_and_test - increment and test * @v: pointer to type atomic64_t * * Atomically increments @v by 1 * and returns true if the result is zero, or false for all * other cases. */ -static inline bool atomic64_inc_and_test(atomic64_t *v) +static inline bool arch_atomic64_inc_and_test(atomic64_t *v) { GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); } /** - * atomic64_add_negative - add and test if negative + * arch_atomic64_add_negative - add and test if negative * @i: integer value to add * @v: pointer to type atomic64_t * @@ -137,59 +137,59 @@ static inline bool atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline bool atomic64_add_negative(long i, atomic64_t *v) +static inline bool arch_atomic64_add_negative(long i, atomic64_t *v) { GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); } /** - * atomic64_add_return - add and return + * arch_atomic64_add_return - add and return * @i: integer value to add * @v: pointer to type atomic64_t * * Atomically adds @i to @v and returns @i + @v */ -static __always_inline long atomic64_add_return(long i, atomic64_t *v) +static __always_inline long arch_atomic64_add_return(long i, atomic64_t *v) { return i + xadd(&v->counter, i); } -static inline long atomic64_sub_return(long i, atomic64_t *v) +static inline long arch_atomic64_sub_return(long i, atomic64_t *v) { - return atomic64_add_return(-i, v); + return arch_atomic64_add_return(-i, v); } -static inline long atomic64_fetch_add(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_add(long i, atomic64_t *v) { return xadd(&v->counter, i); } -static inline long atomic64_fetch_sub(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_sub(long i, atomic64_t *v) { return xadd(&v->counter, -i); } -#define atomic64_inc_return(v) (atomic64_add_return(1, (v))) -#define atomic64_dec_return(v) (atomic64_sub_return(1, (v))) +#define arch_atomic64_inc_return(v) (arch_atomic64_add_return(1, (v))) +#define arch_atomic64_dec_return(v) (arch_atomic64_sub_return(1, (v))) -static inline long atomic64_cmpxchg(atomic64_t *v, long old, long new) +static inline long arch_atomic64_cmpxchg(atomic64_t *v, long old, long new) { - return cmpxchg(&v->counter, old, new); + return arch_cmpxchg(&v->counter, old, new); } -#define atomic64_try_cmpxchg atomic64_try_cmpxchg -static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) +#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg +static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, long new) { return try_cmpxchg(&v->counter, old, new); } -static inline long atomic64_xchg(atomic64_t *v, long new) +static inline long arch_atomic64_xchg(atomic64_t *v, long new) { return xchg(&v->counter, new); } /** - * atomic64_add_unless - add unless the number is a given value + * arch_atomic64_add_unless - add unless the number is a given value * @v: pointer of type atomic64_t * @a: the amount to add to v... * @u: ...unless v is equal to u. @@ -197,37 +197,37 @@ static inline long atomic64_xchg(atomic64_t *v, long new) * Atomically adds @a to @v, so long as it was not @u. * Returns the old value of @v. */ -static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) +static inline bool arch_atomic64_add_unless(atomic64_t *v, long a, long u) { - s64 c = atomic64_read(v); + s64 c = arch_atomic64_read(v); do { if (unlikely(c == u)) return false; - } while (!atomic64_try_cmpxchg(v, &c, c + a)); + } while (!arch_atomic64_try_cmpxchg(v, &c, c + a)); return true; } -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) +#define arch_atomic64_inc_not_zero(v) arch_atomic64_add_unless((v), 1, 0) /* - * atomic64_dec_if_positive - decrement by 1 if old value positive + * arch_atomic64_dec_if_positive - decrement by 1 if old value positive * @v: pointer of type atomic_t * * The function returns the old value of *v minus 1, even if * the atomic variable, v, was not decremented. */ -static inline long atomic64_dec_if_positive(atomic64_t *v) +static inline long arch_atomic64_dec_if_positive(atomic64_t *v) { - s64 dec, c = atomic64_read(v); + s64 dec, c = arch_atomic64_read(v); do { dec = c - 1; if (unlikely(dec < 0)) break; - } while (!atomic64_try_cmpxchg(v, &c, dec)); + } while (!arch_atomic64_try_cmpxchg(v, &c, dec)); return dec; } -static inline void atomic64_and(long i, atomic64_t *v) +static inline void arch_atomic64_and(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "andq %1,%0" : "+m" (v->counter) @@ -235,16 +235,16 @@ static inline void atomic64_and(long i, atomic64_t *v) : "memory"); } -static inline long atomic64_fetch_and(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_and(long i, atomic64_t *v) { - s64 val = atomic64_read(v); + s64 val = arch_atomic64_read(v); do { - } while (!atomic64_try_cmpxchg(v, &val, val & i)); + } while (!arch_atomic64_try_cmpxchg(v, &val, val & i)); return val; } -static inline void atomic64_or(long i, atomic64_t *v) +static inline void arch_atomic64_or(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "orq %1,%0" : "+m" (v->counter) @@ -252,16 +252,16 @@ static inline void atomic64_or(long i, atomic64_t *v) : "memory"); } -static inline long atomic64_fetch_or(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_or(long i, atomic64_t *v) { - s64 val = atomic64_read(v); + s64 val = arch_atomic64_read(v); do { - } while (!atomic64_try_cmpxchg(v, &val, val | i)); + } while (!arch_atomic64_try_cmpxchg(v, &val, val | i)); return val; } -static inline void atomic64_xor(long i, atomic64_t *v) +static inline void arch_atomic64_xor(long i, atomic64_t *v) { asm volatile(LOCK_PREFIX "xorq %1,%0" : "+m" (v->counter) @@ -269,12 +269,12 @@ static inline void atomic64_xor(long i, atomic64_t *v) : "memory"); } -static inline long atomic64_fetch_xor(long i, atomic64_t *v) +static inline long arch_atomic64_fetch_xor(long i, atomic64_t *v) { - s64 val = atomic64_read(v); + s64 val = arch_atomic64_read(v); do { - } while (!atomic64_try_cmpxchg(v, &val, val ^ i)); + } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i)); return val; } diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h index 56bd436..e3efd8a 100644 --- a/arch/x86/include/asm/cmpxchg.h +++ b/arch/x86/include/asm/cmpxchg.h @@ -145,13 +145,13 @@ extern void __add_wrong_size(void) # include #endif -#define cmpxchg(ptr, old, new) \ +#define arch_cmpxchg(ptr, old, new) \ __cmpxchg(ptr, old, new, sizeof(*(ptr))) -#define sync_cmpxchg(ptr, old, new) \ +#define arch_sync_cmpxchg(ptr, old, new) \ __sync_cmpxchg(ptr, old, new, sizeof(*(ptr))) -#define cmpxchg_local(ptr, old, new) \ +#define arch_cmpxchg_local(ptr, old, new) \ __cmpxchg_local(ptr, old, new, sizeof(*(ptr))) @@ -221,7 +221,7 @@ extern void __add_wrong_size(void) #define __try_cmpxchg(ptr, pold, new, size) \ __raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX) -#define try_cmpxchg(ptr, pold, new) \ +#define try_cmpxchg(ptr, pold, new) \ __try_cmpxchg((ptr), (pold), (new), sizeof(*(ptr))) /* @@ -250,10 +250,10 @@ extern void __add_wrong_size(void) __ret; \ }) -#define cmpxchg_double(p1, p2, o1, o2, n1, n2) \ +#define arch_cmpxchg_double(p1, p2, o1, o2, n1, n2) \ __cmpxchg_double(LOCK_PREFIX, p1, p2, o1, o2, n1, n2) -#define cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ +#define arch_cmpxchg_double_local(p1, p2, o1, o2, n1, n2) \ __cmpxchg_double(, p1, p2, o1, o2, n1, n2) #endif /* ASM_X86_CMPXCHG_H */ diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h index 1732704..1a2eafca 100644 --- a/arch/x86/include/asm/cmpxchg_32.h +++ b/arch/x86/include/asm/cmpxchg_32.h @@ -36,10 +36,10 @@ static inline void set_64bit(volatile u64 *ptr, u64 value) } #ifdef CONFIG_X86_CMPXCHG64 -#define cmpxchg64(ptr, o, n) \ +#define arch_cmpxchg64(ptr, o, n) \ ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \ (unsigned long long)(n))) -#define cmpxchg64_local(ptr, o, n) \ +#define arch_cmpxchg64_local(ptr, o, n) \ ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \ (unsigned long long)(n))) #endif @@ -76,7 +76,7 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) * to simulate the cmpxchg8b on the 80386 and 80486 CPU. */ -#define cmpxchg64(ptr, o, n) \ +#define arch_cmpxchg64(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ __typeof__(*(ptr)) __old = (o); \ @@ -93,7 +93,7 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new) __ret; }) -#define cmpxchg64_local(ptr, o, n) \ +#define arch_cmpxchg64_local(ptr, o, n) \ ({ \ __typeof__(*(ptr)) __ret; \ __typeof__(*(ptr)) __old = (o); \ diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h index 03cad19..bfca3b3 100644 --- a/arch/x86/include/asm/cmpxchg_64.h +++ b/arch/x86/include/asm/cmpxchg_64.h @@ -7,13 +7,13 @@ static inline void set_64bit(volatile u64 *ptr, u64 val) *ptr = val; } -#define cmpxchg64(ptr, o, n) \ +#define arch_cmpxchg64(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg((ptr), (o), (n)); \ }) -#define cmpxchg64_local(ptr, o, n) \ +#define arch_cmpxchg64_local(ptr, o, n) \ ({ \ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \ cmpxchg_local((ptr), (o), (n)); \ -- cgit v1.1 From ac605bee0bfab40fd5d11964705e907d2d5a32de Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Mon, 29 Jan 2018 18:26:07 +0100 Subject: locking/atomic, asm-generic, x86: Add comments for atomic instrumentation The comments are factored out from the code changes to make them easier to read. Add them separately to explain some non-obvious aspects. Signed-off-by: Dmitry Vyukov Cc: Andrew Morton Cc: Andrey Ryabinin Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Cc: kasan-dev@googlegroups.com Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/cc595efc644bb905407012d82d3eb8bac3368e7a.1517246437.git.dvyukov@google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/atomic.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 33afc96..0db6bec 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -24,6 +24,10 @@ */ static __always_inline int arch_atomic_read(const atomic_t *v) { + /* + * Note for KASAN: we deliberately don't use READ_ONCE_NOCHECK() here, + * it's non-inlined function that increases binary size and stack usage. + */ return READ_ONCE((v)->counter); } -- cgit v1.1 From 50beba07a0e42ebd4454adc97515a2a2a969645b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 20 Feb 2018 20:05:04 +0200 Subject: ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export This is a preparation patch to allow override the hardware reduced initialization on ACPI enabled platforms. No functional change intended. Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Cc: Eric Biederman Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: linux-acpi@vger.kernel.org Link: http://lkml.kernel.org/r/20180220180506.65523-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/acpi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 6609dd7..a303d7b 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -140,6 +140,8 @@ static inline u64 acpi_arch_get_root_pointer(void) return x86_init.acpi.get_root_pointer(); } +void acpi_generic_reduced_hw_init(void); + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 @@ -149,6 +151,8 @@ static inline void acpi_noirq_set(void) { } static inline void acpi_disable_pci(void) { } static inline void disable_acpi(void) { } +static inline void acpi_generic_reduced_hw_init(void) { } + #endif /* !CONFIG_ACPI */ #define ARCH_HAS_POWER_INIT 1 -- cgit v1.1 From 81b53e5ff21e09b42525cfa08f2b0af2b8c5f465 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 20 Feb 2018 20:05:05 +0200 Subject: ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback Some ACPI hardware reduced platforms need to initialize certain devices defined by the ACPI hardware specification even though in principle those devices should not be present in an ACPI hardware reduced platform. To allow that to happen, make it possible to override the generic x86_init callbacks and provide a custom legacy_pic value, add a new ->reduced_hw_early_init() callback to struct x86_init_acpi and make acpi_reduced_hw_init() use it. Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Cc: Eric Biederman Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: linux-acpi@vger.kernel.org Link: http://lkml.kernel.org/r/20180220180506.65523-2-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/x86_init.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2e2c34d..5bd45a8 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -133,9 +133,11 @@ struct x86_hyper_init { /** * struct x86_init_acpi - x86 ACPI init functions * @get_root_pointer: get RSDP address + * @reduced_hw_early_init: hardware reduced platform early init */ struct x86_init_acpi { u64 (*get_root_pointer)(void); + void (*reduced_hw_early_init)(void); }; /** -- cgit v1.1 From a14bff131108faf50cc0cf864589fd71ee216c96 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 14 Mar 2018 11:24:27 +0000 Subject: x86/speculation, objtool: Annotate indirect calls/jumps for objtool on 32-bit kernels In the following commit: 9e0e3c5130e9 ("x86/speculation, objtool: Annotate indirect calls/jumps for objtool") ... we added annotations for CALL_NOSPEC/JMP_NOSPEC on 64-bit x86 kernels, but we did not annotate the 32-bit path. Annotate it similarly. Signed-off-by: Andy Whitcroft Acked-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: David Woodhouse Cc: Greg Kroah-Hartman Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180314112427.22351-1-apw@canonical.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nospec-branch.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index b7063cf..b3996d6 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -183,7 +183,10 @@ * otherwise we'll run out of registers. We don't care about CET * here, anyway. */ -# define CALL_NOSPEC ALTERNATIVE("call *%[thunk_target]\n", \ +# define CALL_NOSPEC \ + ALTERNATIVE( \ + ANNOTATE_RETPOLINE_SAFE \ + "call *%[thunk_target]\n", \ " jmp 904f;\n" \ " .align 16\n" \ "901: call 903f;\n" \ -- cgit v1.1 From fc804f65d46236c211f530174904c1ed70db5888 Mon Sep 17 00:00:00 2001 From: Rajvi Jingar Date: Thu, 8 Mar 2018 09:28:36 -0800 Subject: x86/tsc: Convert ART in nanoseconds to TSC Device drivers use get_device_system_crosststamp() to produce precise system/device cross-timestamps. The PHC clock and ALSA interfaces, for example, make the cross-timestamps available to user applications. On Intel platforms, get_device_system_crosststamp() requires a TSC value derived from ART (Always Running Timer) to compute the monotonic raw and realtime system timestamps. Starting with Intel Goldmont platforms, the PCIe root complex supports the PTM time sync protocol. PTM requires all timestamps to be in units of nanoseconds. The Intel root complex hardware propagates system time derived from ART in units of nanoseconds performing the conversion as follows: ART_NS = ART * 1e9 / When user software requests a cross-timestamp, the system timestamps (generally read from device registers) must be converted to TSC by the driver software as follows: TSC = ART_NS * TSC_KHZ / 1e6 This is valid when CPU feature flag X86_FEATURE_TSC_KNOWN_FREQ is set indicating that tsc_khz is derived from CPUID[15H]. Drivers should check whether this flag is set before conversion to TSC is attempted. Suggested-by: Christopher S. Hall Signed-off-by: Rajvi Jingar Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Link: https://lkml.kernel.org/r/1520530116-4925-1-git-send-email-rajvi.jingar@intel.com --- arch/x86/include/asm/tsc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index cf5d53c..2701d22 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -31,6 +31,7 @@ static inline cycles_t get_cycles(void) } extern struct system_counterval_t convert_art_to_tsc(u64 art); +extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns); extern void tsc_early_delay_calibrate(void); extern void tsc_init(void); -- cgit v1.1 From 2613f36ed965d0e5a595a1d931fd3b480e82d6fd Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 14 Mar 2018 19:36:14 +0100 Subject: x86/microcode: Attempt late loading only when new microcode is present Return UCODE_NEW from the scanning functions to denote that new microcode was found and only then attempt the expensive synchronization dance. Reported-by: Emanuel Czirai Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Tested-by: Emanuel Czirai Tested-by: Ashok Raj Tested-by: Tom Lendacky Link: https://lkml.kernel.org/r/20180314183615.17629-1-bp@alien8.de --- arch/x86/include/asm/microcode.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 7fb1047..6cf0e4c 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -39,6 +39,7 @@ struct device; enum ucode_state { UCODE_OK = 0, + UCODE_NEW, UCODE_UPDATED, UCODE_NFOUND, UCODE_ERROR, -- cgit v1.1 From 5927145efd5de71976e62e2822511b13014d7e56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:13 +0100 Subject: x86/cpu: Remove the CONFIG_X86_PPRO_FENCE=y quirk There were only a few Pentium Pro multiprocessors systems where this errata applied. They are more than 20 years old now, and we've slowly dropped places which put the workarounds in and discouraged anyone from enabling the workaround. Get rid of it for good. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-2-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/barrier.h | 30 ------------------------------ arch/x86/include/asm/io.h | 15 --------------- 2 files changed, 45 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index e1259f0..042b5e8 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -52,11 +52,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define barrier_nospec() alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC, \ "lfence", X86_FEATURE_LFENCE_RDTSC) -#ifdef CONFIG_X86_PPRO_FENCE -#define dma_rmb() rmb() -#else #define dma_rmb() barrier() -#endif #define dma_wmb() barrier() #ifdef CONFIG_X86_32 @@ -68,30 +64,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) -#if defined(CONFIG_X86_PPRO_FENCE) - -/* - * For this option x86 doesn't have a strong TSO memory - * model and we should fall back to full barriers. - */ - -#define __smp_store_release(p, v) \ -do { \ - compiletime_assert_atomic_type(*p); \ - __smp_mb(); \ - WRITE_ONCE(*p, v); \ -} while (0) - -#define __smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = READ_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - __smp_mb(); \ - ___p1; \ -}) - -#else /* regular x86 TSO memory ordering */ - #define __smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ @@ -107,8 +79,6 @@ do { \ ___p1; \ }) -#endif - /* Atomic operations are already serializing on x86 */ #define __smp_mb__before_atomic() barrier() #define __smp_mb__after_atomic() barrier() diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 95e9486..f6e5b93 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -232,21 +232,6 @@ extern void set_iounmap_nonlazy(void); */ #define __ISA_IO_base ((char __iomem *)(PAGE_OFFSET)) -/* - * Cache management - * - * This needed for two cases - * 1. Out of order aware processors - * 2. Accidentally out of order processors (PPro errata #51) - */ - -static inline void flush_write_buffers(void) -{ -#if defined(CONFIG_X86_PPRO_FENCE) - asm volatile("lock; addl $0,0(%%esp)": : :"memory"); -#endif -} - #endif /* __KERNEL__ */ extern void native_io_delay(void); -- cgit v1.1 From 038d07a283d62336b32cc23b62aecdf9418cfc11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:14 +0100 Subject: x86/dma: Remove dma_alloc_coherent_mask() These days all devices (including the ISA fallback device) have a coherent DMA mask set, so remove the workaround. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-3-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dma-mapping.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 6277c83..545bf37 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -44,26 +44,12 @@ extern void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs); -static inline unsigned long dma_alloc_coherent_mask(struct device *dev, - gfp_t gfp) -{ - unsigned long dma_mask = 0; - - dma_mask = dev->coherent_dma_mask; - if (!dma_mask) - dma_mask = (gfp & GFP_DMA) ? DMA_BIT_MASK(24) : DMA_BIT_MASK(32); - - return dma_mask; -} - static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) { - unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); - - if (dma_mask <= DMA_BIT_MASK(24)) + if (dev->coherent_dma_mask <= DMA_BIT_MASK(24)) gfp |= GFP_DMA; #ifdef CONFIG_X86_64 - if (dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) + if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) gfp |= GFP_DMA32; #endif return gfp; -- cgit v1.1 From fec777c385b6376048fc4b08f039366545b335cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:15 +0100 Subject: x86/dma: Use DMA-direct (CONFIG_DMA_DIRECT_OPS=y) The generic DMA-direct (CONFIG_DMA_DIRECT_OPS=y) implementation is now functionally equivalent to the x86 nommu dma_map implementation, so switch over to using it. That includes switching from using x86_dma_supported in various IOMMU drivers to use dma_direct_supported instead, which provides the same functionality. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-4-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dma-mapping.h | 8 -------- arch/x86/include/asm/iommu.h | 3 --- 2 files changed, 11 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 545bf37..df9816b 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -36,14 +36,6 @@ int arch_dma_supported(struct device *dev, u64 mask); bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); #define arch_dma_alloc_attrs arch_dma_alloc_attrs -extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_addr, gfp_t flag, - unsigned long attrs); - -extern void dma_generic_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs); - static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) { if (dev->coherent_dma_mask <= DMA_BIT_MASK(24)) diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 1e5d5d9..baedab8 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -2,13 +2,10 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern const struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; -int x86_dma_supported(struct device *dev, u64 mask); - /* 10 seconds */ #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) -- cgit v1.1 From 6e4bf586778315b3fc53b728c53eefc247cfc3ff Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:16 +0100 Subject: x86/dma: Use generic swiotlb_ops The generic swiotlb DMA ops were based on the x86 ones and provide equivalent functionality, so use them. Also fix the sta2x11 case. For that SOC the DMA map ops need an additional physical to DMA address translations. For swiotlb buffers that is done throught the phys_to_dma helper, but the sta2x11_dma_ops also added an additional translation on the return value from x86_swiotlb_alloc_coherent, which is only correct if that functions returns a direct allocation and not a swiotlb buffer. With the generic swiotlb and DMA-direct code phys_to_dma is not always used and the separate sta2x11_dma_ops can be replaced with a simple bit that marks if the additional physical to DMA address translation is needed. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-5-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/device.h | 3 +++ arch/x86/include/asm/swiotlb.h | 8 -------- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 5e12c63..a8f6c80 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -6,6 +6,9 @@ struct dev_archdata { #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif +#ifdef CONFIG_STA2X11 + bool is_sta2x11; +#endif }; #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS) diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 1c6a6cb..ff6c92e 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -27,12 +27,4 @@ static inline void pci_swiotlb_late_init(void) { } #endif - -extern void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs); -extern void x86_swiotlb_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_addr, - unsigned long attrs); - #endif /* _ASM_X86_SWIOTLB_H */ -- cgit v1.1 From 178c5682447ac0e315f0f3e27664fd4e0d2721cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:21 +0100 Subject: x86/dma: Remove dma_alloc_coherent_gfp_flags() All dma_ops implementations used on x86 now take care of setting their own required GFP_ masks for the allocation. And given that the common code now clears harmful flags itself that means we can stop the flags in all the IOMMU implementations as well. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-10-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dma-mapping.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index df9816b..89ce4bf 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -36,15 +36,4 @@ int arch_dma_supported(struct device *dev, u64 mask); bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); #define arch_dma_alloc_attrs arch_dma_alloc_attrs -static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) -{ - if (dev->coherent_dma_mask <= DMA_BIT_MASK(24)) - gfp |= GFP_DMA; -#ifdef CONFIG_X86_64 - if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; -#endif - return gfp; -} - #endif -- cgit v1.1 From e7de6c7cc207be78369d45fb833d7d53aeda47f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:23 +0100 Subject: dma/swiotlb: Remove swiotlb_set_mem_attributes() Now that set_memory_decrypted() is always available we can just call it directly. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Reviewed-by: Konrad Rzeszutek Wilk Reviewed-by: Tom Lendacky Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-12-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mem_encrypt.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 8fe61ad..c064383 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -49,8 +49,6 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void); -void swiotlb_set_mem_attributes(void *vaddr, unsigned long size); - bool sme_active(void); bool sev_active(void); -- cgit v1.1 From b6e05477c10c12e36141558fc14f04b00ea634d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 19 Mar 2018 11:38:24 +0100 Subject: dma/direct: Handle the memory encryption bit in common code Give the basic phys_to_dma() and dma_to_phys() helpers a __-prefix and add the memory encryption mask to the non-prefixed versions. Use the __-prefixed versions directly instead of clearing the mask again in various places. Tested-by: Tom Lendacky Signed-off-by: Christoph Hellwig Reviewed-by: Thomas Gleixner Cc: David Woodhouse Cc: Joerg Roedel Cc: Jon Mason Cc: Konrad Rzeszutek Wilk Cc: Linus Torvalds Cc: Muli Ben-Yehuda Cc: Peter Zijlstra Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20180319103826.12853-13-hch@lst.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dma-direct.h | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/dma-direct.h b/arch/x86/include/asm/dma-direct.h index 1295bc6..1a19251 100644 --- a/arch/x86/include/asm/dma-direct.h +++ b/arch/x86/include/asm/dma-direct.h @@ -2,29 +2,8 @@ #ifndef ASM_X86_DMA_DIRECT_H #define ASM_X86_DMA_DIRECT_H 1 -#include - -#ifdef CONFIG_X86_DMA_REMAP /* Platform code defines bridge-specific code */ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size); -dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); -#else -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return 0; - - return addr + size - 1 <= *dev->dma_mask; -} - -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return __sme_set(paddr); -} +dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); +phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return __sme_clr(daddr); -} -#endif /* CONFIG_X86_DMA_REMAP */ #endif /* ASM_X86_DMA_DIRECT_H */ -- cgit v1.1 From d0266046ad54e0c964941364cd82a0d0478ce286 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 19 Mar 2018 16:41:26 +0100 Subject: x86: Remove FAST_FEATURE_TESTS Since we want to rely on static branches to avoid speculation, remove any possible fallback code for static_cpu_has. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Cc: torvalds@linux-foundation.org Link: https://lkml.kernel.org/r/20180319154717.705383007@infradead.org --- arch/x86/include/asm/cpufeature.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 736771c..b27da96 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -140,7 +140,6 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS) /* * Static testing of CPU features. Used the same as boot_cpu_has(). * These will statically patch the target code for additional @@ -196,13 +195,6 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) -#else -/* - * Fall back to dynamic for gcc versions which don't support asm goto. Should be - * a minority now anyway. - */ -#define static_cpu_has(bit) boot_cpu_has(bit) -#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) -- cgit v1.1 From 32d43cd391bacb5f0814c2624399a5dad3501d09 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 20 Mar 2018 12:16:59 -0700 Subject: kvm/x86: fix icebp instruction handling The undocumented 'icebp' instruction (aka 'int1') works pretty much like 'int3' in the absense of in-circuit probing equipment (except, obviously, that it raises #DB instead of raising #BP), and is used by some validation test-suites as such. But Andy Lutomirski noticed that his test suite acted differently in kvm than on bare hardware. The reason is that kvm used an inexact test for the icebp instruction: it just assumed that an all-zero VM exit qualification value meant that the VM exit was due to icebp. That is not unlike the guess that do_debug() does for the actual exception handling case, but it's purely a heuristic, not an absolute rule. do_debug() does it because it wants to ascribe _some_ reasons to the #DB that happened, and an empty %dr6 value means that 'icebp' is the most likely casue and we have no better information. But kvm can just do it right, because unlike the do_debug() case, kvm actually sees the real reason for the #DB in the VM-exit interruption information field. So instead of relying on an inexact heuristic, just use the actual VM exit information that says "it was 'icebp'". Right now the 'icebp' instruction isn't technically documented by Intel, but that will hopefully change. The special "privileged software exception" information _is_ actually mentioned in the Intel SDM, even though the cause of it isn't enumerated. Reported-by: Andy Lutomirski Tested-by: Paolo Bonzini Signed-off-by: Linus Torvalds --- arch/x86/include/asm/vmx.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 8b67807..5db8b0b 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -352,6 +352,7 @@ enum vmcs_field { #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ #define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */ #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ +#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */ #define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */ /* GUEST_INTERRUPTIBILITY_INFO flags. */ -- cgit v1.1 From ea89c065482179b2bf9f9b6788b06a6e0c68a73b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 23 Mar 2018 00:05:29 +0100 Subject: x86/tsc: Get rid of rdtscll() Commit 99770737ca7e ("x86/asm/tsc: Add rdtscll() merge helper") added rdtscll() in August 2015 along with the comment: /* Deprecated, keep it for a cycle for easier merging: */ 12 cycles later it's really overdue for removal. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/msr.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 30df295..392f05d 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -218,9 +218,6 @@ static __always_inline unsigned long long rdtsc_ordered(void) return rdtsc(); } -/* Deprecated, keep it for a cycle for easier merging: */ -#define rdtscll(now) do { (now) = rdtsc_ordered(); } while (0) - static inline unsigned long long native_read_pmc(int counter) { DECLARE_ARGS(val, low, high); -- cgit v1.1 From e25283bf83bd97a61007ab383695f4872e2eb43f Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Sun, 25 Mar 2018 14:04:30 -0700 Subject: x86/apic: Finish removing unused callbacks The ->cpu_mask_to_apicid() and ->vector_allocation_domain() callbacks are now unused, so remove them. Signed-off-by: David Rientjes Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: baab1e84b112 ("x86/apic: Remove unused callbacks") Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1803251403540.80485@chino.kir.corp.google.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index c6a3201..40a3d36 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -304,12 +304,6 @@ struct apic { u32 irq_delivery_mode; u32 irq_dest_mode; - /* Functions and data related to vector allocation */ - void (*vector_allocation_domain)(int cpu, struct cpumask *retmask, - const struct cpumask *mask); - int (*cpu_mask_to_apicid)(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); u32 (*calc_dest_apicid)(unsigned int cpu); /* ICR related functions */ @@ -499,17 +493,7 @@ extern void default_setup_apic_routing(void); extern u32 apic_default_calc_apicid(unsigned int cpu); extern u32 apic_flat_calc_apicid(unsigned int cpu); -extern int flat_cpu_mask_to_apicid(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); -extern int default_cpu_mask_to_apicid(const struct cpumask *cpumask, - struct irq_data *irqdata, - unsigned int *apicid); extern bool default_check_apicid_used(physid_mask_t *map, int apicid); -extern void flat_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask); -extern void default_vector_allocation_domain(int cpu, struct cpumask *retmask, - const struct cpumask *mask); extern void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap); extern int default_cpu_present_to_apicid(int mps_cpu); extern int default_check_phys_apicid_present(int phys_apicid); -- cgit v1.1 From 631fe154edb0a37308d0116a0f9b7bba9dca6218 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 26 Mar 2018 14:09:27 -0700 Subject: perf/x86: Update rdpmc_always_available static key to the modern API No changes in refcount semantics -- use DEFINE_STATIC_KEY_FALSE() for initialization and replace: static_key_slow_inc|dec() => static_branch_inc|dec() static_key_false() => static_branch_unlikely() Added a '_key' suffix to rdpmc_always_available, for better self-documentation. Signed-off-by: Davidlohr Bueso Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Link: http://lkml.kernel.org/r/20180326210929.5244-5-dave@stgolabs.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mmu_context.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 1de72ce..57e3785 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -24,11 +24,12 @@ static inline void paravirt_activate_mm(struct mm_struct *prev, #endif /* !CONFIG_PARAVIRT */ #ifdef CONFIG_PERF_EVENTS -extern struct static_key rdpmc_always_available; + +DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); static inline void load_mm_cr4(struct mm_struct *mm) { - if (static_key_false(&rdpmc_always_available) || + if (static_branch_unlikely(&rdpmc_always_available_key) || atomic_read(&mm->context.perf_rdpmc_allowed)) cr4_set_bits(X86_CR4_PCE); else -- cgit v1.1 From bd6271039ee6f0c9b468148fc2d73e0584af6b4f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 14 Jan 2018 15:05:04 +0300 Subject: x86/alternatives: Fixup alternative_call_2 The following pattern fails to compile while the same pattern with alternative_call() does: if (...) alternative_call_2(...); else alternative_call_2(...); as it expands into if (...) { }; <=== else { }; Signed-off-by: Alexey Dobriyan Signed-off-by: Thomas Gleixner Acked-by: Borislav Petkov Link: https://lkml.kernel.org/r/20180114120504.GA11368@avx2 --- arch/x86/include/asm/alternative.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index cf5961c..4cd6a3b 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -218,13 +218,11 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ output, input...) \ -{ \ asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ "call %P[new2]", feature2) \ : output, ASM_CALL_CONSTRAINT \ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ - [new2] "i" (newfunc2), ## input); \ -} + [new2] "i" (newfunc2), ## input) /* * use this macro(s) if you need more than one output parameter -- cgit v1.1 From 151ad17fbe5e56afa59709f41980508672c777ce Mon Sep 17 00:00:00 2001 From: Andrew Banman Date: Tue, 27 Mar 2018 17:09:06 -0500 Subject: x86/platform/uv/BAU: Add APIC idt entry BAU uses the old alloc_initr_gate90 method to setup its interrupt. This fails silently as the BAU vector is in the range of APIC vectors that are registered to the spurious interrupt handler. As a consequence BAU broadcasts are not handled, and the broadcast source CPU hangs. Update BAU to use new idt structure. Fixes: dc20b2d52653 ("x86/idt: Move interrupt gate initialization to IDT code") Signed-off-by: Andrew Banman Signed-off-by: Thomas Gleixner Acked-by: Mike Travis Cc: Dimitri Sivanich Cc: Russ Anderson Cc: stable@vger.kernel.org Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/1522188546-196177-1-git-send-email-abanman@hpe.com --- arch/x86/include/asm/hw_irq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 2851077..32e666e 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -36,6 +36,7 @@ extern asmlinkage void kvm_posted_intr_wakeup_ipi(void); extern asmlinkage void kvm_posted_intr_nested_ipi(void); extern asmlinkage void error_interrupt(void); extern asmlinkage void irq_work_interrupt(void); +extern asmlinkage void uv_bau_message_intr1(void); extern asmlinkage void spurious_interrupt(void); extern asmlinkage void thermal_interrupt(void); -- cgit v1.1 From bd47a85acd727e27b7283daff557865ad04c59f6 Mon Sep 17 00:00:00 2001 From: "mike.travis@hpe.com" Date: Wed, 28 Mar 2018 12:40:11 -0500 Subject: x86/platform/UV: Fix critical UV MMR address error A critical error was found testing the fixed UV4 HUB in that an MMR address was found to be incorrect. This causes the virtual address space for accessing the MMIOH1 region to be allocated with the incorrect size. Fixes: 673aa20c55a1 ("x86/platform/UV: Update uv_mmrs.h to prepare for UV4A fixes") Signed-off-by: Mike Travis Signed-off-by: Thomas Gleixner Cc: Dimitri Sivanich Cc: Russ Anderson Cc: Andrew Banman Link: https://lkml.kernel.org/r/20180328174011.041801248@stormcage.americas.sgi.com --- arch/x86/include/asm/uv/uv_mmrs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index ecb9dde..62c79e2 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -3833,7 +3833,7 @@ union uvh_rh_gam_mmioh_overlay_config0_mmr_u { #define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") #define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR uv_undefined("UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR") #define UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x1603000UL -#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x483000UL +#define UV4H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR 0x484000UL #define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR ( \ is_uv1_hub() ? UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ is_uv2_hub() ? UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR : \ -- cgit v1.1 From b51d3cdf44d5ca9000de1e3d64551337d67b1cad Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 18 Mar 2018 08:03:01 +0100 Subject: x86: remove compat_sys_x86_waitpid() compat_sys_x86_waitpid() is not needed, as it takes the same parameters (int, *int, int) as the native syscall. Suggested-by: Al Viro Cc: Ingo Molnar Cc: Jiri Slaby Cc: x86@kernel.org Reviewed-by: Thomas Gleixner Signed-off-by: Dominik Brodowski --- arch/x86/include/asm/sys_ia32.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 906794a..2ee6e3b 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -35,9 +35,6 @@ asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *, struct mmap_arg_struct32; asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *); -asmlinkage long compat_sys_x86_waitpid(compat_pid_t, unsigned int __user *, - int); - asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32, u32); asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32, -- cgit v1.1 From 66f4e88cc69da7d9ec4d68cf370cc69742d4af81 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sun, 11 Mar 2018 11:34:38 +0100 Subject: x86/ioport: add ksys_ioperm() helper; remove in-kernel calls to sys_ioperm() Using this helper allows us to avoid the in-kernel calls to the sys_ioperm() syscall. The ksys_ prefix denotes that this function is meant as a drop-in replacement for the syscall. In particular, it uses the same calling convention as sys_ioperm(). This patch is part of a series which removes in-kernel calls to syscalls. On this basis, the syscall entry path can be streamlined. For details, see http://lkml.kernel.org/r/20180325162527.GA17492@light.dominikbrodowski.net Cc: Ingo Molnar Cc: Jiri Slaby Cc: x86@kernel.org Acked-by: Greg Kroah-Hartman Reviewed-by: Thomas Gleixner Signed-off-by: Dominik Brodowski --- arch/x86/include/asm/syscalls.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index bad25bb..1c0bebb 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -17,6 +17,7 @@ /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); -- cgit v1.1 From 025bd3905acab2cdfeb1a521491bee5e33a8fc90 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 22 Mar 2018 08:29:36 +0100 Subject: x86: fix sys_sigreturn() return type to be long, not unsigned long Same as with other system calls, sys_sigreturn() should return a value of type long, not unsigned long. This also matches the behaviour for IA32_EMULATION, see sys32_sigreturn() in arch/x86/ia32/ia32_signal.c . Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Slaby Cc: x86@kernel.org Cc: Michael Tautschnig Reviewed-by: Thomas Gleixner Signed-off-by: Dominik Brodowski --- arch/x86/include/asm/syscalls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 1c0bebb..ae6e05f 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -35,7 +35,7 @@ asmlinkage long sys_get_thread_area(struct user_desc __user *); #ifdef CONFIG_X86_32 /* kernel/signal.c */ -asmlinkage unsigned long sys_sigreturn(void); +asmlinkage long sys_sigreturn(void); /* kernel/vm86_32.c */ struct vm86_struct; -- cgit v1.1 From 3e2052e5dd4062ccc7a10e8860aa7d2e58627001 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 22 Mar 2018 14:09:17 +0100 Subject: syscalls/x86: auto-create compat_sys_*() prototypes compat_sys_*() functions are no longer called from within the kernel on x86 except from the system call table. Linking the system call does not require compat_sys_*() function prototypes at least on x86. Therefore, generate compat_sys_*() prototypes on-the-fly within the COMPAT_SYSCALL_DEFINEx() macro, and remove x86-specific prototypes from various header files. Suggested-by: Andy Lutomirski Cc: Arnd Bergmann Cc: David S. Miller Cc: netdev@vger.kernel.org Cc: Thomas Gleixner Cc: Andi Kleen Cc: Ingo Molnar Cc: Andrew Morton Cc: Al Viro Cc: x86@kernel.org Signed-off-by: Dominik Brodowski --- arch/x86/include/asm/sys_ia32.h | 64 ----------------------------------------- 1 file changed, 64 deletions(-) delete mode 100644 arch/x86/include/asm/sys_ia32.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h deleted file mode 100644 index 2ee6e3b..0000000 --- a/arch/x86/include/asm/sys_ia32.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * sys_ia32.h - Linux ia32 syscall interfaces - * - * Copyright (c) 2008 Jaswinder Singh Rajput - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#ifndef _ASM_X86_SYS_IA32_H -#define _ASM_X86_SYS_IA32_H - -#ifdef CONFIG_COMPAT - -#include -#include -#include -#include -#include -#include - -/* ia32/sys_ia32.c */ -asmlinkage long compat_sys_x86_truncate64(const char __user *, unsigned long, - unsigned long); -asmlinkage long compat_sys_x86_ftruncate64(unsigned int, unsigned long, - unsigned long); - -asmlinkage long compat_sys_x86_stat64(const char __user *, - struct stat64 __user *); -asmlinkage long compat_sys_x86_lstat64(const char __user *, - struct stat64 __user *); -asmlinkage long compat_sys_x86_fstat64(unsigned int, struct stat64 __user *); -asmlinkage long compat_sys_x86_fstatat(unsigned int, const char __user *, - struct stat64 __user *, int); -struct mmap_arg_struct32; -asmlinkage long compat_sys_x86_mmap(struct mmap_arg_struct32 __user *); - -asmlinkage long compat_sys_x86_pread(unsigned int, char __user *, u32, u32, - u32); -asmlinkage long compat_sys_x86_pwrite(unsigned int, const char __user *, u32, - u32, u32); - -asmlinkage long compat_sys_x86_fadvise64_64(int, __u32, __u32, __u32, __u32, - int); - -asmlinkage ssize_t compat_sys_x86_readahead(int, unsigned int, unsigned int, - size_t); -asmlinkage long compat_sys_x86_sync_file_range(int, unsigned int, unsigned int, - unsigned int, unsigned int, - int); -asmlinkage long compat_sys_x86_fadvise64(int, unsigned int, unsigned int, - size_t, int); -asmlinkage long compat_sys_x86_fallocate(int, int, unsigned int, unsigned int, - unsigned int, unsigned int); -asmlinkage long compat_sys_x86_clone(unsigned long, unsigned long, int __user *, - unsigned long, int __user *); - -/* ia32/ia32_signal.c */ -asmlinkage long sys32_sigreturn(void); -asmlinkage long sys32_rt_sigreturn(void); - -#endif /* CONFIG_COMPAT */ - -#endif /* _ASM_X86_SYS_IA32_H */ -- cgit v1.1 From fa697140f9a20119a9ec8fd7460cc4314fbdaff3 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 5 Apr 2018 11:53:02 +0200 Subject: syscalls/x86: Use 'struct pt_regs' based syscall calling convention for 64-bit syscalls Let's make use of ARCH_HAS_SYSCALL_WRAPPER=y on pure 64-bit x86-64 systems: Each syscall defines a stub which takes struct pt_regs as its only argument. It decodes just those parameters it needs, e.g: asmlinkage long sys_xyzzy(const struct pt_regs *regs) { return SyS_xyzzy(regs->di, regs->si, regs->dx); } This approach avoids leaking random user-provided register content down the call chain. For example, for sys_recv() which is a 4-parameter syscall, the assembly now is (in slightly reordered fashion): : callq <__fentry__> /* decode regs->di, ->si, ->dx and ->r10 */ mov 0x70(%rdi),%rdi mov 0x68(%rdi),%rsi mov 0x60(%rdi),%rdx mov 0x38(%rdi),%rcx [ SyS_recv() is automatically inlined by the compiler, as it is not [yet] used anywhere else ] /* clear %r9 and %r8, the 5th and 6th args */ xor %r9d,%r9d xor %r8d,%r8d /* do the actual work */ callq __sys_recvfrom /* cleanup and return */ cltq retq The only valid place in an x86-64 kernel which rightfully calls a syscall function on its own -- vsyscall -- needs to be modified to pass struct pt_regs onwards as well. To keep the syscall table generation working independent of SYSCALL_PTREGS being enabled, the stubs are named the same as the "original" syscall stubs, i.e. sys_*(). This patch is based on an original proof-of-concept | From: Linus Torvalds | Signed-off-by: Linus Torvalds and was split up and heavily modified by me, in particular to base it on ARCH_HAS_SYSCALL_WRAPPER, to limit it to 64-bit-only for the time being, and to update the vsyscall to the new calling convention. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180405095307.3730-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/syscall.h | 4 ++ arch/x86/include/asm/syscall_wrapper.h | 70 ++++++++++++++++++++++++++++++++++ arch/x86/include/asm/syscalls.h | 7 ++++ 3 files changed, 81 insertions(+) create mode 100644 arch/x86/include/asm/syscall_wrapper.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 03eedc2..17c6237 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -20,9 +20,13 @@ #include /* for TS_COMPAT */ #include +#ifdef CONFIG_SYSCALL_PTREGS +typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *); +#else typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); +#endif /* CONFIG_SYSCALL_PTREGS */ extern const sys_call_ptr_t sys_call_table[]; #if defined(CONFIG_X86_32) diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h new file mode 100644 index 0000000..702bdee --- /dev/null +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * syscall_wrapper.h - x86 specific wrappers to syscall definitions + */ + +#ifndef _ASM_X86_SYSCALL_WRAPPER_H +#define _ASM_X86_SYSCALL_WRAPPER_H + +/* + * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes + * struct pt_regs *regs as the only argument of the syscall stub named + * sys_*(). It decodes just the registers it needs and passes them on to + * the SyS_*() wrapper and then to the SYSC_*() function doing the actual job. + * These wrappers and functions are inlined, meaning that the assembly looks + * as follows (slightly re-ordered): + * + * : <-- syscall with 4 parameters + * callq <__fentry__> + * + * mov 0x70(%rdi),%rdi <-- decode regs->di + * mov 0x68(%rdi),%rsi <-- decode regs->si + * mov 0x60(%rdi),%rdx <-- decode regs->dx + * mov 0x38(%rdi),%rcx <-- decode regs->r10 + * + * xor %r9d,%r9d <-- clear %r9 + * xor %r8d,%r8d <-- clear %r8 + * + * callq __sys_recvfrom <-- do the actual work in __sys_recvfrom() + * which takes 6 arguments + * + * cltq <-- extend return value to 64-bit + * retq <-- return + * + * This approach avoids leaking random user-provided register content down + * the call chain. + * + * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for + * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not + * hurt, there is no need to override it. + */ +#define __SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long sys##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ + static long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + asmlinkage long sys##name(const struct pt_regs *regs) \ + { \ + return SyS##name(__MAP(x,__SC_ARGS \ + ,,regs->di,,regs->si,,regs->dx \ + ,,regs->r10,,regs->r8,,regs->r9)); \ + } \ + static long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + __MAP(x,__SC_TEST,__VA_ARGS__); \ + __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ + return ret; \ + } \ + static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +/* + * For VSYSCALLS, we need to declare these three syscalls with the new + * pt_regs-based calling convention for in-kernel use. + */ +struct pt_regs; +asmlinkage long sys_getcpu(const struct pt_regs *regs); /* di,si,dx */ +asmlinkage long sys_gettimeofday(const struct pt_regs *regs); /* di,si */ +asmlinkage long sys_time(const struct pt_regs *regs); /* di */ + +#endif /* _ASM_X86_SYSCALL_WRAPPER_H */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index ae6e05f..e4ad93c 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -18,6 +18,12 @@ /* Common in X86_32 and X86_64 */ /* kernel/ioport.c */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); + +#ifndef CONFIG_SYSCALL_PTREGS +/* + * If CONFIG_SYSCALL_PTREGS is enabled, a different syscall calling convention + * is used. Do not include these -- invalid -- prototypes then + */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); @@ -53,4 +59,5 @@ asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); #endif /* CONFIG_X86_32 */ +#endif /* CONFIG_SYSCALL_PTREGS */ #endif /* _ASM_X86_SYSCALLS_H */ -- cgit v1.1 From ebeb8c82ffaf94435806ff0b686fffd41dd410b5 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 5 Apr 2018 11:53:04 +0200 Subject: syscalls/x86: Use 'struct pt_regs' based syscall calling for IA32_EMULATION and x32 Extend ARCH_HAS_SYSCALL_WRAPPER for i386 emulation and for x32 on 64-bit x86. For x32, all we need to do is to create an additional stub for each compat syscall which decodes the parameters in x86-64 ordering, e.g.: asmlinkage long __compat_sys_x32_xyzzy(struct pt_regs *regs) { return c_SyS_xyzzy(regs->di, regs->si, regs->dx); } For i386 emulation, we need to teach compat_sys_*() to take struct pt_regs as its only argument, e.g.: asmlinkage long __compat_sys_ia32_xyzzy(struct pt_regs *regs) { return c_SyS_xyzzy(regs->bx, regs->cx, regs->dx); } In addition, we need to create additional stubs for common syscalls (that is, for syscalls which have the same parameters on 32-bit and 64-bit), e.g.: asmlinkage long __sys_ia32_xyzzy(struct pt_regs *regs) { return c_sys_xyzzy(regs->bx, regs->cx, regs->dx); } This approach avoids leaking random user-provided register content down the call chain. This patch is based on an original proof-of-concept | From: Linus Torvalds | Signed-off-by: Linus Torvalds and was split up and heavily modified by me, in particular to base it on ARCH_HAS_SYSCALL_WRAPPER. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180405095307.3730-6-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/syscall_wrapper.h | 117 +++++++++++++++++++++++++++++++-- 1 file changed, 113 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 702bdee..49d7e49 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -6,6 +6,111 @@ #ifndef _ASM_X86_SYSCALL_WRAPPER_H #define _ASM_X86_SYSCALL_WRAPPER_H +/* Mapping of registers to parameters for syscalls on x86-64 and x32 */ +#define SC_X86_64_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ + ,,regs->di,,regs->si,,regs->dx \ + ,,regs->r10,,regs->r8,,regs->r9) \ + +/* Mapping of registers to parameters for syscalls on i386 */ +#define SC_IA32_REGS_TO_ARGS(x, ...) \ + __MAP(x,__SC_ARGS \ + ,,(unsigned int)regs->bx,,(unsigned int)regs->cx \ + ,,(unsigned int)regs->dx,,(unsigned int)regs->si \ + ,,(unsigned int)regs->di,,(unsigned int)regs->bp) + +#ifdef CONFIG_IA32_EMULATION +/* + * For IA32 emulation, we need to handle "compat" syscalls *and* create + * additional wrappers (aptly named __sys_ia32_sys_xyzzy) which decode the + * ia32 regs in the proper order for shared or "common" syscalls. As some + * syscalls may not be implemented, we need to expand COND_SYSCALL in + * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this + * case as well. + */ +#define COMPAT_SC_IA32_STUBx(x, name, ...) \ + asmlinkage long __compat_sys_ia32##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__compat_sys_ia32##name, ERRNO); \ + asmlinkage long __compat_sys_ia32##name(const struct pt_regs *regs)\ + { \ + return c_SyS##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ + } \ + +#define SC_IA32_WRAPPERx(x, name, ...) \ + asmlinkage long __sys_ia32##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__sys_ia32##name, ERRNO); \ + asmlinkage long __sys_ia32##name(const struct pt_regs *regs) \ + { \ + return SyS##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__)); \ + } + +#define COND_SYSCALL(name) \ + cond_syscall(sys_##name); \ + cond_syscall(__sys_ia32_##name) + +#define SYS_NI(name) \ + SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers); \ + SYSCALL_ALIAS(__sys_ia32_##name, sys_ni_posix_timers) + +#else /* CONFIG_IA32_EMULATION */ +#define COMPAT_SC_IA32_STUBx(x, name, ...) +#define SC_IA32_WRAPPERx(x, fullname, name, ...) +#endif /* CONFIG_IA32_EMULATION */ + + +#ifdef CONFIG_X86_X32 +/* + * For the x32 ABI, we need to create a stub for compat_sys_*() which is aware + * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common + * with x86_64 obviously do not need such care. + */ +#define COMPAT_SC_X32_STUBx(x, name, ...) \ + asmlinkage long __compat_sys_x32##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__compat_sys_x32##name, ERRNO); \ + asmlinkage long __compat_sys_x32##name(const struct pt_regs *regs)\ + { \ + return c_SyS##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ + } \ + +#else /* CONFIG_X86_X32 */ +#define COMPAT_SC_X32_STUBx(x, name, ...) +#endif /* CONFIG_X86_X32 */ + + +#ifdef CONFIG_COMPAT +/* + * Compat means IA32_EMULATION and/or X86_X32. As they use a different + * mapping of registers to parameters, we need to generate stubs for each + * of them. There is no need to implement COMPAT_SYSCALL_DEFINE0, as it is + * unused on x86. + */ +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + static long c_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + COMPAT_SC_IA32_STUBx(x, name, __VA_ARGS__) \ + COMPAT_SC_X32_STUBx(x, name, __VA_ARGS__) \ + static long c_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ + } \ + static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + +/* + * As some compat syscalls may not be implemented, we need to expand + * COND_SYSCALL_COMPAT in kernel/sys_ni.c and COMPAT_SYS_NI in + * kernel/time/posix-stubs.c to cover this case as well. + */ +#define COND_SYSCALL_COMPAT(name) \ + cond_syscall(__compat_sys_ia32_##name); \ + cond_syscall(__compat_sys_x32_##name) + +#define COMPAT_SYS_NI(name) \ + SYSCALL_ALIAS(__compat_sys_ia32_##name, sys_ni_posix_timers); \ + SYSCALL_ALIAS(__compat_sys_x32_##name, sys_ni_posix_timers) + +#endif /* CONFIG_COMPAT */ + + /* * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes * struct pt_regs *regs as the only argument of the syscall stub named @@ -34,9 +139,14 @@ * This approach avoids leaking random user-provided register content down * the call chain. * + * If IA32_EMULATION is enabled, this macro generates an additional wrapper + * named __sys_ia32_*() which decodes the struct pt_regs *regs according + * to the i386 calling convention (bx, cx, dx, si, di, bp). + * * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not - * hurt, there is no need to override it. + * hurt, there is no need to override it, or to define it differently for + * IA32_EMULATION. */ #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(const struct pt_regs *regs); \ @@ -45,10 +155,9 @@ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ asmlinkage long sys##name(const struct pt_regs *regs) \ { \ - return SyS##name(__MAP(x,__SC_ARGS \ - ,,regs->di,,regs->si,,regs->dx \ - ,,regs->r10,,regs->r8,,regs->r9)); \ + return SyS##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ } \ + SC_IA32_WRAPPERx(x, name, __VA_ARGS__) \ static long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ -- cgit v1.1 From f8781c4a226319fe60e652118b90cf094ccfe747 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Thu, 5 Apr 2018 11:53:05 +0200 Subject: syscalls/x86: Unconditionally enable 'struct pt_regs' based syscalls on x86_64 Removing CONFIG_SYSCALL_PTREGS from arch/x86/Kconfig and simply selecting ARCH_HAS_SYSCALL_WRAPPER unconditionally on x86-64 allows us to simplify several codepaths. Signed-off-by: Dominik Brodowski Acked-by: Linus Torvalds Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180405095307.3730-7-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/syscall.h | 4 ++-- arch/x86/include/asm/syscalls.h | 20 ++++---------------- 2 files changed, 6 insertions(+), 18 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 17c6237..d653139 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -20,13 +20,13 @@ #include /* for TS_COMPAT */ #include -#ifdef CONFIG_SYSCALL_PTREGS +#ifdef CONFIG_X86_64 typedef asmlinkage long (*sys_call_ptr_t)(const struct pt_regs *); #else typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); -#endif /* CONFIG_SYSCALL_PTREGS */ +#endif /* CONFIG_X86_64 */ extern const sys_call_ptr_t sys_call_table[]; #if defined(CONFIG_X86_32) diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index e4ad93c..d4d18d9 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -19,10 +19,10 @@ /* kernel/ioport.c */ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); -#ifndef CONFIG_SYSCALL_PTREGS -/* - * If CONFIG_SYSCALL_PTREGS is enabled, a different syscall calling convention - * is used. Do not include these -- invalid -- prototypes then +#ifdef CONFIG_X86_32 +/* + * These definitions are only valid on pure 32-bit systems; x86-64 uses a + * different syscall calling convention */ asmlinkage long sys_ioperm(unsigned long, unsigned long, int); asmlinkage long sys_iopl(unsigned int); @@ -38,7 +38,6 @@ asmlinkage long sys_set_thread_area(struct user_desc __user *); asmlinkage long sys_get_thread_area(struct user_desc __user *); /* X86_32 only */ -#ifdef CONFIG_X86_32 /* kernel/signal.c */ asmlinkage long sys_sigreturn(void); @@ -48,16 +47,5 @@ struct vm86_struct; asmlinkage long sys_vm86old(struct vm86_struct __user *); asmlinkage long sys_vm86(unsigned long, unsigned long); -#else /* CONFIG_X86_32 */ - -/* X86_64 only */ -/* kernel/process_64.c */ -asmlinkage long sys_arch_prctl(int, unsigned long); - -/* kernel/sys_x86_64.c */ -asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); - #endif /* CONFIG_X86_32 */ -#endif /* CONFIG_SYSCALL_PTREGS */ #endif /* _ASM_X86_SYSCALLS_H */ -- cgit v1.1 From e145242ea0df6b7d28fd7186e61d6840fa4bb06e Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 9 Apr 2018 12:51:42 +0200 Subject: syscalls/core, syscalls/x86: Clean up syscall stub naming convention Tidy the naming convention for compat syscall subs. Hints which describe the purpose of the stub go in front and receive a double underscore to denote that they are generated on-the-fly by the SYSCALL_DEFINEx() macro. For the generic case, this means (0xffffffff prefix removed): 810f08d0 t kernel_waitid # common C function (see kernel/exit.c) __do_sys_waitid # inlined helper doing the actual work # (takes original parameters as declared) 810f1aa0 T __se_sys_waitid # sign-extending C function calling inlined # helper (takes parameters of type long; # casts them to the declared type) 810f1aa0 T sys_waitid # alias to __se_sys_waitid() (taking # parameters as declared), to be included # in syscall table For x86, the naming is as follows: 810efc70 t kernel_waitid # common C function (see kernel/exit.c) __do_sys_waitid # inlined helper doing the actual work # (takes original parameters as declared) 810efd60 t __se_sys_waitid # sign-extending C function calling inlined # helper (takes parameters of type long; # casts them to the declared type) 810f1140 T __ia32_sys_waitid # IA32_EMULATION 32-bit-ptregs -> C stub, # calls __se_sys_waitid(); to be included # in syscall table 810f1110 T sys_waitid # x86 64-bit-ptregs -> C stub, calls # __se_sys_waitid(); to be included in # syscall table For x86, sys_waitid() will be re-named to __x64_sys_waitid in a follow-up patch. Suggested-by: Ingo Molnar Signed-off-by: Dominik Brodowski Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180409105145.5364-2-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/syscall_wrapper.h | 35 +++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 49d7e49..798a3c2 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -22,7 +22,7 @@ #ifdef CONFIG_IA32_EMULATION /* * For IA32 emulation, we need to handle "compat" syscalls *and* create - * additional wrappers (aptly named __sys_ia32_sys_xyzzy) which decode the + * additional wrappers (aptly named __ia32_sys_xyzzy) which decode the * ia32 regs in the proper order for shared or "common" syscalls. As some * syscalls may not be implemented, we need to expand COND_SYSCALL in * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this @@ -37,20 +37,20 @@ } \ #define SC_IA32_WRAPPERx(x, name, ...) \ - asmlinkage long __sys_ia32##name(const struct pt_regs *regs); \ - ALLOW_ERROR_INJECTION(__sys_ia32##name, ERRNO); \ - asmlinkage long __sys_ia32##name(const struct pt_regs *regs) \ + asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__ia32_sys##name, ERRNO); \ + asmlinkage long __ia32_sys##name(const struct pt_regs *regs) \ { \ - return SyS##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__)); \ + return __se_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ } #define COND_SYSCALL(name) \ cond_syscall(sys_##name); \ - cond_syscall(__sys_ia32_##name) + cond_syscall(__ia32_sys_##name) #define SYS_NI(name) \ SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers); \ - SYSCALL_ALIAS(__sys_ia32_##name, sys_ni_posix_timers) + SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers) #else /* CONFIG_IA32_EMULATION */ #define COMPAT_SC_IA32_STUBx(x, name, ...) @@ -115,9 +115,10 @@ * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes * struct pt_regs *regs as the only argument of the syscall stub named * sys_*(). It decodes just the registers it needs and passes them on to - * the SyS_*() wrapper and then to the SYSC_*() function doing the actual job. - * These wrappers and functions are inlined, meaning that the assembly looks - * as follows (slightly re-ordered): + * the __se_sys_*() wrapper performing sign extension and then to the + * __do_sys_*() function doing the actual job. These wrappers and functions + * are inlined (at least in very most cases), meaning that the assembly looks + * as follows (slightly re-ordered for better readability): * * : <-- syscall with 4 parameters * callq <__fentry__> @@ -140,7 +141,7 @@ * the call chain. * * If IA32_EMULATION is enabled, this macro generates an additional wrapper - * named __sys_ia32_*() which decodes the struct pt_regs *regs according + * named __ia32_sys_*() which decodes the struct pt_regs *regs according * to the i386 calling convention (bx, cx, dx, si, di, bp). * * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for @@ -151,21 +152,21 @@ #define __SYSCALL_DEFINEx(x, name, ...) \ asmlinkage long sys##name(const struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ - static long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ asmlinkage long sys##name(const struct pt_regs *regs) \ { \ - return SyS##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ + return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ } \ SC_IA32_WRAPPERx(x, name, __VA_ARGS__) \ - static long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ - long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__)); \ + long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\ __MAP(x,__SC_TEST,__VA_ARGS__); \ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \ return ret; \ } \ - static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) + static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) /* * For VSYSCALLS, we need to declare these three syscalls with the new -- cgit v1.1 From 5ac9efa3c50d7caff9f3933bb8a3ad1139d92d92 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 9 Apr 2018 12:51:43 +0200 Subject: syscalls/core, syscalls/x86: Clean up compat syscall stub naming convention Tidy the naming convention for compat syscall subs. Hints which describe the purpose of the stub go in front and receive a double underscore to denote that they are generated on-the-fly by the COMPAT_SYSCALL_DEFINEx() macro. For the generic case, this means: t kernel_waitid # common C function (see kernel/exit.c) __do_compat_sys_waitid # inlined helper doing the actual work # (takes original parameters as declared) T __se_compat_sys_waitid # sign-extending C function calling inlined # helper (takes parameters of type long, # casts them to unsigned long and then to # the declared type) T compat_sys_waitid # alias to __se_compat_sys_waitid() # (taking parameters as declared), to # be included in syscall table For x86, the naming is as follows: t kernel_waitid # common C function (see kernel/exit.c) __do_compat_sys_waitid # inlined helper doing the actual work # (takes original parameters as declared) t __se_compat_sys_waitid # sign-extending C function calling inlined # helper (takes parameters of type long, # casts them to unsigned long and then to # the declared type) T __ia32_compat_sys_waitid # IA32_EMULATION 32-bit-ptregs -> C stub, # calls __se_compat_sys_waitid(); to be # included in syscall table T __x32_compat_sys_waitid # x32 64-bit-ptregs -> C stub, calls # __se_compat_sys_waitid(); to be included # in syscall table If only one of IA32_EMULATION and x32 is enabled, __se_compat_sys_waitid() may be inlined into the stub __{ia32,x32}_compat_sys_waitid(). Suggested-by: Ingo Molnar Signed-off-by: Dominik Brodowski Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180409105145.5364-3-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/syscall_wrapper.h | 44 +++++++++++++++++----------------- arch/x86/include/asm/syscalls.h | 2 +- 2 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 798a3c2..8d0951c 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -29,11 +29,11 @@ * case as well. */ #define COMPAT_SC_IA32_STUBx(x, name, ...) \ - asmlinkage long __compat_sys_ia32##name(const struct pt_regs *regs);\ - ALLOW_ERROR_INJECTION(__compat_sys_ia32##name, ERRNO); \ - asmlinkage long __compat_sys_ia32##name(const struct pt_regs *regs)\ + asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \ + asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\ { \ - return c_SyS##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ + return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ } \ #define SC_IA32_WRAPPERx(x, name, ...) \ @@ -65,11 +65,11 @@ * with x86_64 obviously do not need such care. */ #define COMPAT_SC_X32_STUBx(x, name, ...) \ - asmlinkage long __compat_sys_x32##name(const struct pt_regs *regs);\ - ALLOW_ERROR_INJECTION(__compat_sys_x32##name, ERRNO); \ - asmlinkage long __compat_sys_x32##name(const struct pt_regs *regs)\ + asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\ + ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \ + asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\ { \ - return c_SyS##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ + return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ } \ #else /* CONFIG_X86_X32 */ @@ -84,16 +84,16 @@ * of them. There is no need to implement COMPAT_SYSCALL_DEFINE0, as it is * unused on x86. */ -#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ - static long c_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ - static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - COMPAT_SC_IA32_STUBx(x, name, __VA_ARGS__) \ - COMPAT_SC_X32_STUBx(x, name, __VA_ARGS__) \ - static long c_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ - { \ - return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \ - } \ - static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)) +#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ + static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ + COMPAT_SC_IA32_STUBx(x, name, __VA_ARGS__) \ + COMPAT_SC_X32_STUBx(x, name, __VA_ARGS__) \ + static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + { \ + return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ + } \ + static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) /* * As some compat syscalls may not be implemented, we need to expand @@ -101,12 +101,12 @@ * kernel/time/posix-stubs.c to cover this case as well. */ #define COND_SYSCALL_COMPAT(name) \ - cond_syscall(__compat_sys_ia32_##name); \ - cond_syscall(__compat_sys_x32_##name) + cond_syscall(__ia32_compat_sys_##name); \ + cond_syscall(__x32_compat_sys_##name) #define COMPAT_SYS_NI(name) \ - SYSCALL_ALIAS(__compat_sys_ia32_##name, sys_ni_posix_timers); \ - SYSCALL_ALIAS(__compat_sys_x32_##name, sys_ni_posix_timers) + SYSCALL_ALIAS(__ia32_compat_sys_##name, sys_ni_posix_timers); \ + SYSCALL_ALIAS(__x32_compat_sys_##name, sys_ni_posix_timers) #endif /* CONFIG_COMPAT */ diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index d4d18d9..9fa979d 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -20,7 +20,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on); #ifdef CONFIG_X86_32 -/* +/* * These definitions are only valid on pure 32-bit systems; x86-64 uses a * different syscall calling convention */ -- cgit v1.1 From d5a00528b58cdb2c71206e18bd021e34c4eab878 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 9 Apr 2018 12:51:44 +0200 Subject: syscalls/core, syscalls/x86: Rename struct pt_regs-based sys_*() to __x64_sys_*() This rename allows us to have a coherent syscall stub naming convention on 64-bit x86 (0xffffffff prefix removed): 810f0af0 t kernel_waitid # common (32/64) kernel helper __do_sys_waitid # inlined helper doing actual work 810f0be0 t __se_sys_waitid # C func calling inlined helper __do_compat_sys_waitid # inlined helper doing actual work 810f0d80 t __se_compat_sys_waitid # compat C func calling inlined helper 810f2080 T __x64_sys_waitid # x64 64-bit-ptregs -> C stub 810f20b0 T __ia32_sys_waitid # ia32 32-bit-ptregs -> C stub[*] 810f2470 T __ia32_compat_sys_waitid # ia32 32-bit-ptregs -> compat C stub 810f2490 T __x32_compat_sys_waitid # x32 64-bit-ptregs -> compat C stub [*] This stub is unused, as the syscall table links __ia32_compat_sys_waitid instead of __ia32_sys_waitid as we need a compat variant here. Suggested-by: Ingo Molnar Signed-off-by: Dominik Brodowski Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180409105145.5364-4-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/syscall_wrapper.h | 63 +++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 17 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index 8d0951c..a598013 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -44,12 +44,23 @@ return __se_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ } +/* + * To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias + * named __ia32_sys_*() + */ +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(void); \ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \ + asmlinkage long __x64_sys_##sname(void) + #define COND_SYSCALL(name) \ - cond_syscall(sys_##name); \ + cond_syscall(__x64_sys_##name); \ cond_syscall(__ia32_sys_##name) #define SYS_NI(name) \ - SYSCALL_ALIAS(sys_##name, sys_ni_posix_timers); \ + SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \ SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers) #else /* CONFIG_IA32_EMULATION */ @@ -81,8 +92,7 @@ /* * Compat means IA32_EMULATION and/or X86_X32. As they use a different * mapping of registers to parameters, we need to generate stubs for each - * of them. There is no need to implement COMPAT_SYSCALL_DEFINE0, as it is - * unused on x86. + * of them. */ #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ @@ -114,13 +124,13 @@ /* * Instead of the generic __SYSCALL_DEFINEx() definition, this macro takes * struct pt_regs *regs as the only argument of the syscall stub named - * sys_*(). It decodes just the registers it needs and passes them on to + * __x64_sys_*(). It decodes just the registers it needs and passes them on to * the __se_sys_*() wrapper performing sign extension and then to the * __do_sys_*() function doing the actual job. These wrappers and functions * are inlined (at least in very most cases), meaning that the assembly looks * as follows (slightly re-ordered for better readability): * - * : <-- syscall with 4 parameters + * <__x64_sys_recv>: <-- syscall with 4 parameters * callq <__fentry__> * * mov 0x70(%rdi),%rdi <-- decode regs->di @@ -143,18 +153,13 @@ * If IA32_EMULATION is enabled, this macro generates an additional wrapper * named __ia32_sys_*() which decodes the struct pt_regs *regs according * to the i386 calling convention (bx, cx, dx, si, di, bp). - * - * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for - * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not - * hurt, there is no need to override it, or to define it differently for - * IA32_EMULATION. */ #define __SYSCALL_DEFINEx(x, name, ...) \ - asmlinkage long sys##name(const struct pt_regs *regs); \ - ALLOW_ERROR_INJECTION(sys##name, ERRNO); \ + asmlinkage long __x64_sys##name(const struct pt_regs *regs); \ + ALLOW_ERROR_INJECTION(__x64_sys##name, ERRNO); \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - asmlinkage long sys##name(const struct pt_regs *regs) \ + asmlinkage long __x64_sys##name(const struct pt_regs *regs) \ { \ return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ } \ @@ -169,12 +174,36 @@ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) /* + * As the generic SYSCALL_DEFINE0() macro does not decode any parameters for + * obvious reasons, and passing struct pt_regs *regs to it in %rdi does not + * hurt, we only need to re-define it here to keep the naming congruent to + * SYSCALL_DEFINEx() -- which is essential for the COND_SYSCALL() and SYS_NI() + * macros to work correctly. + */ +#ifndef SYSCALL_DEFINE0 +#define SYSCALL_DEFINE0(sname) \ + SYSCALL_METADATA(_##sname, 0); \ + asmlinkage long __x64_sys_##sname(void); \ + ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \ + asmlinkage long __x64_sys_##sname(void) +#endif + +#ifndef COND_SYSCALL +#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name) +#endif + +#ifndef SYS_NI +#define SYS_NI(name) SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); +#endif + + +/* * For VSYSCALLS, we need to declare these three syscalls with the new * pt_regs-based calling convention for in-kernel use. */ struct pt_regs; -asmlinkage long sys_getcpu(const struct pt_regs *regs); /* di,si,dx */ -asmlinkage long sys_gettimeofday(const struct pt_regs *regs); /* di,si */ -asmlinkage long sys_time(const struct pt_regs *regs); /* di */ +asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs); +asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs); +asmlinkage long __x64_sys_time(const struct pt_regs *regs); #endif /* _ASM_X86_SYSCALL_WRAPPER_H */ -- cgit v1.1 From c76fc98260751e71c884dc1a18a07e427ef033b5 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Mon, 9 Apr 2018 12:51:45 +0200 Subject: syscalls/x86: Adapt syscall_wrapper.h to the new syscall stub naming convention Make the code in syscall_wrapper.h more readable by naming the stub macros similar to the stub they provide. While at it, fix a stray newline at the end of the __IA32_COMPAT_SYS_STUBx macro. Signed-off-by: Dominik Brodowski Cc: Al Viro Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180409105145.5364-5-linux@dominikbrodowski.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/syscall_wrapper.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h index a598013..e046a40 100644 --- a/arch/x86/include/asm/syscall_wrapper.h +++ b/arch/x86/include/asm/syscall_wrapper.h @@ -28,7 +28,7 @@ * kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this * case as well. */ -#define COMPAT_SC_IA32_STUBx(x, name, ...) \ +#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\ ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \ asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\ @@ -36,7 +36,7 @@ return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\ } \ -#define SC_IA32_WRAPPERx(x, name, ...) \ +#define __IA32_SYS_STUBx(x, name, ...) \ asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \ ALLOW_ERROR_INJECTION(__ia32_sys##name, ERRNO); \ asmlinkage long __ia32_sys##name(const struct pt_regs *regs) \ @@ -64,8 +64,8 @@ SYSCALL_ALIAS(__ia32_sys_##name, sys_ni_posix_timers) #else /* CONFIG_IA32_EMULATION */ -#define COMPAT_SC_IA32_STUBx(x, name, ...) -#define SC_IA32_WRAPPERx(x, fullname, name, ...) +#define __IA32_COMPAT_SYS_STUBx(x, name, ...) +#define __IA32_SYS_STUBx(x, fullname, name, ...) #endif /* CONFIG_IA32_EMULATION */ @@ -75,7 +75,7 @@ * of the x86-64-style parameter ordering of x32 syscalls. The syscalls common * with x86_64 obviously do not need such care. */ -#define COMPAT_SC_X32_STUBx(x, name, ...) \ +#define __X32_COMPAT_SYS_STUBx(x, name, ...) \ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\ ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \ asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\ @@ -84,7 +84,7 @@ } \ #else /* CONFIG_X86_X32 */ -#define COMPAT_SC_X32_STUBx(x, name, ...) +#define __X32_COMPAT_SYS_STUBx(x, name, ...) #endif /* CONFIG_X86_X32 */ @@ -97,8 +97,8 @@ #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\ - COMPAT_SC_IA32_STUBx(x, name, __VA_ARGS__) \ - COMPAT_SC_X32_STUBx(x, name, __VA_ARGS__) \ + __IA32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__) \ + __X32_COMPAT_SYS_STUBx(x, name, __VA_ARGS__) \ static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ return __do_compat_sys##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__));\ @@ -163,7 +163,7 @@ { \ return __se_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\ } \ - SC_IA32_WRAPPERx(x, name, __VA_ARGS__) \ + __IA32_SYS_STUBx(x, name, __VA_ARGS__) \ static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ { \ long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\ -- cgit v1.1