From 1cd9c22fee3ac21db52a0997d08cf2f065d2c0c0 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 31 Jan 2018 16:54:02 +0300 Subject: x86/mm/encrypt: Move page table helpers into separate translation unit There are bunch of functions in mem_encrypt.c that operate on the identity mapping, which means they want virtual addresses to be equal to physical one, without PAGE_OFFSET shift. We also need to avoid paravirtualizaion call there. Getting this done is tricky. We cannot use usual page table helpers. It forces us to open-code a lot of things. It makes code ugly and hard to modify. We can get it work with the page table helpers, but it requires few preprocessor tricks. These tricks may have side effects for the rest of the file. Let's isolate such functions into own translation unit. Tested-by: Tom Lendacky Signed-off-by: Kirill A. Shutemov Reviewed-by: Tom Lendacky Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180131135404.40692-2-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mem_encrypt.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 22c5f3e..8fe61ad 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -22,6 +22,7 @@ #ifdef CONFIG_AMD_MEM_ENCRYPT extern u64 sme_me_mask; +extern bool sev_enabled; void sme_encrypt_execute(unsigned long encrypted_kernel_vaddr, unsigned long decrypted_kernel_vaddr, -- cgit v1.1 From b83ce5ee91471d19c403ff91227204fb37c95fb2 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:48 +0300 Subject: x86/mm/64: Make __PHYSICAL_MASK_SHIFT always 52 __PHYSICAL_MASK_SHIFT is used to define the mask that helps to extract physical address from a page table entry. Although real physical address space available may differ between machines, it's safe to use 52 as __PHYSICAL_MASK_SHIFT. Unused bits above log2(MAXPHYADDR) up to bit 51 are reserved and must be 0. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-2-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_64_types.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index e140731..f68e652 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -52,11 +52,12 @@ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ -#ifdef CONFIG_X86_5LEVEL + #define __PHYSICAL_MASK_SHIFT 52 + +#ifdef CONFIG_X86_5LEVEL #define __VIRTUAL_MASK_SHIFT 56 #else -#define __PHYSICAL_MASK_SHIFT 46 #define __VIRTUAL_MASK_SHIFT 47 #endif -- cgit v1.1 From 02390b87a9459937cdb299e6b34ff33992512ec7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:49 +0300 Subject: mm/zsmalloc: Prepare to variable MAX_PHYSMEM_BITS With boot-time switching between paging mode we will have variable MAX_PHYSMEM_BITS. Let's use the maximum variable possible for CONFIG_X86_5LEVEL=y configuration to define zsmalloc data structures. The patch introduces MAX_POSSIBLE_PHYSMEM_BITS to cover such case. It also suits well to handle PAE special case. Signed-off-by: Kirill A. Shutemov Reviewed-by: Nitin Gupta Acked-by: Minchan Kim Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sergey Senozhatsky Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-3-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable-3level_types.h | 1 + arch/x86/include/asm/pgtable_64_types.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 876b4c7..6a59a6d 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -44,5 +44,6 @@ typedef union { */ #define PTRS_PER_PTE 512 +#define MAX_POSSIBLE_PHYSMEM_BITS 36 #endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6b8f73d..7168de7 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -40,6 +40,8 @@ typedef struct { pteval_t pte; } pte_t; #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) #define P4D_MASK (~(P4D_SIZE - 1)) +#define MAX_POSSIBLE_PHYSMEM_BITS 52 + #else /* CONFIG_X86_5LEVEL */ /* -- cgit v1.1 From eedb92abb9bb03ef21442614a6f5867eaac6e77f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:50 +0300 Subject: x86/mm: Make virtual memory layout dynamic for CONFIG_X86_5LEVEL=y We need to be able to adjust virtual memory layout at runtime to be able to switch between 4- and 5-level paging at boot-time. KASLR already has movable __VMALLOC_BASE, __VMEMMAP_BASE and __PAGE_OFFSET. Let's re-use it. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kaslr.h | 4 ---- arch/x86/include/asm/page_64.h | 4 ++++ arch/x86/include/asm/page_64_types.h | 4 ++-- arch/x86/include/asm/pgtable_64_types.h | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index 460991e..db7ba2f 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -5,10 +5,6 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY -extern unsigned long page_offset_base; -extern unsigned long vmalloc_base; -extern unsigned long vmemmap_base; - void kernel_randomize_memory(void); #else static inline void kernel_randomize_memory(void) { } diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 4baa6bc..09637865 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -11,6 +11,10 @@ extern unsigned long max_pfn; extern unsigned long phys_base; +extern unsigned long page_offset_base; +extern unsigned long vmalloc_base; +extern unsigned long vmemmap_base; + static inline unsigned long __phys_addr_nodebug(unsigned long x) { unsigned long y = x - __START_KERNEL_map; diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index f68e652..d54a3d5 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -43,11 +43,11 @@ #define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) #endif -#ifdef CONFIG_RANDOMIZE_MEMORY +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #define __PAGE_OFFSET page_offset_base #else #define __PAGE_OFFSET __PAGE_OFFSET_BASE -#endif /* CONFIG_RANDOMIZE_MEMORY */ +#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 7168de7..a0db91a 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -100,13 +100,13 @@ typedef struct { pteval_t pte; } pte_t; # define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif -#ifdef CONFIG_RANDOMIZE_MEMORY +#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base # define VMEMMAP_START vmemmap_base #else # define VMALLOC_START __VMALLOC_BASE # define VMEMMAP_START __VMEMMAP_BASE -#endif /* CONFIG_RANDOMIZE_MEMORY */ +#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) -- cgit v1.1 From e626e6bb0dfaca41487241d49ce0ae827716101a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:51 +0300 Subject: x86/mm: Introduce 'pgtable_l5_enabled' The new flag would indicate what paging mode we are in. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-5-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_32_types.h | 2 ++ arch/x86/include/asm/pgtable_64_types.h | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 0777e18..e3225e8 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -15,6 +15,8 @@ # include #endif +#define pgtable_l5_enabled 0 + #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index a0db91a..5e2d724 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -20,6 +20,12 @@ typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; +#ifdef CONFIG_X86_5LEVEL +extern unsigned int pgtable_l5_enabled; +#else +#define pgtable_l5_enabled 0 +#endif + #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 -- cgit v1.1 From 5c7919bb1994f8dc7fed219a5db09e6bb9d473a5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:52 +0300 Subject: x86/mm: Make LDT_BASE_ADDR dynamic LDT_BASE_ADDR has different value in 4- and 5-level paging configurations. We need to make it dynamic in preparation for boot-time switching between paging modes. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-6-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 5e2d724..903e4d05 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -92,18 +92,19 @@ extern unsigned int pgtable_l5_enabled; */ #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define LDT_PGD_ENTRY_L4 -3UL +#define LDT_PGD_ENTRY_L5 -112UL +#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) +#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) + #ifdef CONFIG_X86_5LEVEL # define VMALLOC_SIZE_TB _AC(12800, UL) # define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) -# define LDT_PGD_ENTRY _AC(-112, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #else # define VMALLOC_SIZE_TB _AC(32, UL) # define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) -# define LDT_PGD_ENTRY _AC(-3, UL) -# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #endif #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT -- cgit v1.1 From c65e774fb3f6af212641538694b9778ff9ab4300 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:53 +0300 Subject: x86/mm: Make PGDIR_SHIFT and PTRS_PER_P4D variable For boot-time switching between 4- and 5-level paging we need to be able to fold p4d page table level at runtime. It requires variable PGDIR_SHIFT and PTRS_PER_P4D. The change doesn't affect the kernel image size much: text data bss dec hex filename 8628091 4734304 1368064 14730459 e0c4db vmlinux.before 8628393 4734340 1368064 14730797 e0c62d vmlinux.after Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-7-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_32.h | 2 ++ arch/x86/include/asm/pgtable_64_types.h | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index e67c062..d829360 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -33,6 +33,8 @@ static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); +static inline int pgd_large(pgd_t pgd) { return 0; } + /* * Define this if things work differently on an i386 and an i486: * it will (on an i486) warn about kernel memory accesses that are diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 903e4d05..0c48d80 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -26,6 +26,9 @@ extern unsigned int pgtable_l5_enabled; #define pgtable_l5_enabled 0 #endif +extern unsigned int pgdir_shift; +extern unsigned int ptrs_per_p4d; + #endif /* !__ASSEMBLY__ */ #define SHARED_KERNEL_PMD 0 @@ -35,16 +38,17 @@ extern unsigned int pgtable_l5_enabled; /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 48 +#define PGDIR_SHIFT pgdir_shift #define PTRS_PER_PGD 512 /* * 4th level page in 5-level paging case */ -#define P4D_SHIFT 39 -#define PTRS_PER_P4D 512 -#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) -#define P4D_MASK (~(P4D_SIZE - 1)) +#define P4D_SHIFT 39 +#define MAX_PTRS_PER_P4D 512 +#define PTRS_PER_P4D ptrs_per_p4d +#define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) +#define P4D_MASK (~(P4D_SIZE - 1)) #define MAX_POSSIBLE_PHYSMEM_BITS 52 @@ -53,8 +57,9 @@ extern unsigned int pgtable_l5_enabled; /* * PGDIR_SHIFT determines what a top-level page table entry can map */ -#define PGDIR_SHIFT 39 -#define PTRS_PER_PGD 512 +#define PGDIR_SHIFT 39 +#define PTRS_PER_PGD 512 +#define MAX_PTRS_PER_P4D 1 #endif /* CONFIG_X86_5LEVEL */ -- cgit v1.1 From 162434e7f58b21f0b6c9cc5fb02222cd7d9064cc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:54 +0300 Subject: x86/mm: Make MAX_PHYSADDR_BITS and MAX_PHYSMEM_BITS dynamic For boot-time switching between paging modes, we need to be able to adjust size of physical address space at runtime. As part of making physical address space size variable, we have to make X86_5LEVEL dependent on SPARSEMEM_VMEMMAP. !SPARSEMEM_VMEMMAP configuration doesn't build with variable MAX_PHYSMEM_BITS. For !SPARSEMEM_VMEMMAP SECTIONS_WIDTH depends on MAX_PHYSMEM_BITS: SECTIONS_WIDTH SECTIONS_SHIFT MAX_PHYSMEM_BITS And SECTIONS_WIDTH is used on pre-processor stage, it doesn't work if it's dyncamic. See include/linux/page-flags-layout.h. Effect on kernel image size: text data bss dec hex filename 8628393 4734340 1368064 14730797 e0c62d vmlinux.before 8628892 4734340 1368064 14731296 e0c820 vmlinux.after Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-8-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 2 +- arch/x86/include/asm/sparsemem.h | 9 ++------- 2 files changed, 3 insertions(+), 8 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 0c48d80..59d971c 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -95,7 +95,7 @@ extern unsigned int ptrs_per_p4d; * range must not overlap with anything except the KASAN shadow area, which * is correct as KASAN disables KASLR. */ -#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) +#define MAXMEM (1UL << MAX_PHYSMEM_BITS) #define LDT_PGD_ENTRY_L4 -3UL #define LDT_PGD_ENTRY_L5 -112UL diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 4fc1e9d..4617a2b 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -27,13 +27,8 @@ # endif #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ -# ifdef CONFIG_X86_5LEVEL -# define MAX_PHYSADDR_BITS 52 -# define MAX_PHYSMEM_BITS 52 -# else -# define MAX_PHYSADDR_BITS 44 -# define MAX_PHYSMEM_BITS 46 -# endif +# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44) +# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46) #endif #endif /* CONFIG_SPARSEMEM */ -- cgit v1.1 From 09e61a779e7f171c50325e6d7108a593afb2e5d4 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 14:16:55 +0300 Subject: x86/mm: Make __VIRTUAL_MASK_SHIFT dynamic For boot-time switching between paging modes, we need to be able to adjust virtual mask shifts. The change doesn't affect the kernel image size much: text data bss dec hex filename 8628892 4734340 1368064 14731296 e0c820 vmlinux.before 8628966 4734340 1368064 14731370 e0c86a vmlinux.after Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214111656.88514-9-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_64_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d54a3d5..fa7dc7c 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -56,7 +56,7 @@ #define __PHYSICAL_MASK_SHIFT 52 #ifdef CONFIG_X86_5LEVEL -#define __VIRTUAL_MASK_SHIFT 56 +#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47) #else #define __VIRTUAL_MASK_SHIFT 47 #endif -- cgit v1.1 From 4fa5662b6b49611f11856db8be346710217473ef Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:36 +0300 Subject: x86/mm: Initialize 'page_offset_base' at boot-time For 4- and 5-level paging we have different 'page_offset_base'. Let's initialize it at boot-time accordingly to machine capability. We also have to split __PAGE_OFFSET_BASE into two constants -- for 4- and 5-level paging. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/page_64_types.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index fa7dc7c..2c5a966 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -37,16 +37,13 @@ * hypervisor to fit. Choosing 16 slots here is arbitrary, but it's * what Xen requires. */ -#ifdef CONFIG_X86_5LEVEL -#define __PAGE_OFFSET_BASE _AC(0xff10000000000000, UL) -#else -#define __PAGE_OFFSET_BASE _AC(0xffff880000000000, UL) -#endif +#define __PAGE_OFFSET_BASE_L5 _AC(0xff10000000000000, UL) +#define __PAGE_OFFSET_BASE_L4 _AC(0xffff880000000000, UL) #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT #define __PAGE_OFFSET page_offset_base #else -#define __PAGE_OFFSET __PAGE_OFFSET_BASE +#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define __START_KERNEL_map _AC(0xffffffff80000000, UL) -- cgit v1.1 From a7412546d8cb5ad578805060b4006f2a021b5868 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:37 +0300 Subject: x86/mm: Adjust vmalloc base and size at boot-time vmalloc area has different placement and size depending on paging mode. Let's adjust it during early boot accodring to machine capability. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-5-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 59d971c..6863299 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -102,25 +102,29 @@ extern unsigned int ptrs_per_p4d; #define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) +#define __VMALLOC_BASE_L4 0xffffc90000000000 +#define __VMALLOC_BASE_L5 0xffa0000000000000 + +#define VMALLOC_SIZE_TB_L4 32UL +#define VMALLOC_SIZE_TB_L5 12800UL + #ifdef CONFIG_X86_5LEVEL -# define VMALLOC_SIZE_TB _AC(12800, UL) -# define __VMALLOC_BASE _AC(0xffa0000000000000, UL) # define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) #else -# define VMALLOC_SIZE_TB _AC(32, UL) -# define __VMALLOC_BASE _AC(0xffffc90000000000, UL) # define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) #endif #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base +# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4) # define VMEMMAP_START vmemmap_base #else -# define VMALLOC_START __VMALLOC_BASE +# define VMALLOC_START __VMALLOC_BASE_L4 +# define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 # define VMEMMAP_START __VMEMMAP_BASE #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ -#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL)) +#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) /* The module sections ends with the start of the fixmap */ -- cgit v1.1 From 9b46a051e43461a9afda2bdd50e0e0ae349341df Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:38 +0300 Subject: x86/mm: Initialize vmemmap_base at boot-time vmemmap area has different placement depending on paging mode. Let's adjust it during early boot accodring to machine capability. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-6-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 6863299..68909a6 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -108,11 +108,8 @@ extern unsigned int ptrs_per_p4d; #define VMALLOC_SIZE_TB_L4 32UL #define VMALLOC_SIZE_TB_L5 12800UL -#ifdef CONFIG_X86_5LEVEL -# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL) -#else -# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL) -#endif +#define __VMEMMAP_BASE_L4 0xffffea0000000000 +#define __VMEMMAP_BASE_L5 0xffd4000000000000 #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base @@ -121,7 +118,7 @@ extern unsigned int ptrs_per_p4d; #else # define VMALLOC_START __VMALLOC_BASE_L4 # define VMALLOC_SIZE_TB VMALLOC_SIZE_TB_L4 -# define VMEMMAP_START __VMEMMAP_BASE +# define VMEMMAP_START __VMEMMAP_BASE_L4 #endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */ #define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) -- cgit v1.1 From 98219dda2ab56ce2a967fdebf81e838d676d9ddc Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:40 +0300 Subject: x86/mm: Fold p4d page table layer at runtime Change page table helpers to fold p4d at runtime. The logic is the same as in . Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-8-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 10 ++++++---- arch/x86/include/asm/pgalloc.h | 5 ++++- arch/x86/include/asm/pgtable.h | 11 ++++++++++- 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 892df37..3fbaad2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -569,14 +569,16 @@ static inline p4dval_t p4d_val(p4d_t p4d) static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { - pgdval_t val = native_pgd_val(pgd); - - PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, val); + if (pgtable_l5_enabled) + PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd)); + else + set_p4d((p4d_t *)(pgdp), (p4d_t) { pgd.pgd }); } static inline void pgd_clear(pgd_t *pgdp) { - set_pgd(pgdp, __pgd(0)); + if (pgtable_l5_enabled) + set_pgd(pgdp, __pgd(0)); } #endif /* CONFIG_PGTABLE_LEVELS == 5 */ diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index aff42e1..263c142 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -167,6 +167,8 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, #if CONFIG_PGTABLE_LEVELS > 4 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { + if (!pgtable_l5_enabled) + return; paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); } @@ -191,7 +193,8 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long address) { - ___p4d_free_tlb(tlb, p4d); + if (pgtable_l5_enabled) + ___p4d_free_tlb(tlb, p4d); } #endif /* CONFIG_PGTABLE_LEVELS > 4 */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 63c2552..c8baa7f 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags; #ifndef __PAGETABLE_P4D_FOLDED #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) -#define pgd_clear(pgd) native_pgd_clear(pgd) +#define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0) #endif #ifndef set_p4d @@ -859,6 +859,8 @@ static inline unsigned long p4d_index(unsigned long address) #if CONFIG_PGTABLE_LEVELS > 4 static inline int pgd_present(pgd_t pgd) { + if (!pgtable_l5_enabled) + return 1; return pgd_flags(pgd) & _PAGE_PRESENT; } @@ -876,6 +878,8 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) /* to find an entry in a page-table-directory. */ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { + if (!pgtable_l5_enabled) + return (p4d_t *)pgd; return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); } @@ -883,6 +887,9 @@ static inline int pgd_bad(pgd_t pgd) { unsigned long ignore_flags = _PAGE_USER; + if (!pgtable_l5_enabled) + return 0; + if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) ignore_flags |= _PAGE_NX; @@ -891,6 +898,8 @@ static inline int pgd_bad(pgd_t pgd) static inline int pgd_none(pgd_t pgd) { + if (!pgtable_l5_enabled) + return 0; /* * There is no need to do a workaround for the KNL stray * A/D bit erratum here. PGDs only point to page tables -- cgit v1.1 From 91f606a8fa68264224cbc76888fa8649cdbe9990 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:41 +0300 Subject: x86/mm: Replace compile-time checks for 5-level paging with runtime-time checks This patch converts the of CONFIG_X86_5LEVEL check to runtime checks for p4d folding. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-9-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 81462e9..81dda8d 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -217,29 +217,26 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { -#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL) - p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd); -#else - *p4dp = p4d; -#endif + pgd_t pgd; + + if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + *p4dp = p4d; + return; + } + + pgd = native_make_pgd(p4d_val(p4d)); + pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd); + *p4dp = native_make_p4d(pgd_val(pgd)); } static inline void native_p4d_clear(p4d_t *p4d) { -#ifdef CONFIG_X86_5LEVEL native_set_p4d(p4d, native_make_p4d(0)); -#else - native_set_p4d(p4d, (p4d_t) { .pgd = native_make_pgd(0)}); -#endif } static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { -#ifdef CONFIG_PAGE_TABLE_ISOLATION *pgdp = pti_set_user_pgd(pgdp, pgd); -#else - *pgdp = pgd; -#endif } static inline void native_pgd_clear(pgd_t *pgd) -- cgit v1.1 From 6657fca06e3ffab8d0b3f9d8b397f5ee498952d7 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 14 Feb 2018 21:25:42 +0300 Subject: x86/mm: Allow to boot without LA57 if CONFIG_X86_5LEVEL=y All pieces of the puzzle are in place and we can now allow to boot with CONFIG_X86_5LEVEL=y on a machine without LA57 support. Kernel will detect that LA57 is missing and fold p4d at runtime. Update the documentation and the Kconfig option description to reflect the change. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180214182542.69302-10-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/required-features.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index fb3a6de..6847d85 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -53,12 +53,6 @@ # define NEED_MOVBE 0 #endif -#ifdef CONFIG_X86_5LEVEL -# define NEED_LA57 (1<<(X86_FEATURE_LA57 & 31)) -#else -# define NEED_LA57 0 -#endif - #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT /* Paravirtualized systems may not have PSE or PGE available */ @@ -104,7 +98,7 @@ #define REQUIRED_MASK13 0 #define REQUIRED_MASK14 0 #define REQUIRED_MASK15 0 -#define REQUIRED_MASK16 (NEED_LA57) +#define REQUIRED_MASK16 0 #define REQUIRED_MASK17 0 #define REQUIRED_MASK18 0 #define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) -- cgit v1.1 From 92e1c5b3f7bf5407cfdbf13613e7101831216dc5 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 16 Feb 2018 14:49:47 +0300 Subject: x86/mm: Redefine some of page table helpers as macros This is preparation for the next patch, which would change pgtable_l5_enabled to be cpu_feature_enabled(X86_FEATURE_LA57). The change makes few helpers in paravirt.h dependent on cpu_feature_enabled() definition from cpufeature.h. And cpufeature.h is dependent on paravirt.h. Let's re-define some of helpers as macros to break this dependency loop. Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180216114948.68868-3-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/paravirt.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 3fbaad2..2c0c8c9 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -567,19 +567,22 @@ static inline p4dval_t p4d_val(p4d_t p4d) return PVOP_CALLEE1(p4dval_t, pv_mmu_ops.p4d_val, p4d.p4d); } -static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) { - if (pgtable_l5_enabled) - PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd)); - else - set_p4d((p4d_t *)(pgdp), (p4d_t) { pgd.pgd }); + PVOP_VCALL2(pv_mmu_ops.set_pgd, pgdp, native_pgd_val(pgd)); } -static inline void pgd_clear(pgd_t *pgdp) -{ - if (pgtable_l5_enabled) - set_pgd(pgdp, __pgd(0)); -} +#define set_pgd(pgdp, pgdval) do { \ + if (pgtable_l5_enabled) \ + __set_pgd(pgdp, pgdval); \ + else \ + set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \ +} while (0) + +#define pgd_clear(pgdp) do { \ + if (pgtable_l5_enabled) \ + set_pgd(pgdp, __pgd(0)); \ +} while (0) #endif /* CONFIG_PGTABLE_LEVELS == 5 */ -- cgit v1.1 From 39b9552281abfcdfc54162897018890dafe7ffef Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 16 Feb 2018 14:49:48 +0300 Subject: x86/mm: Optimize boot-time paging mode switching cost By this point we have functioning boot-time switching between 4- and 5-level paging mode. But naive approach comes with cost. Numbers below are for kernel build, allmodconfig, 5 times. CONFIG_X86_5LEVEL=n: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17308719.892691 task-clock:u (msec) # 26.772 CPUs utilized ( +- 0.11% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,993,164 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,614,978,867,455 cycles:u # 2.520 GHz ( +- 0.01% ) 39,371,534,575,126 stalled-cycles-frontend:u # 90.27% frontend cycles idle ( +- 0.09% ) 28,363,350,152,428 instructions:u # 0.65 insn per cycle # 1.39 stalled cycles per insn ( +- 0.00% ) 6,316,784,066,413 branches:u # 364.948 M/sec ( +- 0.00% ) 250,808,144,781 branch-misses:u # 3.97% of all branches ( +- 0.01% ) 646.531974142 seconds time elapsed ( +- 1.15% ) CONFIG_X86_5LEVEL=y: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17411536.780625 task-clock:u (msec) # 26.426 CPUs utilized ( +- 0.10% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,868,663 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,865,909,056,301 cycles:u # 2.519 GHz ( +- 0.01% ) 39,740,130,365,581 stalled-cycles-frontend:u # 90.59% frontend cycles idle ( +- 0.05% ) 28,363,358,997,959 instructions:u # 0.65 insn per cycle # 1.40 stalled cycles per insn ( +- 0.00% ) 6,316,784,937,460 branches:u # 362.793 M/sec ( +- 0.00% ) 251,531,919,485 branch-misses:u # 3.98% of all branches ( +- 0.00% ) 658.886307752 seconds time elapsed ( +- 0.92% ) The patch tries to fix the performance regression by using cpu_feature_enabled(X86_FEATURE_LA57) instead of pgtable_l5_enabled in all hot code paths. These will statically patch the target code for additional performance. CONFIG_X86_5LEVEL=y + the patch: Performance counter stats for 'sh -c make -j100 -B -k >/dev/null' (5 runs): 17381990.268506 task-clock:u (msec) # 26.907 CPUs utilized ( +- 0.19% ) 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 331,862,625 page-faults:u # 0.019 M/sec ( +- 0.01% ) 43,697,726,320,051 cycles:u # 2.514 GHz ( +- 0.03% ) 39,480,408,690,401 stalled-cycles-frontend:u # 90.35% frontend cycles idle ( +- 0.05% ) 28,363,394,221,388 instructions:u # 0.65 insn per cycle # 1.39 stalled cycles per insn ( +- 0.00% ) 6,316,794,985,573 branches:u # 363.410 M/sec ( +- 0.00% ) 251,013,232,547 branch-misses:u # 3.97% of all branches ( +- 0.01% ) 645.991174661 seconds time elapsed ( +- 1.19% ) Unfortunately, this approach doesn't help with text size: vmlinux.before .text size: 8190319 vmlinux.after .text size: 8200623 The .text section is increased by about 4k. Not sure if we can do anything about this. Signed-off-by: Kirill A. Shuemov Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Borislav Petkov Cc: Dan Williams Cc: Dave Hansen Cc: David Woodhouse Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180216114948.68868-4-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64_types.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 68909a6..d5c21a3 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -21,7 +21,10 @@ typedef unsigned long pgprotval_t; typedef struct { pteval_t pte; } pte_t; #ifdef CONFIG_X86_5LEVEL -extern unsigned int pgtable_l5_enabled; +extern unsigned int __pgtable_l5_enabled; +#ifndef pgtable_l5_enabled +#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57) +#endif #else #define pgtable_l5_enabled 0 #endif -- cgit v1.1 From 038bac2b02989acf1fc938cedcb7944c02672b9f Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 19 Feb 2018 11:09:05 +0100 Subject: x86/acpi: Add a new x86_init_acpi structure to x86_init_ops Add a new struct x86_init_acpi to x86_init_ops. For now it contains only one init function to get the RSDP table address. Signed-off-by: Juergen Gross Reviewed-by: Andy Shevchenko Acked-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Cc: Borislav Petkov Cc: Eric Biederman Cc: H. Peter Anvin Cc: Kees Cook Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: boris.ostrovsky@oracle.com Cc: lenb@kernel.org Cc: linux-acpi@vger.kernel.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20180219100906.14265-3-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/acpi.h | 7 +++++++ arch/x86/include/asm/x86_init.h | 9 +++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 1188172..6609dd7 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI_APEI # include @@ -133,6 +134,12 @@ static inline bool acpi_has_cpu_in_madt(void) return !!acpi_lapic; } +#define ACPI_HAVE_ARCH_GET_ROOT_POINTER +static inline u64 acpi_arch_get_root_pointer(void) +{ + return x86_init.acpi.get_root_pointer(); +} + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index fc2f082..2e2c34d 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -131,6 +131,14 @@ struct x86_hyper_init { }; /** + * struct x86_init_acpi - x86 ACPI init functions + * @get_root_pointer: get RSDP address + */ +struct x86_init_acpi { + u64 (*get_root_pointer)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -144,6 +152,7 @@ struct x86_init_ops { struct x86_init_iommu iommu; struct x86_init_pci pci; struct x86_hyper_init hyper; + struct x86_init_acpi acpi; }; /** -- cgit v1.1 From a5b162b2ecb013ed517ab5ce90079117ada743f4 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 11:16:41 +0300 Subject: x86/mm: Do not use paravirtualized calls in native_set_p4d() In 4-level paging mode, native_set_p4d() updates the entry in the top-level page table. With PTI, update to the top-level kernel page table requires update to the userspace copy of the table as well, using pti_set_user_pgd(). native_set_p4d() uses p4d_val() and pgd_val() to convert types between p4d_t and pgd_t. p4d_val() and pgd_val() are paravirtualized and we must not use them in native helpers, as they crash the boot in paravirtualized environments. Replace p4d_val() and pgd_val() with native_p4d_val() and native_pgd_val() in native_set_p4d(). Reported-by: Fengguang Wu Signed-off-by: Kirill A. Shutemov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 91f606a8fa68 ("x86/mm: Replace compile-time checks for 5-level paging with runtime-time checks") Link: http://lkml.kernel.org/r/20180305081641.4290-1-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 81dda8d..163e01a 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -224,9 +224,9 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) return; } - pgd = native_make_pgd(p4d_val(p4d)); + pgd = native_make_pgd(native_p4d_val(p4d)); pgd = pti_set_user_pgd((pgd_t *)p4dp, pgd); - *p4dp = native_make_p4d(pgd_val(pgd)); + *p4dp = native_make_p4d(native_pgd_val(pgd)); } static inline void native_p4d_clear(p4d_t *p4d) -- cgit v1.1 From be7825c19b4866ddc7b1431740b69ede2eeb93c1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 19:25:52 +0300 Subject: x86/pconfig: Detect PCONFIG targets Intel PCONFIG targets are enumerated via new CPUID leaf 0x1b. This patch detects all supported targets of PCONFIG and implements helper to check if the target is supported. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Kai Huang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180305162610.37510-5-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel_pconfig.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 arch/x86/include/asm/intel_pconfig.h (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h new file mode 100644 index 0000000..fb7a37c --- /dev/null +++ b/arch/x86/include/asm/intel_pconfig.h @@ -0,0 +1,15 @@ +#ifndef _ASM_X86_INTEL_PCONFIG_H +#define _ASM_X86_INTEL_PCONFIG_H + +#include +#include + +enum pconfig_target { + INVALID_TARGET = 0, + MKTME_TARGET = 1, + PCONFIG_TARGET_NR +}; + +int pconfig_target_supported(enum pconfig_target target); + +#endif /* _ASM_X86_INTEL_PCONFIG_H */ -- cgit v1.1 From 24c517856af6511be1339dd55edd131160e37aac Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 5 Mar 2018 19:25:53 +0300 Subject: x86/pconfig: Provide defines and helper to run MKTME_KEY_PROG leaf MKTME_KEY_PROG allows to manipulate MKTME keys in the CPU. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Kai Huang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tom Lendacky Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20180305162610.37510-6-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel_pconfig.h | 50 ++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h index fb7a37c..3cb002b 100644 --- a/arch/x86/include/asm/intel_pconfig.h +++ b/arch/x86/include/asm/intel_pconfig.h @@ -12,4 +12,54 @@ enum pconfig_target { int pconfig_target_supported(enum pconfig_target target); +enum pconfig_leaf { + MKTME_KEY_PROGRAM = 0, + PCONFIG_LEAF_INVALID, +}; + +#define PCONFIG ".byte 0x0f, 0x01, 0xc5" + +/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */ + +/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */ +#define MKTME_KEYID_SET_KEY_DIRECT 0 +#define MKTME_KEYID_SET_KEY_RANDOM 1 +#define MKTME_KEYID_CLEAR_KEY 2 +#define MKTME_KEYID_NO_ENCRYPT 3 + +/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */ +#define MKTME_AES_XTS_128 (1 << 8) + +/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */ +#define MKTME_PROG_SUCCESS 0 +#define MKTME_INVALID_PROG_CMD 1 +#define MKTME_ENTROPY_ERROR 2 +#define MKTME_INVALID_KEYID 3 +#define MKTME_INVALID_ENC_ALG 4 +#define MKTME_DEVICE_BUSY 5 + +/* Hardware requires the structure to be 256 byte alinged. Otherwise #GP(0). */ +struct mktme_key_program { + u16 keyid; + u32 keyid_ctrl; + u8 __rsvd[58]; + u8 key_field_1[64]; + u8 key_field_2[64]; +} __packed __aligned(256); + +static inline int mktme_key_program(struct mktme_key_program *key_program) +{ + unsigned long rax = MKTME_KEY_PROGRAM; + + if (!pconfig_target_supported(MKTME_TARGET)) + return -ENXIO; + + asm volatile(PCONFIG + : "=a" (rax), "=b" (key_program) + : "0" (rax), "1" (key_program) + : "memory", "cc"); + + return rax; +} + #endif /* _ASM_X86_INTEL_PCONFIG_H */ -- cgit v1.1 From 50beba07a0e42ebd4454adc97515a2a2a969645b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 20 Feb 2018 20:05:04 +0200 Subject: ACPI, x86/boot: Split out acpi_generic_reduce_hw_init() and export This is a preparation patch to allow override the hardware reduced initialization on ACPI enabled platforms. No functional change intended. Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Cc: Eric Biederman Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: linux-acpi@vger.kernel.org Link: http://lkml.kernel.org/r/20180220180506.65523-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/acpi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 6609dd7..a303d7b 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -140,6 +140,8 @@ static inline u64 acpi_arch_get_root_pointer(void) return x86_init.acpi.get_root_pointer(); } +void acpi_generic_reduced_hw_init(void); + #else /* !CONFIG_ACPI */ #define acpi_lapic 0 @@ -149,6 +151,8 @@ static inline void acpi_noirq_set(void) { } static inline void acpi_disable_pci(void) { } static inline void disable_acpi(void) { } +static inline void acpi_generic_reduced_hw_init(void) { } + #endif /* !CONFIG_ACPI */ #define ARCH_HAS_POWER_INIT 1 -- cgit v1.1 From 81b53e5ff21e09b42525cfa08f2b0af2b8c5f465 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 20 Feb 2018 20:05:05 +0200 Subject: ACPI, x86/boot: Introduce the ->reduced_hw_early_init() ACPI callback Some ACPI hardware reduced platforms need to initialize certain devices defined by the ACPI hardware specification even though in principle those devices should not be present in an ACPI hardware reduced platform. To allow that to happen, make it possible to override the generic x86_init callbacks and provide a custom legacy_pic value, add a new ->reduced_hw_early_init() callback to struct x86_init_acpi and make acpi_reduced_hw_init() use it. Signed-off-by: Andy Shevchenko Reviewed-by: Rafael J. Wysocki Cc: Eric Biederman Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J . Wysocki Cc: Thomas Gleixner Cc: linux-acpi@vger.kernel.org Link: http://lkml.kernel.org/r/20180220180506.65523-2-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/x86_init.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/include') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 2e2c34d..5bd45a8 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -133,9 +133,11 @@ struct x86_hyper_init { /** * struct x86_init_acpi - x86 ACPI init functions * @get_root_pointer: get RSDP address + * @reduced_hw_early_init: hardware reduced platform early init */ struct x86_init_acpi { u64 (*get_root_pointer)(void); + void (*reduced_hw_early_init)(void); }; /** -- cgit v1.1