From 99c13b8c8896d7bcb92753bf0c63a8de4326e78d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 4 Jul 2017 19:04:23 -0400 Subject: x86/mm/pat: Don't report PAT on CPUs that don't support it The pat_enabled() logic is broken on CPUs which do not support PAT and where the initialization code fails to call pat_init(). Due to that the enabled flag stays true and pat_enabled() returns true wrongfully. As a consequence the mappings, e.g. for Xorg, are set up with the wrong caching mode and the required MTRR setups are omitted. To cure this the following changes are required: 1) Make pat_enabled() return true only if PAT initialization was invoked and successful. 2) Invoke init_cache_modes() unconditionally in setup_arch() and remove the extra callsites in pat_disable() and the pat disabled code path in pat_init(). Also rename __pat_enabled to pat_disabled to reflect the real purpose of this variable. Fixes: 9cd25aac1f44 ("x86/mm/pat: Emulate PAT when it is disabled") Signed-off-by: Mikulas Patocka Signed-off-by: Thomas Gleixner Cc: Bernhard Held Cc: Denys Vlasenko Cc: Peter Zijlstra Cc: Brian Gerst Cc: "Luis R. Rodriguez" Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Andrew Morton Cc: Linus Torvalds Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1707041749300.3456@file01.intranet.prod.int.rdu2.redhat.com --- arch/x86/kernel/setup.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 65622f0..3486d04 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1076,6 +1076,13 @@ void __init setup_arch(char **cmdline_p) max_possible_pfn = max_pfn; /* + * This call is required when the CPU does not support PAT. If + * mtrr_bp_init() invoked it already via pat_init() the call has no + * effect. + */ + init_cache_modes(); + + /* * Define random base addresses for memory sections after max_pfn is * defined and before each memory section base is used. */ -- cgit v1.1 From b7a67e02cd2b0d632114dcfb4bfb9b1d85dee325 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Mon, 3 Jul 2017 01:06:28 +0800 Subject: x86/boot/e820: Avoid overwriting e820_table_firmware The following commit in 2013: 77ea8c948953 ("x86: Reserve setup_data ranges late after parsing memmap cmdline") has fixed the issue of losing setup_data information by deferring the e820_reserve_setup_data() call until the early params have been parsed. But this also introduced a new problem that, during early params parsing, the kexec kernel might fake a mptable and saves it into the e820_table_firmware[] table (without saving the mptable to the e820_table[]), however the subsequent invoking of e820_reserve_setup_data() will overwrite the e820_table_firmware[] according to the e820_table[], thus the fake mptable information is lost. Fix this issue by updating the e820_table_firmware[] according to the setup_data information, but without overwriting it. Signed-off-by: Chen Yu Cc: Dave Young Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Xunlei Pang Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d78a586..c87223e 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -669,6 +669,8 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) __append_e820_table(extmap, entries); e820__update_table(e820_table); + memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); + early_memunmap(sdata, data_len); pr_info("e820: extended physical RAM map:\n"); e820__print_table("extended"); @@ -923,13 +925,13 @@ void __init e820__reserve_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + e820__range_update_firmware(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); pa_data = data->next; early_memunmap(data, sizeof(*data)); } e820__update_table(e820_table); - - memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); + e820__update_table(e820_table_firmware); pr_info("extended physical RAM map:\n"); e820__print_table("reserve setup_data"); -- cgit v1.1 From a09bae0f8aa08d4d76d0ebece26062a49ec51ac9 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Mon, 3 Jul 2017 01:07:12 +0800 Subject: x86/boot/e820: Rename the e820_table_firmware to e820_table_kexec Currently the e820_table_firmware[] table is mainly used by the kexec, and it is not what it's supposed to be - despite its name it might be modified by the kernel. So change its name to e820_table_kexec[]. In the next patch we will introduce the real e820_table_firmware[] table. No functional change. Signed-off-by: Chen Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Xunlei Pang Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 42 +++++++++++++++++++-------------------- arch/x86/kernel/kexec-bzimage64.c | 4 ++-- 2 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index c87223e..5910190 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -22,7 +22,7 @@ /* * We organize the E820 table into two main data structures: * - * - 'e820_table_firmware': the original firmware version passed to us by the + * - 'e820_table_kexec': the original firmware version passed to us by the * bootloader - not modified by the kernel. We use this to: * * - inform the user about the firmware's notion of memory layout @@ -46,10 +46,10 @@ * specific memory layout data during early bootup. */ static struct e820_table e820_table_init __initdata; -static struct e820_table e820_table_firmware_init __initdata; +static struct e820_table e820_table_kexec_init __initdata; struct e820_table *e820_table __refdata = &e820_table_init; -struct e820_table *e820_table_firmware __refdata = &e820_table_firmware_init; +struct e820_table *e820_table_kexec __refdata = &e820_table_kexec_init; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0xaeedbabe; @@ -470,9 +470,9 @@ u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum return __e820__range_update(e820_table, start, size, old_type, new_type); } -static u64 __init e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) +static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) { - return __e820__range_update(e820_table_firmware, start, size, old_type, new_type); + return __e820__range_update(e820_table_kexec, start, size, old_type, new_type); } /* Remove a range of memory from the E820 table: */ @@ -546,9 +546,9 @@ void __init e820__update_table_print(void) e820__print_table("modified"); } -static void __init e820__update_table_firmware(void) +static void __init e820__update_table_kexec(void) { - e820__update_table(e820_table_firmware); + e820__update_table(e820_table_kexec); } #define MAX_GAP_END 0x100000000ull @@ -623,7 +623,7 @@ __init void e820__setup_pci_gap(void) /* * Called late during init, in free_initmem(). * - * Initial e820_table and e820_table_firmware are largish __initdata arrays. + * Initial e820_table and e820_table_kexec are largish __initdata arrays. * * Copy them to a (usually much smaller) dynamically allocated area that is * sized precisely after the number of e820 entries. @@ -643,11 +643,11 @@ __init void e820__reallocate_tables(void) memcpy(n, e820_table, size); e820_table = n; - size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries; + size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_kexec->nr_entries; n = kmalloc(size, GFP_KERNEL); BUG_ON(!n); - memcpy(n, e820_table_firmware, size); - e820_table_firmware = n; + memcpy(n, e820_table_kexec, size); + e820_table_kexec = n; } /* @@ -669,7 +669,7 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) __append_e820_table(extmap, entries); e820__update_table(e820_table); - memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); + memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); early_memunmap(sdata, data_len); pr_info("e820: extended physical RAM map:\n"); @@ -729,7 +729,7 @@ core_initcall(e820__register_nvs_regions); /* * Allocate the requested number of bytes with the requsted alignment * and return (the physical address) to the caller. Also register this - * range in the 'firmware' E820 table as a reserved range. + * range in the 'kexec' E820 table as a reserved range. * * This allows kexec to fake a new mptable, as if it came from the real * system. @@ -740,9 +740,9 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); if (addr) { - e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); - pr_info("e820: update e820_table_firmware for e820__memblock_alloc_reserved()\n"); - e820__update_table_firmware(); + e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); + pr_info("e820: update e820_table_kexec for e820__memblock_alloc_reserved()\n"); + e820__update_table_kexec(); } return addr; @@ -925,13 +925,13 @@ void __init e820__reserve_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - e820__range_update_firmware(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); pa_data = data->next; early_memunmap(data, sizeof(*data)); } e820__update_table(e820_table); - e820__update_table(e820_table_firmware); + e820__update_table(e820_table_kexec); pr_info("extended physical RAM map:\n"); e820__print_table("reserve setup_data"); @@ -1064,8 +1064,8 @@ void __init e820__reserve_resources(void) res++; } - for (i = 0; i < e820_table_firmware->nr_entries; i++) { - struct e820_entry *entry = e820_table_firmware->entries + i; + for (i = 0; i < e820_table_kexec->nr_entries; i++) { + struct e820_entry *entry = e820_table_kexec->entries + i; firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry)); } @@ -1177,7 +1177,7 @@ void __init e820__memory_setup(void) who = x86_init.resources.memory_setup(); - memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); + memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); pr_info("e820: BIOS-provided physical RAM map:\n"); e820__print_table(who); diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c index 9d7fd5e..fb095ba 100644 --- a/arch/x86/kernel/kexec-bzimage64.c +++ b/arch/x86/kernel/kexec-bzimage64.c @@ -100,14 +100,14 @@ static int setup_e820_entries(struct boot_params *params) { unsigned int nr_e820_entries; - nr_e820_entries = e820_table_firmware->nr_entries; + nr_e820_entries = e820_table_kexec->nr_entries; /* TODO: Pass entries more than E820_MAX_ENTRIES_ZEROPAGE in bootparams setup data */ if (nr_e820_entries > E820_MAX_ENTRIES_ZEROPAGE) nr_e820_entries = E820_MAX_ENTRIES_ZEROPAGE; params->e820_entries = nr_e820_entries; - memcpy(¶ms->e820_table, &e820_table_firmware->entries, nr_e820_entries*sizeof(struct e820_entry)); + memcpy(¶ms->e820_table, &e820_table_kexec->entries, nr_e820_entries*sizeof(struct e820_entry)); return 0; } -- cgit v1.1 From 12df216c61c89e31e27e74146115a9728880ca6f Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Mon, 3 Jul 2017 01:07:32 +0800 Subject: x86/boot/e820: Introduce the bootloader provided e820_table_firmware[] table Add the real e820_tabel_firmware[] that will not be modified by the kernel or the EFI boot stub under any circumstance. In addition to that modify the code so that e820_table_firmwarep[] is exposed via sysfs to represent the real firmware memory layout, rather than exposing the e820_table_kexec[] table. This fixes a hibernation bug/warning, which uses e820_table_kexec[] to check RAM layout consistency across hibernation/resume: The suspend kernel: [ 0.000000] e820: update [mem 0x76671018-0x76679457] usable ==> usable The resume kernel: [ 0.000000] e820: update [mem 0x7666f018-0x76677457] usable ==> usable ... [ 15.752088] PM: Using 3 thread(s) for decompression. [ 15.752088] PM: Loading and decompressing image data (471870 pages)... [ 15.764971] Hibernate inconsistent memory map detected! [ 15.770833] PM: Image mismatch: architecture specific data Actually it is safe to restore these pages because E820_TYPE_RAM and E820_TYPE_RESERVED_KERN are treated the same during hibernation, so the original e820 table provided by the bootloader is used for hibernation MD5 fingerprint checking. The side effect is that, this newly introduced variable might increase the kernel size at compile time. Suggested-by: Ingo Molnar Signed-off-by: Chen Yu Cc: Dave Young Cc: Len Brown Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: Xunlei Pang Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 5910190..532da61 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -20,10 +20,12 @@ #include /* - * We organize the E820 table into two main data structures: + * We organize the E820 table into three main data structures: * - * - 'e820_table_kexec': the original firmware version passed to us by the - * bootloader - not modified by the kernel. We use this to: + * - 'e820_table_firmware': the original firmware version passed to us by the + * bootloader - not modified by the kernel. It is composed of two parts: + * the first 128 E820 memory entries in boot_params.e820_table and the remaining + * (if any) entries of the SETUP_E820_EXT nodes. We use this to: * * - inform the user about the firmware's notion of memory layout * via /sys/firmware/memmap @@ -31,6 +33,14 @@ * - the hibernation code uses it to generate a kernel-independent MD5 * fingerprint of the physical memory layout of a system. * + * - 'e820_table_kexec': a slightly modified (by the kernel) firmware version + * passed to us by the bootloader - the major difference between + * e820_table_firmware[] and this one is that, the latter marks the setup_data + * list created by the EFI boot stub as reserved, so that kexec can reuse the + * setup_data information in the second kernel. Besides, e820_table_kexec[] + * might also be modified by the kexec itself to fake a mptable. + * We use this to: + * * - kexec, which is a bootloader in disguise, uses the original E820 * layout to pass to the kexec-ed kernel. This way the original kernel * can have a restricted E820 map while the kexec()-ed kexec-kernel @@ -47,9 +57,11 @@ */ static struct e820_table e820_table_init __initdata; static struct e820_table e820_table_kexec_init __initdata; +static struct e820_table e820_table_firmware_init __initdata; struct e820_table *e820_table __refdata = &e820_table_init; struct e820_table *e820_table_kexec __refdata = &e820_table_kexec_init; +struct e820_table *e820_table_firmware __refdata = &e820_table_firmware_init; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0xaeedbabe; @@ -648,6 +660,12 @@ __init void e820__reallocate_tables(void) BUG_ON(!n); memcpy(n, e820_table_kexec, size); e820_table_kexec = n; + + size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries; + n = kmalloc(size, GFP_KERNEL); + BUG_ON(!n); + memcpy(n, e820_table_firmware, size); + e820_table_firmware = n; } /* @@ -670,6 +688,7 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) e820__update_table(e820_table); memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); + memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); early_memunmap(sdata, data_len); pr_info("e820: extended physical RAM map:\n"); @@ -1064,8 +1083,9 @@ void __init e820__reserve_resources(void) res++; } - for (i = 0; i < e820_table_kexec->nr_entries; i++) { - struct e820_entry *entry = e820_table_kexec->entries + i; + /* Expose the bootloader-provided memory layout to the sysfs. */ + for (i = 0; i < e820_table_firmware->nr_entries; i++) { + struct e820_entry *entry = e820_table_firmware->entries + i; firmware_map_add_early(entry->addr, entry->addr + entry->size, e820_type_to_string(entry)); } @@ -1178,6 +1198,7 @@ void __init e820__memory_setup(void) who = x86_init.resources.memory_setup(); memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); + memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); pr_info("e820: BIOS-provided physical RAM map:\n"); e820__print_table(who); -- cgit v1.1