summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorSteve French <sfrench@us.ibm.com>2008-02-15 21:06:08 +0000
committerSteve French <sfrench@us.ibm.com>2008-02-15 21:06:08 +0000
commit0a3abcf75bf391fec4e32356ab5ddb8f5d2e6b41 (patch)
treeb80b1d344ec24cad28b057ef803cebac9434be01 /arch/x86
parent70eff55d2d979cca700aa6906494f0c474f3f7ff (diff)
parent101142c37be8e5af9b847860219217e6b958c739 (diff)
downloadop-kernel-dev-0a3abcf75bf391fec4e32356ab5ddb8f5d2e6b41.zip
op-kernel-dev-0a3abcf75bf391fec4e32356ab5ddb8f5d2e6b41.tar.gz
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig23
-rw-r--r--arch/x86/Kconfig.debug10
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/boot/.gitignore2
-rw-r--r--arch/x86/boot/printf.c24
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/ia32/ia32_aout.c3
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/acpi/boot.c47
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/acpi/processor.c6
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/Kconfig4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c61
-rw-r--r--arch/x86/kernel/cpu/cpufreq/gx-suspmod.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c10
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.h2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-lib.c5
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c19
-rw-r--r--arch/x86/kernel/efi.c14
-rw-r--r--arch/x86/kernel/efi_64.c32
-rw-r--r--arch/x86/kernel/entry_32.S15
-rw-r--r--arch/x86/kernel/entry_64.S40
-rw-r--r--arch/x86/kernel/geode_32.c5
-rw-r--r--arch/x86/kernel/head_32.S151
-rw-r--r--arch/x86/kernel/head_64.S15
-rw-r--r--arch/x86/kernel/i8253.c2
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c5
-rw-r--r--arch/x86/kernel/mfgpt_32.c123
-rw-r--r--arch/x86/kernel/mpparse_32.c6
-rw-r--r--arch/x86/kernel/pci-gart_64.c9
-rw-r--r--arch/x86/kernel/process_32.c50
-rw-r--r--arch/x86/kernel/process_64.c1
-rw-r--r--arch/x86/kernel/ptrace.c25
-rw-r--r--arch/x86/kernel/quirks.c35
-rw-r--r--arch/x86/kernel/reboot.c46
-rw-r--r--arch/x86/kernel/setup_32.c22
-rw-r--r--arch/x86/kernel/setup_64.c38
-rw-r--r--arch/x86/kernel/smpboot_32.c2
-rw-r--r--arch/x86/kernel/srat_32.c2
-rw-r--r--arch/x86/kernel/test_nx.c2
-rw-r--r--arch/x86/kernel/test_rodata.c2
-rw-r--r--arch/x86/kernel/time_64.c2
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/kernel/traps_32.c15
-rw-r--r--arch/x86/kernel/traps_64.c4
-rw-r--r--arch/x86/lib/delay_32.c4
-rw-r--r--arch/x86/lib/delay_64.c4
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c2
-rw-r--r--arch/x86/mm/discontig_32.c3
-rw-r--r--arch/x86/mm/fault.c30
-rw-r--r--arch/x86/mm/init_32.c75
-rw-r--r--arch/x86/mm/init_64.c22
-rw-r--r--arch/x86/mm/ioremap.c57
-rw-r--r--arch/x86/mm/numa_64.c5
-rw-r--r--arch/x86/mm/pageattr-test.c72
-rw-r--r--arch/x86/mm/pageattr.c175
-rw-r--r--arch/x86/mm/pgtable_32.c5
-rw-r--r--arch/x86/mm/srat_64.c3
-rw-r--r--arch/x86/pci/common.c25
-rw-r--r--arch/x86/pci/direct.c4
-rw-r--r--arch/x86/pci/fixup.c6
-rw-r--r--arch/x86/pci/legacy.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c41
-rw-r--r--arch/x86/pci/mmconfig_32.c20
-rw-r--r--arch/x86/pci/mmconfig_64.c18
-rw-r--r--arch/x86/pci/pci.h22
-rw-r--r--arch/x86/pci/visws.c3
-rw-r--r--arch/x86/power/Makefile4
-rw-r--r--arch/x86/power/cpu_32.c (renamed from arch/x86/power/cpu.c)2
-rw-r--r--arch/x86/power/cpu_64.c (renamed from arch/x86/kernel/suspend_64.c)160
-rw-r--r--arch/x86/power/hibernate_32.c (renamed from arch/x86/power/suspend.c)6
-rw-r--r--arch/x86/power/hibernate_64.c169
-rw-r--r--arch/x86/power/hibernate_asm_32.S (renamed from arch/x86/power/swsusp.S)3
-rw-r--r--arch/x86/power/hibernate_asm_64.S (renamed from arch/x86/kernel/suspend_asm_64.S)9
-rw-r--r--arch/x86/vdso/Makefile22
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu.c6
-rw-r--r--arch/x86/xen/time.c10
84 files changed, 1078 insertions, 820 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4348211..3be2305 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -18,8 +18,11 @@ config X86_64
### Arch settings
config X86
def_bool y
+ select HAVE_IDE
select HAVE_OPROFILE
select HAVE_KPROBES
+ select HAVE_KVM
+
config GENERIC_LOCKBREAK
def_bool n
@@ -52,6 +55,10 @@ config HAVE_LATENCYTOP_SUPPORT
config SEMAPHORE_SLEEPERS
def_bool y
+config FAST_CMPXCHG_LOCAL
+ bool
+ default y
+
config MMU
def_bool y
@@ -98,6 +105,9 @@ config ARCH_HAS_ILOG2_U32
config ARCH_HAS_ILOG2_U64
def_bool n
+config ARCH_HAS_CPU_IDLE_WAIT
+ def_bool y
+
config GENERIC_CALIBRATE_DELAY
def_bool y
@@ -105,11 +115,12 @@ config GENERIC_TIME_VSYSCALL
bool
default X86_64
+config ARCH_HAS_CPU_RELAX
+ def_bool y
+
config HAVE_SETUP_PER_CPU_AREA
def_bool X86_64
-select HAVE_KVM
-
config ARCH_HIBERNATION_POSSIBLE
def_bool y
depends on !SMP || !X86_VOYAGER
@@ -129,6 +140,9 @@ config AUDIT_ARCH
bool
default X86_64
+config ARCH_SUPPORTS_AOUT
+ def_bool y
+
# Use the generic interrupt handling code in kernel/irq/:
config GENERIC_HARDIRQS
bool
@@ -415,7 +429,7 @@ config HPET_TIMER
config HPET_EMULATE_RTC
def_bool y
- depends on HPET_TIMER && (RTC=y || RTC=m)
+ depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
# Mark as embedded because too many people got it wrong.
# The code disables itself when not needed.
@@ -631,7 +645,6 @@ config TOSHIBA
config I8K
tristate "Dell laptop support"
- depends on X86_32
---help---
This adds a driver to safely access the System Management Mode
of the CPU on the Dell Inspiron 8000. The System Management Mode
@@ -1571,7 +1584,7 @@ config IA32_EMULATION
config IA32_AOUT
tristate "IA32 a.out support"
- depends on IA32_EMULATION
+ depends on IA32_EMULATION && ARCH_SUPPORTS_AOUT
help
Support old a.out binaries in the 32bit emulation.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 2e1e3af..864affc 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -34,13 +34,9 @@ config DEBUG_STACK_USAGE
This option will slow down process creation somewhat.
-comment "Page alloc debug is incompatible with Software Suspend on i386"
- depends on DEBUG_KERNEL && HIBERNATION
- depends on X86_32
-
config DEBUG_PAGEALLOC
bool "Debug page memory allocations"
- depends on DEBUG_KERNEL && X86_32
+ depends on DEBUG_KERNEL
help
Unmap pages from the kernel linear mapping after free_pages().
This results in a large slowdown, but helps to find certain types
@@ -220,9 +216,9 @@ config DEBUG_BOOT_PARAMS
This option will cause struct boot_params to be exported via debugfs.
config CPA_DEBUG
- bool "CPA self test code"
+ bool "CPA self-test code"
depends on DEBUG_KERNEL
help
- Do change_page_attr self tests at boot.
+ Do change_page_attr() self-tests every 30 seconds.
endmenu
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 364865b..204af43 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -191,8 +191,10 @@ drivers-$(CONFIG_PCI) += arch/x86/pci/
# must be linked after kernel/
drivers-$(CONFIG_OPROFILE) += arch/x86/oprofile/
-ifeq ($(CONFIG_X86_32),y)
+# suspend and hibernation support
drivers-$(CONFIG_PM) += arch/x86/power/
+
+ifeq ($(CONFIG_X86_32),y)
drivers-$(CONFIG_FB) += arch/x86/video/
endif
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 1846514..b1bdc4c 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -3,3 +3,5 @@ bzImage
setup
setup.bin
setup.elf
+cpustr.h
+mkcpustr
diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c
index 1a09f93..7e7e890 100644
--- a/arch/x86/boot/printf.c
+++ b/arch/x86/boot/printf.c
@@ -33,8 +33,8 @@ static int skip_atoi(const char **s)
#define PLUS 4 /* show plus */
#define SPACE 8 /* space if plus */
#define LEFT 16 /* left justified */
-#define SPECIAL 32 /* 0x */
-#define LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */
+#define SMALL 32 /* Must be 32 == 0x20 */
+#define SPECIAL 64 /* 0x */
#define do_div(n,base) ({ \
int __res; \
@@ -45,12 +45,16 @@ __res; })
static char *number(char *str, long num, int base, int size, int precision,
int type)
{
- char c, sign, tmp[66];
- const char *digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+ /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
+ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+ char tmp[66];
+ char c, sign, locase;
int i;
- if (type & LARGE)
- digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ /* locase = 0 or 0x20. ORing digits or letters with 'locase'
+ * produces same digits or (maybe lowercased) letters */
+ locase = (type & SMALL);
if (type & LEFT)
type &= ~ZEROPAD;
if (base < 2 || base > 36)
@@ -81,7 +85,7 @@ static char *number(char *str, long num, int base, int size, int precision,
tmp[i++] = '0';
else
while (num != 0)
- tmp[i++] = digits[do_div(num, base)];
+ tmp[i++] = (digits[do_div(num, base)] | locase);
if (i > precision)
precision = i;
size -= precision;
@@ -95,7 +99,7 @@ static char *number(char *str, long num, int base, int size, int precision,
*str++ = '0';
else if (base == 16) {
*str++ = '0';
- *str++ = digits[33];
+ *str++ = ('X' | locase);
}
}
if (!(type & LEFT))
@@ -244,9 +248,9 @@ int vsprintf(char *buf, const char *fmt, va_list args)
base = 8;
break;
- case 'X':
- flags |= LARGE;
case 'x':
+ flags |= SMALL;
+ case 'X':
base = 16;
break;
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 77562e7..3df340b 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1421,7 +1421,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_FRAME_POINTER is not set
-# CONFIG_FORCED_INLINING is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 9e2b0ef..eef98cb 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1346,7 +1346,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_FRAME_POINTER is not set
-# CONFIG_FORCED_INLINING is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index e4c1207..58cccb6 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -172,8 +172,7 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
has_dumped = 1;
current->flags |= PF_DUMPCORE;
strncpy(dump.u_comm, current->comm, sizeof(current->comm));
- dump.u_ar0 = (u32)(((unsigned long)(&dump.regs)) -
- ((unsigned long)(&dump)));
+ dump.u_ar0 = offsetof(struct user32, regs);
dump.signal = signr;
dump_thread32(regs, &dump);
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 21dc1a0..76ec0f8 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -84,8 +84,6 @@ ifeq ($(CONFIG_X86_64),y)
obj-y += genapic_64.o genapic_flat_64.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
obj-$(CONFIG_AUDIT) += audit_64.o
- obj-$(CONFIG_PM) += suspend_64.o
- obj-$(CONFIG_HIBERNATION) += suspend_asm_64.o
obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d2a5843..680b730 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -78,7 +78,6 @@ int acpi_ht __initdata = 1; /* enable HT */
int acpi_lapic;
int acpi_ioapic;
int acpi_strict;
-EXPORT_SYMBOL(acpi_strict);
u8 acpi_sci_flags __initdata;
int acpi_sci_override_gsi __initdata;
@@ -106,7 +105,7 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
#ifdef CONFIG_X86_64
/* rely on all ACPI tables being in the direct mapping */
-char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
+char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
{
if (!phys_addr || !size)
return NULL;
@@ -131,7 +130,7 @@ char *__acpi_map_table(unsigned long phys_addr, unsigned long size)
* from the fixed base. That's why we start at FIX_IO_APIC_BASE_END and
* count idx down while incrementing the phys address.
*/
-char *__acpi_map_table(unsigned long phys, unsigned long size)
+char *__init __acpi_map_table(unsigned long phys, unsigned long size)
{
unsigned long base, offset, mapped_size;
int idx;
@@ -490,8 +489,6 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
return irq;
}
-EXPORT_SYMBOL(acpi_register_gsi);
-
/*
* ACPI based hotplug support for CPU
*/
@@ -587,25 +584,6 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
EXPORT_SYMBOL(acpi_unregister_ioapic);
-static unsigned long __init
-acpi_scan_rsdp(unsigned long start, unsigned long length)
-{
- unsigned long offset = 0;
- unsigned long sig_len = sizeof("RSD PTR ") - 1;
-
- /*
- * Scan all 16-byte boundaries of the physical memory region for the
- * RSDP signature.
- */
- for (offset = 0; offset < length; offset += 16) {
- if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len))
- continue;
- return (start + offset);
- }
-
- return 0;
-}
-
static int __init acpi_parse_sbf(struct acpi_table_header *table)
{
struct acpi_table_boot *sb;
@@ -748,27 +726,6 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
return 0;
}
-unsigned long __init acpi_find_rsdp(void)
-{
- unsigned long rsdp_phys = 0;
-
- if (efi_enabled) {
- if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
- return efi.acpi20;
- else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
- return efi.acpi;
- }
- /*
- * Scan memory looking for the RSDP signature. First search EBDA (low
- * memory) paragraphs and then search upper memory (E0000-FFFFF).
- */
- rsdp_phys = acpi_scan_rsdp(0, 0x400);
- if (!rsdp_phys)
- rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000);
-
- return rsdp_phys;
-}
-
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Parse LAPIC entries in MADT
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 10b6717..8ca3557 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -126,6 +126,8 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d "
"state\n", cx->type);
}
+ snprintf(cx->desc, ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x",
+ cx->address);
out:
set_cpus_allowed(current, saved_mask);
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index a25db51..324eb0c 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -46,6 +46,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
buf[1] = 1;
buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
+ /*
+ * The default of PDC_SMP_T_SWCOORD bit is set for intel x86 cpu so
+ * that OSPM is capable of native ACPI throttling software
+ * coordination using BIOS supplied _TSD info.
+ */
+ buf[2] |= ACPI_PDC_SMP_T_SWCOORD;
if (cpu_has(c, X86_FEATURE_EST))
buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d9313d9..f86a3c4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -637,7 +637,7 @@ void __init early_cpu_init(void)
}
/* Make sure %fs is initialized properly in idle threads */
-struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
+struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
{
memset(regs, 0, sizeof(struct pt_regs));
regs->fs = __KERNEL_PERCPU;
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig
index 151eda0..cb7a571 100644
--- a/arch/x86/kernel/cpu/cpufreq/Kconfig
+++ b/arch/x86/kernel/cpu/cpufreq/Kconfig
@@ -29,7 +29,7 @@ config X86_ACPI_CPUFREQ
config ELAN_CPUFREQ
tristate "AMD Elan SC400 and SC410"
select CPU_FREQ_TABLE
- depends on X86_32 && X86_ELAN
+ depends on X86_ELAN
---help---
This adds the CPUFreq driver for AMD Elan SC400 and SC410
processors.
@@ -45,7 +45,7 @@ config ELAN_CPUFREQ
config SC520_CPUFREQ
tristate "AMD Elan SC520"
select CPU_FREQ_TABLE
- depends on X86_32 && X86_ELAN
+ depends on X86_ELAN
---help---
This adds the CPUFreq driver for AMD Elan SC520 processor.
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index 326a4c8..39f8cb1 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -23,6 +23,7 @@
#define EPS_BRAND_C7 1
#define EPS_BRAND_EDEN 2
#define EPS_BRAND_C3 3
+#define EPS_BRAND_C7D 4
struct eps_cpu_data {
u32 fsb;
@@ -54,6 +55,7 @@ static int eps_set_state(struct eps_cpu_data *centaur,
{
struct cpufreq_freqs freqs;
u32 lo, hi;
+ u8 current_multiplier, current_voltage;
int err = 0;
int i;
@@ -93,6 +95,15 @@ postchange:
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
freqs.new = centaur->fsb * ((lo >> 8) & 0xff);
+ /* Print voltage and multiplier */
+ rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
+ current_voltage = lo & 0xff;
+ printk(KERN_INFO "eps: Current voltage = %dmV\n",
+ current_voltage * 16 + 700);
+ current_multiplier = (lo >> 8) & 0xff;
+ printk(KERN_INFO "eps: Current multiplier = %d\n",
+ current_multiplier);
+
cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE);
return err;
}
@@ -141,9 +152,10 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
u8 current_multiplier, current_voltage;
u8 max_multiplier, max_voltage;
u8 min_multiplier, min_voltage;
- u8 brand;
+ u8 brand = 0;
u32 fsb;
struct eps_cpu_data *centaur;
+ struct cpuinfo_x86 *c = &cpu_data(0);
struct cpufreq_frequency_table *f_table;
int k, step, voltage;
int ret;
@@ -153,21 +165,36 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
return -ENODEV;
/* Check brand */
- printk("eps: Detected VIA ");
- rdmsr(0x1153, lo, hi);
- brand = (((lo >> 2) ^ lo) >> 18) & 3;
+ printk(KERN_INFO "eps: Detected VIA ");
+
+ switch (c->x86_model) {
+ case 10:
+ rdmsr(0x1153, lo, hi);
+ brand = (((lo >> 2) ^ lo) >> 18) & 3;
+ printk(KERN_CONT "Model A ");
+ break;
+ case 13:
+ rdmsr(0x1154, lo, hi);
+ brand = (((lo >> 4) ^ (lo >> 2))) & 0x000000ff;
+ printk(KERN_CONT "Model D ");
+ break;
+ }
+
switch(brand) {
case EPS_BRAND_C7M:
- printk("C7-M\n");
+ printk(KERN_CONT "C7-M\n");
break;
case EPS_BRAND_C7:
- printk("C7\n");
+ printk(KERN_CONT "C7\n");
break;
case EPS_BRAND_EDEN:
- printk("Eden\n");
+ printk(KERN_CONT "Eden\n");
+ break;
+ case EPS_BRAND_C7D:
+ printk(KERN_CONT "C7-D\n");
break;
case EPS_BRAND_C3:
- printk("C3\n");
+ printk(KERN_CONT "C3\n");
return -ENODEV;
break;
}
@@ -179,7 +206,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
/* Can be locked at 0 */
rdmsrl(MSR_IA32_MISC_ENABLE, val);
if (!(val & 1 << 16)) {
- printk("eps: Can't enable Enhanced PowerSaver\n");
+ printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
return -ENODEV;
}
}
@@ -187,19 +214,19 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
/* Print voltage and multiplier */
rdmsr(MSR_IA32_PERF_STATUS, lo, hi);
current_voltage = lo & 0xff;
- printk("eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
+ printk(KERN_INFO "eps: Current voltage = %dmV\n", current_voltage * 16 + 700);
current_multiplier = (lo >> 8) & 0xff;
- printk("eps: Current multiplier = %d\n", current_multiplier);
+ printk(KERN_INFO "eps: Current multiplier = %d\n", current_multiplier);
/* Print limits */
max_voltage = hi & 0xff;
- printk("eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
+ printk(KERN_INFO "eps: Highest voltage = %dmV\n", max_voltage * 16 + 700);
max_multiplier = (hi >> 8) & 0xff;
- printk("eps: Highest multiplier = %d\n", max_multiplier);
+ printk(KERN_INFO "eps: Highest multiplier = %d\n", max_multiplier);
min_voltage = (hi >> 16) & 0xff;
- printk("eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
+ printk(KERN_INFO "eps: Lowest voltage = %dmV\n", min_voltage * 16 + 700);
min_multiplier = (hi >> 24) & 0xff;
- printk("eps: Lowest multiplier = %d\n", min_multiplier);
+ printk(KERN_INFO "eps: Lowest multiplier = %d\n", min_multiplier);
/* Sanity checks */
if (current_multiplier == 0 || max_multiplier == 0
@@ -208,7 +235,7 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
if (current_multiplier > max_multiplier
|| max_multiplier <= min_multiplier)
return -EINVAL;
- if (current_voltage > 0x1c || max_voltage > 0x1c)
+ if (current_voltage > 0x1f || max_voltage > 0x1f)
return -EINVAL;
if (max_voltage < min_voltage)
return -EINVAL;
@@ -310,7 +337,7 @@ static int __init eps_init(void)
/* This driver will work only on Centaur C7 processors with
* Enhanced SpeedStep/PowerSaver registers */
if (c->x86_vendor != X86_VENDOR_CENTAUR
- || c->x86 != 6 || c->x86_model != 10)
+ || c->x86 != 6 || c->x86_model < 10)
return -ENODEV;
if (!cpu_has(c, X86_FEATURE_EST))
return -ENODEV;
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
index 2ed7db2..9d9eae8 100644
--- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c
@@ -181,8 +181,8 @@ static __init struct pci_dev *gx_detect_chipset(void)
struct pci_dev *gx_pci = NULL;
/* check if CPU is a MediaGX or a Geode. */
- if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
- (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
+ if ((boot_cpu_data.x86_vendor != X86_VENDOR_NSC) &&
+ (boot_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) {
dprintk("error: no MediaGX/Geode processor found!\n");
return NULL;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index b5a9863..0a61159 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -460,7 +460,7 @@ static int powernow_decode_bios (int maxfid, int startvid)
latency = psb->settlingtime;
if (latency < 100) {
- printk (KERN_INFO PFX "BIOS set settling time to %d microseconds."
+ printk(KERN_INFO PFX "BIOS set settling time to %d microseconds. "
"Should be at least 100. Correcting.\n", latency);
latency = 100;
}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index a052273..c99d59d 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -578,10 +578,9 @@ static void print_basics(struct powernow_k8_data *data)
for (j = 0; j < data->numps; j++) {
if (data->powernow_table[j].frequency != CPUFREQ_ENTRY_INVALID) {
if (cpu_family == CPU_HW_PSTATE) {
- printk(KERN_INFO PFX " %d : fid 0x%x did 0x%x (%d MHz)\n",
+ printk(KERN_INFO PFX " %d : pstate %d (%d MHz)\n",
j,
- (data->powernow_table[j].index & 0xff00) >> 8,
- (data->powernow_table[j].index & 0xff0000) >> 16,
+ data->powernow_table[j].index,
data->powernow_table[j].frequency/1000);
} else {
printk(KERN_INFO PFX " %d : fid 0x%x (%d MHz), vid 0x%x\n",
@@ -827,7 +826,6 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf
for (i = 0; i < data->acpi_data.state_count; i++) {
u32 index;
- u32 hi = 0, lo = 0;
index = data->acpi_data.states[i].control & HW_PSTATE_MASK;
if (index > data->max_hw_pstate) {
@@ -1236,8 +1234,10 @@ static unsigned int powernowk8_get (unsigned int cpu)
struct powernow_k8_data *data;
cpumask_t oldmask = current->cpus_allowed;
unsigned int khz = 0;
+ unsigned int first;
- data = per_cpu(powernow_data, first_cpu(per_cpu(cpu_core_map, cpu)));
+ first = first_cpu(per_cpu(cpu_core_map, cpu));
+ data = per_cpu(powernow_data, first);
if (!data)
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index afd2b52..ab48cfe 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -47,7 +47,7 @@ struct powernow_k8_data {
#define CPUID_XFAM 0x0ff00000 /* extended family */
#define CPUID_XFAM_K8 0
#define CPUID_XMOD 0x000f0000 /* extended model */
-#define CPUID_XMOD_REV_MASK 0x00080000
+#define CPUID_XMOD_REV_MASK 0x000c0000
#define CPUID_XFAM_10H 0x00100000 /* family 0x10 */
#define CPUID_USE_XFAM_XMOD 0x00000f00
#define CPUID_GET_MAX_CAPABILITIES 0x80000000
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
index 76c3ab0..98d4fdb 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c
@@ -189,10 +189,7 @@ static unsigned int pentium4_get_frequency(void)
printk(KERN_DEBUG "speedstep-lib: couldn't detect FSB speed. Please send an e-mail to <linux@brodo.de>\n");
/* Multiplier. */
- if (c->x86_model < 2)
- mult = msr_lo >> 27;
- else
- mult = msr_lo >> 24;
+ mult = msr_lo >> 24;
dprintk("P4 - FSB %u kHz; Multiplier %u; Speed %u kHz\n", fsb, mult, (fsb * mult));
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 404a6a2..7139b02 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -83,8 +83,6 @@ static char cyrix_model_mult2[] __cpuinitdata = "12233445";
* FIXME: our newer udelay uses the tsc. We don't need to frob with SLOP
*/
-extern void calibrate_delay(void) __init;
-
static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
{
unsigned long flags;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 24885be..9b7e01d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -118,7 +118,7 @@ static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
{
- return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
+ sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
}
/* Mutex protecting device creation against CPU hotplug */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 1e27b69..b6e136f 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -659,7 +659,7 @@ static __init int amd_special_default_mtrr(void)
*/
int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
{
- unsigned long i, base, size, highest_addr = 0, def, dummy;
+ unsigned long i, base, size, highest_pfn = 0, def, dummy;
mtrr_type type;
u64 trim_start, trim_size;
@@ -682,28 +682,27 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
mtrr_if->get(i, &base, &size, &type);
if (type != MTRR_TYPE_WRBACK)
continue;
- base <<= PAGE_SHIFT;
- size <<= PAGE_SHIFT;
- if (highest_addr < base + size)
- highest_addr = base + size;
+ if (highest_pfn < base + size)
+ highest_pfn = base + size;
}
/* kvm/qemu doesn't have mtrr set right, don't trim them all */
- if (!highest_addr) {
+ if (!highest_pfn) {
printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n");
WARN_ON(1);
return 0;
}
- if ((highest_addr >> PAGE_SHIFT) < end_pfn) {
+ if (highest_pfn < end_pfn) {
printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
- " all of memory, losing %LdMB of RAM.\n",
- (((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20);
+ " all of memory, losing %luMB of RAM.\n",
+ (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
WARN_ON(1);
printk(KERN_INFO "update e820 for mtrr\n");
- trim_start = highest_addr;
+ trim_start = highest_pfn;
+ trim_start <<= PAGE_SHIFT;
trim_size = end_pfn;
trim_size <<= PAGE_SHIFT;
trim_size -= trim_start;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 32dd62b..0c0eeb1 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -384,9 +384,6 @@ static void __init runtime_code_page_mkexec(void)
efi_memory_desc_t *md;
void *p;
- if (!(__supported_pte_mask & _PAGE_NX))
- return;
-
/* Make EFI runtime service code area executable */
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
@@ -394,7 +391,7 @@ static void __init runtime_code_page_mkexec(void)
if (md->type != EFI_RUNTIME_SERVICES_CODE)
continue;
- set_memory_x(md->virt_addr, md->num_pages << EFI_PAGE_SHIFT);
+ set_memory_x(md->virt_addr, md->num_pages);
}
}
@@ -428,9 +425,6 @@ void __init efi_enter_virtual_mode(void)
else
va = efi_ioremap(md->phys_addr, size);
- if (md->attribute & EFI_MEMORY_WB)
- set_memory_uc(md->virt_addr, size);
-
md->virt_addr = (u64) (unsigned long) va;
if (!va) {
@@ -439,6 +433,9 @@ void __init efi_enter_virtual_mode(void)
continue;
}
+ if (!(md->attribute & EFI_MEMORY_WB))
+ set_memory_uc(md->virt_addr, md->num_pages);
+
systab = (u64) (unsigned long) efi_phys.systab;
if (md->phys_addr <= systab && systab < end) {
systab += md->virt_addr - md->phys_addr;
@@ -476,7 +473,8 @@ void __init efi_enter_virtual_mode(void)
efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
efi.reset_system = virt_efi_reset_system;
efi.set_virtual_address_map = virt_efi_set_virtual_address_map;
- runtime_code_page_mkexec();
+ if (__supported_pte_mask & _PAGE_NX)
+ runtime_code_page_mkexec();
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
memmap.map = NULL;
}
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 09d5c23..d143a1e 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -35,6 +35,7 @@
#include <asm/tlbflush.h>
#include <asm/proto.h>
#include <asm/efi.h>
+#include <asm/cacheflush.h>
static pgd_t save_pgd __initdata;
static unsigned long efi_flags __initdata;
@@ -43,22 +44,15 @@ static void __init early_mapping_set_exec(unsigned long start,
unsigned long end,
int executable)
{
- pte_t *kpte;
- unsigned int level;
-
- while (start < end) {
- kpte = lookup_address((unsigned long)__va(start), &level);
- BUG_ON(!kpte);
- if (executable)
- set_pte(kpte, pte_mkexec(*kpte));
- else
- set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \
- __supported_pte_mask));
- if (level == PG_LEVEL_4K)
- start = (start + PAGE_SIZE) & PAGE_MASK;
- else
- start = (start + PMD_SIZE) & PMD_MASK;
- }
+ unsigned long num_pages;
+
+ start &= PMD_MASK;
+ end = (end + PMD_SIZE - 1) & PMD_MASK;
+ num_pages = (end - start) >> PAGE_SHIFT;
+ if (executable)
+ set_memory_x((unsigned long)__va(start), num_pages);
+ else
+ set_memory_nx((unsigned long)__va(start), num_pages);
}
static void __init early_runtime_code_mapping_set_exec(int executable)
@@ -74,7 +68,7 @@ static void __init early_runtime_code_mapping_set_exec(int executable)
md = p;
if (md->type == EFI_RUNTIME_SERVICES_CODE) {
unsigned long end;
- end = md->phys_addr + (md->num_pages << PAGE_SHIFT);
+ end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
early_mapping_set_exec(md->phys_addr, end, executable);
}
}
@@ -84,8 +78,8 @@ void __init efi_call_phys_prelog(void)
{
unsigned long vaddress;
- local_irq_save(efi_flags);
early_runtime_code_mapping_set_exec(1);
+ local_irq_save(efi_flags);
vaddress = (unsigned long)__va(0x0UL);
save_pgd = *pgd_offset_k(0x0UL);
set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
@@ -98,9 +92,9 @@ void __init efi_call_phys_epilog(void)
* After the lock is released, the original page table is restored.
*/
set_pgd(pgd_offset_k(0x0UL), save_pgd);
- early_runtime_code_mapping_set_exec(0);
__flush_tlb_all();
local_irq_restore(efi_flags);
+ early_runtime_code_mapping_set_exec(0);
}
void __init efi_reserve_bootmem(void)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index be5c31d..824e21b 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,8 @@ restore_nocheck_notrace:
RESTORE_REGS
addl $4, %esp # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
-1: INTERRUPT_RETURN
+ENTRY(irq_return)
+ INTERRUPT_RETURN
.section .fixup,"ax"
iret_exc:
pushl $0 # no error code
@@ -418,7 +419,7 @@ iret_exc:
.previous
.section __ex_table,"a"
.align 4
- .long 1b,iret_exc
+ .long irq_return,iret_exc
.previous
CFI_RESTORE_STATE
@@ -865,20 +866,16 @@ nmi_espfix_stack:
RESTORE_REGS
lss 12+4(%esp), %esp # back to espfix stack
CFI_ADJUST_CFA_OFFSET -24
-1: INTERRUPT_RETURN
+ jmp irq_return
CFI_ENDPROC
-.section __ex_table,"a"
- .align 4
- .long 1b,iret_exc
-.previous
KPROBE_END(nmi)
#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
-1: iret
+ iret
.section __ex_table,"a"
.align 4
- .long 1b,iret_exc
+ .long native_iret, iret_exc
.previous
END(native_iret)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bea8474..6be39a3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -581,25 +581,41 @@ retint_restore_args: /* return to kernel space */
*/
TRACE_IRQS_IRETQ
restore_args:
- RESTORE_ARGS 0,8,0
-iret_label:
-#ifdef CONFIG_PARAVIRT
+ RESTORE_ARGS 0,8,0
+
+ENTRY(irq_return)
INTERRUPT_RETURN
-#endif
+
+ .section __ex_table, "a"
+ .quad irq_return, bad_iret
+ .previous
+
+#ifdef CONFIG_PARAVIRT
ENTRY(native_iret)
iretq
.section __ex_table,"a"
.quad native_iret, bad_iret
.previous
+#endif
+
.section .fixup,"ax"
- /* force a signal here? this matches i386 behaviour */
- /* running with kernel gs */
bad_iret:
- movq $11,%rdi /* SIGSEGV */
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
- jmp do_exit
+ /*
+ * The iret traps when the %cs or %ss being restored is bogus.
+ * We've lost the original trap vector and error code.
+ * #GPF is the most likely one to get for an invalid selector.
+ * So pretend we completed the iret and took the #GPF in user mode.
+ *
+ * We are now running with the kernel GS after exception recovery.
+ * But error_entry expects us to have user GS to match the user %cs,
+ * so swap back.
+ */
+ pushq $0
+
+ SWAPGS
+ jmp general_protection
+
.previous
/* edi: workmask, edx: work */
@@ -796,7 +812,7 @@ paranoid_swapgs\trace:
SWAPGS_UNSAFE_STACK
paranoid_restore\trace:
RESTORE_ALL 8
- INTERRUPT_RETURN
+ jmp irq_return
paranoid_userspace\trace:
GET_THREAD_INFO(%rcx)
movl threadinfo_flags(%rcx),%ebx
@@ -911,7 +927,7 @@ error_kernelspace:
iret run with kernel gs again, so don't set the user space flag.
B stepping K8s sometimes report an truncated RIP for IRET
exceptions returning to compat mode. Check for these here too. */
- leaq iret_label(%rip),%rbp
+ leaq irq_return(%rip),%rbp
cmpq %rbp,RIP(%rsp)
je error_swapgs
movl %ebp,%ebp /* zero extend */
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c
index 9c7f7d3..9dad6ca 100644
--- a/arch/x86/kernel/geode_32.c
+++ b/arch/x86/kernel/geode_32.c
@@ -163,14 +163,11 @@ EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
static int __init geode_southbridge_init(void)
{
- int timers;
-
if (!is_geode())
return -ENODEV;
init_lbars();
- timers = geode_mfgpt_detect();
- printk(KERN_INFO "geode: %d MFGPT timers available.\n", timers);
+ (void) mfgpt_timer_setup();
return 0;
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 5d8c573..74ef4a4 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,6 +19,10 @@
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/setup.h>
+#include <asm/processor-flags.h>
+
+/* Physical address */
+#define pa(X) ((X) - __PAGE_OFFSET)
/*
* References to members of the new_cpu_data structure.
@@ -80,10 +84,6 @@ INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_
*/
.section .text.head,"ax",@progbits
ENTRY(startup_32)
- /* check to see if KEEP_SEGMENTS flag is meaningful */
- cmpw $0x207, BP_version(%esi)
- jb 1f
-
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
testb $(1<<6), BP_loadflags(%esi)
@@ -92,7 +92,7 @@ ENTRY(startup_32)
/*
* Set segments to known values.
*/
-1: lgdt boot_gdt_descr - __PAGE_OFFSET
+ lgdt pa(boot_gdt_descr)
movl $(__BOOT_DS),%eax
movl %eax,%ds
movl %eax,%es
@@ -105,8 +105,8 @@ ENTRY(startup_32)
*/
cld
xorl %eax,%eax
- movl $__bss_start - __PAGE_OFFSET,%edi
- movl $__bss_stop - __PAGE_OFFSET,%ecx
+ movl $pa(__bss_start),%edi
+ movl $pa(__bss_stop),%ecx
subl %edi,%ecx
shrl $2,%ecx
rep ; stosl
@@ -118,31 +118,32 @@ ENTRY(startup_32)
* (kexec on panic case). Hence copy out the parameters before initializing
* page tables.
*/
- movl $(boot_params - __PAGE_OFFSET),%edi
+ movl $pa(boot_params),%edi
movl $(PARAM_SIZE/4),%ecx
cld
rep
movsl
- movl boot_params - __PAGE_OFFSET + NEW_CL_POINTER,%esi
+ movl pa(boot_params) + NEW_CL_POINTER,%esi
andl %esi,%esi
jz 1f # No comand line
- movl $(boot_command_line - __PAGE_OFFSET),%edi
+ movl $pa(boot_command_line),%edi
movl $(COMMAND_LINE_SIZE/4),%ecx
rep
movsl
1:
#ifdef CONFIG_PARAVIRT
- cmpw $0x207, (boot_params + BP_version - __PAGE_OFFSET)
+ /* This is can only trip for a broken bootloader... */
+ cmpw $0x207, pa(boot_params + BP_version)
jb default_entry
/* Paravirt-compatible boot parameters. Look to see what architecture
we're booting under. */
- movl (boot_params + BP_hardware_subarch - __PAGE_OFFSET), %eax
+ movl pa(boot_params + BP_hardware_subarch), %eax
cmpl $num_subarch_entries, %eax
jae bad_subarch
- movl subarch_entries - __PAGE_OFFSET(,%eax,4), %eax
+ movl pa(subarch_entries)(,%eax,4), %eax
subl $__PAGE_OFFSET, %eax
jmp *%eax
@@ -170,17 +171,68 @@ num_subarch_entries = (. - subarch_entries) / 4
* Mappings are created both at virtual address 0 (identity mapping)
* and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
*
- * Warning: don't use %esi or the stack in this code. However, %esp
- * can be used as a GPR if you really need it...
+ * Note that the stack is not yet set up!
*/
-page_pde_offset = (__PAGE_OFFSET >> 20);
+#define PTE_ATTR 0x007 /* PRESENT+RW+USER */
+#define PDE_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
+#define PGD_ATTR 0x001 /* PRESENT (no other attributes) */
default_entry:
- movl $(pg0 - __PAGE_OFFSET), %edi
- movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
- movl $0x007, %eax /* 0x007 = PRESENT+RW+USER */
+#ifdef CONFIG_X86_PAE
+
+ /*
+ * In PAE mode swapper_pg_dir is statically defined to contain enough
+ * entries to cover the VMSPLIT option (that is the top 1, 2 or 3
+ * entries). The identity mapping is handled by pointing two PGD
+ * entries to the first kernel PMD.
+ *
+ * Note the upper half of each PMD or PTE are always zero at
+ * this stage.
+ */
+
+#define KPMDS ((0x100000000-__PAGE_OFFSET) >> 30) /* Number of kernel PMDs */
+
+ xorl %ebx,%ebx /* %ebx is kept at zero */
+
+ movl $pa(pg0), %edi
+ movl $pa(swapper_pg_pmd), %edx
+ movl $PTE_ATTR, %eax
+10:
+ leal PDE_ATTR(%edi),%ecx /* Create PMD entry */
+ movl %ecx,(%edx) /* Store PMD entry */
+ /* Upper half already zero */
+ addl $8,%edx
+ movl $512,%ecx
+11:
+ stosl
+ xchgl %eax,%ebx
+ stosl
+ xchgl %eax,%ebx
+ addl $0x1000,%eax
+ loop 11b
+
+ /*
+ * End condition: we must map up to and including INIT_MAP_BEYOND_END
+ * bytes beyond the end of our own page tables.
+ */
+ leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
+ cmpl %ebp,%eax
+ jb 10b
+1:
+ movl %edi,pa(init_pg_tables_end)
+
+ /* Do early initialization of the fixmap area */
+ movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+ movl %eax,pa(swapper_pg_pmd+0x1000*KPMDS-8)
+#else /* Not PAE */
+
+page_pde_offset = (__PAGE_OFFSET >> 20);
+
+ movl $pa(pg0), %edi
+ movl $pa(swapper_pg_dir), %edx
+ movl $PTE_ATTR, %eax
10:
- leal 0x007(%edi),%ecx /* Create PDE entry */
+ leal PDE_ATTR(%edi),%ecx /* Create PDE entry */
movl %ecx,(%edx) /* Store identity PDE entry */
movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */
addl $4,%edx
@@ -189,19 +241,20 @@ default_entry:
stosl
addl $0x1000,%eax
loop 11b
- /* End condition: we must map up to and including INIT_MAP_BEYOND_END */
- /* bytes beyond the end of our own page tables; the +0x007 is the attribute bits */
- leal (INIT_MAP_BEYOND_END+0x007)(%edi),%ebp
+ /*
+ * End condition: we must map up to and including INIT_MAP_BEYOND_END
+ * bytes beyond the end of our own page tables; the +0x007 is
+ * the attribute bits
+ */
+ leal (INIT_MAP_BEYOND_END+PTE_ATTR)(%edi),%ebp
cmpl %ebp,%eax
jb 10b
- movl %edi,(init_pg_tables_end - __PAGE_OFFSET)
-
- /* Do an early initialization of the fixmap area */
- movl $(swapper_pg_dir - __PAGE_OFFSET), %edx
- movl $(swapper_pg_pmd - __PAGE_OFFSET), %eax
- addl $0x67, %eax /* 0x67 == _PAGE_TABLE */
- movl %eax, 4092(%edx)
+ movl %edi,pa(init_pg_tables_end)
+ /* Do early initialization of the fixmap area */
+ movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
+ movl %eax,pa(swapper_pg_dir+0xffc)
+#endif
jmp 3f
/*
* Non-boot CPU entry point; entered from trampoline.S
@@ -241,7 +294,7 @@ ENTRY(startup_32_smp)
* NOTE! We have to correct for the fact that we're
* not yet offset PAGE_OFFSET..
*/
-#define cr4_bits mmu_cr4_features-__PAGE_OFFSET
+#define cr4_bits pa(mmu_cr4_features)
movl cr4_bits,%edx
andl %edx,%edx
jz 6f
@@ -276,10 +329,10 @@ ENTRY(startup_32_smp)
/*
* Enable paging
*/
- movl $swapper_pg_dir-__PAGE_OFFSET,%eax
+ movl $pa(swapper_pg_dir),%eax
movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax
- orl $0x80000000,%eax
+ orl $X86_CR0_PG,%eax
movl %eax,%cr0 /* ..and set paging (PG) bit */
ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
1:
@@ -552,16 +605,44 @@ ENTRY(_stext)
*/
.section ".bss.page_aligned","wa"
.align PAGE_SIZE_asm
+#ifdef CONFIG_X86_PAE
+ENTRY(swapper_pg_pmd)
+ .fill 1024*KPMDS,4,0
+#else
ENTRY(swapper_pg_dir)
.fill 1024,4,0
-ENTRY(swapper_pg_pmd)
+#endif
+ENTRY(swapper_pg_fixmap)
.fill 1024,4,0
ENTRY(empty_zero_page)
.fill 4096,1,0
-
/*
* This starts the data section.
*/
+#ifdef CONFIG_X86_PAE
+.section ".data.page_aligned","wa"
+ /* Page-aligned for the benefit of paravirt? */
+ .align PAGE_SIZE_asm
+ENTRY(swapper_pg_dir)
+ .long pa(swapper_pg_pmd+PGD_ATTR),0 /* low identity map */
+# if KPMDS == 3
+ .long pa(swapper_pg_pmd+PGD_ATTR),0
+ .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
+ .long pa(swapper_pg_pmd+PGD_ATTR+0x2000),0
+# elif KPMDS == 2
+ .long 0,0
+ .long pa(swapper_pg_pmd+PGD_ATTR),0
+ .long pa(swapper_pg_pmd+PGD_ATTR+0x1000),0
+# elif KPMDS == 1
+ .long 0,0
+ .long 0,0
+ .long pa(swapper_pg_pmd+PGD_ATTR),0
+# else
+# error "Kernel PMDs should be 1, 2 or 3"
+# endif
+ .align PAGE_SIZE_asm /* needs to be page-sized too */
+#endif
+
.data
ENTRY(stack_start)
.long init_thread_union+THREAD_SIZE
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 4f283ad..09b38d5 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -250,18 +250,13 @@ ENTRY(secondary_startup_64)
lretq
/* SMP bootup changes these two */
-#ifndef CONFIG_HOTPLUG_CPU
- .pushsection .init.data
-#endif
+ __CPUINITDATA
.align 8
- .globl initial_code
-initial_code:
+ ENTRY(initial_code)
.quad x86_64_start_kernel
-#ifndef CONFIG_HOTPLUG_CPU
- .popsection
-#endif
- .globl init_rsp
-init_rsp:
+ __FINITDATA
+
+ ENTRY(init_rsp)
.quad init_thread_union+THREAD_SIZE-8
bad_address:
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index ef62b07..8540abe 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -95,7 +95,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt)
* registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
* !using_apic_timer decisions in do_timer_interrupt_hook()
*/
-struct clock_event_device pit_clockevent = {
+static struct clock_event_device pit_clockevent = {
.name = "pit",
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
.set_mode = init_pit_timer,
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1cfd60..d0b234c 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -151,7 +151,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
-#ifdef CONFIG_ARCH_DISCONTIGMEM_ENABLE
+#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);
VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
#endif
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index a1fef42..236d2f8 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -234,5 +234,10 @@ NORET_TYPE void machine_kexec(struct kimage *image)
void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_SYMBOL(init_level4_pgt);
+
+#ifdef CONFIG_NUMA
+ VMCOREINFO_SYMBOL(node_data);
+ VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
}
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c
index 219f86e..027fc06 100644
--- a/arch/x86/kernel/mfgpt_32.c
+++ b/arch/x86/kernel/mfgpt_32.c
@@ -12,48 +12,37 @@
*/
/*
- * We are using the 32Khz input clock - its the only one that has the
+ * We are using the 32.768kHz input clock - it's the only one that has the
* ranges we find desirable. The following table lists the suitable
- * divisors and the associated hz, minimum interval
- * and the maximum interval:
+ * divisors and the associated Hz, minimum interval and the maximum interval:
*
- * Divisor Hz Min Delta (S) Max Delta (S)
- * 1 32000 .0005 2.048
- * 2 16000 .001 4.096
- * 4 8000 .002 8.192
- * 8 4000 .004 16.384
- * 16 2000 .008 32.768
- * 32 1000 .016 65.536
- * 64 500 .032 131.072
- * 128 250 .064 262.144
- * 256 125 .128 524.288
+ * Divisor Hz Min Delta (s) Max Delta (s)
+ * 1 32768 .00048828125 2.000
+ * 2 16384 .0009765625 4.000
+ * 4 8192 .001953125 8.000
+ * 8 4096 .00390625 16.000
+ * 16 2048 .0078125 32.000
+ * 32 1024 .015625 64.000
+ * 64 512 .03125 128.000
+ * 128 256 .0625 256.000
+ * 256 128 .125 512.000
*/
#include <linux/kernel.h>
#include <linux/interrupt.h>
-#include <linux/module.h>
#include <asm/geode.h>
-#define F_AVAIL 0x01
-
static struct mfgpt_timer_t {
- int flags;
- struct module *owner;
+ unsigned int avail:1;
} mfgpt_timers[MFGPT_MAX_TIMERS];
/* Selected from the table above */
#define MFGPT_DIVISOR 16
#define MFGPT_SCALE 4 /* divisor = 2^(scale) */
-#define MFGPT_HZ (32000 / MFGPT_DIVISOR)
+#define MFGPT_HZ (32768 / MFGPT_DIVISOR)
#define MFGPT_PERIODIC (MFGPT_HZ / HZ)
-#ifdef CONFIG_GEODE_MFGPT_TIMER
-static int __init mfgpt_timer_setup(void);
-#else
-#define mfgpt_timer_setup() (0)
-#endif
-
/* Allow for disabling of MFGPTs */
static int disable;
static int __init mfgpt_disable(char *s)
@@ -85,28 +74,37 @@ __setup("mfgptfix", mfgpt_fix);
* In other cases (such as with VSAless OpenFirmware), the system firmware
* leaves timers available for us to use.
*/
-int __init geode_mfgpt_detect(void)
+
+
+static int timers = -1;
+
+static void geode_mfgpt_detect(void)
{
- int count = 0, i;
+ int i;
u16 val;
+ timers = 0;
+
if (disable) {
- printk(KERN_INFO "geode-mfgpt: Skipping MFGPT setup\n");
- return 0;
+ printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n");
+ goto done;
+ }
+
+ if (!geode_get_dev_base(GEODE_DEV_MFGPT)) {
+ printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n");
+ goto done;
}
for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
val = geode_mfgpt_read(i, MFGPT_REG_SETUP);
if (!(val & MFGPT_SETUP_SETUP)) {
- mfgpt_timers[i].flags = F_AVAIL;
- count++;
+ mfgpt_timers[i].avail = 1;
+ timers++;
}
}
- /* set up clock event device, if desired */
- i = mfgpt_timer_setup();
-
- return count;
+done:
+ printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers);
}
int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable)
@@ -183,36 +181,41 @@ int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable)
return 0;
}
-static int mfgpt_get(int timer, struct module *owner)
+static int mfgpt_get(int timer)
{
- mfgpt_timers[timer].flags &= ~F_AVAIL;
- mfgpt_timers[timer].owner = owner;
+ mfgpt_timers[timer].avail = 0;
printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer);
return timer;
}
-int geode_mfgpt_alloc_timer(int timer, int domain, struct module *owner)
+int geode_mfgpt_alloc_timer(int timer, int domain)
{
int i;
- if (!geode_get_dev_base(GEODE_DEV_MFGPT))
- return -ENODEV;
+ if (timers == -1) {
+ /* timers haven't been detected yet */
+ geode_mfgpt_detect();
+ }
+
+ if (!timers)
+ return -1;
+
if (timer >= MFGPT_MAX_TIMERS)
- return -EIO;
+ return -1;
if (timer < 0) {
/* Try to find an available timer */
for (i = 0; i < MFGPT_MAX_TIMERS; i++) {
- if (mfgpt_timers[i].flags & F_AVAIL)
- return mfgpt_get(i, owner);
+ if (mfgpt_timers[i].avail)
+ return mfgpt_get(i);
if (i == 5 && domain == MFGPT_DOMAIN_WORKING)
break;
}
} else {
/* If they requested a specific timer, try to honor that */
- if (mfgpt_timers[timer].flags & F_AVAIL)
- return mfgpt_get(timer, owner);
+ if (mfgpt_timers[timer].avail)
+ return mfgpt_get(timer);
}
/* No timers available - too bad */
@@ -244,10 +247,11 @@ static int __init mfgpt_setup(char *str)
}
__setup("mfgpt_irq=", mfgpt_setup);
-static inline void mfgpt_disable_timer(u16 clock)
+static void mfgpt_disable_timer(u16 clock)
{
- u16 val = geode_mfgpt_read(clock, MFGPT_REG_SETUP);
- geode_mfgpt_write(clock, MFGPT_REG_SETUP, val & ~MFGPT_SETUP_CNTEN);
+ /* avoid races by clearing CMP1 and CMP2 unconditionally */
+ geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN |
+ MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2);
}
static int mfgpt_next_event(unsigned long, struct clock_event_device *);
@@ -263,7 +267,7 @@ static struct clock_event_device mfgpt_clockevent = {
.shift = 32
};
-static inline void mfgpt_start_timer(u16 clock, u16 delta)
+static void mfgpt_start_timer(u16 delta)
{
geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta);
geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0);
@@ -278,21 +282,25 @@ static void mfgpt_set_mode(enum clock_event_mode mode,
mfgpt_disable_timer(mfgpt_event_clock);
if (mode == CLOCK_EVT_MODE_PERIODIC)
- mfgpt_start_timer(mfgpt_event_clock, MFGPT_PERIODIC);
+ mfgpt_start_timer(MFGPT_PERIODIC);
mfgpt_tick_mode = mode;
}
static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt)
{
- mfgpt_start_timer(mfgpt_event_clock, delta);
+ mfgpt_start_timer(delta);
return 0;
}
-/* Assume (foolishly?), that this interrupt was due to our tick */
-
static irqreturn_t mfgpt_tick(int irq, void *dev_id)
{
+ u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP);
+
+ /* See if the interrupt was for us */
+ if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1)))
+ return IRQ_NONE;
+
/* Turn off the clock (and clear the event) */
mfgpt_disable_timer(mfgpt_event_clock);
@@ -320,13 +328,12 @@ static struct irqaction mfgptirq = {
.name = "mfgpt-timer"
};
-static int __init mfgpt_timer_setup(void)
+int __init mfgpt_timer_setup(void)
{
int timer, ret;
u16 val;
- timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING,
- THIS_MODULE);
+ timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING);
if (timer < 0) {
printk(KERN_ERR
"mfgpt-timer: Could not allocate a MFPGT timer\n");
@@ -363,7 +370,7 @@ static int __init mfgpt_timer_setup(void)
&mfgpt_clockevent);
printk(KERN_INFO
- "mfgpt-timer: registering the MFGT timer as a clock event.\n");
+ "mfgpt-timer: registering the MFGPT timer as a clock event.\n");
clockevents_register_device(&mfgpt_clockevent);
return 0;
diff --git a/arch/x86/kernel/mpparse_32.c b/arch/x86/kernel/mpparse_32.c
index 67009cd..f349e68 100644
--- a/arch/x86/kernel/mpparse_32.c
+++ b/arch/x86/kernel/mpparse_32.c
@@ -736,7 +736,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
smp_found_config = 1;
printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
mpf, virt_to_phys(mpf));
- reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE);
+ reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
+ BOOTMEM_DEFAULT);
if (mpf->mpf_physptr) {
/*
* We cannot access to MPC table to compute
@@ -751,7 +752,8 @@ static int __init smp_scan_config (unsigned long base, unsigned long length)
unsigned long end = max_low_pfn * PAGE_SIZE;
if (mpf->mpf_physptr + size > end)
size = end - mpf->mpf_physptr;
- reserve_bootmem(mpf->mpf_physptr, size);
+ reserve_bootmem(mpf->mpf_physptr, size,
+ BOOTMEM_DEFAULT);
}
mpf_found = mpf;
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 65f6acb..faf3229 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -749,6 +749,15 @@ void __init gart_iommu_init(void)
*/
set_memory_np((unsigned long)__va(iommu_bus_base),
iommu_size >> PAGE_SHIFT);
+ /*
+ * Tricky. The GART table remaps the physical memory range,
+ * so the CPU wont notice potential aliases and if the memory
+ * is remapped to UC later on, we might surprise the PCI devices
+ * with a stray writeout of a cacheline. So play it sure and
+ * do an explicit, full-scale wbinvd() _after_ having marked all
+ * the pages as Not-Present:
+ */
+ wbinvd();
/*
* Try to workaround a bug (thanks to BenH)
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index dabdbef..a7d50a5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -23,7 +23,6 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/user.h>
-#include <linux/a.out.h>
#include <linux/interrupt.h>
#include <linux/utsname.h>
#include <linux/delay.h>
@@ -539,55 +538,6 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
return err;
}
-/*
- * fill in the user structure for a core dump..
- */
-void dump_thread(struct pt_regs * regs, struct user * dump)
-{
- u16 gs;
-
-/* changed the size calculations - should hopefully work better. lbt */
- dump->magic = CMAGIC;
- dump->start_code = 0;
- dump->start_stack = regs->sp & ~(PAGE_SIZE - 1);
- dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
- dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
- dump->u_dsize -= dump->u_tsize;
- dump->u_ssize = 0;
- dump->u_debugreg[0] = current->thread.debugreg0;
- dump->u_debugreg[1] = current->thread.debugreg1;
- dump->u_debugreg[2] = current->thread.debugreg2;
- dump->u_debugreg[3] = current->thread.debugreg3;
- dump->u_debugreg[4] = 0;
- dump->u_debugreg[5] = 0;
- dump->u_debugreg[6] = current->thread.debugreg6;
- dump->u_debugreg[7] = current->thread.debugreg7;
-
- if (dump->start_stack < TASK_SIZE)
- dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
-
- dump->regs.bx = regs->bx;
- dump->regs.cx = regs->cx;
- dump->regs.dx = regs->dx;
- dump->regs.si = regs->si;
- dump->regs.di = regs->di;
- dump->regs.bp = regs->bp;
- dump->regs.ax = regs->ax;
- dump->regs.ds = (u16)regs->ds;
- dump->regs.es = (u16)regs->es;
- dump->regs.fs = (u16)regs->fs;
- savesegment(gs,gs);
- dump->regs.orig_ax = regs->orig_ax;
- dump->regs.ip = regs->ip;
- dump->regs.cs = (u16)regs->cs;
- dump->regs.flags = regs->flags;
- dump->regs.sp = regs->sp;
- dump->regs.ss = (u16)regs->ss;
-
- dump->u_fpvalid = dump_fpu (regs, &dump->i387);
-}
-EXPORT_SYMBOL(dump_thread);
-
#ifdef CONFIG_SECCOMP
static void hard_disable_TSC(void)
{
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 137a861..b0cc8f0 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -26,7 +26,6 @@
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/user.h>
-#include <linux/a.out.h>
#include <linux/interrupt.h>
#include <linux/utsname.h>
#include <linux/delay.h>
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 96286df..702c33ef 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -103,9 +103,26 @@ static int set_segment_reg(struct task_struct *task,
if (invalid_selector(value))
return -EIO;
- if (offset != offsetof(struct user_regs_struct, gs))
+ /*
+ * For %cs and %ss we cannot permit a null selector.
+ * We can permit a bogus selector as long as it has USER_RPL.
+ * Null selectors are fine for other segment registers, but
+ * we will never get back to user mode with invalid %cs or %ss
+ * and will take the trap in iret instead. Much code relies
+ * on user_mode() to distinguish a user trap frame (which can
+ * safely use invalid selectors) from a kernel trap frame.
+ */
+ switch (offset) {
+ case offsetof(struct user_regs_struct, cs):
+ case offsetof(struct user_regs_struct, ss):
+ if (unlikely(value == 0))
+ return -EIO;
+
+ default:
*pt_regs_access(task_pt_regs(task), offset) = value;
- else {
+ break;
+
+ case offsetof(struct user_regs_struct, gs):
task->thread.gs = value;
if (task == current)
/*
@@ -227,12 +244,16 @@ static int set_segment_reg(struct task_struct *task,
* Can't actually change these in 64-bit mode.
*/
case offsetof(struct user_regs_struct,cs):
+ if (unlikely(value == 0))
+ return -EIO;
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->cs = value;
#endif
break;
case offsetof(struct user_regs_struct,ss):
+ if (unlikely(value == 0))
+ return -EIO;
#ifdef CONFIG_IA32_EMULATION
if (test_tsk_thread_flag(task, TIF_IA32))
task_pt_regs(task)->ss = value;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 3cd7a2d..c47208f 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -11,7 +11,7 @@
static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
{
u8 config, rev;
- u32 word;
+ u16 word;
/* BIOS may enable hardware IRQ balancing for
* E7520/E7320/E7525(revision ID 0x9 and below)
@@ -26,8 +26,11 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
pci_read_config_byte(dev, 0xf4, &config);
pci_write_config_byte(dev, 0xf4, config|0x2);
- /* read xTPR register */
- raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
+ /*
+ * read xTPR register. We may not have a pci_dev for device 8
+ * because it might be hidden until the above write.
+ */
+ pci_bus_read_config_word(dev->bus, PCI_DEVFN(8, 0), 0x4c, &word);
if (!(word & (1 << 13))) {
dev_info(&dev->dev, "Intel E7520/7320/7525 detected; "
@@ -380,19 +383,19 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367,
void force_hpet_resume(void)
{
switch (force_hpet_resume_type) {
- case ICH_FORCE_HPET_RESUME:
- return ich_force_hpet_resume();
-
- case OLD_ICH_FORCE_HPET_RESUME:
- return old_ich_force_hpet_resume();
-
- case VT8237_FORCE_HPET_RESUME:
- return vt8237_force_hpet_resume();
-
- case NVIDIA_FORCE_HPET_RESUME:
- return nvidia_force_hpet_resume();
-
- default:
+ case ICH_FORCE_HPET_RESUME:
+ ich_force_hpet_resume();
+ return;
+ case OLD_ICH_FORCE_HPET_RESUME:
+ old_ich_force_hpet_resume();
+ return;
+ case VT8237_FORCE_HPET_RESUME:
+ vt8237_force_hpet_resume();
+ return;
+ case NVIDIA_FORCE_HPET_RESUME:
+ nvidia_force_hpet_resume();
+ return;
+ default:
break;
}
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 5818dc2..7fd6ac4 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -326,7 +326,7 @@ static inline void kb_wait(void)
}
}
-void machine_emergency_restart(void)
+static void native_machine_emergency_restart(void)
{
int i;
@@ -376,7 +376,7 @@ void machine_emergency_restart(void)
}
}
-void machine_shutdown(void)
+static void native_machine_shutdown(void)
{
/* Stop the cpus and apics */
#ifdef CONFIG_SMP
@@ -420,7 +420,7 @@ void machine_shutdown(void)
#endif
}
-void machine_restart(char *__unused)
+static void native_machine_restart(char *__unused)
{
printk("machine restart\n");
@@ -429,11 +429,11 @@ void machine_restart(char *__unused)
machine_emergency_restart();
}
-void machine_halt(void)
+static void native_machine_halt(void)
{
}
-void machine_power_off(void)
+static void native_machine_power_off(void)
{
if (pm_power_off) {
if (!reboot_force)
@@ -443,9 +443,35 @@ void machine_power_off(void)
}
struct machine_ops machine_ops = {
- .power_off = machine_power_off,
- .shutdown = machine_shutdown,
- .emergency_restart = machine_emergency_restart,
- .restart = machine_restart,
- .halt = machine_halt
+ .power_off = native_machine_power_off,
+ .shutdown = native_machine_shutdown,
+ .emergency_restart = native_machine_emergency_restart,
+ .restart = native_machine_restart,
+ .halt = native_machine_halt
};
+
+void machine_power_off(void)
+{
+ machine_ops.power_off();
+}
+
+void machine_shutdown(void)
+{
+ machine_ops.shutdown();
+}
+
+void machine_emergency_restart(void)
+{
+ machine_ops.emergency_restart();
+}
+
+void machine_restart(char *cmd)
+{
+ machine_ops.restart(cmd);
+}
+
+void machine_halt(void)
+{
+ machine_ops.halt();
+}
+
diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c
index 62adc5f..691ab4c 100644
--- a/arch/x86/kernel/setup_32.c
+++ b/arch/x86/kernel/setup_32.c
@@ -154,7 +154,11 @@ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
EXPORT_SYMBOL(boot_cpu_data);
+#ifndef CONFIG_X86_PAE
unsigned long mmu_cr4_features;
+#else
+unsigned long mmu_cr4_features = X86_CR4_PAE;
+#endif
/* for MCA, but anyone else can use it if they want */
unsigned int machine_id;
@@ -390,7 +394,7 @@ static void __init reserve_ebda_region(void)
unsigned int addr;
addr = get_bios_ebda();
if (addr)
- reserve_bootmem(addr, PAGE_SIZE);
+ reserve_bootmem(addr, PAGE_SIZE, BOOTMEM_DEFAULT);
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -484,7 +488,8 @@ static void __init reserve_crashkernel(void)
(unsigned long)(total_mem >> 20));
crashk_res.start = crash_base;
crashk_res.end = crash_base + crash_size - 1;
- reserve_bootmem(crash_base, crash_size);
+ reserve_bootmem(crash_base, crash_size,
+ BOOTMEM_DEFAULT);
} else
printk(KERN_INFO "crashkernel reservation failed - "
"you have to specify a base address\n");
@@ -525,7 +530,7 @@ static void __init reserve_initrd(void)
}
if (ramdisk_end <= end_of_lowmem) {
/* All in lowmem, easy case */
- reserve_bootmem(ramdisk_image, ramdisk_size);
+ reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT);
initrd_start = ramdisk_image + PAGE_OFFSET;
initrd_end = initrd_start+ramdisk_size;
return;
@@ -536,7 +541,7 @@ static void __init reserve_initrd(void)
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
- reserve_bootmem(ramdisk_here, ramdisk_size);
+ reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT);
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
@@ -606,13 +611,14 @@ void __init setup_bootmem_allocator(void)
* bootmem allocator with an invalid RAM area.
*/
reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
- bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
+ bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
+ BOOTMEM_DEFAULT);
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
* enabling clean reboots, SMP operation, laptop functions.
*/
- reserve_bootmem(0, PAGE_SIZE);
+ reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
/* reserve EBDA region, it's a 4K region */
reserve_ebda_region();
@@ -622,7 +628,7 @@ void __init setup_bootmem_allocator(void)
unless you have no PS/2 mouse plugged in. */
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86 == 6)
- reserve_bootmem(0xa0000 - 4096, 4096);
+ reserve_bootmem(0xa0000 - 4096, 4096, BOOTMEM_DEFAULT);
#ifdef CONFIG_SMP
/*
@@ -630,7 +636,7 @@ void __init setup_bootmem_allocator(void)
* FIXME: Don't need the extra page at 4K, but need to fix
* trampoline before removing it. (see the GDT stuff)
*/
- reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
+ reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
#endif
#ifdef CONFIG_ACPI_SLEEP
/*
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c
index c8939df..c0d8208 100644
--- a/arch/x86/kernel/setup_64.c
+++ b/arch/x86/kernel/setup_64.c
@@ -15,7 +15,6 @@
#include <linux/ptrace.h>
#include <linux/slab.h>
#include <linux/user.h>
-#include <linux/a.out.h>
#include <linux/screen_info.h>
#include <linux/ioport.h>
#include <linux/delay.h>
@@ -189,7 +188,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
e820_register_active_regions(0, start_pfn, end_pfn);
free_bootmem_with_active_regions(0, end_pfn);
- reserve_bootmem(bootmap, bootmap_size);
+ reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
}
#endif
@@ -220,28 +219,35 @@ static inline void copy_edd(void)
#ifdef CONFIG_KEXEC
static void __init reserve_crashkernel(void)
{
- unsigned long long free_mem;
+ unsigned long long total_mem;
unsigned long long crash_size, crash_base;
int ret;
- free_mem =
- ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
+ total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
- ret = parse_crashkernel(boot_command_line, free_mem,
+ ret = parse_crashkernel(boot_command_line, total_mem,
&crash_size, &crash_base);
if (ret == 0 && crash_size) {
- if (crash_base > 0) {
- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
- "for crashkernel (System RAM: %ldMB)\n",
- (unsigned long)(crash_size >> 20),
- (unsigned long)(crash_base >> 20),
- (unsigned long)(free_mem >> 20));
- crashk_res.start = crash_base;
- crashk_res.end = crash_base + crash_size - 1;
- reserve_bootmem(crash_base, crash_size);
- } else
+ if (crash_base <= 0) {
printk(KERN_INFO "crashkernel reservation failed - "
"you have to specify a base address\n");
+ return;
+ }
+
+ if (reserve_bootmem(crash_base, crash_size,
+ BOOTMEM_EXCLUSIVE) < 0) {
+ printk(KERN_INFO "crashkernel reservation failed - "
+ "memory is in use\n");
+ return;
+ }
+
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+ "for crashkernel (System RAM: %ldMB)\n",
+ (unsigned long)(crash_size >> 20),
+ (unsigned long)(crash_base >> 20),
+ (unsigned long)(total_mem >> 20));
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
}
}
#else
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c
index 5787a0c..579b9b7 100644
--- a/arch/x86/kernel/smpboot_32.c
+++ b/arch/x86/kernel/smpboot_32.c
@@ -202,8 +202,6 @@ valid_k7:
;
}
-extern void calibrate_delay(void);
-
static atomic_t init_deasserted;
static void __cpuinit smp_callin(void)
diff --git a/arch/x86/kernel/srat_32.c b/arch/x86/kernel/srat_32.c
index 2bf6903..b72e613 100644
--- a/arch/x86/kernel/srat_32.c
+++ b/arch/x86/kernel/srat_32.c
@@ -274,7 +274,7 @@ int __init get_memcfg_from_srat(void)
int tables = 0;
int i = 0;
- rsdp_address = acpi_find_rsdp();
+ rsdp_address = acpi_os_get_root_pointer();
if (!rsdp_address) {
printk("%s: System description tables not found\n",
__FUNCTION__);
diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c
index 36c100c..10b8a6f 100644
--- a/arch/x86/kernel/test_nx.c
+++ b/arch/x86/kernel/test_nx.c
@@ -139,7 +139,6 @@ static int test_NX(void)
* Until then, don't run them to avoid too many people getting scared
* by the error message
*/
-#if 0
#ifdef CONFIG_DEBUG_RODATA
/* Test 3: Check if the .rodata section is executable */
@@ -152,6 +151,7 @@ static int test_NX(void)
}
#endif
+#if 0
/* Test 4: Check if the .data section of a module is executable */
if (test_address(&test_data)) {
printk(KERN_ERR "test_nx: .data section is executable\n");
diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c
index 4c16377..c29e235 100644
--- a/arch/x86/kernel/test_rodata.c
+++ b/arch/x86/kernel/test_rodata.c
@@ -10,8 +10,8 @@
* of the License.
*/
#include <linux/module.h>
+#include <asm/cacheflush.h>
#include <asm/sections.h>
-extern int rodata_test_data;
int rodata_test(void)
{
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 0380795..c737849 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -77,7 +77,7 @@ unsigned long __init native_calculate_cpu_khz(void)
reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
}
local_irq_save(flags);
- /* start meauring cycles, incrementing from 0 */
+ /* start measuring cycles, incrementing from 0 */
wrmsrl(MSR_K7_PERFCTR0 + i, 0);
wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
rdtscl(tsc_start);
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index e6757aa..a40051b 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -53,7 +53,7 @@ EXPORT_SYMBOL(arch_register_cpu);
void arch_unregister_cpu(int num)
{
- return unregister_cpu(&per_cpu(cpu_devices, num).cpu);
+ unregister_cpu(&per_cpu(cpu_devices, num).cpu);
}
EXPORT_SYMBOL(arch_unregister_cpu);
#else
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 3cf72977..b22c01e 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1176,17 +1176,12 @@ void __init trap_init(void)
#endif
set_trap_gate(19,&simd_coprocessor_error);
+ /*
+ * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
+ * Generate a build-time error if the alignment is wrong.
+ */
+ BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15);
if (cpu_has_fxsr) {
- /*
- * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned.
- * Generates a compile-time "error: zero width for bit-field" if
- * the alignment is wrong.
- */
- struct fxsrAlignAssert {
- int _:!(offsetof(struct task_struct,
- thread.i387.fxsave) & 15);
- };
-
printk(KERN_INFO "Enabling fast FPU save and restore... ");
set_in_cr4(X86_CR4_OSFXSR);
printk("done.\n");
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index efc66df..0454666 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -84,7 +84,7 @@ static inline void conditional_sti(struct pt_regs *regs)
static inline void preempt_conditional_sti(struct pt_regs *regs)
{
- preempt_disable();
+ inc_preempt_count();
if (regs->flags & X86_EFLAGS_IF)
local_irq_enable();
}
@@ -95,7 +95,7 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
local_irq_disable();
/* Make sure to not schedule here because we could be running
on an exception stack. */
- preempt_enable_no_resched();
+ dec_preempt_count();
}
int kstack_depth_to_print = 12;
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay_32.c
index aad9d95..4535e6d 100644
--- a/arch/x86/lib/delay_32.c
+++ b/arch/x86/lib/delay_32.c
@@ -12,8 +12,10 @@
#include <linux/module.h>
#include <linux/sched.h>
+#include <linux/timex.h>
#include <linux/preempt.h>
#include <linux/delay.h>
+#include <linux/init.h>
#include <asm/processor.h>
#include <asm/delay.h>
@@ -63,7 +65,7 @@ void use_tsc_delay(void)
delay_fn = delay_tsc;
}
-int read_current_timer(unsigned long *timer_val)
+int __devinit read_current_timer(unsigned long *timer_val)
{
if (delay_fn == delay_tsc) {
rdtscl(*timer_val);
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c
index 45cdd3f..bbc6105 100644
--- a/arch/x86/lib/delay_64.c
+++ b/arch/x86/lib/delay_64.c
@@ -10,8 +10,10 @@
#include <linux/module.h>
#include <linux/sched.h>
+#include <linux/timex.h>
#include <linux/preempt.h>
#include <linux/delay.h>
+#include <linux/init.h>
#include <asm/delay.h>
#include <asm/msr.h>
@@ -20,7 +22,7 @@
#include <asm/smp.h>
#endif
-int read_current_timer(unsigned long *timer_value)
+int __devinit read_current_timer(unsigned long *timer_value)
{
rdtscll(*timer_value);
return 0;
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index dffa786..3cc8eb2 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -444,8 +444,6 @@ static __u32 __init setup_trampoline(void)
static void __init start_secondary(void *unused)
{
__u8 cpuid = hard_smp_processor_id();
- /* external functions not defined in the headers */
- extern void calibrate_delay(void);
cpu_init();
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
index 04b1d20..c394ca0 100644
--- a/arch/x86/mm/discontig_32.c
+++ b/arch/x86/mm/discontig_32.c
@@ -391,7 +391,8 @@ unsigned long __init setup_memory(void)
void __init numa_kva_reserve(void)
{
if (kva_pages)
- reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages));
+ reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages),
+ BOOTMEM_DEFAULT);
}
void __init zone_sizes_init(void)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ad8b973..fdc6674 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -186,7 +186,7 @@ static int bad_address(void *p)
}
#endif
-void dump_pagetable(unsigned long address)
+static void dump_pagetable(unsigned long address)
{
#ifdef CONFIG_X86_32
__typeof__(pte_val(__pte(0))) page;
@@ -428,6 +428,16 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
}
#endif
+static int spurious_fault_check(unsigned long error_code, pte_t *pte)
+{
+ if ((error_code & PF_WRITE) && !pte_write(*pte))
+ return 0;
+ if ((error_code & PF_INSTR) && !pte_exec(*pte))
+ return 0;
+
+ return 1;
+}
+
/*
* Handle a spurious fault caused by a stale TLB entry. This allows
* us to lazily refresh the TLB when increasing the permissions of a
@@ -457,20 +467,21 @@ static int spurious_fault(unsigned long address,
if (!pud_present(*pud))
return 0;
+ if (pud_large(*pud))
+ return spurious_fault_check(error_code, (pte_t *) pud);
+
pmd = pmd_offset(pud, address);
if (!pmd_present(*pmd))
return 0;
+ if (pmd_large(*pmd))
+ return spurious_fault_check(error_code, (pte_t *) pmd);
+
pte = pte_offset_kernel(pmd, address);
if (!pte_present(*pte))
return 0;
- if ((error_code & PF_WRITE) && !pte_write(*pte))
- return 0;
- if ((error_code & PF_INSTR) && !pte_exec(*pte))
- return 0;
-
- return 1;
+ return spurious_fault_check(error_code, pte);
}
/*
@@ -947,11 +958,12 @@ void vmalloc_sync_all(void)
for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
if (!test_bit(pgd_index(address), insync)) {
const pgd_t *pgd_ref = pgd_offset_k(address);
+ unsigned long flags;
struct page *page;
if (pgd_none(*pgd_ref))
continue;
- spin_lock(&pgd_lock);
+ spin_lock_irqsave(&pgd_lock, flags);
list_for_each_entry(page, &pgd_list, lru) {
pgd_t *pgd;
pgd = (pgd_t *)page_address(page) + pgd_index(address);
@@ -960,7 +972,7 @@ void vmalloc_sync_all(void)
else
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
}
- spin_unlock(&pgd_lock);
+ spin_unlock_irqrestore(&pgd_lock, flags);
set_bit(pgd_index(address), insync);
}
if (address == start)
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index d1bc040..ee1091a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -46,6 +46,8 @@
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/paravirt.h>
+#include <asm/setup.h>
+#include <asm/cacheflush.h>
unsigned int __VMALLOC_RESERVE = 128 << 20;
@@ -328,44 +330,38 @@ pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
void __init native_pagetable_setup_start(pgd_t *base)
{
-#ifdef CONFIG_X86_PAE
- int i;
+ unsigned long pfn, va;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
/*
- * Init entries of the first-level page table to the
- * zero page, if they haven't already been set up.
- *
- * In a normal native boot, we'll be running on a
- * pagetable rooted in swapper_pg_dir, but not in PAE
- * mode, so this will end up clobbering the mappings
- * for the lower 24Mbytes of the address space,
- * without affecting the kernel address space.
+ * Remove any mappings which extend past the end of physical
+ * memory from the boot time page table:
*/
- for (i = 0; i < USER_PTRS_PER_PGD; i++)
- set_pgd(&base[i],
- __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
-
- /* Make sure kernel address space is empty so that a pagetable
- will be allocated for it. */
- memset(&base[USER_PTRS_PER_PGD], 0,
- KERNEL_PGD_PTRS * sizeof(pgd_t));
-#else
+ for (pfn = max_low_pfn + 1; pfn < 1<<(32-PAGE_SHIFT); pfn++) {
+ va = PAGE_OFFSET + (pfn<<PAGE_SHIFT);
+ pgd = base + pgd_index(va);
+ if (!pgd_present(*pgd))
+ break;
+
+ pud = pud_offset(pgd, va);
+ pmd = pmd_offset(pud, va);
+ if (!pmd_present(*pmd))
+ break;
+
+ pte = pte_offset_kernel(pmd, va);
+ if (!pte_present(*pte))
+ break;
+
+ pte_clear(NULL, va, pte);
+ }
paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
-#endif
}
void __init native_pagetable_setup_done(pgd_t *base)
{
-#ifdef CONFIG_X86_PAE
- /*
- * Add low memory identity-mappings - SMP needs it when
- * starting up on an AP from real-mode. In the non-PAE
- * case we already have these mappings through head.S.
- * All user-space mappings are explicitly cleared after
- * SMP startup.
- */
- set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
-#endif
}
/*
@@ -374,9 +370,8 @@ void __init native_pagetable_setup_done(pgd_t *base)
* the boot process.
*
* If we're booting on native hardware, this will be a pagetable
- * constructed in arch/i386/kernel/head.S, and not running in PAE mode
- * (even if we'll end up running in PAE). The root of the pagetable
- * will be swapper_pg_dir.
+ * constructed in arch/x86/kernel/head_32.S. The root of the
+ * pagetable will be swapper_pg_dir.
*
* If we're booting paravirtualized under a hypervisor, then there are
* more options: we may already be running PAE, and the pagetable may
@@ -537,14 +532,6 @@ void __init paging_init(void)
load_cr3(swapper_pg_dir);
-#ifdef CONFIG_X86_PAE
- /*
- * We will bail out later - printk doesn't work right now so
- * the user would just see a hanging kernel.
- */
- if (cpu_has_pae)
- set_in_cr4(X86_CR4_PAE);
-#endif
__flush_tlb_all();
kmap_init();
@@ -675,13 +662,11 @@ void __init mem_init(void)
BUG_ON((unsigned long)high_memory > VMALLOC_START);
#endif /* double-sanity-check paranoia */
-#ifdef CONFIG_X86_PAE
- if (!cpu_has_pae)
- panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
-#endif
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
+ cpa_init();
+
/*
* Subtle. SMP is doing it's boot stuff late (because it has to
* fork idle threads) - but it also needs low mappings for the
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3a98d6f..a4a9ccc 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -45,6 +45,7 @@
#include <asm/sections.h>
#include <asm/kdebug.h>
#include <asm/numa.h>
+#include <asm/cacheflush.h>
const struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
@@ -528,13 +529,15 @@ void __init mem_init(void)
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10);
+
+ cpa_init();
}
void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
- unsigned long addr;
+ unsigned long addr = begin;
- if (begin >= end)
+ if (addr >= end)
return;
/*
@@ -549,7 +552,7 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
#else
printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
+ for (; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)),
@@ -591,10 +594,17 @@ void mark_rodata_ro(void)
if (end <= start)
return;
- set_memory_ro(start, (end - start) >> PAGE_SHIFT);
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
+ set_memory_ro(start, (end - start) >> PAGE_SHIFT);
+
+ /*
+ * The rodata section (but not the kernel text!) should also be
+ * not-executable.
+ */
+ start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+ set_memory_nx(start, (end - start) >> PAGE_SHIFT);
rodata_test();
@@ -637,9 +647,9 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
/* Should check here against the e820 map to avoid double free */
#ifdef CONFIG_NUMA
- reserve_bootmem_node(NODE_DATA(nid), phys, len);
+ reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
#else
- reserve_bootmem(phys, len);
+ reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
#endif
if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
dma_reserve += len / PAGE_SIZE;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index ee6648f..9f42d7e 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -260,41 +260,48 @@ static int __init early_ioremap_debug_setup(char *str)
early_param("early_ioremap_debug", early_ioremap_debug_setup);
static __initdata int after_paging_init;
-static __initdata unsigned long bm_pte[1024]
+static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
__attribute__((aligned(PAGE_SIZE)));
-static inline unsigned long * __init early_ioremap_pgd(unsigned long addr)
+static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
{
- return (unsigned long *)swapper_pg_dir + ((addr >> 22) & 1023);
+ /* Don't assume we're using swapper_pg_dir at this point */
+ pgd_t *base = __va(read_cr3());
+ pgd_t *pgd = &base[pgd_index(addr)];
+ pud_t *pud = pud_offset(pgd, addr);
+ pmd_t *pmd = pmd_offset(pud, addr);
+
+ return pmd;
}
-static inline unsigned long * __init early_ioremap_pte(unsigned long addr)
+static inline pte_t * __init early_ioremap_pte(unsigned long addr)
{
- return bm_pte + ((addr >> PAGE_SHIFT) & 1023);
+ return &bm_pte[pte_index(addr)];
}
void __init early_ioremap_init(void)
{
- unsigned long *pgd;
+ pmd_t *pmd;
if (early_ioremap_debug)
printk(KERN_INFO "early_ioremap_init()\n");
- pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
- *pgd = __pa(bm_pte) | _PAGE_TABLE;
+ pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
memset(bm_pte, 0, sizeof(bm_pte));
+ pmd_populate_kernel(&init_mm, pmd, bm_pte);
+
/*
- * The boot-ioremap range spans multiple pgds, for which
+ * The boot-ioremap range spans multiple pmds, for which
* we are not prepared:
*/
- if (pgd != early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END))) {
+ if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
- printk(KERN_WARNING "pgd %p != %p\n",
- pgd, early_ioremap_pgd(fix_to_virt(FIX_BTMAP_END)));
+ printk(KERN_WARNING "pmd %p != %p\n",
+ pmd, early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END)));
printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
- fix_to_virt(FIX_BTMAP_BEGIN));
+ fix_to_virt(FIX_BTMAP_BEGIN));
printk(KERN_WARNING "fix_to_virt(FIX_BTMAP_END): %08lx\n",
- fix_to_virt(FIX_BTMAP_END));
+ fix_to_virt(FIX_BTMAP_END));
printk(KERN_WARNING "FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
printk(KERN_WARNING "FIX_BTMAP_BEGIN: %d\n",
@@ -304,28 +311,29 @@ void __init early_ioremap_init(void)
void __init early_ioremap_clear(void)
{
- unsigned long *pgd;
+ pmd_t *pmd;
if (early_ioremap_debug)
printk(KERN_INFO "early_ioremap_clear()\n");
- pgd = early_ioremap_pgd(fix_to_virt(FIX_BTMAP_BEGIN));
- *pgd = 0;
- paravirt_release_pt(__pa(pgd) >> PAGE_SHIFT);
+ pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+ pmd_clear(pmd);
+ paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
__flush_tlb_all();
}
void __init early_ioremap_reset(void)
{
enum fixed_addresses idx;
- unsigned long *pte, phys, addr;
+ unsigned long addr, phys;
+ pte_t *pte;
after_paging_init = 1;
for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
addr = fix_to_virt(idx);
pte = early_ioremap_pte(addr);
- if (*pte & _PAGE_PRESENT) {
- phys = *pte & PAGE_MASK;
+ if (pte_present(*pte)) {
+ phys = pte_val(*pte) & PAGE_MASK;
set_fixmap(idx, phys);
}
}
@@ -334,7 +342,8 @@ void __init early_ioremap_reset(void)
static void __init __early_set_fixmap(enum fixed_addresses idx,
unsigned long phys, pgprot_t flags)
{
- unsigned long *pte, addr = __fix_to_virt(idx);
+ unsigned long addr = __fix_to_virt(idx);
+ pte_t *pte;
if (idx >= __end_of_fixed_addresses) {
BUG();
@@ -342,9 +351,9 @@ static void __init __early_set_fixmap(enum fixed_addresses idx,
}
pte = early_ioremap_pte(addr);
if (pgprot_val(flags))
- *pte = (phys & PAGE_MASK) | pgprot_val(flags);
+ set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
else
- *pte = 0;
+ pte_clear(NULL, addr, pte);
__flush_tlb_one(addr);
}
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 5a02bf4..1aecc65 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -238,9 +238,10 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
free_bootmem_with_active_regions(nodeid, end);
- reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size);
+ reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size,
+ BOOTMEM_DEFAULT);
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
- bootmap_pages<<PAGE_SHIFT);
+ bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
#ifdef CONFIG_ACPI_NUMA
srat_reserve_add_area(nodeid);
#endif
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index 398f3a5..75f1b10 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -5,6 +5,7 @@
* and compares page tables forwards and afterwards.
*/
#include <linux/bootmem.h>
+#include <linux/kthread.h>
#include <linux/random.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -14,8 +15,13 @@
#include <asm/pgtable.h>
#include <asm/kdebug.h>
+/*
+ * Only print the results of the first pass:
+ */
+static __read_mostly int print = 1;
+
enum {
- NTEST = 4000,
+ NTEST = 400,
#ifdef CONFIG_X86_64
LPS = (1 << PMD_SHIFT),
#elif defined(CONFIG_X86_PAE)
@@ -31,10 +37,9 @@ struct split_state {
long min_exec, max_exec;
};
-static __init int print_split(struct split_state *s)
+static int print_split(struct split_state *s)
{
long i, expected, missed = 0;
- int printed = 0;
int err = 0;
s->lpg = s->gpg = s->spg = s->exec = 0;
@@ -47,12 +52,6 @@ static __init int print_split(struct split_state *s)
pte = lookup_address(addr, &level);
if (!pte) {
- if (!printed) {
- dump_pagetable(addr);
- printk(KERN_INFO "CPA %lx no pte level %d\n",
- addr, level);
- printed = 1;
- }
missed++;
i++;
continue;
@@ -82,10 +81,13 @@ static __init int print_split(struct split_state *s)
s->max_exec = addr;
}
}
- printk(KERN_INFO
- "CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
- s->spg, s->lpg, s->gpg, s->exec,
- s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed);
+ if (print) {
+ printk(KERN_INFO
+ " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n",
+ s->spg, s->lpg, s->gpg, s->exec,
+ s->min_exec != ~0UL ? s->min_exec : 0,
+ s->max_exec, missed);
+ }
expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed;
if (expected != i) {
@@ -96,11 +98,11 @@ static __init int print_split(struct split_state *s)
return err;
}
-static unsigned long __initdata addr[NTEST];
-static unsigned int __initdata len[NTEST];
+static unsigned long addr[NTEST];
+static unsigned int len[NTEST];
/* Change the global bit on random pages in the direct mapping */
-static __init int exercise_pageattr(void)
+static int pageattr_test(void)
{
struct split_state sa, sb, sc;
unsigned long *bm;
@@ -110,7 +112,8 @@ static __init int exercise_pageattr(void)
int i, k;
int err;
- printk(KERN_INFO "CPA exercising pageattr\n");
+ if (print)
+ printk(KERN_INFO "CPA self-test:\n");
bm = vmalloc((max_pfn_mapped + 7) / 8);
if (!bm) {
@@ -186,7 +189,6 @@ static __init int exercise_pageattr(void)
failed += print_split(&sb);
- printk(KERN_INFO "CPA reverting everything\n");
for (i = 0; i < NTEST; i++) {
if (!addr[i])
continue;
@@ -214,12 +216,40 @@ static __init int exercise_pageattr(void)
failed += print_split(&sc);
if (failed) {
- printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n");
+ printk(KERN_ERR "NOT PASSED. Please report.\n");
WARN_ON(1);
+ return -EINVAL;
} else {
- printk(KERN_INFO "CPA selftests PASSED\n");
+ if (print)
+ printk(KERN_INFO "ok.\n");
+ }
+
+ return 0;
+}
+
+static int do_pageattr_test(void *__unused)
+{
+ while (!kthread_should_stop()) {
+ schedule_timeout_interruptible(HZ*30);
+ if (pageattr_test() < 0)
+ break;
+ if (print)
+ print--;
}
+ return 0;
+}
+
+static int start_pageattr_test(void)
+{
+ struct task_struct *p;
+
+ p = kthread_create(do_pageattr_test, NULL, "pageattr-test");
+ if (!IS_ERR(p))
+ wake_up_process(p);
+ else
+ WARN_ON(1);
return 0;
}
-module_init(exercise_pageattr);
+
+module_init(start_pageattr_test);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 16ce841..4119379 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/interrupt.h>
#include <asm/e820.h>
#include <asm/processor.h>
@@ -167,8 +168,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext)))
pgprot_val(forbidden) |= _PAGE_NX;
-
-#ifdef CONFIG_DEBUG_RODATA
/* The .rodata section needs to be read-only */
if (within(address, (unsigned long)__start_rodata,
(unsigned long)__end_rodata))
@@ -179,7 +178,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
if (within(address, virt_to_highmap(__start_rodata),
virt_to_highmap(__end_rodata)))
pgprot_val(forbidden) |= _PAGE_RW;
-#endif
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
@@ -194,7 +192,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
* or when the present bit is not set. Otherwise we would return a
* pointer to a nonexisting mapping.
*/
-pte_t *lookup_address(unsigned long address, int *level)
+pte_t *lookup_address(unsigned long address, unsigned int *level)
{
pgd_t *pgd = pgd_offset_k(address);
pud_t *pud;
@@ -255,21 +253,11 @@ static int
try_preserve_large_page(pte_t *kpte, unsigned long address,
struct cpa_data *cpa)
{
- unsigned long nextpage_addr, numpages, pmask, psize, flags;
+ unsigned long nextpage_addr, numpages, pmask, psize, flags, addr;
pte_t new_pte, old_pte, *tmp;
pgprot_t old_prot, new_prot;
- int level, do_split = 1;
-
- /*
- * An Athlon 64 X2 showed hard hangs if we tried to preserve
- * largepages and changed the PSE entry from RW to RO.
- *
- * As AMD CPUs have a long series of erratas in this area,
- * (and none of the known ones seem to explain this hang),
- * disable this code until the hang can be debugged:
- */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return 1;
+ int i, do_split = 1;
+ unsigned int level;
spin_lock_irqsave(&pgd_lock, flags);
/*
@@ -287,8 +275,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
break;
#ifdef CONFIG_X86_64
case PG_LEVEL_1G:
- psize = PMD_PAGE_SIZE;
- pmask = PMD_PAGE_MASK;
+ psize = PUD_PAGE_SIZE;
+ pmask = PUD_PAGE_MASK;
break;
#endif
default:
@@ -316,6 +304,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
new_prot = static_protections(new_prot, address);
/*
+ * We need to check the full range, whether
+ * static_protection() requires a different pgprot for one of
+ * the pages in the range we try to preserve:
+ */
+ addr = address + PAGE_SIZE;
+ for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) {
+ pgprot_t chk_prot = static_protections(new_prot, addr);
+
+ if (pgprot_val(chk_prot) != pgprot_val(new_prot))
+ goto out_unlock;
+ }
+
+ /*
* If there are no changes, return. maxpages has been updated
* above:
*/
@@ -349,23 +350,103 @@ out_unlock:
return do_split;
}
+static LIST_HEAD(page_pool);
+static unsigned long pool_size, pool_pages, pool_low;
+static unsigned long pool_used, pool_failed, pool_refill;
+
+static void cpa_fill_pool(void)
+{
+ struct page *p;
+ gfp_t gfp = GFP_KERNEL;
+
+ /* Do not allocate from interrupt context */
+ if (in_irq() || irqs_disabled())
+ return;
+ /*
+ * Check unlocked. I does not matter when we have one more
+ * page in the pool. The bit lock avoids recursive pool
+ * allocations:
+ */
+ if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
+ return;
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ /*
+ * We could do:
+ * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
+ * but this fails on !PREEMPT kernels
+ */
+ gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
+#endif
+
+ while (pool_pages < pool_size) {
+ p = alloc_pages(gfp, 0);
+ if (!p) {
+ pool_failed++;
+ break;
+ }
+ spin_lock_irq(&pgd_lock);
+ list_add(&p->lru, &page_pool);
+ pool_pages++;
+ spin_unlock_irq(&pgd_lock);
+ }
+ clear_bit_unlock(0, &pool_refill);
+}
+
+#define SHIFT_MB (20 - PAGE_SHIFT)
+#define ROUND_MB_GB ((1 << 10) - 1)
+#define SHIFT_MB_GB 10
+#define POOL_PAGES_PER_GB 16
+
+void __init cpa_init(void)
+{
+ struct sysinfo si;
+ unsigned long gb;
+
+ si_meminfo(&si);
+ /*
+ * Calculate the number of pool pages:
+ *
+ * Convert totalram (nr of pages) to MiB and round to the next
+ * GiB. Shift MiB to Gib and multiply the result by
+ * POOL_PAGES_PER_GB:
+ */
+ gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
+ pool_size = POOL_PAGES_PER_GB * gb;
+ pool_low = pool_size;
+
+ cpa_fill_pool();
+ printk(KERN_DEBUG
+ "CPA: page pool initialized %lu of %lu pages preallocated\n",
+ pool_pages, pool_size);
+}
+
static int split_large_page(pte_t *kpte, unsigned long address)
{
unsigned long flags, pfn, pfninc = 1;
- gfp_t gfp_flags = GFP_KERNEL;
unsigned int i, level;
pte_t *pbase, *tmp;
pgprot_t ref_prot;
struct page *base;
-#ifdef CONFIG_DEBUG_PAGEALLOC
- gfp_flags = GFP_ATOMIC | __GFP_NOWARN;
-#endif
- base = alloc_pages(gfp_flags, 0);
- if (!base)
+ /*
+ * Get a page from the pool. The pool list is protected by the
+ * pgd_lock, which we have to take anyway for the split
+ * operation:
+ */
+ spin_lock_irqsave(&pgd_lock, flags);
+ if (list_empty(&page_pool)) {
+ spin_unlock_irqrestore(&pgd_lock, flags);
return -ENOMEM;
+ }
+
+ base = list_first_entry(&page_pool, struct page, lru);
+ list_del(&base->lru);
+ pool_pages--;
+
+ if (pool_pages < pool_low)
+ pool_low = pool_pages;
- spin_lock_irqsave(&pgd_lock, flags);
/*
* Check for races, another CPU might have split this page
* up for us already:
@@ -410,17 +491,24 @@ static int split_large_page(pte_t *kpte, unsigned long address)
base = NULL;
out_unlock:
+ /*
+ * If we dropped out via the lookup_address check under
+ * pgd_lock then stick the page back into the pool:
+ */
+ if (base) {
+ list_add(&base->lru, &page_pool);
+ pool_pages++;
+ } else
+ pool_used++;
spin_unlock_irqrestore(&pgd_lock, flags);
- if (base)
- __free_pages(base, 0);
-
return 0;
}
static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
{
- int level, do_split, err;
+ int do_split, err;
+ unsigned int level;
struct page *kpte_page;
pte_t *kpte;
@@ -600,6 +688,15 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
if (!pgprot_val(mask_set) && !pgprot_val(mask_clr))
return 0;
+ /* Ensure we are PAGE_SIZE aligned */
+ if (addr & ~PAGE_MASK) {
+ addr &= PAGE_MASK;
+ /*
+ * People should not be passing in unaligned addresses:
+ */
+ WARN_ON_ONCE(1);
+ }
+
cpa.vaddr = addr;
cpa.numpages = numpages;
cpa.mask_set = mask_set;
@@ -612,7 +709,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
* Check whether we really changed something:
*/
if (!cpa.flushtlb)
- return ret;
+ goto out;
/*
* No need to flush, when we did not set any of the caching
@@ -631,6 +728,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
else
cpa_flush_all(cache);
+out:
+ cpa_fill_pool();
return ret;
}
@@ -771,8 +870,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
return;
/*
- * The return value is ignored - the calls cannot fail,
- * large pages are disabled at boot time:
+ * The return value is ignored as the calls cannot fail.
+ * Large pages are kept enabled at boot time, and are
+ * split up quickly with DEBUG_PAGEALLOC. If a splitup
+ * fails here (due to temporary memory shortage) no damage
+ * is done because we just keep the largepage intact up
+ * to the next attempt when it will likely be split up:
*/
if (enable)
__set_pages_p(page, numpages);
@@ -784,6 +887,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
* but that can deadlock->flush only current cpu:
*/
__flush_tlb_all();
+
+ /*
+ * Try to refill the page pool here. We can do this only after
+ * the tlb flush.
+ */
+ cpa_fill_pool();
}
#endif
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6c19146..73aba71 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -183,7 +183,7 @@ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
}
-struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
struct page *pte;
@@ -192,6 +192,8 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
#else
pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
#endif
+ if (pte)
+ pgtable_page_ctor(pte);
return pte;
}
@@ -365,6 +367,7 @@ void check_pgt_cache(void)
void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
+ pgtable_page_dtor(pte);
paravirt_release_pt(page_to_pfn(pte));
tlb_remove_page(tlb, pte);
}
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 65416f8..ecd91ea 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -488,7 +488,8 @@ void __init srat_reserve_add_area(int nodeid)
printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
"pre-allocated memory.\n", (unsigned long long)total_mb);
reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
- nodes_add[nodeid].end - nodes_add[nodeid].start);
+ nodes_add[nodeid].end - nodes_add[nodeid].start,
+ BOOTMEM_DEFAULT);
}
}
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 52deabc7..b7c67a1 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -26,16 +26,37 @@ int pcibios_last_bus = -1;
unsigned long pirq_table_addr;
struct pci_bus *pci_root_bus;
struct pci_raw_ops *raw_pci_ops;
+struct pci_raw_ops *raw_pci_ext_ops;
+
+int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 *val)
+{
+ if (reg < 256 && raw_pci_ops)
+ return raw_pci_ops->read(domain, bus, devfn, reg, len, val);
+ if (raw_pci_ext_ops)
+ return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val);
+ return -EINVAL;
+}
+
+int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 val)
+{
+ if (reg < 256 && raw_pci_ops)
+ return raw_pci_ops->write(domain, bus, devfn, reg, len, val);
+ if (raw_pci_ext_ops)
+ return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val);
+ return -EINVAL;
+}
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
{
- return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
+ return raw_pci_read(pci_domain_nr(bus), bus->number,
devfn, where, size, value);
}
static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
{
- return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
+ return raw_pci_write(pci_domain_nr(bus), bus->number,
devfn, where, size, value);
}
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 431c9a5..42f3e4c 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -14,7 +14,7 @@
#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
(0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
-int pci_conf1_read(unsigned int seg, unsigned int bus,
+static int pci_conf1_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
unsigned long flags;
@@ -45,7 +45,7 @@ int pci_conf1_read(unsigned int seg, unsigned int bus,
return 0;
}
-int pci_conf1_write(unsigned int seg, unsigned int bus,
+static int pci_conf1_write(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 value)
{
unsigned long flags;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 74d30ff..a5ef5f5 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -215,7 +215,8 @@ static int quirk_aspm_offset[MAX_PCIEROOT << 3];
static int quirk_pcie_aspm_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
{
- return raw_pci_ops->read(0, bus->number, devfn, where, size, value);
+ return raw_pci_read(pci_domain_nr(bus), bus->number,
+ devfn, where, size, value);
}
/*
@@ -231,7 +232,8 @@ static int quirk_pcie_aspm_write(struct pci_bus *bus, unsigned int devfn, int wh
if ((offset) && (where == offset))
value = value & 0xfffffffc;
- return raw_pci_ops->write(0, bus->number, devfn, where, size, value);
+ return raw_pci_write(pci_domain_nr(bus), bus->number,
+ devfn, where, size, value);
}
static struct pci_ops quirk_pcie_aspm_ops = {
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index 5565d70..e041ced 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -22,7 +22,7 @@ static void __devinit pcibios_fixup_peer_bridges(void)
if (pci_find_bus(0, n))
continue;
for (devfn = 0; devfn < 256; devfn += 8) {
- if (!raw_pci_ops->read(0, n, devfn, PCI_VENDOR_ID, 2, &l) &&
+ if (!raw_pci_read(0, n, devfn, PCI_VENDOR_ID, 2, &l) &&
l != 0x0000 && l != 0xffff) {
DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l);
printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 4df637e..8d54df4 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -22,46 +22,13 @@
#define MMCONFIG_APER_MIN (2 * 1024*1024)
#define MMCONFIG_APER_MAX (256 * 1024*1024)
-DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
-
/* Indicate if the mmcfg resources have been placed into the resource table. */
static int __initdata pci_mmcfg_resources_inserted;
-/* K8 systems have some devices (typically in the builtin northbridge)
- that are only accessible using type1
- Normally this can be expressed in the MCFG by not listing them
- and assigning suitable _SEGs, but this isn't implemented in some BIOS.
- Instead try to discover all devices on bus 0 that are unreachable using MM
- and fallback for them. */
-static void __init unreachable_devices(void)
-{
- int i, bus;
- /* Use the max bus number from ACPI here? */
- for (bus = 0; bus < PCI_MMCFG_MAX_CHECK_BUS; bus++) {
- for (i = 0; i < 32; i++) {
- unsigned int devfn = PCI_DEVFN(i, 0);
- u32 val1, val2;
-
- pci_conf1_read(0, bus, devfn, 0, 4, &val1);
- if (val1 == 0xffffffff)
- continue;
-
- if (pci_mmcfg_arch_reachable(0, bus, devfn)) {
- raw_pci_ops->read(0, bus, devfn, 0, 4, &val2);
- if (val1 == val2)
- continue;
- }
- set_bit(i + 32 * bus, pci_mmcfg_fallback_slots);
- printk(KERN_NOTICE "PCI: No mmconfig possible on device"
- " %02x:%02x\n", bus, i);
- }
- }
-}
-
static const char __init *pci_mmcfg_e7520(void)
{
u32 win;
- pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
+ pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
win = win & 0xf000;
if(win == 0x0000 || win == 0xf000)
@@ -86,7 +53,7 @@ static const char __init *pci_mmcfg_intel_945(void)
pci_mmcfg_config_num = 1;
- pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar);
+ pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar);
/* Enable bit */
if (!(pciexbar & 1))
@@ -151,7 +118,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
int i;
const char *name;
- pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0, 4, &l);
+ pci_direct_conf1.read(0, 0, PCI_DEVFN(0,0), 0, 4, &l);
vendor = l & 0xffff;
device = (l >> 16) & 0xffff;
@@ -270,8 +237,6 @@ void __init pci_mmcfg_init(int type)
return;
if (pci_mmcfg_arch_init()) {
- if (type == 1)
- unreachable_devices();
if (known_bridge)
pci_mmcfg_insert_resources(IORESOURCE_BUSY);
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 1bf5816..081816a 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -30,10 +30,6 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn)
struct acpi_mcfg_allocation *cfg;
int cfg_num;
- if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS &&
- test_bit(PCI_SLOT(devfn) + 32*bus, pci_mmcfg_fallback_slots))
- return 0;
-
for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) {
cfg = &pci_mmcfg_config[cfg_num];
if (cfg->pci_segment == seg &&
@@ -68,13 +64,13 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
u32 base;
if ((bus > 255) || (devfn > 255) || (reg > 4095)) {
- *value = -1;
+err: *value = -1;
return -EINVAL;
}
base = get_base_addr(seg, bus, devfn);
if (!base)
- return pci_conf1_read(seg,bus,devfn,reg,len,value);
+ goto err;
spin_lock_irqsave(&pci_config_lock, flags);
@@ -107,7 +103,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
base = get_base_addr(seg, bus, devfn);
if (!base)
- return pci_conf1_write(seg,bus,devfn,reg,len,value);
+ return -EINVAL;
spin_lock_irqsave(&pci_config_lock, flags);
@@ -134,15 +130,9 @@ static struct pci_raw_ops pci_mmcfg = {
.write = pci_mmcfg_write,
};
-int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
- unsigned int devfn)
-{
- return get_base_addr(seg, bus, devfn) != 0;
-}
-
int __init pci_mmcfg_arch_init(void)
{
- printk(KERN_INFO "PCI: Using MMCONFIG\n");
- raw_pci_ops = &pci_mmcfg;
+ printk(KERN_INFO "PCI: Using MMCONFIG for extended config space\n");
+ raw_pci_ext_ops = &pci_mmcfg;
return 1;
}
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index 4095e4d..9207fd4 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -40,9 +40,7 @@ static char __iomem *get_virt(unsigned int seg, unsigned bus)
static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
{
char __iomem *addr;
- if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS &&
- test_bit(32*bus + PCI_SLOT(devfn), pci_mmcfg_fallback_slots))
- return NULL;
+
addr = get_virt(seg, bus);
if (!addr)
return NULL;
@@ -56,13 +54,13 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
/* Why do we have this when nobody checks it. How about a BUG()!? -AK */
if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
- *value = -1;
+err: *value = -1;
return -EINVAL;
}
addr = pci_dev_base(seg, bus, devfn);
if (!addr)
- return pci_conf1_read(seg,bus,devfn,reg,len,value);
+ goto err;
switch (len) {
case 1:
@@ -90,7 +88,7 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
addr = pci_dev_base(seg, bus, devfn);
if (!addr)
- return pci_conf1_write(seg,bus,devfn,reg,len,value);
+ return -EINVAL;
switch (len) {
case 1:
@@ -126,12 +124,6 @@ static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg)
return addr;
}
-int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
- unsigned int devfn)
-{
- return pci_dev_base(seg, bus, devfn) != NULL;
-}
-
int __init pci_mmcfg_arch_init(void)
{
int i;
@@ -152,6 +144,6 @@ int __init pci_mmcfg_arch_init(void)
return 0;
}
}
- raw_pci_ops = &pci_mmcfg;
+ raw_pci_ext_ops = &pci_mmcfg;
return 1;
}
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
index ac56d39..3431518 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/pci/pci.h
@@ -85,10 +85,17 @@ extern spinlock_t pci_config_lock;
extern int (*pcibios_enable_irq)(struct pci_dev *dev);
extern void (*pcibios_disable_irq)(struct pci_dev *dev);
-extern int pci_conf1_write(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 value);
-extern int pci_conf1_read(unsigned int seg, unsigned int bus,
- unsigned int devfn, int reg, int len, u32 *value);
+struct pci_raw_ops {
+ int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 *val);
+ int (*write)(unsigned int domain, unsigned int bus, unsigned int devfn,
+ int reg, int len, u32 val);
+};
+
+extern struct pci_raw_ops *raw_pci_ops;
+extern struct pci_raw_ops *raw_pci_ext_ops;
+
+extern struct pci_raw_ops pci_direct_conf1;
extern int pci_direct_probe(void);
extern void pci_direct_init(int type);
@@ -98,13 +105,6 @@ extern void pcibios_sort(void);
/* pci-mmconfig.c */
-/* Verify the first 16 busses. We assume that systems with more busses
- get MCFG right. */
-#define PCI_MMCFG_MAX_CHECK_BUS 16
-extern DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
-
-extern int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
- unsigned int devfn);
extern int __init pci_mmcfg_arch_init(void);
/*
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 8ecb1c7..c2df4e9 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -13,9 +13,6 @@
#include "pci.h"
-
-extern struct pci_raw_ops pci_direct_conf1;
-
static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
static void pci_visws_disable_irq(struct pci_dev *dev) { }
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index d764ec9..9ff4d5b 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -1,2 +1,2 @@
-obj-$(CONFIG_PM) += cpu.o
-obj-$(CONFIG_HIBERNATION) += swsusp.o suspend.o
+obj-$(CONFIG_PM_SLEEP) += cpu_$(BITS).o
+obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu_32.c
index efcf620..7f9c6da 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu_32.c
@@ -40,7 +40,7 @@ static void __save_processor_state(struct saved_context *ctxt)
savesegment(ss, ctxt->ss);
/*
- * control registers
+ * control registers
*/
ctxt->cr0 = read_cr0();
ctxt->cr2 = read_cr2();
diff --git a/arch/x86/kernel/suspend_64.c b/arch/x86/power/cpu_64.c
index 7ac7130..66bdfb5 100644
--- a/arch/x86/kernel/suspend_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -1,8 +1,9 @@
/*
- * Suspend support specific for i386.
+ * Suspend and hibernation support for x86-64
*
* Distribute under GPLv2
*
+ * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
* Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
* Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
*/
@@ -14,9 +15,6 @@
#include <asm/pgtable.h>
#include <asm/mtrr.h>
-/* References to section boundaries */
-extern const void __nosave_begin, __nosave_end;
-
static void fix_processor_context(void);
struct saved_context saved_context;
@@ -63,7 +61,7 @@ static void __save_processor_state(struct saved_context *ctxt)
mtrr_save_fixed_ranges(NULL);
/*
- * control registers
+ * control registers
*/
rdmsrl(MSR_EFER, ctxt->efer);
ctxt->cr0 = read_cr0();
@@ -166,155 +164,3 @@ static void fix_processor_context(void)
loaddebug(&current->thread, 7);
}
}
-
-#ifdef CONFIG_HIBERNATION
-/* Defined in arch/x86_64/kernel/suspend_asm.S */
-extern int restore_image(void);
-
-/*
- * Address to jump to in the last phase of restore in order to get to the image
- * kernel's text (this value is passed in the image header).
- */
-unsigned long restore_jump_address;
-
-/*
- * Value of the cr3 register from before the hibernation (this value is passed
- * in the image header).
- */
-unsigned long restore_cr3;
-
-pgd_t *temp_level4_pgt;
-
-void *relocated_restore_code;
-
-static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
-{
- long i, j;
-
- i = pud_index(address);
- pud = pud + i;
- for (; i < PTRS_PER_PUD; pud++, i++) {
- unsigned long paddr;
- pmd_t *pmd;
-
- paddr = address + i*PUD_SIZE;
- if (paddr >= end)
- break;
-
- pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
- if (!pmd)
- return -ENOMEM;
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
- for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
- unsigned long pe;
-
- if (paddr >= end)
- break;
- pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
- pe &= __supported_pte_mask;
- set_pmd(pmd, __pmd(pe));
- }
- }
- return 0;
-}
-
-static int set_up_temporary_mappings(void)
-{
- unsigned long start, end, next;
- int error;
-
- temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
- if (!temp_level4_pgt)
- return -ENOMEM;
-
- /* It is safe to reuse the original kernel mapping */
- set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
- init_level4_pgt[pgd_index(__START_KERNEL_map)]);
-
- /* Set up the direct mapping from scratch */
- start = (unsigned long)pfn_to_kaddr(0);
- end = (unsigned long)pfn_to_kaddr(end_pfn);
-
- for (; start < end; start = next) {
- pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
- if (!pud)
- return -ENOMEM;
- next = start + PGDIR_SIZE;
- if (next > end)
- next = end;
- if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
- return error;
- set_pgd(temp_level4_pgt + pgd_index(start),
- mk_kernel_pgd(__pa(pud)));
- }
- return 0;
-}
-
-int swsusp_arch_resume(void)
-{
- int error;
-
- /* We have got enough memory and from now on we cannot recover */
- if ((error = set_up_temporary_mappings()))
- return error;
-
- relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
- if (!relocated_restore_code)
- return -ENOMEM;
- memcpy(relocated_restore_code, &core_restore_code,
- &restore_registers - &core_restore_code);
-
- restore_image();
- return 0;
-}
-
-/*
- * pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-
-int pfn_is_nosave(unsigned long pfn)
-{
- unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
- unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
- return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
-}
-
-struct restore_data_record {
- unsigned long jump_address;
- unsigned long cr3;
- unsigned long magic;
-};
-
-#define RESTORE_MAGIC 0x0123456789ABCDEFUL
-
-/**
- * arch_hibernation_header_save - populate the architecture specific part
- * of a hibernation image header
- * @addr: address to save the data at
- */
-int arch_hibernation_header_save(void *addr, unsigned int max_size)
-{
- struct restore_data_record *rdr = addr;
-
- if (max_size < sizeof(struct restore_data_record))
- return -EOVERFLOW;
- rdr->jump_address = restore_jump_address;
- rdr->cr3 = restore_cr3;
- rdr->magic = RESTORE_MAGIC;
- return 0;
-}
-
-/**
- * arch_hibernation_header_restore - read the architecture specific data
- * from the hibernation image header
- * @addr: address to read the data from
- */
-int arch_hibernation_header_restore(void *addr)
-{
- struct restore_data_record *rdr = addr;
-
- restore_jump_address = rdr->jump_address;
- restore_cr3 = rdr->cr3;
- return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
-}
-#endif /* CONFIG_HIBERNATION */
diff --git a/arch/x86/power/suspend.c b/arch/x86/power/hibernate_32.c
index a0020b9..f2b6e3f 100644
--- a/arch/x86/power/suspend.c
+++ b/arch/x86/power/hibernate_32.c
@@ -1,5 +1,5 @@
/*
- * Suspend support specific for i386 - temporary page tables
+ * Hibernation support specific for i386 - temporary page tables
*
* Distribute under GPLv2
*
@@ -13,7 +13,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
-/* Defined in arch/i386/power/swsusp.S */
+/* Defined in hibernate_asm_32.S */
extern int restore_image(void);
/* References to section boundaries */
@@ -23,7 +23,7 @@ extern const void __nosave_begin, __nosave_end;
pgd_t *resume_pg_dir;
/* The following three functions are based on the analogous code in
- * arch/i386/mm/init.c
+ * arch/x86/mm/init_32.c
*/
/*
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
new file mode 100644
index 0000000..b542355
--- /dev/null
+++ b/arch/x86/power/hibernate_64.c
@@ -0,0 +1,169 @@
+/*
+ * Hibernation support for x86-64
+ *
+ * Distribute under GPLv2
+ *
+ * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
+ * Copyright (c) 2002 Pavel Machek <pavel@suse.cz>
+ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
+ */
+
+#include <linux/smp.h>
+#include <linux/suspend.h>
+#include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mtrr.h>
+
+/* References to section boundaries */
+extern const void __nosave_begin, __nosave_end;
+
+/* Defined in hibernate_asm_64.S */
+extern int restore_image(void);
+
+/*
+ * Address to jump to in the last phase of restore in order to get to the image
+ * kernel's text (this value is passed in the image header).
+ */
+unsigned long restore_jump_address;
+
+/*
+ * Value of the cr3 register from before the hibernation (this value is passed
+ * in the image header).
+ */
+unsigned long restore_cr3;
+
+pgd_t *temp_level4_pgt;
+
+void *relocated_restore_code;
+
+static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
+{
+ long i, j;
+
+ i = pud_index(address);
+ pud = pud + i;
+ for (; i < PTRS_PER_PUD; pud++, i++) {
+ unsigned long paddr;
+ pmd_t *pmd;
+
+ paddr = address + i*PUD_SIZE;
+ if (paddr >= end)
+ break;
+
+ pmd = (pmd_t *)get_safe_page(GFP_ATOMIC);
+ if (!pmd)
+ return -ENOMEM;
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) {
+ unsigned long pe;
+
+ if (paddr >= end)
+ break;
+ pe = __PAGE_KERNEL_LARGE_EXEC | paddr;
+ pe &= __supported_pte_mask;
+ set_pmd(pmd, __pmd(pe));
+ }
+ }
+ return 0;
+}
+
+static int set_up_temporary_mappings(void)
+{
+ unsigned long start, end, next;
+ int error;
+
+ temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC);
+ if (!temp_level4_pgt)
+ return -ENOMEM;
+
+ /* It is safe to reuse the original kernel mapping */
+ set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map),
+ init_level4_pgt[pgd_index(__START_KERNEL_map)]);
+
+ /* Set up the direct mapping from scratch */
+ start = (unsigned long)pfn_to_kaddr(0);
+ end = (unsigned long)pfn_to_kaddr(end_pfn);
+
+ for (; start < end; start = next) {
+ pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC);
+ if (!pud)
+ return -ENOMEM;
+ next = start + PGDIR_SIZE;
+ if (next > end)
+ next = end;
+ if ((error = res_phys_pud_init(pud, __pa(start), __pa(next))))
+ return error;
+ set_pgd(temp_level4_pgt + pgd_index(start),
+ mk_kernel_pgd(__pa(pud)));
+ }
+ return 0;
+}
+
+int swsusp_arch_resume(void)
+{
+ int error;
+
+ /* We have got enough memory and from now on we cannot recover */
+ if ((error = set_up_temporary_mappings()))
+ return error;
+
+ relocated_restore_code = (void *)get_safe_page(GFP_ATOMIC);
+ if (!relocated_restore_code)
+ return -ENOMEM;
+ memcpy(relocated_restore_code, &core_restore_code,
+ &restore_registers - &core_restore_code);
+
+ restore_image();
+ return 0;
+}
+
+/*
+ * pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+
+int pfn_is_nosave(unsigned long pfn)
+{
+ unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
+ unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
+ return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+struct restore_data_record {
+ unsigned long jump_address;
+ unsigned long cr3;
+ unsigned long magic;
+};
+
+#define RESTORE_MAGIC 0x0123456789ABCDEFUL
+
+/**
+ * arch_hibernation_header_save - populate the architecture specific part
+ * of a hibernation image header
+ * @addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+ struct restore_data_record *rdr = addr;
+
+ if (max_size < sizeof(struct restore_data_record))
+ return -EOVERFLOW;
+ rdr->jump_address = restore_jump_address;
+ rdr->cr3 = restore_cr3;
+ rdr->magic = RESTORE_MAGIC;
+ return 0;
+}
+
+/**
+ * arch_hibernation_header_restore - read the architecture specific data
+ * from the hibernation image header
+ * @addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+ struct restore_data_record *rdr = addr;
+
+ restore_jump_address = rdr->jump_address;
+ restore_cr3 = rdr->cr3;
+ return (rdr->magic == RESTORE_MAGIC) ? 0 : -EINVAL;
+}
diff --git a/arch/x86/power/swsusp.S b/arch/x86/power/hibernate_asm_32.S
index 53662e0..b95aa6c 100644
--- a/arch/x86/power/swsusp.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -1,7 +1,6 @@
.text
-/* Originally gcc generated, modified by hand
- *
+/*
* This may not use any stack, nor any variable that is not "NoSave":
*
* Its rewriting one kernel image with another. What is stack in "old"
diff --git a/arch/x86/kernel/suspend_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index aeb9a4d..1deb3244 100644
--- a/arch/x86/kernel/suspend_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -1,7 +1,12 @@
-/* Copyright 2004,2005 Pavel Machek <pavel@suse.cz>, Andi Kleen <ak@suse.de>, Rafael J. Wysocki <rjw@sisk.pl>
+/*
+ * Hibernation support for x86-64
*
* Distribute under GPLv2.
*
+ * Copyright 2007 Rafael J. Wysocki <rjw@sisk.pl>
+ * Copyright 2005 Andi Kleen <ak@suse.de>
+ * Copyright 2004 Pavel Machek <pavel@suse.cz>
+ *
* swsusp_arch_resume must not use any stack or any nonlocal variables while
* copying pages:
*
@@ -9,7 +14,7 @@
* image could very well be data page in "new" image, and overwriting
* your own stack under you is bad idea.
*/
-
+
.text
#include <linux/linkage.h>
#include <asm/segment.h>
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index d28dda5..f385a4b 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -7,7 +7,7 @@ VDSO32-$(CONFIG_X86_32) := y
VDSO32-$(CONFIG_COMPAT) := y
vdso-install-$(VDSO64-y) += vdso.so
-vdso-install-$(VDSO32-y) += $(vdso32-y:=.so)
+vdso-install-$(VDSO32-y) += $(vdso32-images)
# files to link into the vdso
@@ -63,6 +63,8 @@ vdso32.so-$(CONFIG_X86_32) += int80
vdso32.so-$(CONFIG_COMPAT) += syscall
vdso32.so-$(VDSO32-y) += sysenter
+vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
+
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1
@@ -71,21 +73,21 @@ VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1
override obj-dirs = $(dir $(obj)) $(obj)/vdso32/
targets += vdso32/vdso32.lds
-targets += $(vdso32.so-y:%=vdso32-%.so.dbg) $(vdso32.so-y:%=vdso32-%.so)
+targets += $(vdso32-images) $(vdso32-images:=.dbg)
targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o)
-extra-y += $(vdso32.so-y:%=vdso32-%.so)
+extra-y += $(vdso32-images)
-$(obj)/vdso32.o: $(vdso32.so-y:%=$(obj)/vdso32-%.so)
+$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%)
KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS))
-$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
-$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): asflags-$(CONFIG_X86_64) += -m32
+$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32)
+$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32
-$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
- $(obj)/vdso32/vdso32.lds \
- $(obj)/vdso32/note.o \
- $(obj)/vdso32/%.o
+$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \
+ $(obj)/vdso32/vdso32.lds \
+ $(obj)/vdso32/note.o \
+ $(obj)/vdso32/%.o
$(call if_changed,vdso)
# Make vdso32-*-syms.lds from each image, and then make sure they match.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index de647bc..49e5358 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -798,6 +798,10 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
* added to the table can be prepared properly for Xen.
*/
xen_write_cr3(__pa(base));
+
+ /* Unpin initial Xen pagetable */
+ pin_pagetable_pfn(MMUEXT_UNPIN_TABLE,
+ PFN_DOWN(__pa(xen_start_info->pt_base)));
}
static __init void xen_pagetable_setup_done(pgd_t *base)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 45aa771..0144395 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -58,7 +58,7 @@
xmaddr_t arbitrary_virt_to_machine(unsigned long address)
{
- int level;
+ unsigned int level;
pte_t *pte = lookup_address(address, &level);
unsigned offset = address & PAGE_MASK;
@@ -71,7 +71,7 @@ void make_lowmem_page_readonly(void *vaddr)
{
pte_t *pte, ptev;
unsigned long address = (unsigned long)vaddr;
- int level;
+ unsigned int level;
pte = lookup_address(address, &level);
BUG_ON(pte == NULL);
@@ -86,7 +86,7 @@ void make_lowmem_page_readwrite(void *vaddr)
{
pte_t *pte, ptev;
unsigned long address = (unsigned long)vaddr;
- int level;
+ unsigned int level;
pte = lookup_address(address, &level);
BUG_ON(pte == NULL);
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index b3721fd..c39e1a5 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -217,17 +217,17 @@ unsigned long long xen_sched_clock(void)
/* Get the CPU speed from Xen */
unsigned long xen_cpu_khz(void)
{
- u64 cpu_khz = 1000000ULL << 32;
+ u64 xen_khz = 1000000ULL << 32;
const struct vcpu_time_info *info =
&HYPERVISOR_shared_info->vcpu_info[0].time;
- do_div(cpu_khz, info->tsc_to_system_mul);
+ do_div(xen_khz, info->tsc_to_system_mul);
if (info->tsc_shift < 0)
- cpu_khz <<= -info->tsc_shift;
+ xen_khz <<= -info->tsc_shift;
else
- cpu_khz >>= info->tsc_shift;
+ xen_khz >>= info->tsc_shift;
- return cpu_khz;
+ return xen_khz;
}
/*
OpenPOWER on IntegriCloud