141 files changed, 2836 insertions, 2825 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fc20fdc..e4ff5d1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,7 @@ config X86
 	select HAVE_UNSTABLE_SCHED_CLOCK
 	select HAVE_IDE
 	select HAVE_OPROFILE
-	select HAVE_PERF_COUNTERS if (!M386 && !M486)
+	select HAVE_PERF_EVENTS if (!M386 && !M486)
 	select HAVE_IOREMAP_PROT
 	select HAVE_KPROBES
 	select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -150,7 +150,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
 config HAVE_SETUP_PER_CPU_AREA
 	def_bool y
 
-config HAVE_DYNAMIC_PER_CPU_AREA
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+	def_bool y
+
+config NEED_PER_CPU_PAGE_FIRST_CHUNK
 	def_bool y
 
 config HAVE_CPUMASK_OF_CPU_MAP
@@ -179,6 +182,10 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
 config ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	def_bool y
 
+config HAVE_INTEL_TXT
+	def_bool y
+	depends on EXPERIMENTAL && DMAR && ACPI
+
 # Use the generic interrupt handling code in kernel/irq/:
 config GENERIC_HARDIRQS
 	bool
@@ -318,6 +325,7 @@ config X86_EXTENDED_PLATFORM
 		SGI 320/540 (Visual Workstation)
 		Summit/EXA (IBM x440)
 		Unisys ES7000 IA32 series
+		Moorestown MID devices
 
 	  If you have one of these systems, or if you want to build a
 	  generic distribution kernel, say Y here - otherwise say N.
@@ -377,6 +385,18 @@ config X86_ELAN
 
 	  If unsure, choose "PC-compatible" instead.
 
+config X86_MRST
+       bool "Moorestown MID platform"
+	depends on X86_32
+	depends on X86_EXTENDED_PLATFORM
+	---help---
+	  Moorestown is Intel's Low Power Intel Architecture (LPIA) based Moblin
+	  Internet Device(MID) platform. Moorestown consists of two chips:
+	  Lincroft (CPU core, graphics, and memory controller) and Langwell IOH.
+	  Unlike standard x86 PCs, Moorestown does not have many legacy devices
+	  nor standard legacy replacement devices/features. e.g. Moorestown does
+	  not contain i8259, i8254, HPET, legacy BIOS, most of the io ports.
+
 config X86_RDC321X
 	bool "RDC R-321x SoC"
 	depends on X86_32
@@ -776,41 +796,17 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
 	  increased on these systems.
 
 config X86_MCE
-	bool "Machine Check Exception"
+	bool "Machine Check / overheating reporting"
 	---help---
-	  Machine Check Exception support allows the processor to notify the
-	  kernel if it detects a problem (e.g. overheating, component failure).
+	  Machine Check support allows the processor to notify the
+	  kernel if it detects a problem (e.g. overheating, data corruption).
 	  The action the kernel takes depends on the severity of the problem,
-	  ranging from a warning message on the console, to halting the machine.
-	  Your processor must be a Pentium or newer to support this - check the
-	  flags in /proc/cpuinfo for mce.  Note that some older Pentium systems
-	  have a design flaw which leads to false MCE events - hence MCE is
-	  disabled on all P5 processors, unless explicitly enabled with "mce"
-	  as a boot argument.  Similarly, if MCE is built in and creates a
-	  problem on some new non-standard machine, you can boot with "nomce"
-	  to disable it.  MCE support simply ignores non-MCE processors like
-	  the 386 and 486, so nearly everyone can say Y here.
-
-config X86_OLD_MCE
-	depends on X86_32 && X86_MCE
-	bool "Use legacy machine check code (will go away)"
-	default n
-	select X86_ANCIENT_MCE
-	---help---
-	  Use the old i386 machine check code. This is merely intended for
-	  testing in a transition period. Try this if you run into any machine
-	  check related software problems, but report the problem to
-	  linux-kernel.  When in doubt say no.
-
-config X86_NEW_MCE
-	depends on X86_MCE
-	bool
-	default y if (!X86_OLD_MCE && X86_32) || X86_64
+	  ranging from warning messages to halting the machine.
 
 config X86_MCE_INTEL
 	def_bool y
 	prompt "Intel MCE features"
-	depends on X86_NEW_MCE && X86_LOCAL_APIC
+	depends on X86_MCE && X86_LOCAL_APIC
 	---help---
 	   Additional support for intel specific MCE features such as
 	   the thermal monitor.
@@ -818,14 +814,14 @@ config X86_MCE_INTEL
 config X86_MCE_AMD
 	def_bool y
 	prompt "AMD MCE features"
-	depends on X86_NEW_MCE && X86_LOCAL_APIC
+	depends on X86_MCE && X86_LOCAL_APIC
 	---help---
 	   Additional support for AMD specific MCE features such as
 	   the DRAM Error Threshold.
 
 config X86_ANCIENT_MCE
 	def_bool n
-	depends on X86_32
+	depends on X86_32 && X86_MCE
 	prompt "Support for old Pentium 5 / WinChip machine checks"
 	---help---
 	  Include support for machine check handling on old Pentium 5 or WinChip
@@ -838,36 +834,16 @@ config X86_MCE_THRESHOLD
 	default y
 
 config X86_MCE_INJECT
-	depends on X86_NEW_MCE
+	depends on X86_MCE
 	tristate "Machine check injector support"
 	---help---
 	  Provide support for injecting machine checks for testing purposes.
 	  If you don't know what a machine check is and you don't do kernel
 	  QA it is safe to say n.
 
-config X86_MCE_NONFATAL
-	tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
-	depends on X86_OLD_MCE
-	---help---
-	  Enabling this feature starts a timer that triggers every 5 seconds which
-	  will look at the machine check registers to see if anything happened.
-	  Non-fatal problems automatically get corrected (but still logged).
-	  Disable this if you don't want to see these messages.
-	  Seeing the messages this option prints out may be indicative of dying
-	  or out-of-spec (ie, overclocked) hardware.
-	  This option only does something on certain CPUs.
-	  (AMD Athlon/Duron and Intel Pentium 4)
-
-config X86_MCE_P4THERMAL
-	bool "check for P4 thermal throttling interrupt."
-	depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP)
-	---help---
-	  Enabling this feature will cause a message to be printed when the P4
-	  enters thermal throttling.
-
 config X86_THERMAL_VECTOR
 	def_bool y
-	depends on X86_MCE_P4THERMAL || X86_MCE_INTEL
+	depends on X86_MCE_INTEL
 
 config VM86
 	bool "Enable VM86 support" if EMBEDDED
@@ -1413,6 +1389,10 @@ config X86_PAT
 
 	  If unsure, say Y.
 
+config ARCH_USES_PG_UNCACHED
+	def_bool y
+	depends on X86_PAT
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ba331bf..74619c4 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -831,5 +831,5 @@ ia32_sys_call_table:
 	.quad compat_sys_preadv
 	.quad compat_sys_pwritev
 	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
-	.quad sys_perf_counter_open
+	.quad sys_perf_event_open
 ia32_syscall_end:
diff --git a/arch/x86/include/asm/agp.h b/arch/x86/include/asm/agp.h
index 9825cd6..eec2a70 100644
--- a/arch/x86/include/asm/agp.h
+++ b/arch/x86/include/asm/agp.h
@@ -22,10 +22,6 @@
  */
 #define flush_agp_cache() wbinvd()
 
-/* Convert a physical address to an address suitable for the GART. */
-#define phys_to_gart(x) (x)
-#define gart_to_phys(x) (x)
-
 /* GATT allocation. Returns/accepts GATT kernel virtual address. */
 #define alloc_gatt_pages(order)		\
 	((char *)__get_free_pages(GFP_KERNEL, (order)))
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 586b7ad..474d80d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -66,13 +66,23 @@ static inline void default_inquire_remote_apic(int apicid)
 }
 
 /*
+ * With 82489DX we can't rely on apic feature bit
+ * retrieved via cpuid but still have to deal with
+ * such an apic chip so we assume that SMP configuration
+ * is found from MP table (64bit case uses ACPI mostly
+ * which set smp presence flag as well so we are safe
+ * to use this helper too).
+ */
+static inline bool apic_from_smp_config(void)
+{
+	return smp_found_config && !disable_apic;
+}
+
+/*
  * Basic functions accessing APICs.
  */
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
-#else
-#define setup_boot_clock setup_boot_APIC_clock
-#define setup_secondary_clock setup_secondary_APIC_clock
 #endif
 
 #ifdef CONFIG_X86_64
@@ -252,6 +262,8 @@ static inline void lapic_shutdown(void) { }
 static inline void init_apic_mappings(void) { }
 static inline void disable_local_APIC(void) { }
 static inline void apic_disable(void) { }
+# define setup_boot_APIC_clock x86_init_noop
+# define setup_secondary_APIC_clock x86_init_noop
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_64
@@ -300,7 +312,7 @@ struct apic {
 	int (*cpu_present_to_apicid)(int mps_cpu);
 	physid_mask_t (*apicid_to_cpu_present)(int phys_apicid);
 	void (*setup_portio_remap)(void);
-	int (*check_phys_apicid_present)(int boot_cpu_physical_apicid);
+	int (*check_phys_apicid_present)(int phys_apicid);
 	void (*enable_apic_mode)(void);
 	int (*phys_pkg_id)(int cpuid_apic, int index_msb);
 
@@ -434,7 +446,7 @@ extern struct apic apic_x2apic_uv_x;
 DECLARE_PER_CPU(int, x2apic_extra_bits);
 
 extern int default_cpu_present_to_apicid(int mps_cpu);
-extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid);
+extern int default_check_phys_apicid_present(int phys_apicid);
 #endif
 
 static inline void default_wait_for_init_deassert(atomic_t *deassert)
@@ -550,9 +562,9 @@ static inline int __default_cpu_present_to_apicid(int mps_cpu)
 }
 
 static inline int
-__default_check_phys_apicid_present(int boot_cpu_physical_apicid)
+__default_check_phys_apicid_present(int phys_apicid)
 {
-	return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map);
+	return physid_isset(phys_apicid, phys_cpu_present_map);
 }
 
 #ifdef CONFIG_X86_32
@@ -562,13 +574,13 @@ static inline int default_cpu_present_to_apicid(int mps_cpu)
 }
 
 static inline int
-default_check_phys_apicid_present(int boot_cpu_physical_apicid)
+default_check_phys_apicid_present(int phys_apicid)
 {
-	return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
+	return __default_check_phys_apicid_present(phys_apicid);
 }
 #else
 extern int default_cpu_present_to_apicid(int mps_cpu);
-extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid);
+extern int default_check_phys_apicid_present(int phys_apicid);
 #endif
 
 static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid)
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h
index 1724e8d..6be33d8 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/asm/bootparam.h
@@ -85,7 +85,8 @@ struct efi_info {
 struct boot_params {
 	struct screen_info screen_info;			/* 0x000 */
 	struct apm_bios_info apm_bios_info;		/* 0x040 */
-	__u8  _pad2[12];				/* 0x054 */
+	__u8  _pad2[4];					/* 0x054 */
+	__u64  tboot_addr;				/* 0x058 */
 	struct ist_info ist_info;			/* 0x060 */
 	__u8  _pad3[16];				/* 0x070 */
 	__u8  hd0_info[16];	/* obsolete! */		/* 0x080 */
@@ -109,4 +110,14 @@ struct boot_params {
 	__u8  _pad9[276];				/* 0xeec */
 } __attribute__((packed));
 
+enum {
+	X86_SUBARCH_PC = 0,
+	X86_SUBARCH_LGUEST,
+	X86_SUBARCH_XEN,
+	X86_SUBARCH_MRST,
+	X86_NR_SUBARCHS,
+};
+
+
+
 #endif /* _ASM_X86_BOOTPARAM_H */
diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h
index e55dfc1..b54f6af 100644
--- a/arch/x86/include/asm/cacheflush.h
+++ b/arch/x86/include/asm/cacheflush.h
@@ -43,8 +43,58 @@ static inline void copy_from_user_page(struct vm_area_struct *vma,
 	memcpy(dst, src, len);
 }
 
-#define PG_non_WB				PG_arch_1
-PAGEFLAG(NonWB, non_WB)
+#define PG_WC				PG_arch_1
+PAGEFLAG(WC, WC)
+
+#ifdef CONFIG_X86_PAT
+/*
+ * X86 PAT uses page flags WC and Uncached together to keep track of
+ * memory type of pages that have backing page struct. X86 PAT supports 3
+ * different memory types, _PAGE_CACHE_WB, _PAGE_CACHE_WC and
+ * _PAGE_CACHE_UC_MINUS and fourth state where page's memory type has not
+ * been changed from its default (value of -1 used to denote this).
+ * Note we do not support _PAGE_CACHE_UC here.
+ *
+ * Caller must hold memtype_lock for atomicity.
+ */
+static inline unsigned long get_page_memtype(struct page *pg)
+{
+	if (!PageUncached(pg) && !PageWC(pg))
+		return -1;
+	else if (!PageUncached(pg) && PageWC(pg))
+		return _PAGE_CACHE_WC;
+	else if (PageUncached(pg) && !PageWC(pg))
+		return _PAGE_CACHE_UC_MINUS;
+	else
+		return _PAGE_CACHE_WB;
+}
+
+static inline void set_page_memtype(struct page *pg, unsigned long memtype)
+{
+	switch (memtype) {
+	case _PAGE_CACHE_WC:
+		ClearPageUncached(pg);
+		SetPageWC(pg);
+		break;
+	case _PAGE_CACHE_UC_MINUS:
+		SetPageUncached(pg);
+		ClearPageWC(pg);
+		break;
+	case _PAGE_CACHE_WB:
+		SetPageUncached(pg);
+		SetPageWC(pg);
+		break;
+	default:
+	case -1:
+		ClearPageUncached(pg);
+		ClearPageWC(pg);
+		break;
+	}
+}
+#else
+static inline unsigned long get_page_memtype(struct page *pg) { return -1; }
+static inline void set_page_memtype(struct page *pg, unsigned long memtype) { }
+#endif
 
 /*
  * The set_memory_* API can be used to change various attributes of a virtual
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 847fee6..9cfc88b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -96,6 +96,7 @@
 #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
 #define X86_FEATURE_EXTD_APICID	(3*32+26) /* has extended APICID (8 bits) */
 #define X86_FEATURE_AMD_DCM     (3*32+27) /* multi-node processor */
+#define X86_FEATURE_APERFMPERF	(3*32+28) /* APERFMPERF */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3	(4*32+ 0) /* "pni" SSE-3 */
diff --git a/arch/x86/include/asm/do_timer.h b/arch/x86/include/asm/do_timer.h
deleted file mode 100644
index 23ecda0..0000000
--- a/arch/x86/include/asm/do_timer.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* defines for inline arch setup functions */
-#include <linux/clockchips.h>
-
-#include <asm/i8259.h>
-#include <asm/i8253.h>
-
-/**
- * do_timer_interrupt_hook - hook into timer tick
- *
- * Call the pit clock event handler. see asm/i8253.h
- **/
-
-static inline void do_timer_interrupt_hook(void)
-{
-	global_clock_event->event_handler(global_clock_event);
-}
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 7ecba4d..40b4e61 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -126,8 +126,6 @@ extern void e820_reserve_resources(void);
 extern void e820_reserve_resources_late(void);
 extern void setup_memory_map(void);
 extern char *default_machine_specific_memory_setup(void);
-extern char *machine_specific_memory_setup(void);
-extern char *memory_setup(void);
 #endif /* __KERNEL__ */
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 83c1bc8..456a304 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -299,6 +299,8 @@ do {									\
 
 #ifdef CONFIG_X86_32
 
+#define STACK_RND_MASK (0x7ff)
+
 #define VDSO_HIGH_BASE		(__fix_to_virt(FIX_VDSO))
 
 #define ARCH_DLINFO		ARCH_DLINFO_IA32(vdso_enabled)
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index ff8cbfa..f5693c8 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
 BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
 BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
 #endif
 
@@ -61,7 +61,7 @@ BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
 BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR)
 #endif
 
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
 BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR)
 #endif
 
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 7b2d71d..14f9890 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -132,6 +132,9 @@ enum fixed_addresses {
 #ifdef CONFIG_X86_32
 	FIX_WP_TEST,
 #endif
+#ifdef CONFIG_INTEL_TXT
+	FIX_TBOOT_BASE,
+#endif
 	__end_of_fixed_addresses
 };
 
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index 369f5c5..b78c094 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -20,7 +20,7 @@
 #ifndef ASM_X86__HYPERVISOR_H
 #define ASM_X86__HYPERVISOR_H
 
-extern unsigned long get_hypervisor_tsc_freq(void);
 extern void init_hypervisor(struct cpuinfo_x86 *c);
+extern void init_hypervisor_platform(void);
 
 #endif
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 85232d3..7c7c16c 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -143,6 +143,8 @@ extern int noioapicreroute;
 /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
 extern int timer_through_8259;
 
+extern void io_apic_disable_legacy(void);
+
 /*
  * If we use the IO-APIC for IRQ routing, disable automatic
  * assignment of PCI IRQ's.
@@ -176,6 +178,7 @@ extern int setup_ioapic_entry(int apic, int irq,
 			      int polarity, int vector, int pin);
 extern void ioapic_write_entry(int apic, int pin,
 			       struct IO_APIC_route_entry e);
+extern void setup_ioapic_ids_from_mpc(void);
 
 struct mp_ioapic_gsi{
 	int gsi_base;
@@ -187,12 +190,14 @@ int mp_find_ioapic_pin(int ioapic, int gsi);
 void __init mp_register_ioapic(int id, u32 address, u32 gsi_base);
 
 #else  /* !CONFIG_X86_IO_APIC */
+
 #define io_apic_assign_pci_irqs 0
+#define setup_ioapic_ids_from_mpc x86_init_noop
 static const int timer_through_8259 = 0;
 static inline void ioapic_init_mappings(void)	{ }
 static inline void ioapic_insert_resources(void) { }
-
 static inline void probe_nr_irqs_gsi(void)	{ }
+
 #endif
 
 #endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
index 0e9fe1d..f35eb45 100644
--- a/arch/x86/include/asm/iomap.h
+++ b/arch/x86/include/asm/iomap.h
@@ -26,13 +26,16 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
-int
-is_io_mapping_possible(resource_size_t base, unsigned long size);
-
 void *
 iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
 
 void
 iounmap_atomic(void *kvaddr, enum km_type type);
 
+int
+iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot);
+
+void
+iomap_free(resource_size_t base, unsigned long size);
+
 #endif /* _ASM_X86_IOMAP_H */
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index f38481b..ddda6cb 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -37,7 +37,6 @@ extern void fixup_irqs(void);
 #endif
 
 extern void (*generic_interrupt_extension)(void);
-extern void init_IRQ(void);
 extern void native_init_IRQ(void);
 extern bool handle_irq(unsigned irq, struct pt_regs *regs);
 
@@ -47,4 +46,6 @@ extern unsigned int do_IRQ(struct pt_regs *regs);
 extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
 extern int vector_used_by_percpu_irq(unsigned int vector);
 
+extern void init_ISA_irqs(void);
+
 #endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 5cdd8d1..b608a64 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -9,7 +9,7 @@
  */
 
 #define MCG_BANKCNT_MASK	0xff         /* Number of Banks */
-#define MCG_CTL_P		(1ULL<<8)    /* MCG_CAP register available */
+#define MCG_CTL_P		(1ULL<<8)    /* MCG_CTL register available */
 #define MCG_EXT_P		(1ULL<<9)    /* Extended registers available */
 #define MCG_CMCI_P		(1ULL<<10)   /* CMCI supported */
 #define MCG_EXT_CNT_MASK	0xff0000     /* Number of Extended registers */
@@ -38,6 +38,14 @@
 #define MCM_ADDR_MEM	 3	/* memory address */
 #define MCM_ADDR_GENERIC 7	/* generic */
 
+#define MCJ_CTX_MASK		3
+#define MCJ_CTX(flags)		((flags) & MCJ_CTX_MASK)
+#define MCJ_CTX_RANDOM		0    /* inject context: random */
+#define MCJ_CTX_PROCESS		1    /* inject context: process */
+#define MCJ_CTX_IRQ		2    /* inject context: IRQ */
+#define MCJ_NMI_BROADCAST	4    /* do NMI broadcasting */
+#define MCJ_EXCEPTION		8    /* raise as exception */
+
 /* Fields are zero when not available */
 struct mce {
 	__u64 status;
@@ -48,8 +56,8 @@ struct mce {
 	__u64 tsc;	/* cpu time stamp counter */
 	__u64 time;	/* wall time_t when error was detected */
 	__u8  cpuvendor;	/* cpu vendor as encoded in system.h */
-	__u8  pad1;
-	__u16 pad2;
+	__u8  inject_flags;	/* software inject flags */
+	__u16  pad;
 	__u32 cpuid;	/* CPUID 1 EAX */
 	__u8  cs;		/* code segment */
 	__u8  bank;	/* machine check bank */
@@ -115,13 +123,6 @@ void mcheck_init(struct cpuinfo_x86 *c);
 static inline void mcheck_init(struct cpuinfo_x86 *c) {}
 #endif
 
-#ifdef CONFIG_X86_OLD_MCE
-extern int nr_mce_banks;
-void amd_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
-#endif
-
 #ifdef CONFIG_X86_ANCIENT_MCE
 void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
 void winchip_mcheck_init(struct cpuinfo_x86 *c);
@@ -137,10 +138,11 @@ void mce_log(struct mce *m);
 DECLARE_PER_CPU(struct sys_device, mce_dev);
 
 /*
- * To support more than 128 would need to escape the predefined
- * Linux defined extended banks first.
+ * Maximum banks number.
+ * This is the limit of the current register layout on
+ * Intel CPUs.
  */
-#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
+#define MAX_NR_BANKS 32
 
 #ifdef CONFIG_X86_MCE_INTEL
 extern int mce_cmci_disabled;
@@ -208,11 +210,7 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 
 void intel_init_thermal(struct cpuinfo_x86 *c);
 
-#ifdef CONFIG_X86_NEW_MCE
 void mce_log_therm_throt_event(__u64 status);
-#else
-static inline void mce_log_therm_throt_event(__u64 status) {}
-#endif
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index e2a1bb6..79c9450 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -4,6 +4,7 @@
 #include <linux/init.h>
 
 #include <asm/mpspec_def.h>
+#include <asm/x86_init.h>
 
 extern int apic_version[MAX_APICS];
 extern int pic_mode;
@@ -41,9 +42,6 @@ extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
 
 #endif /* CONFIG_X86_64 */
 
-extern void early_find_smp_config(void);
-extern void early_get_smp_config(void);
-
 #if defined(CONFIG_MCA) || defined(CONFIG_EISA)
 extern int mp_bus_id_to_type[MAX_MP_BUSSES];
 #endif
@@ -52,20 +50,55 @@ extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
 extern unsigned int boot_cpu_physical_apicid;
 extern unsigned int max_physical_apicid;
-extern int smp_found_config;
 extern int mpc_default_type;
 extern unsigned long mp_lapic_addr;
 
-extern void get_smp_config(void);
+#ifdef CONFIG_X86_LOCAL_APIC
+extern int smp_found_config;
+#else
+# define smp_found_config 0
+#endif
+
+static inline void get_smp_config(void)
+{
+	x86_init.mpparse.get_smp_config(0);
+}
+
+static inline void early_get_smp_config(void)
+{
+	x86_init.mpparse.get_smp_config(1);
+}
+
+static inline void find_smp_config(void)
+{
+	x86_init.mpparse.find_smp_config(1);
+}
+
+static inline void early_find_smp_config(void)
+{
+	x86_init.mpparse.find_smp_config(0);
+}
 
 #ifdef CONFIG_X86_MPPARSE
-extern void find_smp_config(void);
 extern void early_reserve_e820_mpc_new(void);
 extern int enable_update_mptable;
+extern int default_mpc_apic_id(struct mpc_cpu *m);
+extern void default_smp_read_mpc_oem(struct mpc_table *mpc);
+# ifdef CONFIG_X86_IO_APIC
+extern void default_mpc_oem_bus_info(struct mpc_bus *m, char *str);
+# else
+#  define default_mpc_oem_bus_info NULL
+# endif
+extern void default_find_smp_config(unsigned int reserve);
+extern void default_get_smp_config(unsigned int early);
 #else
-static inline void find_smp_config(void) { }
 static inline void early_reserve_e820_mpc_new(void) { }
 #define enable_update_mptable 0
+#define default_mpc_apic_id NULL
+#define default_smp_read_mpc_oem NULL
+#define default_mpc_oem_bus_info NULL
+#define default_find_smp_config x86_init_uint_noop
+#define default_get_smp_config x86_init_uint_noop
 #endif
 
 void __cpuinit generic_processor_info(int apicid, int version);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index bd55490..4ffe09b 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -81,8 +81,15 @@
 #define MSR_IA32_MC0_ADDR		0x00000402
 #define MSR_IA32_MC0_MISC		0x00000403
 
+#define MSR_IA32_MCx_CTL(x)		(MSR_IA32_MC0_CTL + 4*(x))
+#define MSR_IA32_MCx_STATUS(x)		(MSR_IA32_MC0_STATUS + 4*(x))
+#define MSR_IA32_MCx_ADDR(x)		(MSR_IA32_MC0_ADDR + 4*(x))
+#define MSR_IA32_MCx_MISC(x)		(MSR_IA32_MC0_MISC + 4*(x))
+
 /* These are consecutive and not in the normal 4er MCE bank block */
 #define MSR_IA32_MC0_CTL2		0x00000280
+#define MSR_IA32_MCx_CTL2(x)		(MSR_IA32_MC0_CTL2 + (x))
+
 #define CMCI_EN			(1ULL << 30)
 #define CMCI_THRESHOLD_MASK		0xffffULL
 
@@ -215,6 +222,10 @@
 
 #define THERM_STATUS_PROCHOT		(1 << 0)
 
+#define MSR_THERM2_CTL			0x0000019d
+
+#define MSR_THERM2_CTL_TM_SELECT	(1ULL << 16)
+
 #define MSR_IA32_MISC_ENABLE		0x000001a0
 
 /* MISC_ENABLE bits: architectural */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index a51ada8..4365ffd 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -121,6 +121,9 @@ extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
 extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
 extern void mtrr_ap_init(void);
 extern void mtrr_bp_init(void);
+extern void set_mtrr_aps_delayed_init(void);
+extern void mtrr_aps_init(void);
+extern void mtrr_bp_restore(void);
 extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
 extern int amd_special_default_mtrr(void);
 #  else
@@ -161,6 +164,9 @@ static inline void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
 
 #define mtrr_ap_init() do {} while (0)
 #define mtrr_bp_init() do {} while (0)
+#define set_mtrr_aps_delayed_init() do {} while (0)
+#define mtrr_aps_init() do {} while (0)
+#define mtrr_bp_restore() do {} while (0)
 #  endif
 
 #ifdef CONFIG_COMPAT
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index ad2668e..6d8723a 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -65,6 +65,8 @@
    6: osp nopl 0x00(%eax,%eax,1)
    7: nopl 0x00000000(%eax)
    8: nopl 0x00000000(%eax,%eax,1)
+   Note: All the above are assumed to be a single instruction.
+	There is kernel code that depends on this.
 */
 #define P6_NOP1	GENERIC_NOP1
 #define P6_NOP2	".byte 0x66,0x90\n"
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 40d6586..8aebcc4 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -24,22 +24,6 @@ static inline void load_sp0(struct tss_struct *tss,
 	PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
 }
 
-#define ARCH_SETUP			pv_init_ops.arch_setup();
-static inline unsigned long get_wallclock(void)
-{
-	return PVOP_CALL0(unsigned long, pv_time_ops.get_wallclock);
-}
-
-static inline int set_wallclock(unsigned long nowtime)
-{
-	return PVOP_CALL1(int, pv_time_ops.set_wallclock, nowtime);
-}
-
-static inline void (*choose_time_init(void))(void)
-{
-	return pv_time_ops.time_init;
-}
-
 /* The paravirtualized CPUID instruction. */
 static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
 			   unsigned int *ecx, unsigned int *edx)
@@ -245,7 +229,6 @@ static inline unsigned long long paravirt_sched_clock(void)
 {
 	return PVOP_CALL0(unsigned long long, pv_time_ops.sched_clock);
 }
-#define calibrate_tsc() (pv_time_ops.get_tsc_khz())
 
 static inline unsigned long long paravirt_read_pmc(int counter)
 {
@@ -363,34 +346,6 @@ static inline void slow_down_io(void)
 #endif
 }
 
-#ifdef CONFIG_X86_LOCAL_APIC
-static inline void setup_boot_clock(void)
-{
-	PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
-}
-
-static inline void setup_secondary_clock(void)
-{
-	PVOP_VCALL0(pv_apic_ops.setup_secondary_clock);
-}
-#endif
-
-static inline void paravirt_post_allocator_init(void)
-{
-	if (pv_init_ops.post_allocator_init)
-		(*pv_init_ops.post_allocator_init)();
-}
-
-static inline void paravirt_pagetable_setup_start(pgd_t *base)
-{
-	(*pv_mmu_ops.pagetable_setup_start)(base);
-}
-
-static inline void paravirt_pagetable_setup_done(pgd_t *base)
-{
-	(*pv_mmu_ops.pagetable_setup_done)(base);
-}
-
 #ifdef CONFIG_SMP
 static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
 				    unsigned long start_esp)
@@ -948,6 +903,8 @@ static inline unsigned long __raw_local_irq_save(void)
 #undef PVOP_VCALL4
 #undef PVOP_CALL4
 
+extern void default_banner(void);
+
 #else  /* __ASSEMBLY__ */
 
 #define _PVSITE(ptype, clobbers, ops, word, algn)	\
@@ -1088,5 +1045,7 @@ static inline unsigned long __raw_local_irq_save(void)
 #endif	/* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
-#endif /* CONFIG_PARAVIRT */
+#else  /* CONFIG_PARAVIRT */
+# define default_banner x86_init_noop
+#endif /* !CONFIG_PARAVIRT */
 #endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 25402d0..dd0f5b3 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -78,14 +78,6 @@ struct pv_init_ops {
 	 */
 	unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
 			  unsigned long addr, unsigned len);
-
-	/* Basic arch-specific setup */
-	void (*arch_setup)(void);
-	char *(*memory_setup)(void);
-	void (*post_allocator_init)(void);
-
-	/* Print a banner to identify the environment */
-	void (*banner)(void);
 };
 
 
@@ -96,12 +88,6 @@ struct pv_lazy_ops {
 };
 
 struct pv_time_ops {
-	void (*time_init)(void);
-
-	/* Set and set time of day */
-	unsigned long (*get_wallclock)(void);
-	int (*set_wallclock)(unsigned long);
-
 	unsigned long long (*sched_clock)(void);
 	unsigned long (*get_tsc_khz)(void);
 };
@@ -203,8 +189,6 @@ struct pv_cpu_ops {
 };
 
 struct pv_irq_ops {
-	void (*init_IRQ)(void);
-
 	/*
 	 * Get/set interrupt state.  save_fl and restore_fl are only
 	 * expected to use X86_EFLAGS_IF; all other bits
@@ -229,9 +213,6 @@ struct pv_irq_ops {
 
 struct pv_apic_ops {
 #ifdef CONFIG_X86_LOCAL_APIC
-	void (*setup_boot_clock)(void);
-	void (*setup_secondary_clock)(void);
-
 	void (*startup_ipi_hook)(int phys_apicid,
 				 unsigned long start_eip,
 				 unsigned long start_esp);
@@ -239,15 +220,6 @@ struct pv_apic_ops {
 };
 
 struct pv_mmu_ops {
-	/*
-	 * Called before/after init_mm pagetable setup. setup_start
-	 * may reset %cr3, and may pre-install parts of the pagetable;
-	 * pagetable setup is expected to preserve any existing
-	 * mapping.
-	 */
-	void (*pagetable_setup_start)(pgd_t *pgd_base);
-	void (*pagetable_setup_done)(pgd_t *pgd_base);
-
 	unsigned long (*read_cr2)(void);
 	void (*write_cr2)(unsigned long);
 
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 7af14e5..e2c1668 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -19,4 +19,9 @@ extern int free_memtype(u64 start, u64 end);
 extern int kernel_map_sync_memtype(u64 base, unsigned long size,
 		unsigned long flag);
 
+int io_reserve_memtype(resource_size_t start, resource_size_t end,
+			unsigned long *type);
+
+void io_free_memtype(resource_size_t start, resource_size_t end);
+
 #endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 1ff685c..f76a162 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -48,7 +48,6 @@ extern unsigned int pcibios_assign_all_busses(void);
 #else
 #define pcibios_assign_all_busses()	0
 #endif
-#define pcibios_scan_all_fns(a, b)	0
 
 extern unsigned long pci_mem_start;
 #define PCIBIOS_MIN_IO		0x1000
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 04eacef..b65a36d 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -168,15 +168,6 @@ do {							\
 /* We can use this directly for local CPU (faster). */
 DECLARE_PER_CPU(unsigned long, this_cpu_off);
 
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-void *pcpu_lpage_remapped(void *kaddr);
-#else
-static inline void *pcpu_lpage_remapped(void *kaddr)
-{
-	return NULL;
-}
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_event.h
index e7b7c93..ad7ce3f 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -1,8 +1,8 @@
-#ifndef _ASM_X86_PERF_COUNTER_H
-#define _ASM_X86_PERF_COUNTER_H
+#ifndef _ASM_X86_PERF_EVENT_H
+#define _ASM_X86_PERF_EVENT_H
 
 /*
- * Performance counter hw details:
+ * Performance event hw details:
  */
 
 #define X86_PMC_MAX_GENERIC					8
@@ -43,7 +43,7 @@
 union cpuid10_eax {
 	struct {
 		unsigned int version_id:8;
-		unsigned int num_counters:8;
+		unsigned int num_events:8;
 		unsigned int bit_width:8;
 		unsigned int mask_length:8;
 	} split;
@@ -52,7 +52,7 @@ union cpuid10_eax {
 
 union cpuid10_edx {
 	struct {
-		unsigned int num_counters_fixed:4;
+		unsigned int num_events_fixed:4;
 		unsigned int reserved:28;
 	} split;
 	unsigned int full;
@@ -60,7 +60,7 @@ union cpuid10_edx {
 
 
 /*
- * Fixed-purpose performance counters:
+ * Fixed-purpose performance events:
  */
 
 /*
@@ -87,22 +87,22 @@ union cpuid10_edx {
 /*
  * We model BTS tracing as another fixed-mode PMC.
  *
- * We choose a value in the middle of the fixed counter range, since lower
- * values are used by actual fixed counters and higher values are used
+ * We choose a value in the middle of the fixed event range, since lower
+ * values are used by actual fixed events and higher values are used
  * to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
  */
 #define X86_PMC_IDX_FIXED_BTS				(X86_PMC_IDX_FIXED + 16)
 
 
-#ifdef CONFIG_PERF_COUNTERS
-extern void init_hw_perf_counters(void);
-extern void perf_counters_lapic_init(void);
+#ifdef CONFIG_PERF_EVENTS
+extern void init_hw_perf_events(void);
+extern void perf_events_lapic_init(void);
 
-#define PERF_COUNTER_INDEX_OFFSET			0
+#define PERF_EVENT_INDEX_OFFSET			0
 
 #else
-static inline void init_hw_perf_counters(void)		{ }
-static inline void perf_counters_lapic_init(void)	{ }
+static inline void init_hw_perf_events(void)		{ }
+static inline void perf_events_lapic_init(void)	{ }
 #endif
 
-#endif /* _ASM_X86_PERF_COUNTER_H */
+#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 4c5b51f..af6fd36 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -56,16 +56,6 @@ extern struct list_head pgd_list;
 #define pte_update(mm, addr, ptep)              do { } while (0)
 #define pte_update_defer(mm, addr, ptep)        do { } while (0)
 
-static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
-{
-	native_pagetable_setup_start(base);
-}
-
-static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
-{
-	native_pagetable_setup_done(base);
-}
-
 #define pgd_val(x)	native_pgd_val(x)
 #define __pgd(x)	native_make_pgd(x)
 
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 54cb697..7b467bf 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,8 +299,8 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 extern void native_pagetable_setup_start(pgd_t *base);
 extern void native_pagetable_setup_done(pgd_t *base);
 #else
-static inline void native_pagetable_setup_start(pgd_t *base) {}
-static inline void native_pagetable_setup_done(pgd_t *base) {}
+#define native_pagetable_setup_start x86_init_pgd_noop
+#define native_pagetable_setup_done  x86_init_pgd_noop
 #endif
 
 struct seq_file;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e08ea04..c3429e8 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -27,6 +27,7 @@ struct mm_struct;
 #include <linux/cpumask.h>
 #include <linux/cache.h>
 #include <linux/threads.h>
+#include <linux/math64.h>
 #include <linux/init.h>
 
 /*
@@ -1020,4 +1021,35 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
+extern int amd_get_nb_id(int cpu);
+
+struct aperfmperf {
+	u64 aperf, mperf;
+};
+
+static inline void get_aperfmperf(struct aperfmperf *am)
+{
+	WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
+
+	rdmsrl(MSR_IA32_APERF, am->aperf);
+	rdmsrl(MSR_IA32_MPERF, am->mperf);
+}
+
+#define APERFMPERF_SHIFT 10
+
+static inline
+unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
+				    struct aperfmperf *new)
+{
+	u64 aperf = new->aperf - old->aperf;
+	u64 mperf = new->mperf - old->mperf;
+	unsigned long ratio = aperf;
+
+	mperf >>= APERFMPERF_SHIFT;
+	if (mperf)
+		ratio = div64_u64(aperf, mperf);
+
+	return ratio;
+}
+
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index 4093d1e..18e496c 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -5,43 +5,6 @@
 
 #define COMMAND_LINE_SIZE 2048
 
-#ifndef __ASSEMBLY__
-
-/*
- * Any setup quirks to be performed?
- */
-struct mpc_cpu;
-struct mpc_bus;
-struct mpc_oemtable;
-
-struct x86_quirks {
-	int (*arch_pre_time_init)(void);
-	int (*arch_time_init)(void);
-	int (*arch_pre_intr_init)(void);
-	int (*arch_intr_init)(void);
-	int (*arch_trap_init)(void);
-	char * (*arch_memory_setup)(void);
-	int (*mach_get_smp_config)(unsigned int early);
-	int (*mach_find_smp_config)(unsigned int reserve);
-
-	int *mpc_record;
-	int (*mpc_apic_id)(struct mpc_cpu *m);
-	void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
-	void (*mpc_oem_pci_bus)(struct mpc_bus *m);
-	void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable,
-				unsigned short oemsize);
-	int (*setup_ioapic_ids)(void);
-};
-
-extern void x86_quirk_intr_init(void);
-
-extern void x86_quirk_trap_init(void);
-
-extern void x86_quirk_pre_time_init(void);
-extern void x86_quirk_time_init(void);
-
-#endif /* __ASSEMBLY__ */
-
 #ifdef __i386__
 
 #include <linux/pfn.h>
@@ -61,6 +24,7 @@ extern void x86_quirk_time_init(void);
 
 #ifndef __ASSEMBLY__
 #include <asm/bootparam.h>
+#include <asm/x86_init.h>
 
 /* Interrupt control for vSMPowered x86_64 systems */
 #ifdef CONFIG_X86_64
@@ -79,11 +43,16 @@ static inline void visws_early_detect(void) { }
 static inline int is_visws_box(void) { return 0; }
 #endif
 
-extern struct x86_quirks *x86_quirks;
 extern unsigned long saved_video_mode;
 
-#ifndef CONFIG_PARAVIRT
-#define paravirt_post_allocator_init()	do {} while (0)
+extern void reserve_standard_io_resources(void);
+extern void i386_reserve_resources(void);
+extern void setup_default_timer_irq(void);
+
+#ifdef CONFIG_X86_MRST
+extern void x86_mrst_early_setup(void);
+#else
+static inline void x86_mrst_early_setup(void) { }
 #endif
 
 #ifndef _SETUP
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index c86f452..ae907e6 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -65,7 +65,6 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
 	case 4:
 		*(int *)to = *(int *)from;
 		return to;
-
 	case 3:
 		*(short *)to = *(short *)from;
 		*((char *)to + 2) = *((char *)from + 2);
diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h
index 50c733a..7bdec4e 100644
--- a/arch/x86/include/asm/time.h
+++ b/arch/x86/include/asm/time.h
@@ -4,60 +4,7 @@
 extern void hpet_time_init(void);
 
 #include <asm/mc146818rtc.h>
-#ifdef CONFIG_X86_32
-#include <linux/efi.h>
-
-static inline unsigned long native_get_wallclock(void)
-{
-	unsigned long retval;
-
-	if (efi_enabled)
-		retval = efi_get_time();
-	else
-		retval = mach_get_cmos_time();
-
-	return retval;
-}
-
-static inline int native_set_wallclock(unsigned long nowtime)
-{
-	int retval;
-
-	if (efi_enabled)
-		retval = efi_set_rtc_mmss(nowtime);
-	else
-		retval = mach_set_rtc_mmss(nowtime);
-
-	return retval;
-}
-
-#else
-extern void native_time_init_hook(void);
-
-static inline unsigned long native_get_wallclock(void)
-{
-	return mach_get_cmos_time();
-}
-
-static inline int native_set_wallclock(unsigned long nowtime)
-{
-	return mach_set_rtc_mmss(nowtime);
-}
-
-#endif
 
 extern void time_init(void);
 
-#ifdef CONFIG_PARAVIRT
-#include <asm/paravirt.h>
-#else /* !CONFIG_PARAVIRT */
-
-#define get_wallclock() native_get_wallclock()
-#define set_wallclock(x) native_set_wallclock(x)
-#define choose_time_init() hpet_time_init
-
-#endif /* CONFIG_PARAVIRT */
-
-extern unsigned long __init calibrate_cpu(void);
-
 #endif /* _ASM_X86_TIME_H */
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 20ca9c4..5469630 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -8,20 +8,16 @@
 #define TICK_SIZE (tick_nsec / 1000)
 
 unsigned long long native_sched_clock(void);
-unsigned long native_calibrate_tsc(void);
+extern int recalibrate_cpu_khz(void);
 
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
 extern int timer_ack;
-extern irqreturn_t timer_interrupt(int irq, void *dev_id);
-#endif /* CONFIG_X86_32 */
-extern int recalibrate_cpu_khz(void);
+#else
+# define timer_ack (0)
+#endif
 
 extern int no_timer_check;
 
-#ifndef CONFIG_PARAVIRT
-#define calibrate_tsc() native_calibrate_tsc()
-#endif
-
 /* Accelerators for sched_clock()
  * convert from cycles(64bits) => nanoseconds (64bits)
  *  basic equation:
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 26d06e0..6f0695d 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -116,15 +116,11 @@ extern unsigned long node_remap_size[];
 
 # define SD_CACHE_NICE_TRIES	1
 # define SD_IDLE_IDX		1
-# define SD_NEWIDLE_IDX		2
-# define SD_FORKEXEC_IDX	0
 
 #else
 
 # define SD_CACHE_NICE_TRIES	2
 # define SD_IDLE_IDX		2
-# define SD_NEWIDLE_IDX		2
-# define SD_FORKEXEC_IDX	1
 
 #endif
 
@@ -137,22 +133,20 @@ extern unsigned long node_remap_size[];
 	.cache_nice_tries	= SD_CACHE_NICE_TRIES,			\
 	.busy_idx		= 3,					\
 	.idle_idx		= SD_IDLE_IDX,				\
-	.newidle_idx		= SD_NEWIDLE_IDX,			\
-	.wake_idx		= 1,					\
-	.forkexec_idx		= SD_FORKEXEC_IDX,			\
+	.newidle_idx		= 0,					\
+	.wake_idx		= 0,					\
+	.forkexec_idx		= 0,					\
 									\
 	.flags			= 1*SD_LOAD_BALANCE			\
 				| 1*SD_BALANCE_NEWIDLE			\
 				| 1*SD_BALANCE_EXEC			\
 				| 1*SD_BALANCE_FORK			\
-				| 0*SD_WAKE_IDLE			\
+				| 0*SD_BALANCE_WAKE			\
 				| 1*SD_WAKE_AFFINE			\
-				| 1*SD_WAKE_BALANCE			\
 				| 0*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
 				| 0*SD_SHARE_PKG_RESOURCES		\
 				| 1*SD_SERIALIZE			\
-				| 1*SD_WAKE_IDLE_FAR			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
 	.last_balance		= jiffies,				\
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 38ae163..c042729 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -48,7 +48,8 @@ static __always_inline cycles_t vget_cycles(void)
 extern void tsc_init(void);
 extern void mark_tsc_unstable(char *reason);
 extern int unsynchronized_tsc(void);
-int check_tsc_unstable(void);
+extern int check_tsc_unstable(void);
+extern unsigned long native_calibrate_tsc(void);
 
 /*
  * Boot-time check whether the TSCs are synchronized across
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 5e06259..632fb44 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -33,7 +33,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
  * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
  * The caller should also make sure he pins the user space address
- * so that the we don't result in page fault and sleep.
+ * so that we don't result in page fault and sleep.
  *
  * Here we special-case 1, 2 and 4-byte copy_*_user invocations.  On a fault
  * we return the initial request size (1, 2 or 4), as copy_*_user should do.
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 8deaada..6fb3c20 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -341,7 +341,7 @@
 #define __NR_preadv		333
 #define __NR_pwritev		334
 #define __NR_rt_tgsigqueueinfo	335
-#define __NR_perf_counter_open	336
+#define __NR_perf_event_open	336
 
 #ifdef __KERNEL__
 
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index b9f3c60..8d3ad0a 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -659,8 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv)
 __SYSCALL(__NR_pwritev, sys_pwritev)
 #define __NR_rt_tgsigqueueinfo			297
 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
-#define __NR_perf_counter_open			298
-__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+#define __NR_perf_event_open			298
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
 
 #ifndef __NO_STUBS
 #define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 77a6850..04eb6c9 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -15,6 +15,7 @@
 #include <linux/numa.h>
 #include <linux/percpu.h>
 #include <linux/timer.h>
+#include <linux/io.h>
 #include <asm/types.h>
 #include <asm/percpu.h>
 #include <asm/uv/uv_mmrs.h>
@@ -258,13 +259,13 @@ static inline unsigned long *uv_global_mmr32_address(int pnode,
 static inline void uv_write_global_mmr32(int pnode, unsigned long offset,
 				 unsigned long val)
 {
-	*uv_global_mmr32_address(pnode, offset) = val;
+	writeq(val, uv_global_mmr32_address(pnode, offset));
 }
 
 static inline unsigned long uv_read_global_mmr32(int pnode,
 						 unsigned long offset)
 {
-	return *uv_global_mmr32_address(pnode, offset);
+	return readq(uv_global_mmr32_address(pnode, offset));
 }
 
 /*
@@ -281,13 +282,13 @@ static inline unsigned long *uv_global_mmr64_address(int pnode,
 static inline void uv_write_global_mmr64(int pnode, unsigned long offset,
 				unsigned long val)
 {
-	*uv_global_mmr64_address(pnode, offset) = val;
+	writeq(val, uv_global_mmr64_address(pnode, offset));
 }
 
 static inline unsigned long uv_read_global_mmr64(int pnode,
 						 unsigned long offset)
 {
-	return *uv_global_mmr64_address(pnode, offset);
+	return readq(uv_global_mmr64_address(pnode, offset));
 }
 
 /*
@@ -301,22 +302,22 @@ static inline unsigned long *uv_local_mmr_address(unsigned long offset)
 
 static inline unsigned long uv_read_local_mmr(unsigned long offset)
 {
-	return *uv_local_mmr_address(offset);
+	return readq(uv_local_mmr_address(offset));
 }
 
 static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
 {
-	*uv_local_mmr_address(offset) = val;
+	writeq(val, uv_local_mmr_address(offset));
 }
 
 static inline unsigned char uv_read_local_mmr8(unsigned long offset)
 {
-	return *((unsigned char *)uv_local_mmr_address(offset));
+	return readb(uv_local_mmr_address(offset));
 }
 
 static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val)
 {
-	*((unsigned char *)uv_local_mmr_address(offset)) = val;
+	writeb(val, uv_local_mmr_address(offset));
 }
 
 /*
@@ -422,7 +423,7 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
 	unsigned long val;
 
 	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
-			((apicid & 0x3f) << UVH_IPI_INT_APIC_ID_SHFT) |
+			((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
 			(vector << UVH_IPI_INT_VECTOR_SHFT);
 	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
 }
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index dc27a69..3d61e20 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -21,6 +21,7 @@ struct vsyscall_gtod_data {
 		u32	shift;
 	} clock;
 	struct timespec wall_to_monotonic;
+	struct timespec wall_time_coarse;
 };
 extern struct vsyscall_gtod_data __vsyscall_gtod_data
 __section_vsyscall_gtod_data;
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
index c11b7e1..e49ed6d 100644
--- a/arch/x86/include/asm/vmware.h
+++ b/arch/x86/include/asm/vmware.h
@@ -20,7 +20,7 @@
 #ifndef ASM_X86__VMWARE_H
 #define ASM_X86__VMWARE_H
 
-extern unsigned long vmware_get_tsc_khz(void);
+extern void vmware_platform_setup(void);
 extern int vmware_platform(void);
 extern void vmware_set_feature_bits(struct cpuinfo_x86 *c);
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
new file mode 100644
index 0000000..2c756fd
--- /dev/null
+++ b/arch/x86/include/asm/x86_init.h
@@ -0,0 +1,133 @@
+#ifndef _ASM_X86_PLATFORM_H
+#define _ASM_X86_PLATFORM_H
+
+#include <asm/pgtable_types.h>
+#include <asm/bootparam.h>
+
+struct mpc_bus;
+struct mpc_cpu;
+struct mpc_table;
+
+/**
+ * struct x86_init_mpparse - platform specific mpparse ops
+ * @mpc_record:			platform specific mpc record accounting
+ * @setup_ioapic_ids:		platform specific ioapic id override
+ * @mpc_apic_id:		platform specific mpc apic id assignment
+ * @smp_read_mpc_oem:		platform specific oem mpc table setup
+ * @mpc_oem_pci_bus:		platform specific pci bus setup (default NULL)
+ * @mpc_oem_bus_info:		platform specific mpc bus info
+ * @find_smp_config:		find the smp configuration
+ * @get_smp_config:		get the smp configuration
+ */
+struct x86_init_mpparse {
+	void (*mpc_record)(unsigned int mode);
+	void (*setup_ioapic_ids)(void);
+	int (*mpc_apic_id)(struct mpc_cpu *m);
+	void (*smp_read_mpc_oem)(struct mpc_table *mpc);
+	void (*mpc_oem_pci_bus)(struct mpc_bus *m);
+	void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
+	void (*find_smp_config)(unsigned int reserve);
+	void (*get_smp_config)(unsigned int early);
+};
+
+/**
+ * struct x86_init_resources - platform specific resource related ops
+ * @probe_roms:			probe BIOS roms
+ * @reserve_resources:		reserve the standard resources for the
+ *				platform
+ * @memory_setup:		platform specific memory setup
+ *
+ */
+struct x86_init_resources {
+	void (*probe_roms)(void);
+	void (*reserve_resources)(void);
+	char *(*memory_setup)(void);
+};
+
+/**
+ * struct x86_init_irqs - platform specific interrupt setup
+ * @pre_vector_init:		init code to run before interrupt vectors
+ *				are set up.
+ * @intr_init:			interrupt init code
+ * @trap_init:			platform specific trap setup
+ */
+struct x86_init_irqs {
+	void (*pre_vector_init)(void);
+	void (*intr_init)(void);
+	void (*trap_init)(void);
+};
+
+/**
+ * struct x86_init_oem - oem platform specific customizing functions
+ * @arch_setup:			platform specific architecure setup
+ * @banner:			print a platform specific banner
+ */
+struct x86_init_oem {
+	void (*arch_setup)(void);
+	void (*banner)(void);
+};
+
+/**
+ * struct x86_init_paging - platform specific paging functions
+ * @pagetable_setup_start:	platform specific pre paging_init() call
+ * @pagetable_setup_done:	platform specific post paging_init() call
+ */
+struct x86_init_paging {
+	void (*pagetable_setup_start)(pgd_t *base);
+	void (*pagetable_setup_done)(pgd_t *base);
+};
+
+/**
+ * struct x86_init_timers - platform specific timer setup
+ * @setup_perpcu_clockev:	set up the per cpu clock event device for the
+ *				boot cpu
+ * @tsc_pre_init:		platform function called before TSC init
+ * @timer_init:			initialize the platform timer (default PIT/HPET)
+ */
+struct x86_init_timers {
+	void (*setup_percpu_clockev)(void);
+	void (*tsc_pre_init)(void);
+	void (*timer_init)(void);
+};
+
+/**
+ * struct x86_init_ops - functions for platform specific setup
+ *
+ */
+struct x86_init_ops {
+	struct x86_init_resources	resources;
+	struct x86_init_mpparse		mpparse;
+	struct x86_init_irqs		irqs;
+	struct x86_init_oem		oem;
+	struct x86_init_paging		paging;
+	struct x86_init_timers		timers;
+};
+
+/**
+ * struct x86_cpuinit_ops - platform specific cpu hotplug setups
+ * @setup_percpu_clockev:	set up the per cpu clock event device
+ */
+struct x86_cpuinit_ops {
+	void (*setup_percpu_clockev)(void);
+};
+
+/**
+ * struct x86_platform_ops - platform specific runtime functions
+ * @calibrate_tsc:		calibrate TSC
+ * @get_wallclock:		get time from HW clock like RTC etc.
+ * @set_wallclock:		set time back to HW clock
+ */
+struct x86_platform_ops {
+	unsigned long (*calibrate_tsc)(void);
+	unsigned long (*get_wallclock)(void);
+	int (*set_wallclock)(unsigned long nowtime);
+};
+
+extern struct x86_init_ops x86_init;
+extern struct x86_cpuinit_ops x86_cpuinit;
+extern struct x86_platform_ops x86_platform;
+
+extern void x86_init_noop(void);
+extern void x86_init_uint_noop(unsigned int unused);
+
+#endif
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 430d5b2..4ba419b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -31,8 +31,8 @@ GCOV_PROFILE_paravirt.o		:= n
 
 obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
 obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
-obj-y			+= time_$(BITS).o ioport.o ldt.o dumpstack.o
-obj-y			+= setup.o i8259.o irqinit.o
+obj-y			+= time.o ioport.o ldt.o dumpstack.o
+obj-y			+= setup.o x86_init.o i8259.o irqinit.o
 obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
 obj-$(CONFIG_X86_32)	+= probe_roms_32.o
 obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
@@ -52,6 +52,7 @@ obj-$(CONFIG_X86_DS_SELFTEST)		+= ds_selftest.o
 obj-$(CONFIG_X86_32)		+= tls.o
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-y				+= step.o
+obj-$(CONFIG_INTEL_TXT)		+= tboot.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
@@ -104,6 +105,7 @@ obj-$(CONFIG_SCx200)		+= scx200.o
 scx200-y			+= scx200_32.o
 
 obj-$(CONFIG_OLPC)		+= olpc.o
+obj-$(CONFIG_X86_MRST)		+= mrst.o
 
 microcode-y				:= microcode_core.o
 microcode-$(CONFIG_MICROCODE_INTEL)	+= microcode_intel.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 159740d..894aa97 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,7 +14,7 @@
  *	Mikael Pettersson	:	PM converted to driver model.
  */
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/kernel_stat.h>
 #include <linux/mc146818rtc.h>
 #include <linux/acpi_pmtmr.h>
@@ -35,7 +35,8 @@
 #include <linux/smp.h>
 #include <linux/mm.h>
 
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
+#include <asm/x86_init.h>
 #include <asm/pgalloc.h>
 #include <asm/atomic.h>
 #include <asm/mpspec.h>
@@ -61,7 +62,7 @@ unsigned int boot_cpu_physical_apicid = -1U;
 /*
  * The highest APIC ID seen during enumeration.
  *
- * This determines the messaging protocol we can use: if all APIC IDs
+ * On AMD, this determines the messaging protocol we can use: if all APIC IDs
  * are in the 0 ... 7 range, then we can use logical addressing which
  * has some performance advantages (better broadcasting).
  *
@@ -978,7 +979,7 @@ void lapic_shutdown(void)
 {
 	unsigned long flags;
 
-	if (!cpu_has_apic)
+	if (!cpu_has_apic && !apic_from_smp_config())
 		return;
 
 	local_irq_save(flags);
@@ -1188,7 +1189,7 @@ void __cpuinit setup_local_APIC(void)
 		apic_write(APIC_ESR, 0);
 	}
 #endif
-	perf_counters_lapic_init();
+	perf_events_lapic_init();
 
 	preempt_disable();
 
@@ -1196,8 +1197,7 @@ void __cpuinit setup_local_APIC(void)
 	 * Double-check whether this APIC is really registered.
 	 * This is meaningless in clustered apic mode, so we skip it.
 	 */
-	if (!apic->apic_id_registered())
-		BUG();
+	BUG_ON(!apic->apic_id_registered());
 
 	/*
 	 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1709,7 +1709,7 @@ int __init APIC_init_uniprocessor(void)
 	localise_nmi_watchdog();
 #endif
 
-	setup_boot_clock();
+	x86_init.timers.setup_percpu_clockev();
 #ifdef CONFIG_X86_64
 	check_nmi_watchdog();
 #endif
@@ -1916,24 +1916,14 @@ void __cpuinit generic_processor_info(int apicid, int version)
 		max_physical_apicid = apicid;
 
 #ifdef CONFIG_X86_32
-	/*
-	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
-	 * but we need to work other dependencies like SMP_SUSPEND etc
-	 * before this can be done without some confusion.
-	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
-	 *       - Ashok Raj <ashok.raj@intel.com>
-	 */
-	if (max_physical_apicid >= 8) {
-		switch (boot_cpu_data.x86_vendor) {
-		case X86_VENDOR_INTEL:
-			if (!APIC_XAPIC(version)) {
-				def_to_bigsmp = 0;
-				break;
-			}
-			/* If P4 and above fall through */
-		case X86_VENDOR_AMD:
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_INTEL:
+		if (num_processors > 8)
+			def_to_bigsmp = 1;
+		break;
+	case X86_VENDOR_AMD:
+		if (max_physical_apicid >= 8)
 			def_to_bigsmp = 1;
-		}
 	}
 #endif
 
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 676cdac..77a0641 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -112,7 +112,7 @@ static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
 	return physids_promote(0xFFL);
 }
 
-static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
+static int bigsmp_check_phys_apicid_present(int phys_apicid)
 {
 	return 1;
 }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 3c8f9e7..64970b9 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -96,6 +96,11 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+/* Number of legacy interrupts */
+static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
+/* GSI interrupts */
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
+
 #if defined (CONFIG_MCA) || defined (CONFIG_EISA)
 int mp_bus_id_to_type[MAX_MP_BUSSES];
 #endif
@@ -173,6 +178,12 @@ static struct irq_cfg irq_cfgx[NR_IRQS] = {
 	[15] = { .vector = IRQ15_VECTOR, },
 };
 
+void __init io_apic_disable_legacy(void)
+{
+	nr_legacy_irqs = 0;
+	nr_irqs_gsi = 0;
+}
+
 int __init arch_early_irq_init(void)
 {
 	struct irq_cfg *cfg;
@@ -190,7 +201,7 @@ int __init arch_early_irq_init(void)
 		desc->chip_data = &cfg[i];
 		zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
 		zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
-		if (i < NR_IRQS_LEGACY)
+		if (i < nr_legacy_irqs)
 			cpumask_setall(cfg[i].domain);
 	}
 
@@ -867,7 +878,7 @@ static int __init find_isa_irq_apic(int irq, int type)
  */
 static int EISA_ELCR(unsigned int irq)
 {
-	if (irq < NR_IRQS_LEGACY) {
+	if (irq < nr_legacy_irqs) {
 		unsigned int port = 0x4d0 + (irq >> 3);
 		return (inb(port) >> (irq & 7)) & 1;
 	}
@@ -1464,7 +1475,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
 	}
 
 	ioapic_register_intr(irq, desc, trigger);
-	if (irq < NR_IRQS_LEGACY)
+	if (irq < nr_legacy_irqs)
 		disable_8259A_irq(irq);
 
 	ioapic_write_entry(apic_id, pin, entry);
@@ -1831,7 +1842,7 @@ __apicdebuginit(void) print_PIC(void)
 	unsigned int v;
 	unsigned long flags;
 
-	if (apic_verbosity == APIC_QUIET)
+	if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
 		return;
 
 	printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1863,7 +1874,7 @@ __apicdebuginit(int) print_all_ICs(void)
 	print_PIC();
 
 	/* don't print out if apic is not there */
-	if (!cpu_has_apic || disable_apic)
+	if (!cpu_has_apic && !apic_from_smp_config())
 		return 0;
 
 	print_all_local_APICs();
@@ -1894,6 +1905,10 @@ void __init enable_IO_APIC(void)
 		spin_unlock_irqrestore(&ioapic_lock, flags);
 		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
 	}
+
+	if (!nr_legacy_irqs)
+		return;
+
 	for(apic = 0; apic < nr_ioapics; apic++) {
 		int pin;
 		/* See if any of the pins is in ExtINT mode */
@@ -1948,6 +1963,9 @@ void disable_IO_APIC(void)
 	 */
 	clear_IO_APIC();
 
+	if (!nr_legacy_irqs)
+		return;
+
 	/*
 	 * If the i8259 is routed through an IOAPIC
 	 * Put that IOAPIC in virtual wire mode
@@ -1981,7 +1999,7 @@ void disable_IO_APIC(void)
 	/*
 	 * Use virtual wire A mode when interrupt remapping is enabled.
 	 */
-	if (cpu_has_apic)
+	if (cpu_has_apic || apic_from_smp_config())
 		disconnect_bsp_APIC(!intr_remapping_enabled &&
 				ioapic_i8259.pin != -1);
 }
@@ -1994,7 +2012,7 @@ void disable_IO_APIC(void)
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  */
 
-static void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc(void)
 {
 	union IO_APIC_reg_00 reg_00;
 	physid_mask_t phys_id_present_map;
@@ -2003,9 +2021,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
 	unsigned char old_id;
 	unsigned long flags;
 
-	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+	if (acpi_ioapic)
 		return;
-
 	/*
 	 * Don't check I/O APIC IDs for xAPIC systems.  They have
 	 * no meaning without the serial APIC bus.
@@ -2179,7 +2196,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
 	struct irq_cfg *cfg;
 
 	spin_lock_irqsave(&ioapic_lock, flags);
-	if (irq < NR_IRQS_LEGACY) {
+	if (irq < nr_legacy_irqs) {
 		disable_8259A_irq(irq);
 		if (i8259A_irq_pending(irq))
 			was_pending = 1;
@@ -2657,7 +2674,7 @@ static inline void init_IO_APIC_traps(void)
 			 * so default to an old-fashioned 8259
 			 * interrupt if we can..
 			 */
-			if (irq < NR_IRQS_LEGACY)
+			if (irq < nr_legacy_irqs)
 				make_8259A_irq(irq);
 			else
 				/* Strange. Oh, well.. */
@@ -2993,7 +3010,7 @@ out:
  * the I/O APIC in all cases now.  No actual device should request
  * it anyway.  --macro
  */
-#define PIC_IRQS	(1 << PIC_CASCADE_IR)
+#define PIC_IRQS	(1UL << PIC_CASCADE_IR)
 
 void __init setup_IO_APIC(void)
 {
@@ -3001,21 +3018,19 @@ void __init setup_IO_APIC(void)
 	/*
 	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
 	 */
-
-	io_apic_irqs = ~PIC_IRQS;
+	io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
 
 	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
 	/*
          * Set up IO-APIC IRQ routing.
          */
-#ifdef CONFIG_X86_32
-	if (!acpi_ioapic)
-		setup_ioapic_ids_from_mpc();
-#endif
+	x86_init.mpparse.setup_ioapic_ids();
+
 	sync_Arb_IDs();
 	setup_IO_APIC_irqs();
 	init_IO_APIC_traps();
-	check_timer();
+	if (nr_legacy_irqs)
+		check_timer();
 }
 
 /*
@@ -3116,7 +3131,6 @@ static int __init ioapic_init_sysfs(void)
 
 device_initcall(ioapic_init_sysfs);
 
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
@@ -3856,7 +3870,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
 	/*
 	 * IRQs < 16 are already in the irq_2_pin[] map
 	 */
-	if (irq >= NR_IRQS_LEGACY) {
+	if (irq >= nr_legacy_irqs) {
 		cfg = desc->chip_data;
 		if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
 			printk(KERN_INFO "can not add pin %d for irq %d\n",
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index db72202..cb66a22 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
 
 static inline int mce_in_progress(void)
 {
-#if defined(CONFIG_X86_NEW_MCE)
+#if defined(CONFIG_X86_MCE)
 	return atomic_read(&mce_entry) > 0;
 #endif
 	return 0;
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
index ca96e68..efa00e2 100644
--- a/arch/x86/kernel/apic/numaq_32.c
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -66,7 +66,6 @@ struct mpc_trans {
 	unsigned short			trans_reserved;
 };
 
-/* x86_quirks member */
 static int				mpc_record;
 
 static struct mpc_trans			*translation_table[MAX_MPC_ENTRY];
@@ -130,10 +129,9 @@ void __cpuinit numaq_tsc_disable(void)
 	}
 }
 
-static int __init numaq_pre_time_init(void)
+static void __init numaq_tsc_init(void)
 {
 	numaq_tsc_disable();
-	return 0;
 }
 
 static inline int generate_logical_apicid(int quad, int phys_apicid)
@@ -177,6 +175,19 @@ static void mpc_oem_pci_bus(struct mpc_bus *m)
 	quad_local_to_mp_bus_id[quad][local] = m->busid;
 }
 
+/*
+ * Called from mpparse code.
+ * mode = 0: prescan
+ * mode = 1: one mpc entry scanned
+ */
+static void numaq_mpc_record(unsigned int mode)
+{
+	if (!mode)
+		mpc_record = 0;
+	else
+		mpc_record++;
+}
+
 static void __init MP_translation_info(struct mpc_trans *m)
 {
 	printk(KERN_INFO
@@ -206,9 +217,9 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 /*
  * Read/parse the MPC oem tables
  */
-static void __init
- smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize)
+static void __init smp_read_mpc_oem(struct mpc_table *mpc)
 {
+	struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr;
 	int count = sizeof(*oemtable);	/* the header size */
 	unsigned char *oemptr = ((unsigned char *)oemtable) + count;
 
@@ -250,29 +261,6 @@ static void __init
 	}
 }
 
-static int __init numaq_setup_ioapic_ids(void)
-{
-	/* so can skip it */
-	return 1;
-}
-
-static struct x86_quirks numaq_x86_quirks __initdata = {
-	.arch_pre_time_init		= numaq_pre_time_init,
-	.arch_time_init			= NULL,
-	.arch_pre_intr_init		= NULL,
-	.arch_memory_setup		= NULL,
-	.arch_intr_init			= NULL,
-	.arch_trap_init			= NULL,
-	.mach_get_smp_config		= NULL,
-	.mach_find_smp_config		= NULL,
-	.mpc_record			= &mpc_record,
-	.mpc_apic_id			= mpc_apic_id,
-	.mpc_oem_bus_info		= mpc_oem_bus_info,
-	.mpc_oem_pci_bus		= mpc_oem_pci_bus,
-	.smp_read_mpc_oem		= smp_read_mpc_oem,
-	.setup_ioapic_ids		= numaq_setup_ioapic_ids,
-};
-
 static __init void early_check_numaq(void)
 {
 	/*
@@ -286,8 +274,15 @@ static __init void early_check_numaq(void)
 	if (smp_found_config)
 		early_get_smp_config();
 
-	if (found_numaq)
-		x86_quirks = &numaq_x86_quirks;
+	if (found_numaq) {
+		x86_init.mpparse.mpc_record = numaq_mpc_record;
+		x86_init.mpparse.setup_ioapic_ids = x86_init_noop;
+		x86_init.mpparse.mpc_apic_id = mpc_apic_id;
+		x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem;
+		x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus;
+		x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info;
+		x86_init.timers.tsc_pre_init = numaq_tsc_init;
+	}
 }
 
 int __init get_memcfg_numaq(void)
@@ -418,7 +413,7 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
 /* Where the IO area was mapped on multiquad, always 0 otherwise */
 void *xquad_portio;
 
-static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid)
+static inline int numaq_check_phys_apicid_present(int phys_apicid)
 {
 	return 1;
 }
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 65edc18..c4cbd308 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -64,16 +64,23 @@ void __init default_setup_apic_routing(void)
 			apic = &apic_x2apic_phys;
 		else
 			apic = &apic_x2apic_cluster;
-		printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
 	}
 #endif
 
 	if (apic == &apic_flat) {
-		if (max_physical_apicid >= 8)
-			apic = &apic_physflat;
-		printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_INTEL:
+			if (num_processors > 8)
+				apic = &apic_physflat;
+			break;
+		case X86_VENDOR_AMD:
+			if (max_physical_apicid >= 8)
+				apic = &apic_physflat;
+		}
 	}
 
+	printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
+
 	if (is_vsmp_box()) {
 		/* need to update phys_pkg_id */
 		apic->phys_pkg_id = apicid_phys_pkg_id;
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
index eafdfbd1..645ecc4 100644
--- a/arch/x86/kernel/apic/summit_32.c
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -272,7 +272,7 @@ static physid_mask_t summit_apicid_to_cpu_present(int apicid)
 	return physid_mask_of_physid(0);
 }
 
-static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
+static int summit_check_phys_apicid_present(int physical_apicid)
 {
 	return 1;
 }
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 6011593..f5f5886 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -389,6 +389,16 @@ static __init void map_gru_high(int max_pnode)
 		map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
 }
 
+static __init void map_mmr_high(int max_pnode)
+{
+	union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
+	int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
+	if (mmr.s.enable)
+		map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
+}
+
 static __init void map_mmioh_high(int max_pnode)
 {
 	union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
@@ -643,6 +653,7 @@ void __init uv_system_init(void)
 	}
 
 	map_gru_high(max_pnode);
+	map_mmr_high(max_pnode);
 	map_mmioh_high(max_pnode);
 
 	uv_cpu_init();
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index c1f253d..68537e9 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -13,7 +13,7 @@ CFLAGS_common.o		:= $(nostackp)
 
 obj-y			:= intel_cacheinfo.o addon_cpuid_features.o
 obj-y			+= proc.o capflags.o powerflags.o common.o
-obj-y			+= vmware.o hypervisor.o
+obj-y			+= vmware.o hypervisor.o sched.o
 
 obj-$(CONFIG_X86_32)	+= bugs.o cmpxchg.o
 obj-$(CONFIG_X86_64)	+= bugs_64.o
@@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
 obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
 
-obj-$(CONFIG_PERF_COUNTERS)		+= perf_counter.o
+obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 obj-$(CONFIG_X86_MCE)			+= mcheck/
 obj-$(CONFIG_MTRR)			+= mtrr/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 22a47c8..f32fa71 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -333,6 +333,16 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
 #endif
 }
 
+int amd_get_nb_id(int cpu)
+{
+	int id = 0;
+#ifdef CONFIG_SMP
+	id = per_cpu(cpu_llc_id, cpu);
+#endif
+	return id;
+}
+EXPORT_SYMBOL_GPL(amd_get_nb_id);
+
 static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 {
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2055fc2..cc25c2b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,7 +13,7 @@
 #include <linux/io.h>
 
 #include <asm/stackprotector.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 #include <asm/mmu_context.h>
 #include <asm/hypervisor.h>
 #include <asm/processor.h>
@@ -34,7 +34,6 @@
 #include <asm/mce.h>
 #include <asm/msr.h>
 #include <asm/pat.h>
-#include <linux/smp.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/uv/uv.h>
@@ -870,7 +869,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
-	init_hw_perf_counters();
+	init_hw_perf_events();
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 6b2a52d..dca325c 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -30,8 +30,8 @@
 #include <asm/apic.h>
 #include <asm/desc.h>
 
-static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
-static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
+static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr);
+static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr);
 static DEFINE_PER_CPU(int, cpu_priv_count);
 
 static DEFINE_MUTEX(cpu_debug_lock);
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ae9b5032..7d5c3b0 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
 #include <linux/cpufreq.h>
 #include <linux/compiler.h>
 #include <linux/dmi.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
 
 #include <linux/acpi.h>
 #include <linux/io.h>
@@ -60,7 +60,6 @@ enum {
 };
 
 #define INTEL_MSR_RANGE		(0xffff)
-#define CPUID_6_ECX_APERFMPERF_CAPABILITY	(0x1)
 
 struct acpi_cpufreq_data {
 	struct acpi_processor_performance *acpi_data;
@@ -71,13 +70,7 @@ struct acpi_cpufreq_data {
 
 static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
 
-struct acpi_msr_data {
-	u64 saved_aperf, saved_mperf;
-};
-
-static DEFINE_PER_CPU(struct acpi_msr_data, msr_data);
-
-DEFINE_TRACE(power_mark);
+static DEFINE_PER_CPU(struct aperfmperf, old_perf);
 
 /* acpi_perf_data is a pointer to percpu data. */
 static struct acpi_processor_performance *acpi_perf_data;
@@ -244,23 +237,12 @@ static u32 get_cur_val(const struct cpumask *mask)
 	return cmd.val;
 }
 
-struct perf_pair {
-	union {
-		struct {
-			u32 lo;
-			u32 hi;
-		} split;
-		u64 whole;
-	} aperf, mperf;
-};
-
 /* Called via smp_call_function_single(), on the target CPU */
 static void read_measured_perf_ctrs(void *_cur)
 {
-	struct perf_pair *cur = _cur;
+	struct aperfmperf *am = _cur;
 
-	rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi);
-	rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
+	get_aperfmperf(am);
 }
 
 /*
@@ -279,63 +261,17 @@ static void read_measured_perf_ctrs(void *_cur)
 static unsigned int get_measured_perf(struct cpufreq_policy *policy,
 				      unsigned int cpu)
 {
-	struct perf_pair readin, cur;
-	unsigned int perf_percent;
+	struct aperfmperf perf;
+	unsigned long ratio;
 	unsigned int retval;
 
-	if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1))
+	if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
 		return 0;
 
-	cur.aperf.whole = readin.aperf.whole -
-				per_cpu(msr_data, cpu).saved_aperf;
-	cur.mperf.whole = readin.mperf.whole -
-				per_cpu(msr_data, cpu).saved_mperf;
-	per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
-	per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
-
-#ifdef __i386__
-	/*
-	 * We dont want to do 64 bit divide with 32 bit kernel
-	 * Get an approximate value. Return failure in case we cannot get
-	 * an approximate value.
-	 */
-	if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
-		int shift_count;
-		u32 h;
-
-		h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
-		shift_count = fls(h);
-
-		cur.aperf.whole >>= shift_count;
-		cur.mperf.whole >>= shift_count;
-	}
-
-	if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
-		int shift_count = 7;
-		cur.aperf.split.lo >>= shift_count;
-		cur.mperf.split.lo >>= shift_count;
-	}
-
-	if (cur.aperf.split.lo && cur.mperf.split.lo)
-		perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
-	else
-		perf_percent = 0;
-
-#else
-	if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
-		int shift_count = 7;
-		cur.aperf.whole >>= shift_count;
-		cur.mperf.whole >>= shift_count;
-	}
-
-	if (cur.aperf.whole && cur.mperf.whole)
-		perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
-	else
-		perf_percent = 0;
-
-#endif
+	ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf);
+	per_cpu(old_perf, cpu) = perf;
 
-	retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
+	retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
 
 	return retval;
 }
@@ -394,7 +330,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	unsigned int next_perf_state = 0; /* Index into perf table */
 	unsigned int i;
 	int result = 0;
-	struct power_trace it;
 
 	dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
 
@@ -426,7 +361,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		}
 	}
 
-	trace_power_mark(&it, POWER_PSTATE, next_perf_state);
+	trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
 
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
@@ -588,6 +523,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = {
 	},
 	{ }
 };
+
+static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
+{
+	/* http://www.intel.com/Assets/PDF/specupdate/314554.pdf
+	 * AL30: A Machine Check Exception (MCE) Occurring during an
+	 * Enhanced Intel SpeedStep Technology Ratio Change May Cause
+	 * Both Processor Cores to Lock Up when HT is enabled*/
+	if (c->x86_vendor == X86_VENDOR_INTEL) {
+		if ((c->x86 == 15) &&
+		    (c->x86_model == 6) &&
+		    (c->x86_mask == 8) && smt_capable())
+			return -ENODEV;
+		}
+	return 0;
+}
 #endif
 
 static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
@@ -602,6 +552,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 
 	dprintk("acpi_cpufreq_cpu_init\n");
 
+#ifdef CONFIG_SMP
+	result = acpi_cpufreq_blacklist(c);
+	if (result)
+		return result;
+#endif
+
 	data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
@@ -731,12 +687,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 	acpi_processor_notify_smm(THIS_MODULE);
 
 	/* Check for APERF/MPERF support in hardware */
-	if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
-		unsigned int ecx;
-		ecx = cpuid_ecx(6);
-		if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
-			acpi_cpufreq_driver.getavg = get_measured_perf;
-	}
+	if (cpu_has(c, X86_FEATURE_APERFMPERF))
+		acpi_cpufreq_driver.getavg = get_measured_perf;
 
 	dprintk("CPU%u - ACPI performance management activated.\n", cpu);
 	for (i = 0; i < perf->state_count; i++)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 2a50ef8..6394aa5 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -605,9 +605,10 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst,
 	return 0;
 }
 
-static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry)
+static void invalidate_entry(struct cpufreq_frequency_table *powernow_table,
+		unsigned int entry)
 {
-	data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
+	powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID;
 }
 
 static void print_basics(struct powernow_k8_data *data)
@@ -854,6 +855,10 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 		goto err_out;
 	}
 
+	/* fill in data */
+	data->numps = data->acpi_data.state_count;
+	powernow_k8_acpi_pst_values(data, 0);
+
 	if (cpu_family == CPU_HW_PSTATE)
 		ret_val = fill_powernow_table_pstate(data, powernow_table);
 	else
@@ -866,11 +871,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 	powernow_table[data->acpi_data.state_count].index = 0;
 	data->powernow_table = powernow_table;
 
-	/* fill in data */
-	data->numps = data->acpi_data.state_count;
 	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
 		print_basics(data);
-	powernow_k8_acpi_pst_values(data, 0);
 
 	/* notify BIOS that we exist */
 	acpi_processor_notify_smm(THIS_MODULE);
@@ -914,13 +916,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data,
 					"bad value %d.\n", i, index);
 			printk(KERN_ERR PFX "Please report to BIOS "
 					"manufacturer\n");
-			invalidate_entry(data, i);
+			invalidate_entry(powernow_table, i);
 			continue;
 		}
 		rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi);
 		if (!(hi & HW_PSTATE_VALID_MASK)) {
 			dprintk("invalid pstate %d, ignoring\n", index);
-			invalidate_entry(data, i);
+			invalidate_entry(powernow_table, i);
 			continue;
 		}
 
@@ -941,7 +943,6 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 		struct cpufreq_frequency_table *powernow_table)
 {
 	int i;
-	int cntlofreq = 0;
 
 	for (i = 0; i < data->acpi_data.state_count; i++) {
 		u32 fid;
@@ -970,7 +971,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 		/* verify frequency is OK */
 		if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) {
 			dprintk("invalid freq %u kHz, ignoring\n", freq);
-			invalidate_entry(data, i);
+			invalidate_entry(powernow_table, i);
 			continue;
 		}
 
@@ -978,38 +979,17 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data,
 		 * BIOSs are using "off" to indicate invalid */
 		if (vid == VID_OFF) {
 			dprintk("invalid vid %u, ignoring\n", vid);
-			invalidate_entry(data, i);
+			invalidate_entry(powernow_table, i);
 			continue;
 		}
 
-		/* verify only 1 entry from the lo frequency table */
-		if (fid < HI_FID_TABLE_BOTTOM) {
-			if (cntlofreq) {
-				/* if both entries are the same,
-				 * ignore this one ... */
-				if ((freq != powernow_table[cntlofreq].frequency) ||
-				    (index != powernow_table[cntlofreq].index)) {
-					printk(KERN_ERR PFX
-						"Too many lo freq table "
-						"entries\n");
-					return 1;
-				}
-
-				dprintk("double low frequency table entry, "
-						"ignoring it.\n");
-				invalidate_entry(data, i);
-				continue;
-			} else
-				cntlofreq = i;
-		}
-
 		if (freq != (data->acpi_data.states[i].core_frequency * 1000)) {
 			printk(KERN_INFO PFX "invalid freq entries "
 				"%u kHz vs. %u kHz\n", freq,
 				(unsigned int)
 				(data->acpi_data.states[i].core_frequency
 				 * 1000));
-			invalidate_entry(data, i);
+			invalidate_entry(powernow_table, i);
 			continue;
 		}
 	}
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 93ba8ee..08be922 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -34,13 +34,6 @@ detect_hypervisor_vendor(struct cpuinfo_x86 *c)
 		c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
 }
 
-unsigned long get_hypervisor_tsc_freq(void)
-{
-	if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE)
-		return vmware_get_tsc_khz();
-	return 0;
-}
-
 static inline void __cpuinit
 hypervisor_set_feature_bits(struct cpuinfo_x86 *c)
 {
@@ -55,3 +48,10 @@ void __cpuinit init_hypervisor(struct cpuinfo_x86 *c)
 	detect_hypervisor_vendor(c);
 	hypervisor_set_feature_bits(c);
 }
+
+void __init init_hypervisor_platform(void)
+{
+	init_hypervisor(&boot_cpu_data);
+	if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE)
+		vmware_platform_setup();
+}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 80a722a..40e1835 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -350,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
 	}
 
+	if (c->cpuid_level > 6) {
+		unsigned ecx = cpuid_ecx(6);
+		if (ecx & 0x01)
+			set_cpu_cap(c, X86_FEATURE_APERFMPERF);
+	}
+
 	if (cpu_has_xmm2)
 		set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
 	if (cpu_has_ds) {
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 188a1ca..4ac6d48 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,8 @@
-obj-y				=  mce.o
+obj-y				=  mce.o mce-severity.o
 
-obj-$(CONFIG_X86_NEW_MCE)	+= mce-severity.o
-obj-$(CONFIG_X86_OLD_MCE)	+= k7.o p4.o p6.o
 obj-$(CONFIG_X86_ANCIENT_MCE)	+= winchip.o p5.o
 obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel.o
 obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd.o
-obj-$(CONFIG_X86_MCE_NONFATAL)	+= non-fatal.o
 obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
 obj-$(CONFIG_X86_MCE_INJECT)	+= mce-inject.o
 
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
deleted file mode 100644
index b945d5d..0000000
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Athlon specific Machine Check Exception Reporting
- * (C) Copyright 2002 Dave Jones <davej@redhat.com>
- */
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-
-/* Machine Check Handler For AMD Athlon/Duron: */
-static void k7_machine_check(struct pt_regs *regs, long error_code)
-{
-	u32 alow, ahigh, high, low;
-	u32 mcgstl, mcgsth;
-	int recover = 1;
-	int i;
-
-	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-	if (mcgstl & (1<<0))	/* Recoverable ? */
-		recover = 0;
-
-	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
-		smp_processor_id(), mcgsth, mcgstl);
-
-	for (i = 1; i < nr_mce_banks; i++) {
-		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
-		if (high & (1<<31)) {
-			char misc[20];
-			char addr[24];
-
-			misc[0] = '\0';
-			addr[0] = '\0';
-
-			if (high & (1<<29))
-				recover |= 1;
-			if (high & (1<<25))
-				recover |= 2;
-			high &= ~(1<<31);
-
-			if (high & (1<<27)) {
-				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
-			}
-			if (high & (1<<26)) {
-				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
-			}
-
-			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
-				smp_processor_id(), i, high, low, misc, addr);
-
-			/* Clear it: */
-			wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-			/* Serialize: */
-			wmb();
-			add_taint(TAINT_MACHINE_CHECK);
-		}
-	}
-
-	if (recover & 2)
-		panic("CPU context corrupt");
-	if (recover & 1)
-		panic("Unable to continue");
-
-	printk(KERN_EMERG "Attempting to continue.\n");
-
-	mcgstl &= ~(1<<2);
-	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-}
-
-
-/* AMD K7 machine check is Intel like: */
-void amd_mcheck_init(struct cpuinfo_x86 *c)
-{
-	u32 l, h;
-	int i;
-
-	if (!cpu_has(c, X86_FEATURE_MCE))
-		return;
-
-	machine_check_vector = k7_machine_check;
-	/* Make sure the vector pointer is visible before we enable MCEs: */
-	wmb();
-
-	printk(KERN_INFO "Intel machine check architecture supported.\n");
-
-	rdmsr(MSR_IA32_MCG_CAP, l, h);
-	if (l & (1<<8))	/* Control register present ? */
-		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-	nr_mce_banks = l & 0xff;
-
-	/*
-	 * Clear status for MC index 0 separately, we don't touch CTL,
-	 * as some K7 Athlons cause spurious MCEs when its enabled:
-	 */
-	if (boot_cpu_data.x86 == 6) {
-		wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
-		i = 1;
-	} else
-		i = 0;
-
-	for (; i < nr_mce_banks; i++) {
-		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-	}
-
-	set_in_cr4(X86_CR4_MCE);
-	printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
-		smp_processor_id());
-}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index a3a235a..7029f0e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -18,7 +18,12 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/smp.h>
+#include <linux/notifier.h>
+#include <linux/kdebug.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
 #include <asm/mce.h>
+#include <asm/apic.h>
 
 /* Update fake mce registers on current CPU. */
 static void inject_mce(struct mce *m)
@@ -39,44 +44,141 @@ static void inject_mce(struct mce *m)
 	i->finished = 1;
 }
 
-struct delayed_mce {
-	struct timer_list timer;
-	struct mce m;
-};
+static void raise_poll(struct mce *m)
+{
+	unsigned long flags;
+	mce_banks_t b;
 
-/* Inject mce on current CPU */
-static void raise_mce(unsigned long data)
+	memset(&b, 0xff, sizeof(mce_banks_t));
+	local_irq_save(flags);
+	machine_check_poll(0, &b);
+	local_irq_restore(flags);
+	m->finished = 0;
+}
+
+static void raise_exception(struct mce *m, struct pt_regs *pregs)
 {
-	struct delayed_mce *dm = (struct delayed_mce *)data;
-	struct mce *m = &dm->m;
-	int cpu = m->extcpu;
+	struct pt_regs regs;
+	unsigned long flags;
 
-	inject_mce(m);
-	if (m->status & MCI_STATUS_UC) {
-		struct pt_regs regs;
+	if (!pregs) {
 		memset(&regs, 0, sizeof(struct pt_regs));
 		regs.ip = m->ip;
 		regs.cs = m->cs;
+		pregs = &regs;
+	}
+	/* in mcheck exeception handler, irq will be disabled */
+	local_irq_save(flags);
+	do_machine_check(pregs, 0);
+	local_irq_restore(flags);
+	m->finished = 0;
+}
+
+static cpumask_t mce_inject_cpumask;
+
+static int mce_raise_notify(struct notifier_block *self,
+			    unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *)data;
+	int cpu = smp_processor_id();
+	struct mce *m = &__get_cpu_var(injectm);
+	if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
+		return NOTIFY_DONE;
+	cpu_clear(cpu, mce_inject_cpumask);
+	if (m->inject_flags & MCJ_EXCEPTION)
+		raise_exception(m, args->regs);
+	else if (m->status)
+		raise_poll(m);
+	return NOTIFY_STOP;
+}
+
+static struct notifier_block mce_raise_nb = {
+	.notifier_call = mce_raise_notify,
+	.priority = 1000,
+};
+
+/* Inject mce on current CPU */
+static int raise_local(struct mce *m)
+{
+	int context = MCJ_CTX(m->inject_flags);
+	int ret = 0;
+	int cpu = m->extcpu;
+
+	if (m->inject_flags & MCJ_EXCEPTION) {
 		printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
-		do_machine_check(&regs, 0);
+		switch (context) {
+		case MCJ_CTX_IRQ:
+			/*
+			 * Could do more to fake interrupts like
+			 * calling irq_enter, but the necessary
+			 * machinery isn't exported currently.
+			 */
+			/*FALL THROUGH*/
+		case MCJ_CTX_PROCESS:
+			raise_exception(m, NULL);
+			break;
+		default:
+			printk(KERN_INFO "Invalid MCE context\n");
+			ret = -EINVAL;
+		}
 		printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
-	} else {
-		mce_banks_t b;
-		memset(&b, 0xff, sizeof(mce_banks_t));
+	} else if (m->status) {
 		printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
-		machine_check_poll(0, &b);
+		raise_poll(m);
 		mce_notify_irq();
-		printk(KERN_INFO "Finished machine check poll on CPU %d\n",
-		       cpu);
-	}
-	kfree(dm);
+		printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
+	} else
+		m->finished = 0;
+
+	return ret;
+}
+
+static void raise_mce(struct mce *m)
+{
+	int context = MCJ_CTX(m->inject_flags);
+
+	inject_mce(m);
+
+	if (context == MCJ_CTX_RANDOM)
+		return;
+
+#ifdef CONFIG_X86_LOCAL_APIC
+	if (m->inject_flags & MCJ_NMI_BROADCAST) {
+		unsigned long start;
+		int cpu;
+		get_online_cpus();
+		mce_inject_cpumask = cpu_online_map;
+		cpu_clear(get_cpu(), mce_inject_cpumask);
+		for_each_online_cpu(cpu) {
+			struct mce *mcpu = &per_cpu(injectm, cpu);
+			if (!mcpu->finished ||
+			    MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
+				cpu_clear(cpu, mce_inject_cpumask);
+		}
+		if (!cpus_empty(mce_inject_cpumask))
+			apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
+		start = jiffies;
+		while (!cpus_empty(mce_inject_cpumask)) {
+			if (!time_before(jiffies, start + 2*HZ)) {
+				printk(KERN_ERR
+				"Timeout waiting for mce inject NMI %lx\n",
+					*cpus_addr(mce_inject_cpumask));
+				break;
+			}
+			cpu_relax();
+		}
+		raise_local(m);
+		put_cpu();
+		put_online_cpus();
+	} else
+#endif
+		raise_local(m);
 }
 
 /* Error injection interface */
 static ssize_t mce_write(struct file *filp, const char __user *ubuf,
 			 size_t usize, loff_t *off)
 {
-	struct delayed_mce *dm;
 	struct mce m;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -96,19 +198,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
 	if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
 		return -EINVAL;
 
-	dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
-	if (!dm)
-		return -ENOMEM;
-
 	/*
 	 * Need to give user space some time to set everything up,
 	 * so do it a jiffie or two later everywhere.
-	 * Should we use a hrtimer here for better synchronization?
 	 */
-	memcpy(&dm->m, &m, sizeof(struct mce));
-	setup_timer(&dm->timer, raise_mce, (unsigned long)dm);
-	dm->timer.expires = jiffies + 2;
-	add_timer_on(&dm->timer, m.extcpu);
+	schedule_timeout(2);
+	raise_mce(&m);
 	return usize;
 }
 
@@ -116,6 +211,7 @@ static int inject_init(void)
 {
 	printk(KERN_INFO "Machine check injector initialized\n");
 	mce_chrdev_ops.write = mce_write;
+	register_die_notifier(&mce_raise_nb);
 	return 0;
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 54dcb8f..32996f9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,3 +1,4 @@
+#include <linux/sysdev.h>
 #include <asm/mce.h>
 
 enum severity_level {
@@ -10,6 +11,20 @@ enum severity_level {
 	MCE_PANIC_SEVERITY,
 };
 
+#define ATTR_LEN		16
+
+/* One object for each MCE bank, shared by all CPUs */
+struct mce_bank {
+	u64			ctl;			/* subevents to enable */
+	unsigned char init;				/* initialise bank? */
+	struct sysdev_attribute attr;			/* sysdev attribute */
+	char			attrname[ATTR_LEN];	/* attribute name */
+};
+
 int mce_severity(struct mce *a, int tolerant, char **msg);
+struct dentry *mce_get_debugfs_dir(void);
 
 extern int mce_ser;
+
+extern struct mce_bank *mce_banks;
+
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index ff0807f..8a85dd1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -139,6 +139,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg)
 	}
 }
 
+#ifdef CONFIG_DEBUG_FS
 static void *s_start(struct seq_file *f, loff_t *pos)
 {
 	if (*pos >= ARRAY_SIZE(severities))
@@ -197,7 +198,7 @@ static int __init severities_debugfs_init(void)
 {
 	struct dentry *dmce = NULL, *fseverities_coverage = NULL;
 
-	dmce = debugfs_create_dir("mce", NULL);
+	dmce = mce_get_debugfs_dir();
 	if (dmce == NULL)
 		goto err_out;
 	fseverities_coverage = debugfs_create_file("severities-coverage",
@@ -209,10 +210,7 @@ static int __init severities_debugfs_init(void)
 	return 0;
 
 err_out:
-	if (fseverities_coverage)
-		debugfs_remove(fseverities_coverage);
-	if (dmce)
-		debugfs_remove(dmce);
 	return -ENOMEM;
 }
 late_initcall(severities_debugfs_init);
+#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 9bfe9d2..2f5aab2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -34,6 +34,7 @@
 #include <linux/smp.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/debugfs.h>
 
 #include <asm/processor.h>
 #include <asm/hw_irq.h>
@@ -45,21 +46,8 @@
 
 #include "mce-internal.h"
 
-/* Handle unconfigured int18 (should never happen) */
-static void unexpected_machine_check(struct pt_regs *regs, long error_code)
-{
-	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
-	       smp_processor_id());
-}
-
-/* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct pt_regs *, long error_code) =
-						unexpected_machine_check;
-
 int mce_disabled __read_mostly;
 
-#ifdef CONFIG_X86_NEW_MCE
-
 #define MISC_MCELOG_MINOR	227
 
 #define SPINUNIT 100	/* 100ns */
@@ -77,7 +65,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
  */
 static int			tolerant		__read_mostly = 1;
 static int			banks			__read_mostly;
-static u64			*bank			__read_mostly;
 static int			rip_msr			__read_mostly;
 static int			mce_bootlog		__read_mostly = -1;
 static int			monarch_timeout		__read_mostly = -1;
@@ -87,13 +74,13 @@ int				mce_cmci_disabled	__read_mostly;
 int				mce_ignore_ce		__read_mostly;
 int				mce_ser			__read_mostly;
 
+struct mce_bank                *mce_banks		__read_mostly;
+
 /* User mode helper program triggered by machine check event */
 static unsigned long		mce_need_notify;
 static char			mce_helper[128];
 static char			*mce_helper_argv[2] = { mce_helper, NULL };
 
-static unsigned long		dont_init_banks;
-
 static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static int			cpu_missing;
@@ -104,11 +91,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
 	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
 };
 
-static inline int skip_bank_init(int i)
-{
-	return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
-}
-
 static DEFINE_PER_CPU(struct work_struct, mce_work);
 
 /* Do initial initialization of a struct mce */
@@ -232,6 +214,9 @@ static void print_mce_tail(void)
 
 static atomic_t mce_paniced;
 
+static int fake_panic;
+static atomic_t mce_fake_paniced;
+
 /* Panic in progress. Enable interrupts and wait for final IPI */
 static void wait_for_panic(void)
 {
@@ -249,15 +234,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 {
 	int i;
 
-	/*
-	 * Make sure only one CPU runs in machine check panic
-	 */
-	if (atomic_add_return(1, &mce_paniced) > 1)
-		wait_for_panic();
-	barrier();
+	if (!fake_panic) {
+		/*
+		 * Make sure only one CPU runs in machine check panic
+		 */
+		if (atomic_inc_return(&mce_paniced) > 1)
+			wait_for_panic();
+		barrier();
 
-	bust_spinlocks(1);
-	console_verbose();
+		bust_spinlocks(1);
+		console_verbose();
+	} else {
+		/* Don't log too much for fake panic */
+		if (atomic_inc_return(&mce_fake_paniced) > 1)
+			return;
+	}
 	print_mce_head();
 	/* First print corrected ones that are still unlogged */
 	for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -284,9 +275,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 	print_mce_tail();
 	if (exp)
 		printk(KERN_EMERG "Machine check: %s\n", exp);
-	if (panic_timeout == 0)
-		panic_timeout = mce_panic_timeout;
-	panic(msg);
+	if (!fake_panic) {
+		if (panic_timeout == 0)
+			panic_timeout = mce_panic_timeout;
+		panic(msg);
+	} else
+		printk(KERN_EMERG "Fake kernel panic: %s\n", msg);
 }
 
 /* Support code for software error injection */
@@ -296,11 +290,11 @@ static int msr_to_offset(u32 msr)
 	unsigned bank = __get_cpu_var(injectm.bank);
 	if (msr == rip_msr)
 		return offsetof(struct mce, ip);
-	if (msr == MSR_IA32_MC0_STATUS + bank*4)
+	if (msr == MSR_IA32_MCx_STATUS(bank))
 		return offsetof(struct mce, status);
-	if (msr == MSR_IA32_MC0_ADDR + bank*4)
+	if (msr == MSR_IA32_MCx_ADDR(bank))
 		return offsetof(struct mce, addr);
-	if (msr == MSR_IA32_MC0_MISC + bank*4)
+	if (msr == MSR_IA32_MCx_MISC(bank))
 		return offsetof(struct mce, misc);
 	if (msr == MSR_IA32_MCG_STATUS)
 		return offsetof(struct mce, mcgstatus);
@@ -505,7 +499,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 
 	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
 	for (i = 0; i < banks; i++) {
-		if (!bank[i] || !test_bit(i, *b))
+		if (!mce_banks[i].ctl || !test_bit(i, *b))
 			continue;
 
 		m.misc = 0;
@@ -514,7 +508,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		m.tsc = 0;
 
 		barrier();
-		m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
+		m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
 		if (!(m.status & MCI_STATUS_VAL))
 			continue;
 
@@ -529,9 +523,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 			continue;
 
 		if (m.status & MCI_STATUS_MISCV)
-			m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
+			m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
 		if (m.status & MCI_STATUS_ADDRV)
-			m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
+			m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
 
 		if (!(flags & MCP_TIMESTAMP))
 			m.tsc = 0;
@@ -547,7 +541,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		/*
 		 * Clear state for this bank.
 		 */
-		mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+		mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
 	}
 
 	/*
@@ -568,7 +562,7 @@ static int mce_no_way_out(struct mce *m, char **msg)
 	int i;
 
 	for (i = 0; i < banks; i++) {
-		m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
+		m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
 		if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
 			return 1;
 	}
@@ -628,7 +622,7 @@ out:
  * This way we prevent any potential data corruption in a unrecoverable case
  * and also makes sure always all CPU's errors are examined.
  *
- * Also this detects the case of an machine check event coming from outer
+ * Also this detects the case of a machine check event coming from outer
  * space (not detected by any CPUs) In this case some external agent wants
  * us to shut down, so panic too.
  *
@@ -681,7 +675,7 @@ static void mce_reign(void)
 	 * No machine check event found. Must be some external
 	 * source or one CPU is hung. Panic.
 	 */
-	if (!m && tolerant < 3)
+	if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
 		mce_panic("Machine check from unknown source", NULL, NULL);
 
 	/*
@@ -715,7 +709,7 @@ static int mce_start(int *no_way_out)
 	 * global_nwo should be updated before mce_callin
 	 */
 	smp_wmb();
-	order = atomic_add_return(1, &mce_callin);
+	order = atomic_inc_return(&mce_callin);
 
 	/*
 	 * Wait for everyone.
@@ -852,7 +846,7 @@ static void mce_clear_state(unsigned long *toclear)
 
 	for (i = 0; i < banks; i++) {
 		if (test_bit(i, toclear))
-			mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+			mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
 	}
 }
 
@@ -905,11 +899,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	mce_setup(&m);
 
 	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
-	no_way_out = mce_no_way_out(&m, &msg);
-
 	final = &__get_cpu_var(mces_seen);
 	*final = m;
 
+	no_way_out = mce_no_way_out(&m, &msg);
+
 	barrier();
 
 	/*
@@ -926,14 +920,14 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	order = mce_start(&no_way_out);
 	for (i = 0; i < banks; i++) {
 		__clear_bit(i, toclear);
-		if (!bank[i])
+		if (!mce_banks[i].ctl)
 			continue;
 
 		m.misc = 0;
 		m.addr = 0;
 		m.bank = i;
 
-		m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4);
+		m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
 		if ((m.status & MCI_STATUS_VAL) == 0)
 			continue;
 
@@ -974,9 +968,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 			kill_it = 1;
 
 		if (m.status & MCI_STATUS_MISCV)
-			m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4);
+			m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
 		if (m.status & MCI_STATUS_ADDRV)
-			m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4);
+			m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
 
 		/*
 		 * Action optional error. Queue address for later processing.
@@ -1101,7 +1095,7 @@ void mce_log_therm_throt_event(__u64 status)
  */
 static int check_interval = 5 * 60; /* 5 minutes */
 
-static DEFINE_PER_CPU(int, next_interval); /* in jiffies */
+static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
 static DEFINE_PER_CPU(struct timer_list, mce_timer);
 
 static void mcheck_timer(unsigned long data)
@@ -1120,7 +1114,7 @@ static void mcheck_timer(unsigned long data)
 	 * Alert userspace if needed.  If we logged an MCE, reduce the
 	 * polling interval, otherwise increase the polling interval.
 	 */
-	n = &__get_cpu_var(next_interval);
+	n = &__get_cpu_var(mce_next_interval);
 	if (mce_notify_irq())
 		*n = max(*n/2, HZ/100);
 	else
@@ -1169,10 +1163,25 @@ int mce_notify_irq(void)
 }
 EXPORT_SYMBOL_GPL(mce_notify_irq);
 
+static int mce_banks_init(void)
+{
+	int i;
+
+	mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
+	if (!mce_banks)
+		return -ENOMEM;
+	for (i = 0; i < banks; i++) {
+		struct mce_bank *b = &mce_banks[i];
+		b->ctl = -1ULL;
+		b->init = 1;
+	}
+	return 0;
+}
+
 /*
  * Initialize Machine Checks for a CPU.
  */
-static int mce_cap_init(void)
+static int __cpuinit mce_cap_init(void)
 {
 	unsigned b;
 	u64 cap;
@@ -1192,11 +1201,10 @@ static int mce_cap_init(void)
 	/* Don't support asymmetric configurations today */
 	WARN_ON(banks != 0 && b != banks);
 	banks = b;
-	if (!bank) {
-		bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
-		if (!bank)
-			return -ENOMEM;
-		memset(bank, 0xff, banks * sizeof(u64));
+	if (!mce_banks) {
+		int err = mce_banks_init();
+		if (err)
+			return err;
 	}
 
 	/* Use accurate RIP reporting if available. */
@@ -1228,15 +1236,16 @@ static void mce_init(void)
 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
 
 	for (i = 0; i < banks; i++) {
-		if (skip_bank_init(i))
+		struct mce_bank *b = &mce_banks[i];
+		if (!b->init)
 			continue;
-		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
-		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+		wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
+		wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
 	}
 }
 
 /* Add per CPU specific workarounds here */
-static int mce_cpu_quirks(struct cpuinfo_x86 *c)
+static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
 {
 	if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
 		pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1251,7 +1260,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
 			 * trips off incorrectly with the IOMMU & 3ware
 			 * & Cerberus:
 			 */
-			clear_bit(10, (unsigned long *)&bank[4]);
+			clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
 		}
 		if (c->x86 <= 17 && mce_bootlog < 0) {
 			/*
@@ -1265,7 +1274,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
 		 * by default.
 		 */
 		 if (c->x86 == 6 && banks > 0)
-			bank[0] = 0;
+			mce_banks[0].ctl = 0;
 	}
 
 	if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1278,8 +1287,8 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
 		 * valid event later, merely don't write CTL0.
 		 */
 
-		if (c->x86 == 6 && c->x86_model < 0x1A)
-			__set_bit(0, &dont_init_banks);
+		if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
+			mce_banks[0].init = 0;
 
 		/*
 		 * All newer Intel systems support MCE broadcasting. Enable
@@ -1335,7 +1344,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
 static void mce_init_timer(void)
 {
 	struct timer_list *t = &__get_cpu_var(mce_timer);
-	int *n = &__get_cpu_var(next_interval);
+	int *n = &__get_cpu_var(mce_next_interval);
 
 	if (mce_ignore_ce)
 		return;
@@ -1348,6 +1357,17 @@ static void mce_init_timer(void)
 	add_timer_on(t, smp_processor_id());
 }
 
+/* Handle unconfigured int18 (should never happen) */
+static void unexpected_machine_check(struct pt_regs *regs, long error_code)
+{
+	printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
+	       smp_processor_id());
+}
+
+/* Call the installed machine check handler for this CPU setup. */
+void (*machine_check_vector)(struct pt_regs *, long error_code) =
+						unexpected_machine_check;
+
 /*
  * Called for each booted CPU to set up machine checks.
  * Must be called with preempt off:
@@ -1561,8 +1581,10 @@ static struct miscdevice mce_log_device = {
  */
 static int __init mcheck_enable(char *str)
 {
-	if (*str == 0)
+	if (*str == 0) {
 		enable_p5_mce();
+		return 1;
+	}
 	if (*str == '=')
 		str++;
 	if (!strcmp(str, "off"))
@@ -1603,8 +1625,9 @@ static int mce_disable(void)
 	int i;
 
 	for (i = 0; i < banks; i++) {
-		if (!skip_bank_init(i))
-			wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+		struct mce_bank *b = &mce_banks[i];
+		if (b->init)
+			wrmsrl(MSR_IA32_MCx_CTL(i), 0);
 	}
 	return 0;
 }
@@ -1679,14 +1702,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev);
 __cpuinitdata
 void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
 
-static struct sysdev_attribute *bank_attrs;
+static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr)
+{
+	return container_of(attr, struct mce_bank, attr);
+}
 
 static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
 			 char *buf)
 {
-	u64 b = bank[attr - bank_attrs];
-
-	return sprintf(buf, "%llx\n", b);
+	return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
 }
 
 static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1697,7 +1721,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
 	if (strict_strtoull(buf, 0, &new) < 0)
 		return -EINVAL;
 
-	bank[attr - bank_attrs] = new;
+	attr_to_bank(attr)->ctl = new;
 	mce_restart();
 
 	return size;
@@ -1839,7 +1863,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
 	}
 	for (j = 0; j < banks; j++) {
 		err = sysdev_create_file(&per_cpu(mce_dev, cpu),
-					&bank_attrs[j]);
+					&mce_banks[j].attr);
 		if (err)
 			goto error2;
 	}
@@ -1848,10 +1872,10 @@ static __cpuinit int mce_create_device(unsigned int cpu)
 	return 0;
 error2:
 	while (--j >= 0)
-		sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]);
+		sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
 error:
 	while (--i >= 0)
-		sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
+		sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
 
 	sysdev_unregister(&per_cpu(mce_dev, cpu));
 
@@ -1869,7 +1893,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
 		sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
 
 	for (i = 0; i < banks; i++)
-		sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]);
+		sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
 
 	sysdev_unregister(&per_cpu(mce_dev, cpu));
 	cpumask_clear_cpu(cpu, mce_dev_initialized);
@@ -1886,8 +1910,9 @@ static void mce_disable_cpu(void *h)
 	if (!(action & CPU_TASKS_FROZEN))
 		cmci_clear();
 	for (i = 0; i < banks; i++) {
-		if (!skip_bank_init(i))
-			wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
+		struct mce_bank *b = &mce_banks[i];
+		if (b->init)
+			wrmsrl(MSR_IA32_MCx_CTL(i), 0);
 	}
 }
 
@@ -1902,8 +1927,9 @@ static void mce_reenable_cpu(void *h)
 	if (!(action & CPU_TASKS_FROZEN))
 		cmci_reenable();
 	for (i = 0; i < banks; i++) {
-		if (!skip_bank_init(i))
-			wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
+		struct mce_bank *b = &mce_banks[i];
+		if (b->init)
+			wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
 	}
 }
 
@@ -1935,7 +1961,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
 		t->expires = round_jiffies(jiffies +
-						__get_cpu_var(next_interval));
+					   __get_cpu_var(mce_next_interval));
 		add_timer_on(t, cpu);
 		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
 		break;
@@ -1951,35 +1977,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
 	.notifier_call = mce_cpu_callback,
 };
 
-static __init int mce_init_banks(void)
+static __init void mce_init_banks(void)
 {
 	int i;
 
-	bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
-				GFP_KERNEL);
-	if (!bank_attrs)
-		return -ENOMEM;
-
 	for (i = 0; i < banks; i++) {
-		struct sysdev_attribute *a = &bank_attrs[i];
+		struct mce_bank *b = &mce_banks[i];
+		struct sysdev_attribute *a = &b->attr;
 
-		a->attr.name	= kasprintf(GFP_KERNEL, "bank%d", i);
-		if (!a->attr.name)
-			goto nomem;
+		a->attr.name	= b->attrname;
+		snprintf(b->attrname, ATTR_LEN, "bank%d", i);
 
 		a->attr.mode	= 0644;
 		a->show		= show_bank;
 		a->store	= set_bank;
 	}
-	return 0;
-
-nomem:
-	while (--i >= 0)
-		kfree(bank_attrs[i].attr.name);
-	kfree(bank_attrs);
-	bank_attrs = NULL;
-
-	return -ENOMEM;
 }
 
 static __init int mce_init_device(void)
@@ -1992,9 +2004,7 @@ static __init int mce_init_device(void)
 
 	zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
 
-	err = mce_init_banks();
-	if (err)
-		return err;
+	mce_init_banks();
 
 	err = sysdev_class_register(&mce_sysclass);
 	if (err)
@@ -2014,57 +2024,65 @@ static __init int mce_init_device(void)
 
 device_initcall(mce_init_device);
 
-#else /* CONFIG_X86_OLD_MCE: */
-
-int nr_mce_banks;
-EXPORT_SYMBOL_GPL(nr_mce_banks);	/* non-fatal.o */
+/*
+ * Old style boot options parsing. Only for compatibility.
+ */
+static int __init mcheck_disable(char *str)
+{
+	mce_disabled = 1;
+	return 1;
+}
+__setup("nomce", mcheck_disable);
 
-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
+#ifdef CONFIG_DEBUG_FS
+struct dentry *mce_get_debugfs_dir(void)
 {
-	if (mce_disabled)
-		return;
+	static struct dentry *dmce;
 
-	switch (c->x86_vendor) {
-	case X86_VENDOR_AMD:
-		amd_mcheck_init(c);
-		break;
+	if (!dmce)
+		dmce = debugfs_create_dir("mce", NULL);
 
-	case X86_VENDOR_INTEL:
-		if (c->x86 == 5)
-			intel_p5_mcheck_init(c);
-		if (c->x86 == 6)
-			intel_p6_mcheck_init(c);
-		if (c->x86 == 15)
-			intel_p4_mcheck_init(c);
-		break;
+	return dmce;
+}
 
-	case X86_VENDOR_CENTAUR:
-		if (c->x86 == 5)
-			winchip_mcheck_init(c);
-		break;
+static void mce_reset(void)
+{
+	cpu_missing = 0;
+	atomic_set(&mce_fake_paniced, 0);
+	atomic_set(&mce_executing, 0);
+	atomic_set(&mce_callin, 0);
+	atomic_set(&global_nwo, 0);
+}
 
-	default:
-		break;
-	}
-	printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks);
+static int fake_panic_get(void *data, u64 *val)
+{
+	*val = fake_panic;
+	return 0;
 }
 
-static int __init mcheck_enable(char *str)
+static int fake_panic_set(void *data, u64 val)
 {
-	mce_p5_enabled = 1;
-	return 1;
+	mce_reset();
+	fake_panic = val;
+	return 0;
 }
-__setup("mce", mcheck_enable);
 
-#endif /* CONFIG_X86_OLD_MCE */
+DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
+			fake_panic_set, "%llu\n");
 
-/*
- * Old style boot options parsing. Only for compatibility.
- */
-static int __init mcheck_disable(char *str)
+static int __init mce_debugfs_init(void)
 {
-	mce_disabled = 1;
-	return 1;
+	struct dentry *dmce, *ffake_panic;
+
+	dmce = mce_get_debugfs_dir();
+	if (!dmce)
+		return -ENOMEM;
+	ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
+					  &fake_panic_fops);
+	if (!ffake_panic)
+		return -ENOMEM;
+
+	return 0;
 }
-__setup("nomce", mcheck_disable);
+late_initcall(mce_debugfs_init);
+#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 1fecba4..83a3d1f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -69,7 +69,7 @@ struct threshold_bank {
 	struct threshold_block	*blocks;
 	cpumask_var_t		cpus;
 };
-static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
+static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
 
 #ifdef CONFIG_SMP
 static unsigned char shared_bank[NR_BANKS] = {
@@ -489,8 +489,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
 	int i, err = 0;
 	struct threshold_bank *b = NULL;
 	char name[32];
+#ifdef CONFIG_SMP
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
-
+#endif
 
 	sprintf(name, "threshold_bank%i", bank);
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index e1acec0f..889f665 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -90,7 +90,7 @@ static void cmci_discover(int banks, int boot)
 		if (test_bit(i, owned))
 			continue;
 
-		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 
 		/* Already owned by someone else? */
 		if (val & CMCI_EN) {
@@ -101,8 +101,8 @@ static void cmci_discover(int banks, int boot)
 		}
 
 		val |= CMCI_EN | CMCI_THRESHOLD;
-		wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
-		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
+		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 
 		/* Did the enable bit stick? -- the bank supports CMCI */
 		if (val & CMCI_EN) {
@@ -152,9 +152,9 @@ void cmci_clear(void)
 		if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
 			continue;
 		/* Disable CMCI */
-		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
+		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 		val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
-		wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
+		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
 		__clear_bit(i, __get_cpu_var(mce_banks_owned));
 	}
 	spin_unlock_irqrestore(&cmci_discover_lock, flags);
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
deleted file mode 100644
index f5f2d6f..0000000
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Non Fatal Machine Check Exception Reporting
- *
- * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
- *
- * This file contains routines to check for non-fatal MCEs every 15s
- *
- */
-#include <linux/interrupt.h>
-#include <linux/workqueue.h>
-#include <linux/jiffies.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-
-static int		firstbank;
-
-#define MCE_RATE	(15*HZ)	/* timer rate is 15s */
-
-static void mce_checkregs(void *info)
-{
-	u32 low, high;
-	int i;
-
-	for (i = firstbank; i < nr_mce_banks; i++) {
-		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
-
-		if (!(high & (1<<31)))
-			continue;
-
-		printk(KERN_INFO "MCE: The hardware reports a non fatal, "
-			"correctable incident occurred on CPU %d.\n",
-				smp_processor_id());
-
-		printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
-
-		/*
-		 * Scrub the error so we don't pick it up in MCE_RATE
-		 * seconds time:
-		 */
-		wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-
-		/* Serialize: */
-		wmb();
-		add_taint(TAINT_MACHINE_CHECK);
-	}
-}
-
-static void mce_work_fn(struct work_struct *work);
-static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
-
-static void mce_work_fn(struct work_struct *work)
-{
-	on_each_cpu(mce_checkregs, NULL, 1);
-	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
-}
-
-static int __init init_nonfatal_mce_checker(void)
-{
-	struct cpuinfo_x86 *c = &boot_cpu_data;
-
-	/* Check for MCE support */
-	if (!cpu_has(c, X86_FEATURE_MCE))
-		return -ENODEV;
-
-	/* Check for PPro style MCA */
-	if (!cpu_has(c, X86_FEATURE_MCA))
-		return -ENODEV;
-
-	/* Some Athlons misbehave when we frob bank 0 */
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-						boot_cpu_data.x86 == 6)
-		firstbank = 1;
-	else
-		firstbank = 0;
-
-	/*
-	 * Check for non-fatal errors every MCE_RATE s
-	 */
-	schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
-	printk(KERN_INFO "Machine check exception polling timer started.\n");
-
-	return 0;
-}
-module_init(init_nonfatal_mce_checker);
-
-MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
deleted file mode 100644
index 4482aea..0000000
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * P4 specific Machine Check Exception Reporting
- */
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-
-/* as supported by the P4/Xeon family */
-struct intel_mce_extended_msrs {
-	u32 eax;
-	u32 ebx;
-	u32 ecx;
-	u32 edx;
-	u32 esi;
-	u32 edi;
-	u32 ebp;
-	u32 esp;
-	u32 eflags;
-	u32 eip;
-	/* u32 *reserved[]; */
-};
-
-static int mce_num_extended_msrs;
-
-/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
-static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
-{
-	u32 h;
-
-	rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
-	rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
-	rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
-	rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
-	rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
-	rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
-	rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
-	rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
-	rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
-	rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
-}
-
-static void intel_machine_check(struct pt_regs *regs, long error_code)
-{
-	u32 alow, ahigh, high, low;
-	u32 mcgstl, mcgsth;
-	int recover = 1;
-	int i;
-
-	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-	if (mcgstl & (1<<0))	/* Recoverable ? */
-		recover = 0;
-
-	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
-		smp_processor_id(), mcgsth, mcgstl);
-
-	if (mce_num_extended_msrs > 0) {
-		struct intel_mce_extended_msrs dbg;
-
-		intel_get_extended_msrs(&dbg);
-
-		printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
-			"\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
-			"\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
-			smp_processor_id(), dbg.eip, dbg.eflags,
-			dbg.eax, dbg.ebx, dbg.ecx, dbg.edx,
-			dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
-	}
-
-	for (i = 0; i < nr_mce_banks; i++) {
-		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
-		if (high & (1<<31)) {
-			char misc[20];
-			char addr[24];
-
-			misc[0] = addr[0] = '\0';
-			if (high & (1<<29))
-				recover |= 1;
-			if (high & (1<<25))
-				recover |= 2;
-			high &= ~(1<<31);
-			if (high & (1<<27)) {
-				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
-			}
-			if (high & (1<<26)) {
-				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
-			}
-			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
-				smp_processor_id(), i, high, low, misc, addr);
-		}
-	}
-
-	if (recover & 2)
-		panic("CPU context corrupt");
-	if (recover & 1)
-		panic("Unable to continue");
-
-	printk(KERN_EMERG "Attempting to continue.\n");
-
-	/*
-	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
-	 * recoverable/continuable.This will allow BIOS to look at the MSRs
-	 * for errors if the OS could not log the error.
-	 */
-	for (i = 0; i < nr_mce_banks; i++) {
-		u32 msr;
-		msr = MSR_IA32_MC0_STATUS+i*4;
-		rdmsr(msr, low, high);
-		if (high&(1<<31)) {
-			/* Clear it */
-			wrmsr(msr, 0UL, 0UL);
-			/* Serialize */
-			wmb();
-			add_taint(TAINT_MACHINE_CHECK);
-		}
-	}
-	mcgstl &= ~(1<<2);
-	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-}
-
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
-{
-	u32 l, h;
-	int i;
-
-	machine_check_vector = intel_machine_check;
-	wmb();
-
-	printk(KERN_INFO "Intel machine check architecture supported.\n");
-	rdmsr(MSR_IA32_MCG_CAP, l, h);
-	if (l & (1<<8))	/* Control register present ? */
-		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-	nr_mce_banks = l & 0xff;
-
-	for (i = 0; i < nr_mce_banks; i++) {
-		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-	}
-
-	set_in_cr4(X86_CR4_MCE);
-	printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
-		smp_processor_id());
-
-	/* Check for P4/Xeon extended MCE MSRs */
-	rdmsr(MSR_IA32_MCG_CAP, l, h);
-	if (l & (1<<9))	{/* MCG_EXT_P */
-		mce_num_extended_msrs = (l >> 16) & 0xff;
-		printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
-				" available\n",
-			smp_processor_id(), mce_num_extended_msrs);
-
-#ifdef CONFIG_X86_MCE_P4THERMAL
-		/* Check for P4/Xeon Thermal monitor */
-		intel_init_thermal(c);
-#endif
-	}
-}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
deleted file mode 100644
index 01e4f81..0000000
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * P6 specific Machine Check Exception Reporting
- * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
- */
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-
-#include <asm/processor.h>
-#include <asm/system.h>
-#include <asm/mce.h>
-#include <asm/msr.h>
-
-/* Machine Check Handler For PII/PIII */
-static void intel_machine_check(struct pt_regs *regs, long error_code)
-{
-	u32 alow, ahigh, high, low;
-	u32 mcgstl, mcgsth;
-	int recover = 1;
-	int i;
-
-	rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-	if (mcgstl & (1<<0))	/* Recoverable ? */
-		recover = 0;
-
-	printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
-		smp_processor_id(), mcgsth, mcgstl);
-
-	for (i = 0; i < nr_mce_banks; i++) {
-		rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
-		if (high & (1<<31)) {
-			char misc[20];
-			char addr[24];
-
-			misc[0] = '\0';
-			addr[0] = '\0';
-
-			if (high & (1<<29))
-				recover |= 1;
-			if (high & (1<<25))
-				recover |= 2;
-			high &= ~(1<<31);
-
-			if (high & (1<<27)) {
-				rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
-				snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
-			}
-			if (high & (1<<26)) {
-				rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
-				snprintf(addr, 24, " at %08x%08x", ahigh, alow);
-			}
-
-			printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
-				smp_processor_id(), i, high, low, misc, addr);
-		}
-	}
-
-	if (recover & 2)
-		panic("CPU context corrupt");
-	if (recover & 1)
-		panic("Unable to continue");
-
-	printk(KERN_EMERG "Attempting to continue.\n");
-	/*
-	 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
-	 * recoverable/continuable.This will allow BIOS to look at the MSRs
-	 * for errors if the OS could not log the error:
-	 */
-	for (i = 0; i < nr_mce_banks; i++) {
-		unsigned int msr;
-
-		msr = MSR_IA32_MC0_STATUS+i*4;
-		rdmsr(msr, low, high);
-		if (high & (1<<31)) {
-			/* Clear it: */
-			wrmsr(msr, 0UL, 0UL);
-			/* Serialize: */
-			wmb();
-			add_taint(TAINT_MACHINE_CHECK);
-		}
-	}
-	mcgstl &= ~(1<<2);
-	wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
-}
-
-/* Set up machine check reporting for processors with Intel style MCE: */
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
-{
-	u32 l, h;
-	int i;
-
-	/* Check for MCE support */
-	if (!cpu_has(c, X86_FEATURE_MCE))
-		return;
-
-	/* Check for PPro style MCA */
-	if (!cpu_has(c, X86_FEATURE_MCA))
-		return;
-
-	/* Ok machine check is available */
-	machine_check_vector = intel_machine_check;
-	/* Make sure the vector pointer is visible before we enable MCEs: */
-	wmb();
-
-	printk(KERN_INFO "Intel machine check architecture supported.\n");
-	rdmsr(MSR_IA32_MCG_CAP, l, h);
-	if (l & (1<<8))	/* Control register present ? */
-		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
-	nr_mce_banks = l & 0xff;
-
-	/*
-	 * Following the example in IA-32 SDM Vol 3:
-	 * - MC0_CTL should not be written
-	 * - Status registers on all banks should be cleared on reset
-	 */
-	for (i = 1; i < nr_mce_banks; i++)
-		wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
-
-	for (i = 0; i < nr_mce_banks; i++)
-		wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
-
-	set_in_cr4(X86_CR4_MCE);
-	printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
-		smp_processor_id());
-}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 5957a93..63a56d1 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -260,9 +260,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 		return;
 	}
 
-	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
-		tm2 = 1;
-
 	/* Check whether a vector already exists */
 	if (h & APIC_VECTOR_MASK) {
 		printk(KERN_DEBUG
@@ -271,6 +268,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
 		return;
 	}
 
+	/* early Pentium M models use different method for enabling TM2 */
+	if (cpu_has(c, X86_FEATURE_TM2)) {
+		if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
+			rdmsr(MSR_THERM2_CTL, l, h);
+			if (l & MSR_THERM2_CTL_TM_SELECT)
+				tm2 = 1;
+		} else if (l & MSR_IA32_MISC_ENABLE_TM2)
+			tm2 = 1;
+	}
+
 	/* We'll mask the thermal vector in the lapic till we're ready: */
 	h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
 	apic_write(APIC_LVTTHMR, h);
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 08b6ea4..f04e725 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -126,8 +126,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
 		return -EINVAL;
 
 	base = simple_strtoull(line + 5, &ptr, 0);
-	for (; isspace(*ptr); ++ptr)
-		;
+	while (isspace(*ptr))
+		ptr++;
 
 	if (strncmp(ptr, "size=", 5))
 		return -EINVAL;
@@ -135,14 +135,14 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
 	size = simple_strtoull(ptr + 5, &ptr, 0);
 	if ((base & 0xfff) || (size & 0xfff))
 		return -EINVAL;
-	for (; isspace(*ptr); ++ptr)
-		;
+	while (isspace(*ptr))
+		ptr++;
 
 	if (strncmp(ptr, "type=", 5))
 		return -EINVAL;
 	ptr += 5;
-	for (; isspace(*ptr); ++ptr)
-		;
+	while (isspace(*ptr))
+		ptr++;
 
 	for (i = 0; i < MTRR_NUM_TYPES; ++i) {
 		if (strcmp(ptr, mtrr_strings[i]))
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 7af0f88..84e83de 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -58,6 +58,7 @@ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
 static DEFINE_MUTEX(mtrr_mutex);
 
 u64 size_or_mask, size_and_mask;
+static bool mtrr_aps_delayed_init;
 
 static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
 
@@ -163,7 +164,10 @@ static void ipi_handler(void *info)
 	if (data->smp_reg != ~0U) {
 		mtrr_if->set(data->smp_reg, data->smp_base,
 			     data->smp_size, data->smp_type);
-	} else {
+	} else if (mtrr_aps_delayed_init) {
+		/*
+		 * Initialize the MTRRs inaddition to the synchronisation.
+		 */
 		mtrr_if->set_all();
 	}
 
@@ -265,6 +269,8 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
 	 */
 	if (reg != ~0U)
 		mtrr_if->set(reg, base, size, type);
+	else if (!mtrr_aps_delayed_init)
+		mtrr_if->set_all();
 
 	/* Wait for the others */
 	while (atomic_read(&data.count))
@@ -721,9 +727,7 @@ void __init mtrr_bp_init(void)
 
 void mtrr_ap_init(void)
 {
-	unsigned long flags;
-
-	if (!mtrr_if || !use_intel())
+	if (!use_intel() || mtrr_aps_delayed_init)
 		return;
 	/*
 	 * Ideally we should hold mtrr_mutex here to avoid mtrr entries
@@ -738,11 +742,7 @@ void mtrr_ap_init(void)
 	 *   2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
 	 *      lock to prevent mtrr entry changes
 	 */
-	local_irq_save(flags);
-
-	mtrr_if->set_all();
-
-	local_irq_restore(flags);
+	set_mtrr(~0U, 0, 0, 0);
 }
 
 /**
@@ -753,6 +753,34 @@ void mtrr_save_state(void)
 	smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
 }
 
+void set_mtrr_aps_delayed_init(void)
+{
+	if (!use_intel())
+		return;
+
+	mtrr_aps_delayed_init = true;
+}
+
+/*
+ * MTRR initialization for all AP's
+ */
+void mtrr_aps_init(void)
+{
+	if (!use_intel())
+		return;
+
+	set_mtrr(~0U, 0, 0, 0);
+	mtrr_aps_delayed_init = false;
+}
+
+void mtrr_bp_restore(void)
+{
+	if (!use_intel())
+		return;
+
+	mtrr_if->set_all();
+}
+
 static int __init mtrr_init_finialize(void)
 {
 	if (!mtrr_if)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_event.c
index f9cd084..a3c7adb 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1,5 +1,5 @@
 /*
- * Performance counter x86 architecture code
+ * Performance events x86 architecture code
  *
  *  Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
  *  Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
@@ -11,7 +11,7 @@
  *  For licencing details see kernel-base/COPYING
  */
 
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
 #include <linux/capability.h>
 #include <linux/notifier.h>
 #include <linux/hardirq.h>
@@ -27,19 +27,19 @@
 #include <asm/stacktrace.h>
 #include <asm/nmi.h>
 
-static u64 perf_counter_mask __read_mostly;
+static u64 perf_event_mask __read_mostly;
 
-/* The maximal number of PEBS counters: */
-#define MAX_PEBS_COUNTERS	4
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS	4
 
 /* The size of a BTS record in bytes: */
 #define BTS_RECORD_SIZE		24
 
 /* The size of a per-cpu BTS buffer in bytes: */
-#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 1024)
+#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
 
 /* The BTS overflow threshold in bytes from the end of the buffer: */
-#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 64)
+#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
 
 
 /*
@@ -65,11 +65,11 @@ struct debug_store {
 	u64	pebs_index;
 	u64	pebs_absolute_maximum;
 	u64	pebs_interrupt_threshold;
-	u64	pebs_counter_reset[MAX_PEBS_COUNTERS];
+	u64	pebs_event_reset[MAX_PEBS_EVENTS];
 };
 
-struct cpu_hw_counters {
-	struct perf_counter	*counters[X86_PMC_IDX_MAX];
+struct cpu_hw_events {
+	struct perf_event	*events[X86_PMC_IDX_MAX];
 	unsigned long		used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
@@ -86,17 +86,17 @@ struct x86_pmu {
 	int		(*handle_irq)(struct pt_regs *);
 	void		(*disable_all)(void);
 	void		(*enable_all)(void);
-	void		(*enable)(struct hw_perf_counter *, int);
-	void		(*disable)(struct hw_perf_counter *, int);
+	void		(*enable)(struct hw_perf_event *, int);
+	void		(*disable)(struct hw_perf_event *, int);
 	unsigned	eventsel;
 	unsigned	perfctr;
 	u64		(*event_map)(int);
 	u64		(*raw_event)(u64);
 	int		max_events;
-	int		num_counters;
-	int		num_counters_fixed;
-	int		counter_bits;
-	u64		counter_mask;
+	int		num_events;
+	int		num_events_fixed;
+	int		event_bits;
+	u64		event_mask;
 	int		apic;
 	u64		max_period;
 	u64		intel_ctrl;
@@ -106,7 +106,7 @@ struct x86_pmu {
 
 static struct x86_pmu x86_pmu __read_mostly;
 
-static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
 	.enabled = 1,
 };
 
@@ -124,35 +124,35 @@ static const u64 p6_perfmon_event_map[] =
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x0062,
 };
 
-static u64 p6_pmu_event_map(int event)
+static u64 p6_pmu_event_map(int hw_event)
 {
-	return p6_perfmon_event_map[event];
+	return p6_perfmon_event_map[hw_event];
 }
 
 /*
- * Counter setting that is specified not to count anything.
+ * Event setting that is specified not to count anything.
  * We use this to effectively disable a counter.
  *
  * L2_RQSTS with 0 MESI unit mask.
  */
-#define P6_NOP_COUNTER			0x0000002EULL
+#define P6_NOP_EVENT			0x0000002EULL
 
-static u64 p6_pmu_raw_event(u64 event)
+static u64 p6_pmu_raw_event(u64 hw_event)
 {
 #define P6_EVNTSEL_EVENT_MASK		0x000000FFULL
 #define P6_EVNTSEL_UNIT_MASK		0x0000FF00ULL
 #define P6_EVNTSEL_EDGE_MASK		0x00040000ULL
 #define P6_EVNTSEL_INV_MASK		0x00800000ULL
-#define P6_EVNTSEL_COUNTER_MASK		0xFF000000ULL
+#define P6_EVNTSEL_REG_MASK		0xFF000000ULL
 
 #define P6_EVNTSEL_MASK			\
 	(P6_EVNTSEL_EVENT_MASK |	\
 	 P6_EVNTSEL_UNIT_MASK  |	\
 	 P6_EVNTSEL_EDGE_MASK  |	\
 	 P6_EVNTSEL_INV_MASK   |	\
-	 P6_EVNTSEL_COUNTER_MASK)
+	 P6_EVNTSEL_REG_MASK)
 
-	return event & P6_EVNTSEL_MASK;
+	return hw_event & P6_EVNTSEL_MASK;
 }
 
 
@@ -170,16 +170,16 @@ static const u64 intel_perfmon_event_map[] =
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
 };
 
-static u64 intel_pmu_event_map(int event)
+static u64 intel_pmu_event_map(int hw_event)
 {
-	return intel_perfmon_event_map[event];
+	return intel_perfmon_event_map[hw_event];
 }
 
 /*
- * Generalized hw caching related event table, filled
+ * Generalized hw caching related hw_event table, filled
  * in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'event makes no sense on
- * this CPU', any other value means the raw event
+ * 'not supported', -1 means 'hw_event makes no sense on
+ * this CPU', any other value means the raw hw_event
  * ID.
  */
 
@@ -463,22 +463,22 @@ static const u64 atom_hw_cache_event_ids
  },
 };
 
-static u64 intel_pmu_raw_event(u64 event)
+static u64 intel_pmu_raw_event(u64 hw_event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
 #define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
 #define CORE_EVNTSEL_EDGE_MASK		0x00040000ULL
 #define CORE_EVNTSEL_INV_MASK		0x00800000ULL
-#define CORE_EVNTSEL_COUNTER_MASK	0xFF000000ULL
+#define CORE_EVNTSEL_REG_MASK	0xFF000000ULL
 
 #define CORE_EVNTSEL_MASK		\
 	(CORE_EVNTSEL_EVENT_MASK |	\
 	 CORE_EVNTSEL_UNIT_MASK  |	\
 	 CORE_EVNTSEL_EDGE_MASK  |	\
 	 CORE_EVNTSEL_INV_MASK  |	\
-	 CORE_EVNTSEL_COUNTER_MASK)
+	 CORE_EVNTSEL_REG_MASK)
 
-	return event & CORE_EVNTSEL_MASK;
+	return hw_event & CORE_EVNTSEL_MASK;
 }
 
 static const u64 amd_hw_cache_event_ids
@@ -585,39 +585,39 @@ static const u64 amd_perfmon_event_map[] =
   [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
 };
 
-static u64 amd_pmu_event_map(int event)
+static u64 amd_pmu_event_map(int hw_event)
 {
-	return amd_perfmon_event_map[event];
+	return amd_perfmon_event_map[hw_event];
 }
 
-static u64 amd_pmu_raw_event(u64 event)
+static u64 amd_pmu_raw_event(u64 hw_event)
 {
 #define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
 #define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
 #define K7_EVNTSEL_EDGE_MASK	0x000040000ULL
 #define K7_EVNTSEL_INV_MASK	0x000800000ULL
-#define K7_EVNTSEL_COUNTER_MASK	0x0FF000000ULL
+#define K7_EVNTSEL_REG_MASK	0x0FF000000ULL
 
 #define K7_EVNTSEL_MASK			\
 	(K7_EVNTSEL_EVENT_MASK |	\
 	 K7_EVNTSEL_UNIT_MASK  |	\
 	 K7_EVNTSEL_EDGE_MASK  |	\
 	 K7_EVNTSEL_INV_MASK   |	\
-	 K7_EVNTSEL_COUNTER_MASK)
+	 K7_EVNTSEL_REG_MASK)
 
-	return event & K7_EVNTSEL_MASK;
+	return hw_event & K7_EVNTSEL_MASK;
 }
 
 /*
- * Propagate counter elapsed time into the generic counter.
- * Can only be executed on the CPU where the counter is active.
+ * Propagate event elapsed time into the generic event.
+ * Can only be executed on the CPU where the event is active.
  * Returns the delta events processed.
  */
 static u64
-x86_perf_counter_update(struct perf_counter *counter,
-			struct hw_perf_counter *hwc, int idx)
+x86_perf_event_update(struct perf_event *event,
+			struct hw_perf_event *hwc, int idx)
 {
-	int shift = 64 - x86_pmu.counter_bits;
+	int shift = 64 - x86_pmu.event_bits;
 	u64 prev_raw_count, new_raw_count;
 	s64 delta;
 
@@ -625,15 +625,15 @@ x86_perf_counter_update(struct perf_counter *counter,
 		return 0;
 
 	/*
-	 * Careful: an NMI might modify the previous counter value.
+	 * Careful: an NMI might modify the previous event value.
 	 *
 	 * Our tactic to handle this is to first atomically read and
 	 * exchange a new raw count - then add that new-prev delta
-	 * count to the generic counter atomically:
+	 * count to the generic event atomically:
 	 */
 again:
 	prev_raw_count = atomic64_read(&hwc->prev_count);
-	rdmsrl(hwc->counter_base + idx, new_raw_count);
+	rdmsrl(hwc->event_base + idx, new_raw_count);
 
 	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
 					new_raw_count) != prev_raw_count)
@@ -642,7 +642,7 @@ again:
 	/*
 	 * Now we have the new raw value and have updated the prev
 	 * timestamp already. We can now calculate the elapsed delta
-	 * (counter-)time and add that to the generic counter.
+	 * (event-)time and add that to the generic event.
 	 *
 	 * Careful, not all hw sign-extends above the physical width
 	 * of the count.
@@ -650,13 +650,13 @@ again:
 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
 	delta >>= shift;
 
-	atomic64_add(delta, &counter->count);
+	atomic64_add(delta, &event->count);
 	atomic64_sub(delta, &hwc->period_left);
 
 	return new_raw_count;
 }
 
-static atomic_t active_counters;
+static atomic_t active_events;
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
 static bool reserve_pmc_hardware(void)
@@ -667,12 +667,12 @@ static bool reserve_pmc_hardware(void)
 	if (nmi_watchdog == NMI_LOCAL_APIC)
 		disable_lapic_nmi_watchdog();
 
-	for (i = 0; i < x86_pmu.num_counters; i++) {
+	for (i = 0; i < x86_pmu.num_events; i++) {
 		if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
 			goto perfctr_fail;
 	}
 
-	for (i = 0; i < x86_pmu.num_counters; i++) {
+	for (i = 0; i < x86_pmu.num_events; i++) {
 		if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
 			goto eventsel_fail;
 	}
@@ -685,7 +685,7 @@ eventsel_fail:
 	for (i--; i >= 0; i--)
 		release_evntsel_nmi(x86_pmu.eventsel + i);
 
-	i = x86_pmu.num_counters;
+	i = x86_pmu.num_events;
 
 perfctr_fail:
 	for (i--; i >= 0; i--)
@@ -703,7 +703,7 @@ static void release_pmc_hardware(void)
 #ifdef CONFIG_X86_LOCAL_APIC
 	int i;
 
-	for (i = 0; i < x86_pmu.num_counters; i++) {
+	for (i = 0; i < x86_pmu.num_events; i++) {
 		release_perfctr_nmi(x86_pmu.perfctr + i);
 		release_evntsel_nmi(x86_pmu.eventsel + i);
 	}
@@ -720,7 +720,7 @@ static inline bool bts_available(void)
 
 static inline void init_debug_store_on_cpu(int cpu)
 {
-	struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
+	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 
 	if (!ds)
 		return;
@@ -732,7 +732,7 @@ static inline void init_debug_store_on_cpu(int cpu)
 
 static inline void fini_debug_store_on_cpu(int cpu)
 {
-	if (!per_cpu(cpu_hw_counters, cpu).ds)
+	if (!per_cpu(cpu_hw_events, cpu).ds)
 		return;
 
 	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
@@ -751,12 +751,12 @@ static void release_bts_hardware(void)
 		fini_debug_store_on_cpu(cpu);
 
 	for_each_possible_cpu(cpu) {
-		struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
+		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 
 		if (!ds)
 			continue;
 
-		per_cpu(cpu_hw_counters, cpu).ds = NULL;
+		per_cpu(cpu_hw_events, cpu).ds = NULL;
 
 		kfree((void *)(unsigned long)ds->bts_buffer_base);
 		kfree(ds);
@@ -796,7 +796,7 @@ static int reserve_bts_hardware(void)
 		ds->bts_interrupt_threshold =
 			ds->bts_absolute_maximum - BTS_OVFL_TH;
 
-		per_cpu(cpu_hw_counters, cpu).ds = ds;
+		per_cpu(cpu_hw_events, cpu).ds = ds;
 		err = 0;
 	}
 
@@ -812,9 +812,9 @@ static int reserve_bts_hardware(void)
 	return err;
 }
 
-static void hw_perf_counter_destroy(struct perf_counter *counter)
+static void hw_perf_event_destroy(struct perf_event *event)
 {
-	if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
+	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
 		release_pmc_hardware();
 		release_bts_hardware();
 		mutex_unlock(&pmc_reserve_mutex);
@@ -827,7 +827,7 @@ static inline int x86_pmu_initialized(void)
 }
 
 static inline int
-set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
 {
 	unsigned int cache_type, cache_op, cache_result;
 	u64 config, val;
@@ -880,7 +880,7 @@ static void intel_pmu_enable_bts(u64 config)
 
 static void intel_pmu_disable_bts(void)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	unsigned long debugctlmsr;
 
 	if (!cpuc->ds)
@@ -898,10 +898,10 @@ static void intel_pmu_disable_bts(void)
 /*
  * Setup the hardware configuration for a given attr_type
  */
-static int __hw_perf_counter_init(struct perf_counter *counter)
+static int __hw_perf_event_init(struct perf_event *event)
 {
-	struct perf_counter_attr *attr = &counter->attr;
-	struct hw_perf_counter *hwc = &counter->hw;
+	struct perf_event_attr *attr = &event->attr;
+	struct hw_perf_event *hwc = &event->hw;
 	u64 config;
 	int err;
 
@@ -909,21 +909,23 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		return -ENODEV;
 
 	err = 0;
-	if (!atomic_inc_not_zero(&active_counters)) {
+	if (!atomic_inc_not_zero(&active_events)) {
 		mutex_lock(&pmc_reserve_mutex);
-		if (atomic_read(&active_counters) == 0) {
+		if (atomic_read(&active_events) == 0) {
 			if (!reserve_pmc_hardware())
 				err = -EBUSY;
 			else
 				err = reserve_bts_hardware();
 		}
 		if (!err)
-			atomic_inc(&active_counters);
+			atomic_inc(&active_events);
 		mutex_unlock(&pmc_reserve_mutex);
 	}
 	if (err)
 		return err;
 
+	event->destroy = hw_perf_event_destroy;
+
 	/*
 	 * Generate PMC IRQs:
 	 * (keep 'enabled' bit clear for now)
@@ -946,17 +948,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		/*
 		 * If we have a PMU initialized but no APIC
 		 * interrupts, we cannot sample hardware
-		 * counters (user-space has to fall back and
-		 * sample via a hrtimer based software counter):
+		 * events (user-space has to fall back and
+		 * sample via a hrtimer based software event):
 		 */
 		if (!x86_pmu.apic)
 			return -EOPNOTSUPP;
 	}
 
-	counter->destroy = hw_perf_counter_destroy;
-
 	/*
-	 * Raw event type provide the config in the event structure
+	 * Raw hw_event type provide the config in the hw_event structure
 	 */
 	if (attr->type == PERF_TYPE_RAW) {
 		hwc->config |= x86_pmu.raw_event(attr->config);
@@ -1001,7 +1001,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 
 static void p6_pmu_disable_all(void)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 val;
 
 	if (!cpuc->enabled)
@@ -1018,7 +1018,7 @@ static void p6_pmu_disable_all(void)
 
 static void intel_pmu_disable_all(void)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 	if (!cpuc->enabled)
 		return;
@@ -1034,7 +1034,7 @@ static void intel_pmu_disable_all(void)
 
 static void amd_pmu_disable_all(void)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
 	if (!cpuc->enabled)
@@ -1043,12 +1043,12 @@ static void amd_pmu_disable_all(void)
 	cpuc->enabled = 0;
 	/*
 	 * ensure we write the disable before we start disabling the
-	 * counters proper, so that amd_pmu_enable_counter() does the
+	 * events proper, so that amd_pmu_enable_event() does the
 	 * right thing.
 	 */
 	barrier();
 
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		u64 val;
 
 		if (!test_bit(idx, cpuc->active_mask))
@@ -1070,7 +1070,7 @@ void hw_perf_disable(void)
 
 static void p6_pmu_enable_all(void)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	unsigned long val;
 
 	if (cpuc->enabled)
@@ -1087,7 +1087,7 @@ static void p6_pmu_enable_all(void)
 
 static void intel_pmu_enable_all(void)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 	if (cpuc->enabled)
 		return;
@@ -1098,19 +1098,19 @@ static void intel_pmu_enable_all(void)
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 
 	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
-		struct perf_counter *counter =
-			cpuc->counters[X86_PMC_IDX_FIXED_BTS];
+		struct perf_event *event =
+			cpuc->events[X86_PMC_IDX_FIXED_BTS];
 
-		if (WARN_ON_ONCE(!counter))
+		if (WARN_ON_ONCE(!event))
 			return;
 
-		intel_pmu_enable_bts(counter->hw.config);
+		intel_pmu_enable_bts(event->hw.config);
 	}
 }
 
 static void amd_pmu_enable_all(void)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
 	if (cpuc->enabled)
@@ -1119,14 +1119,14 @@ static void amd_pmu_enable_all(void)
 	cpuc->enabled = 1;
 	barrier();
 
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-		struct perf_counter *counter = cpuc->counters[idx];
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
+		struct perf_event *event = cpuc->events[idx];
 		u64 val;
 
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 
-		val = counter->hw.config;
+		val = event->hw.config;
 		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
 		wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
 	}
@@ -1153,19 +1153,19 @@ static inline void intel_pmu_ack_status(u64 ack)
 	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
 }
 
-static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	(void)checking_wrmsrl(hwc->config_base + idx,
 			      hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
 }
 
-static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
 }
 
 static inline void
-intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
+intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
 {
 	int idx = __idx - X86_PMC_IDX_FIXED;
 	u64 ctrl_val, mask;
@@ -1178,10 +1178,10 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
 }
 
 static inline void
-p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	u64 val = P6_NOP_COUNTER;
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val = P6_NOP_EVENT;
 
 	if (cpuc->enabled)
 		val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1190,7 +1190,7 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
 }
 
 static inline void
-intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
 	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
 		intel_pmu_disable_bts();
@@ -1202,24 +1202,24 @@ intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
 		return;
 	}
 
-	x86_pmu_disable_counter(hwc, idx);
+	x86_pmu_disable_event(hwc, idx);
 }
 
 static inline void
-amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
 {
-	x86_pmu_disable_counter(hwc, idx);
+	x86_pmu_disable_event(hwc, idx);
 }
 
-static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
+static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
 
 /*
  * Set the next IRQ period, based on the hwc->period_left value.
- * To be called with the counter disabled in hw:
+ * To be called with the event disabled in hw:
  */
 static int
-x86_perf_counter_set_period(struct perf_counter *counter,
-			     struct hw_perf_counter *hwc, int idx)
+x86_perf_event_set_period(struct perf_event *event,
+			     struct hw_perf_event *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
 	s64 period = hwc->sample_period;
@@ -1245,7 +1245,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 		ret = 1;
 	}
 	/*
-	 * Quirk: certain CPUs dont like it if just 1 event is left:
+	 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
 	 */
 	if (unlikely(left < 2))
 		left = 2;
@@ -1253,24 +1253,24 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 	if (left > x86_pmu.max_period)
 		left = x86_pmu.max_period;
 
-	per_cpu(prev_left[idx], smp_processor_id()) = left;
+	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
 
 	/*
-	 * The hw counter starts counting from this counter offset,
+	 * The hw event starts counting from this event offset,
 	 * mark it to be able to extra future deltas:
 	 */
 	atomic64_set(&hwc->prev_count, (u64)-left);
 
-	err = checking_wrmsrl(hwc->counter_base + idx,
-			     (u64)(-left) & x86_pmu.counter_mask);
+	err = checking_wrmsrl(hwc->event_base + idx,
+			     (u64)(-left) & x86_pmu.event_mask);
 
-	perf_counter_update_userpage(counter);
+	perf_event_update_userpage(event);
 
 	return ret;
 }
 
 static inline void
-intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
+intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
 {
 	int idx = __idx - X86_PMC_IDX_FIXED;
 	u64 ctrl_val, bits, mask;
@@ -1295,9 +1295,9 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
 	err = checking_wrmsrl(hwc->config_base, ctrl_val);
 }
 
-static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 val;
 
 	val = hwc->config;
@@ -1308,10 +1308,10 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 }
 
 
-static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
 	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		if (!__get_cpu_var(cpu_hw_counters).enabled)
+		if (!__get_cpu_var(cpu_hw_events).enabled)
 			return;
 
 		intel_pmu_enable_bts(hwc->config);
@@ -1323,134 +1323,134 @@ static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
 		return;
 	}
 
-	x86_pmu_enable_counter(hwc, idx);
+	x86_pmu_enable_event(hwc, idx);
 }
 
-static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
 	if (cpuc->enabled)
-		x86_pmu_enable_counter(hwc, idx);
+		x86_pmu_enable_event(hwc, idx);
 }
 
 static int
-fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
+fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
 {
-	unsigned int event;
+	unsigned int hw_event;
 
-	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+	hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
 
-	if (unlikely((event ==
+	if (unlikely((hw_event ==
 		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
 		     (hwc->sample_period == 1)))
 		return X86_PMC_IDX_FIXED_BTS;
 
-	if (!x86_pmu.num_counters_fixed)
+	if (!x86_pmu.num_events_fixed)
 		return -1;
 
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
 		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
 		return X86_PMC_IDX_FIXED_CPU_CYCLES;
-	if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
+	if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
 		return X86_PMC_IDX_FIXED_BUS_CYCLES;
 
 	return -1;
 }
 
 /*
- * Find a PMC slot for the freshly enabled / scheduled in counter:
+ * Find a PMC slot for the freshly enabled / scheduled in event:
  */
-static int x86_pmu_enable(struct perf_counter *counter)
+static int x86_pmu_enable(struct perf_event *event)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
 	int idx;
 
-	idx = fixed_mode_idx(counter, hwc);
+	idx = fixed_mode_idx(event, hwc);
 	if (idx == X86_PMC_IDX_FIXED_BTS) {
 		/* BTS is already occupied. */
 		if (test_and_set_bit(idx, cpuc->used_mask))
 			return -EAGAIN;
 
 		hwc->config_base	= 0;
-		hwc->counter_base	= 0;
+		hwc->event_base	= 0;
 		hwc->idx		= idx;
 	} else if (idx >= 0) {
 		/*
-		 * Try to get the fixed counter, if that is already taken
-		 * then try to get a generic counter:
+		 * Try to get the fixed event, if that is already taken
+		 * then try to get a generic event:
 		 */
 		if (test_and_set_bit(idx, cpuc->used_mask))
 			goto try_generic;
 
 		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
 		/*
-		 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
+		 * We set it so that event_base + idx in wrmsr/rdmsr maps to
 		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
 		 */
-		hwc->counter_base =
+		hwc->event_base =
 			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
 		hwc->idx = idx;
 	} else {
 		idx = hwc->idx;
-		/* Try to get the previous generic counter again */
+		/* Try to get the previous generic event again */
 		if (test_and_set_bit(idx, cpuc->used_mask)) {
 try_generic:
 			idx = find_first_zero_bit(cpuc->used_mask,
-						  x86_pmu.num_counters);
-			if (idx == x86_pmu.num_counters)
+						  x86_pmu.num_events);
+			if (idx == x86_pmu.num_events)
 				return -EAGAIN;
 
 			set_bit(idx, cpuc->used_mask);
 			hwc->idx = idx;
 		}
 		hwc->config_base  = x86_pmu.eventsel;
-		hwc->counter_base = x86_pmu.perfctr;
+		hwc->event_base = x86_pmu.perfctr;
 	}
 
-	perf_counters_lapic_init();
+	perf_events_lapic_init();
 
 	x86_pmu.disable(hwc, idx);
 
-	cpuc->counters[idx] = counter;
+	cpuc->events[idx] = event;
 	set_bit(idx, cpuc->active_mask);
 
-	x86_perf_counter_set_period(counter, hwc, idx);
+	x86_perf_event_set_period(event, hwc, idx);
 	x86_pmu.enable(hwc, idx);
 
-	perf_counter_update_userpage(counter);
+	perf_event_update_userpage(event);
 
 	return 0;
 }
 
-static void x86_pmu_unthrottle(struct perf_counter *counter)
+static void x86_pmu_unthrottle(struct perf_event *event)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
 
 	if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
-				cpuc->counters[hwc->idx] != counter))
+				cpuc->events[hwc->idx] != event))
 		return;
 
 	x86_pmu.enable(hwc, hwc->idx);
 }
 
-void perf_counter_print_debug(void)
+void perf_event_print_debug(void)
 {
 	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
-	struct cpu_hw_counters *cpuc;
+	struct cpu_hw_events *cpuc;
 	unsigned long flags;
 	int cpu, idx;
 
-	if (!x86_pmu.num_counters)
+	if (!x86_pmu.num_events)
 		return;
 
 	local_irq_save(flags);
 
 	cpu = smp_processor_id();
-	cpuc = &per_cpu(cpu_hw_counters, cpu);
+	cpuc = &per_cpu(cpu_hw_events, cpu);
 
 	if (x86_pmu.version >= 2) {
 		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
@@ -1466,11 +1466,11 @@ void perf_counter_print_debug(void)
 	}
 	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used_mask);
 
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
 		rdmsrl(x86_pmu.perfctr  + idx, pmc_count);
 
-		prev_left = per_cpu(prev_left[idx], cpu);
+		prev_left = per_cpu(pmc_prev_left[idx], cpu);
 
 		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
 			cpu, idx, pmc_ctrl);
@@ -1479,7 +1479,7 @@ void perf_counter_print_debug(void)
 		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
 			cpu, idx, prev_left);
 	}
-	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
+	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
 		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
 
 		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
 	local_irq_restore(flags);
 }
 
-static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
-				       struct perf_sample_data *data)
+static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
 {
 	struct debug_store *ds = cpuc->ds;
 	struct bts_record {
@@ -1497,11 +1496,14 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
 		u64	to;
 		u64	flags;
 	};
-	struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
-	unsigned long orig_ip = data->regs->ip;
+	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
 	struct bts_record *at, *top;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	struct perf_sample_data data;
+	struct pt_regs regs;
 
-	if (!counter)
+	if (!event)
 		return;
 
 	if (!ds)
@@ -1510,26 +1512,45 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
 	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
 	top = (struct bts_record *)(unsigned long)ds->bts_index;
 
+	if (top <= at)
+		return;
+
 	ds->bts_index = ds->bts_buffer_base;
 
+
+	data.period	= event->hw.last_period;
+	data.addr	= 0;
+	regs.ip		= 0;
+
+	/*
+	 * Prepare a generic sample, i.e. fill in the invariant fields.
+	 * We will overwrite the from and to address before we output
+	 * the sample.
+	 */
+	perf_prepare_sample(&header, &data, event, &regs);
+
+	if (perf_output_begin(&handle, event,
+			      header.size * (top - at), 1, 1))
+		return;
+
 	for (; at < top; at++) {
-		data->regs->ip	= at->from;
-		data->addr	= at->to;
+		data.ip		= at->from;
+		data.addr	= at->to;
 
-		perf_counter_output(counter, 1, data);
+		perf_output_sample(&handle, &header, &data, event);
 	}
 
-	data->regs->ip	= orig_ip;
-	data->addr	= 0;
+	perf_output_end(&handle);
 
 	/* There's new data available. */
-	counter->pending_kill = POLL_IN;
+	event->hw.interrupts++;
+	event->pending_kill = POLL_IN;
 }
 
-static void x86_pmu_disable(struct perf_counter *counter)
+static void x86_pmu_disable(struct perf_event *event)
 {
-	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-	struct hw_perf_counter *hwc = &counter->hw;
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 
 	/*
@@ -1541,67 +1562,63 @@ static void x86_pmu_disable(struct perf_counter *counter)
 
 	/*
 	 * Make sure the cleared pointer becomes visible before we
-	 * (potentially) free the counter:
+	 * (potentially) free the event:
 	 */
 	barrier();
 
 	/*
-	 * Drain the remaining delta count out of a counter
+	 * Drain the remaining delta count out of a event
 	 * that we are disabling:
 	 */
-	x86_perf_counter_update(counter, hwc, idx);
+	x86_perf_event_update(event, hwc, idx);
 
 	/* Drain the remaining BTS records. */
-	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
-		struct perf_sample_data data;
-		struct pt_regs regs;
+	if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
+		intel_pmu_drain_bts_buffer(cpuc);
 
-		data.regs = &regs;
-		intel_pmu_drain_bts_buffer(cpuc, &data);
-	}
-	cpuc->counters[idx] = NULL;
+	cpuc->events[idx] = NULL;
 	clear_bit(idx, cpuc->used_mask);
 
-	perf_counter_update_userpage(counter);
+	perf_event_update_userpage(event);
 }
 
 /*
- * Save and restart an expired counter. Called by NMI contexts,
- * so it has to be careful about preempting normal counter ops:
+ * Save and restart an expired event. Called by NMI contexts,
+ * so it has to be careful about preempting normal event ops:
  */
-static int intel_pmu_save_and_restart(struct perf_counter *counter)
+static int intel_pmu_save_and_restart(struct perf_event *event)
 {
-	struct hw_perf_counter *hwc = &counter->hw;
+	struct hw_perf_event *hwc = &event->hw;
 	int idx = hwc->idx;
 	int ret;
 
-	x86_perf_counter_update(counter, hwc, idx);
-	ret = x86_perf_counter_set_period(counter, hwc, idx);
+	x86_perf_event_update(event, hwc, idx);
+	ret = x86_perf_event_set_period(event, hwc, idx);
 
-	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
-		intel_pmu_enable_counter(hwc, idx);
+	if (event->state == PERF_EVENT_STATE_ACTIVE)
+		intel_pmu_enable_event(hwc, idx);
 
 	return ret;
 }
 
 static void intel_pmu_reset(void)
 {
-	struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds;
+	struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
 	unsigned long flags;
 	int idx;
 
-	if (!x86_pmu.num_counters)
+	if (!x86_pmu.num_events)
 		return;
 
 	local_irq_save(flags);
 
 	printk("clearing PMU state on CPU#%d\n", smp_processor_id());
 
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
 		checking_wrmsrl(x86_pmu.perfctr  + idx, 0ull);
 	}
-	for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
+	for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
 		checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
 	}
 	if (ds)
@@ -1613,39 +1630,38 @@ static void intel_pmu_reset(void)
 static int p6_pmu_handle_irq(struct pt_regs *regs)
 {
 	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
+	struct cpu_hw_events *cpuc;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
 	int idx, handled = 0;
 	u64 val;
 
-	data.regs = regs;
 	data.addr = 0;
 
-	cpuc = &__get_cpu_var(cpu_hw_counters);
+	cpuc = &__get_cpu_var(cpu_hw_events);
 
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 
-		counter = cpuc->counters[idx];
-		hwc = &counter->hw;
+		event = cpuc->events[idx];
+		hwc = &event->hw;
 
-		val = x86_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+		val = x86_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << (x86_pmu.event_bits - 1)))
 			continue;
 
 		/*
-		 * counter overflow
+		 * event overflow
 		 */
 		handled		= 1;
-		data.period	= counter->hw.last_period;
+		data.period	= event->hw.last_period;
 
-		if (!x86_perf_counter_set_period(counter, hwc, idx))
+		if (!x86_perf_event_set_period(event, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, &data))
-			p6_pmu_disable_counter(hwc, idx);
+		if (perf_event_overflow(event, 1, &data, regs))
+			p6_pmu_disable_event(hwc, idx);
 	}
 
 	if (handled)
@@ -1661,17 +1677,16 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
 static int intel_pmu_handle_irq(struct pt_regs *regs)
 {
 	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
+	struct cpu_hw_events *cpuc;
 	int bit, loops;
 	u64 ack, status;
 
-	data.regs = regs;
 	data.addr = 0;
 
-	cpuc = &__get_cpu_var(cpu_hw_counters);
+	cpuc = &__get_cpu_var(cpu_hw_events);
 
 	perf_disable();
-	intel_pmu_drain_bts_buffer(cpuc, &data);
+	intel_pmu_drain_bts_buffer(cpuc);
 	status = intel_pmu_get_status();
 	if (!status) {
 		perf_enable();
@@ -1681,8 +1696,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 	loops = 0;
 again:
 	if (++loops > 100) {
-		WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
-		perf_counter_print_debug();
+		WARN_ONCE(1, "perfevents: irq loop stuck!\n");
+		perf_event_print_debug();
 		intel_pmu_reset();
 		perf_enable();
 		return 1;
@@ -1691,19 +1706,19 @@ again:
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
 	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
-		struct perf_counter *counter = cpuc->counters[bit];
+		struct perf_event *event = cpuc->events[bit];
 
 		clear_bit(bit, (unsigned long *) &status);
 		if (!test_bit(bit, cpuc->active_mask))
 			continue;
 
-		if (!intel_pmu_save_and_restart(counter))
+		if (!intel_pmu_save_and_restart(event))
 			continue;
 
-		data.period = counter->hw.last_period;
+		data.period = event->hw.last_period;
 
-		if (perf_counter_overflow(counter, 1, &data))
-			intel_pmu_disable_counter(&counter->hw, bit);
+		if (perf_event_overflow(event, 1, &data, regs))
+			intel_pmu_disable_event(&event->hw, bit);
 	}
 
 	intel_pmu_ack_status(ack);
@@ -1723,39 +1738,38 @@ again:
 static int amd_pmu_handle_irq(struct pt_regs *regs)
 {
 	struct perf_sample_data data;
-	struct cpu_hw_counters *cpuc;
-	struct perf_counter *counter;
-	struct hw_perf_counter *hwc;
+	struct cpu_hw_events *cpuc;
+	struct perf_event *event;
+	struct hw_perf_event *hwc;
 	int idx, handled = 0;
 	u64 val;
 
-	data.regs = regs;
 	data.addr = 0;
 
-	cpuc = &__get_cpu_var(cpu_hw_counters);
+	cpuc = &__get_cpu_var(cpu_hw_events);
 
-	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 
-		counter = cpuc->counters[idx];
-		hwc = &counter->hw;
+		event = cpuc->events[idx];
+		hwc = &event->hw;
 
-		val = x86_perf_counter_update(counter, hwc, idx);
-		if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+		val = x86_perf_event_update(event, hwc, idx);
+		if (val & (1ULL << (x86_pmu.event_bits - 1)))
 			continue;
 
 		/*
-		 * counter overflow
+		 * event overflow
 		 */
 		handled		= 1;
-		data.period	= counter->hw.last_period;
+		data.period	= event->hw.last_period;
 
-		if (!x86_perf_counter_set_period(counter, hwc, idx))
+		if (!x86_perf_event_set_period(event, hwc, idx))
 			continue;
 
-		if (perf_counter_overflow(counter, 1, &data))
-			amd_pmu_disable_counter(hwc, idx);
+		if (perf_event_overflow(event, 1, &data, regs))
+			amd_pmu_disable_event(hwc, idx);
 	}
 
 	if (handled)
@@ -1769,18 +1783,18 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
 	irq_enter();
 	ack_APIC_irq();
 	inc_irq_stat(apic_pending_irqs);
-	perf_counter_do_pending();
+	perf_event_do_pending();
 	irq_exit();
 }
 
-void set_perf_counter_pending(void)
+void set_perf_event_pending(void)
 {
 #ifdef CONFIG_X86_LOCAL_APIC
 	apic->send_IPI_self(LOCAL_PENDING_VECTOR);
 #endif
 }
 
-void perf_counters_lapic_init(void)
+void perf_events_lapic_init(void)
 {
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1794,13 +1808,13 @@ void perf_counters_lapic_init(void)
 }
 
 static int __kprobes
-perf_counter_nmi_handler(struct notifier_block *self,
+perf_event_nmi_handler(struct notifier_block *self,
 			 unsigned long cmd, void *__args)
 {
 	struct die_args *args = __args;
 	struct pt_regs *regs;
 
-	if (!atomic_read(&active_counters))
+	if (!atomic_read(&active_events))
 		return NOTIFY_DONE;
 
 	switch (cmd) {
@@ -1819,7 +1833,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
 #endif
 	/*
 	 * Can't rely on the handled return value to say it was our NMI, two
-	 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
+	 * events could trigger 'simultaneously' raising two back-to-back NMIs.
 	 *
 	 * If the first NMI handles both, the latter will be empty and daze
 	 * the CPU.
@@ -1829,8 +1843,8 @@ perf_counter_nmi_handler(struct notifier_block *self,
 	return NOTIFY_STOP;
 }
 
-static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
-	.notifier_call		= perf_counter_nmi_handler,
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+	.notifier_call		= perf_event_nmi_handler,
 	.next			= NULL,
 	.priority		= 1
 };
@@ -1840,8 +1854,8 @@ static struct x86_pmu p6_pmu = {
 	.handle_irq		= p6_pmu_handle_irq,
 	.disable_all		= p6_pmu_disable_all,
 	.enable_all		= p6_pmu_enable_all,
-	.enable			= p6_pmu_enable_counter,
-	.disable		= p6_pmu_disable_counter,
+	.enable			= p6_pmu_enable_event,
+	.disable		= p6_pmu_disable_event,
 	.eventsel		= MSR_P6_EVNTSEL0,
 	.perfctr		= MSR_P6_PERFCTR0,
 	.event_map		= p6_pmu_event_map,
@@ -1850,16 +1864,16 @@ static struct x86_pmu p6_pmu = {
 	.apic			= 1,
 	.max_period		= (1ULL << 31) - 1,
 	.version		= 0,
-	.num_counters		= 2,
+	.num_events		= 2,
 	/*
-	 * Counters have 40 bits implemented. However they are designed such
+	 * Events have 40 bits implemented. However they are designed such
 	 * that bits [32-39] are sign extensions of bit 31. As such the
-	 * effective width of a counter for P6-like PMU is 32 bits only.
+	 * effective width of a event for P6-like PMU is 32 bits only.
 	 *
 	 * See IA-32 Intel Architecture Software developer manual Vol 3B
 	 */
-	.counter_bits		= 32,
-	.counter_mask		= (1ULL << 32) - 1,
+	.event_bits		= 32,
+	.event_mask		= (1ULL << 32) - 1,
 };
 
 static struct x86_pmu intel_pmu = {
@@ -1867,8 +1881,8 @@ static struct x86_pmu intel_pmu = {
 	.handle_irq		= intel_pmu_handle_irq,
 	.disable_all		= intel_pmu_disable_all,
 	.enable_all		= intel_pmu_enable_all,
-	.enable			= intel_pmu_enable_counter,
-	.disable		= intel_pmu_disable_counter,
+	.enable			= intel_pmu_enable_event,
+	.disable		= intel_pmu_disable_event,
 	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
 	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
 	.event_map		= intel_pmu_event_map,
@@ -1878,7 +1892,7 @@ static struct x86_pmu intel_pmu = {
 	/*
 	 * Intel PMCs cannot be accessed sanely above 32 bit width,
 	 * so we install an artificial 1<<31 period regardless of
-	 * the generic counter period:
+	 * the generic event period:
 	 */
 	.max_period		= (1ULL << 31) - 1,
 	.enable_bts		= intel_pmu_enable_bts,
@@ -1890,16 +1904,16 @@ static struct x86_pmu amd_pmu = {
 	.handle_irq		= amd_pmu_handle_irq,
 	.disable_all		= amd_pmu_disable_all,
 	.enable_all		= amd_pmu_enable_all,
-	.enable			= amd_pmu_enable_counter,
-	.disable		= amd_pmu_disable_counter,
+	.enable			= amd_pmu_enable_event,
+	.disable		= amd_pmu_disable_event,
 	.eventsel		= MSR_K7_EVNTSEL0,
 	.perfctr		= MSR_K7_PERFCTR0,
 	.event_map		= amd_pmu_event_map,
 	.raw_event		= amd_pmu_raw_event,
 	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
-	.num_counters		= 4,
-	.counter_bits		= 48,
-	.counter_mask		= (1ULL << 48) - 1,
+	.num_events		= 4,
+	.event_bits		= 48,
+	.event_mask		= (1ULL << 48) - 1,
 	.apic			= 1,
 	/* use highest bit to detect overflow */
 	.max_period		= (1ULL << 47) - 1,
@@ -1956,7 +1970,7 @@ static int intel_pmu_init(void)
 
 	/*
 	 * Check whether the Architectural PerfMon supports
-	 * Branch Misses Retired Event or not.
+	 * Branch Misses Retired hw_event or not.
 	 */
 	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
 	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
@@ -1968,15 +1982,15 @@ static int intel_pmu_init(void)
 
 	x86_pmu				= intel_pmu;
 	x86_pmu.version			= version;
-	x86_pmu.num_counters		= eax.split.num_counters;
-	x86_pmu.counter_bits		= eax.split.bit_width;
-	x86_pmu.counter_mask		= (1ULL << eax.split.bit_width) - 1;
+	x86_pmu.num_events		= eax.split.num_events;
+	x86_pmu.event_bits		= eax.split.bit_width;
+	x86_pmu.event_mask		= (1ULL << eax.split.bit_width) - 1;
 
 	/*
-	 * Quirk: v2 perfmon does not report fixed-purpose counters, so
-	 * assume at least 3 counters:
+	 * Quirk: v2 perfmon does not report fixed-purpose events, so
+	 * assume at least 3 events:
 	 */
-	x86_pmu.num_counters_fixed	= max((int)edx.split.num_counters_fixed, 3);
+	x86_pmu.num_events_fixed	= max((int)edx.split.num_events_fixed, 3);
 
 	/*
 	 * Install the hw-cache-events table:
@@ -2023,11 +2037,11 @@ static int amd_pmu_init(void)
 	return 0;
 }
 
-void __init init_hw_perf_counters(void)
+void __init init_hw_perf_events(void)
 {
 	int err;
 
-	pr_info("Performance Counters: ");
+	pr_info("Performance Events: ");
 
 	switch (boot_cpu_data.x86_vendor) {
 	case X86_VENDOR_INTEL:
@@ -2040,45 +2054,45 @@ void __init init_hw_perf_counters(void)
 		return;
 	}
 	if (err != 0) {
-		pr_cont("no PMU driver, software counters only.\n");
+		pr_cont("no PMU driver, software events only.\n");
 		return;
 	}
 
 	pr_cont("%s PMU driver.\n", x86_pmu.name);
 
-	if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
-		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
-		     x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
-		x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
+	if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
+		WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+		     x86_pmu.num_events, X86_PMC_MAX_GENERIC);
+		x86_pmu.num_events = X86_PMC_MAX_GENERIC;
 	}
-	perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
-	perf_max_counters = x86_pmu.num_counters;
+	perf_event_mask = (1 << x86_pmu.num_events) - 1;
+	perf_max_events = x86_pmu.num_events;
 
-	if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
-		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
-		     x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
-		x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
+	if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
+		WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+		     x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
+		x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
 	}
 
-	perf_counter_mask |=
-		((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
-	x86_pmu.intel_ctrl = perf_counter_mask;
+	perf_event_mask |=
+		((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
+	x86_pmu.intel_ctrl = perf_event_mask;
 
-	perf_counters_lapic_init();
-	register_die_notifier(&perf_counter_nmi_notifier);
+	perf_events_lapic_init();
+	register_die_notifier(&perf_event_nmi_notifier);
 
-	pr_info("... version:                 %d\n",     x86_pmu.version);
-	pr_info("... bit width:               %d\n",     x86_pmu.counter_bits);
-	pr_info("... generic counters:        %d\n",     x86_pmu.num_counters);
-	pr_info("... value mask:              %016Lx\n", x86_pmu.counter_mask);
-	pr_info("... max period:              %016Lx\n", x86_pmu.max_period);
-	pr_info("... fixed-purpose counters:  %d\n",     x86_pmu.num_counters_fixed);
-	pr_info("... counter mask:            %016Lx\n", perf_counter_mask);
+	pr_info("... version:                %d\n",     x86_pmu.version);
+	pr_info("... bit width:              %d\n",     x86_pmu.event_bits);
+	pr_info("... generic registers:      %d\n",     x86_pmu.num_events);
+	pr_info("... value mask:             %016Lx\n", x86_pmu.event_mask);
+	pr_info("... max period:             %016Lx\n", x86_pmu.max_period);
+	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_events_fixed);
+	pr_info("... event mask:             %016Lx\n", perf_event_mask);
 }
 
-static inline void x86_pmu_read(struct perf_counter *counter)
+static inline void x86_pmu_read(struct perf_event *event)
 {
-	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
+	x86_perf_event_update(event, &event->hw, event->hw.idx);
 }
 
 static const struct pmu pmu = {
@@ -2088,13 +2102,16 @@ static const struct pmu pmu = {
 	.unthrottle	= x86_pmu_unthrottle,
 };
 
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
+const struct pmu *hw_perf_event_init(struct perf_event *event)
 {
 	int err;
 
-	err = __hw_perf_counter_init(counter);
-	if (err)
+	err = __hw_perf_event_init(event);
+	if (err) {
+		if (event->destroy)
+			event->destroy(event);
 		return ERR_PTR(err);
+	}
 
 	return &pmu;
 }
@@ -2110,8 +2127,8 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
 		entry->ip[entry->nr++] = ip;
 }
 
-static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
+static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
 static DEFINE_PER_CPU(int, in_nmi_frame);
 
 
@@ -2264,9 +2281,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 	struct perf_callchain_entry *entry;
 
 	if (in_nmi())
-		entry = &__get_cpu_var(nmi_entry);
+		entry = &__get_cpu_var(pmc_nmi_entry);
 	else
-		entry = &__get_cpu_var(irq_entry);
+		entry = &__get_cpu_var(pmc_irq_entry);
 
 	entry->nr = 0;
 
@@ -2275,7 +2292,7 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 	return entry;
 }
 
-void hw_perf_counter_setup_online(int cpu)
+void hw_perf_event_setup_online(int cpu)
 {
 	init_debug_store_on_cpu(cpu);
 }
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 392bea4..fab786f 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -20,7 +20,7 @@
 #include <linux/kprobes.h>
 
 #include <asm/apic.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 
 struct nmi_watchdog_ctlblk {
 	unsigned int cccr_msr;
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c
new file mode 100644
index 0000000..a640ae5
--- /dev/null
+++ b/arch/x86/kernel/cpu/sched.c
@@ -0,0 +1,55 @@
+#include <linux/sched.h>
+#include <linux/math64.h>
+#include <linux/percpu.h>
+#include <linux/irqflags.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+#ifdef CONFIG_SMP
+
+static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched);
+
+static unsigned long scale_aperfmperf(void)
+{
+	struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched);
+	unsigned long ratio, flags;
+
+	local_irq_save(flags);
+	get_aperfmperf(&val);
+	local_irq_restore(flags);
+
+	ratio = calc_aperfmperf_ratio(old, &val);
+	*old = val;
+
+	return ratio;
+}
+
+unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
+{
+	/*
+	 * do aperf/mperf on the cpu level because it includes things
+	 * like turbo mode, which are relevant to full cores.
+	 */
+	if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+		return scale_aperfmperf();
+
+	/*
+	 * maybe have something cpufreq here
+	 */
+
+	return default_scale_freq_power(sd, cpu);
+}
+
+unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
+{
+	/*
+	 * aperf/mperf already includes the smt gain
+	 */
+	if (boot_cpu_has(X86_FEATURE_APERFMPERF))
+		return SCHED_LOAD_SCALE;
+
+	return default_scale_smt_power(sd, cpu);
+}
+
+#endif
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index bc24f51..1cbed97 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -24,6 +24,7 @@
 #include <linux/dmi.h>
 #include <asm/div64.h>
 #include <asm/vmware.h>
+#include <asm/x86_init.h>
 
 #define CPUID_VMWARE_INFO_LEAF	0x40000000
 #define VMWARE_HYPERVISOR_MAGIC	0x564D5868
@@ -47,21 +48,35 @@ static inline int __vmware_platform(void)
 	return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
 }
 
-static unsigned long __vmware_get_tsc_khz(void)
+static unsigned long vmware_get_tsc_khz(void)
 {
 	uint64_t tsc_hz;
 	uint32_t eax, ebx, ecx, edx;
 
 	VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
 
-	if (ebx == UINT_MAX)
-		return 0;
 	tsc_hz = eax | (((uint64_t)ebx) << 32);
 	do_div(tsc_hz, 1000);
 	BUG_ON(tsc_hz >> 32);
+	printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
+			 (unsigned long) tsc_hz / 1000,
+			 (unsigned long) tsc_hz % 1000);
 	return tsc_hz;
 }
 
+void __init vmware_platform_setup(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+
+	VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
+
+	if (ebx != UINT_MAX)
+		x86_platform.calibrate_tsc = vmware_get_tsc_khz;
+	else
+		printk(KERN_WARNING
+		       "Failed to get TSC freq from the hypervisor\n");
+}
+
 /*
  * While checking the dmi string infomation, just checking the product
  * serial key should be enough, as this will always have a VMware
@@ -87,12 +102,6 @@ int vmware_platform(void)
 	return 0;
 }
 
-unsigned long vmware_get_tsc_khz(void)
-{
-	BUG_ON(!vmware_platform());
-	return __vmware_get_tsc_khz();
-}
-
 /*
  * VMware hypervisor takes care of exporting a reliable TSC to the guest.
  * Still, due to timing difference when running on virtual cpus, the TSC can
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index b07af88..6a52d4b 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -182,7 +182,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
 	.notifier_call = cpuid_class_cpu_callback,
 };
 
-static char *cpuid_nodename(struct device *dev)
+static char *cpuid_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
 }
@@ -203,7 +203,7 @@ static int __init cpuid_init(void)
 		err = PTR_ERR(cpuid_class);
 		goto out_chrdev;
 	}
-	cpuid_class->nodename = cpuid_nodename;
+	cpuid_class->devnode = cpuid_devnode;
 	for_each_online_cpu(i) {
 		err = cpuid_device_create(i);
 		if (err != 0)
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 147005a..85419bb 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1331,7 +1331,7 @@ void __init e820_reserve_resources(void)
 	struct resource *res;
 	u64 end;
 
-	res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
+	res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
 	e820_res = res;
 	for (i = 0; i < e820.nr_map; i++) {
 		end = e820.map[i].addr + e820.map[i].size - 1;
@@ -1455,28 +1455,11 @@ char *__init default_machine_specific_memory_setup(void)
 	return who;
 }
 
-char *__init __attribute__((weak)) machine_specific_memory_setup(void)
-{
-	if (x86_quirks->arch_memory_setup) {
-		char *who = x86_quirks->arch_memory_setup();
-
-		if (who)
-			return who;
-	}
-	return default_machine_specific_memory_setup();
-}
-
-/* Overridden in paravirt.c if CONFIG_PARAVIRT */
-char * __init __attribute__((weak)) memory_setup(void)
-{
-	return machine_specific_memory_setup();
-}
-
 void __init setup_memory_map(void)
 {
 	char *who;
 
-	who = memory_setup();
+	who = x86_init.resources.memory_setup();
 	memcpy(&e820_saved, &e820, sizeof(struct e820map));
 	printk(KERN_INFO "BIOS-provided physical RAM map:\n");
 	e820_print_map(who);
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 335f049..b11cab3 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -624,7 +624,7 @@ try_next_port:
 		return -1;
 	}
 
-	loop = 10;
+	loop = 100000;
 	/* Reset the EHCI controller */
 	cmd = readl(&ehci_regs->command);
 	cmd |= CMD_RESET;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index fe26ba3..ad5bd98 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -42,6 +42,7 @@
 #include <asm/time.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
+#include <asm/x86_init.h>
 
 #define EFI_DEBUG	1
 #define PFX 		"EFI: "
@@ -453,6 +454,9 @@ void __init efi_init(void)
 	if (add_efi_memmap)
 		do_add_efi_memmap();
 
+	x86_platform.get_wallclock = efi_get_time;
+	x86_platform.set_wallclock = efi_set_rtc_mmss;
+
 	/* Setup for EFI runtime service */
 	reboot_type = BOOT_EFI;
 
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c251be7..681c3fd 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -146,7 +146,7 @@ ENTRY(ftrace_graph_caller)
 END(ftrace_graph_caller)
 
 GLOBAL(return_to_handler)
-	subq  $80, %rsp
+	subq  $24, %rsp
 
 	/* Save the return values */
 	movq %rax, (%rsp)
@@ -155,10 +155,10 @@ GLOBAL(return_to_handler)
 
 	call ftrace_return_to_handler
 
-	movq %rax, 72(%rsp)
+	movq %rax, 16(%rsp)
 	movq 8(%rsp), %rdx
 	movq (%rsp), %rax
-	addq $72, %rsp
+	addq $16, %rsp
 	retq
 #endif
 
@@ -1021,7 +1021,7 @@ apicinterrupt ERROR_APIC_VECTOR \
 apicinterrupt SPURIOUS_APIC_VECTOR \
 	spurious_interrupt smp_spurious_interrupt
 
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
 apicinterrupt LOCAL_PENDING_VECTOR \
 	perf_pending_interrupt smp_perf_pending_interrupt
 #endif
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 3f8579f..4f8e250 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -11,8 +11,21 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/e820.h>
-#include <asm/bios_ebda.h>
+#include <asm/page.h>
 #include <asm/trampoline.h>
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/bios_ebda.h>
+
+static void __init i386_default_early_setup(void)
+{
+	/* Initilize 32bit specific setup functions */
+	x86_init.resources.probe_roms = probe_roms;
+	x86_init.resources.reserve_resources = i386_reserve_resources;
+	x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
+
+	reserve_ebda_region();
+}
 
 void __init i386_start_kernel(void)
 {
@@ -29,7 +42,16 @@ void __init i386_start_kernel(void)
 		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
 	}
 #endif
-	reserve_ebda_region();
+
+	/* Call the subarch specific early setup function */
+	switch (boot_params.hdr.hardware_subarch) {
+	case X86_SUBARCH_MRST:
+		x86_mrst_early_setup();
+		break;
+	default:
+		i386_default_early_setup();
+		break;
+	}
 
 	/*
 	 * At this point everything still needed from the boot loader
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 70eaa85..0b06cd7 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -23,8 +23,8 @@
 #include <asm/sections.h>
 #include <asm/kdebug.h>
 #include <asm/e820.h>
-#include <asm/bios_ebda.h>
 #include <asm/trampoline.h>
+#include <asm/bios_ebda.h>
 
 static void __init zap_identity_mappings(void)
 {
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 7ffec6b..b766e8c 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -157,6 +157,7 @@ subarch_entries:
 	.long default_entry		/* normal x86/PC */
 	.long lguest_entry		/* lguest hypervisor */
 	.long xen_entry			/* Xen hypervisor */
+	.long default_entry		/* Moorestown MID */
 num_subarch_entries = (. - subarch_entries) / 4
 .previous
 #endif /* CONFIG_PARAVIRT */
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 5cf36c0..23c1679 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -19,12 +19,6 @@
 DEFINE_SPINLOCK(i8253_lock);
 EXPORT_SYMBOL(i8253_lock);
 
-#ifdef CONFIG_X86_32
-static void pit_disable_clocksource(void);
-#else
-static inline void pit_disable_clocksource(void) { }
-#endif
-
 /*
  * HPET replaces the PIT, when enabled. So we need to know, which of
  * the two timers is used
@@ -57,12 +51,10 @@ static void init_pit_timer(enum clock_event_mode mode,
 			outb_pit(0, PIT_CH0);
 			outb_pit(0, PIT_CH0);
 		}
-		pit_disable_clocksource();
 		break;
 
 	case CLOCK_EVT_MODE_ONESHOT:
 		/* One shot setup */
-		pit_disable_clocksource();
 		outb_pit(0x38, PIT_MODE);
 		break;
 
@@ -200,17 +192,6 @@ static struct clocksource pit_cs = {
 	.shift		= 20,
 };
 
-static void pit_disable_clocksource(void)
-{
-	/*
-	 * Use mult to check whether it is registered or not
-	 */
-	if (pit_cs.mult) {
-		clocksource_unregister(&pit_cs);
-		pit_cs.mult = 0;
-	}
-}
-
 static int __init init_pit_clocksource(void)
 {
 	 /*
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index b0cdde6..74656d1 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -104,7 +104,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
 	seq_printf(p, "  Threshold APIC interrupts\n");
 # endif
 #endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
 	seq_printf(p, "%*s: ", prec, "MCE");
 	for_each_online_cpu(j)
 		seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
@@ -200,7 +200,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 	sum += irq_stats(cpu)->irq_threshold_count;
 # endif
 #endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
 	sum += per_cpu(mce_exception_count, cpu);
 	sum += per_cpu(mce_poll_count, cpu);
 #endif
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 92b7703..40f3077 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -116,7 +116,7 @@ int vector_used_by_percpu_irq(unsigned int vector)
 	return 0;
 }
 
-static void __init init_ISA_irqs(void)
+void __init init_ISA_irqs(void)
 {
 	int i;
 
@@ -140,8 +140,10 @@ static void __init init_ISA_irqs(void)
 	}
 }
 
-/* Overridden in paravirt.c */
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+void __init init_IRQ(void)
+{
+	x86_init.irqs.intr_init();
+}
 
 static void __init smp_intr_init(void)
 {
@@ -190,7 +192,7 @@ static void __init apic_intr_init(void)
 #ifdef CONFIG_X86_MCE_THRESHOLD
 	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 #endif
-#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC)
+#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC)
 	alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
 #endif
 
@@ -206,39 +208,19 @@ static void __init apic_intr_init(void)
 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
 
 	/* Performance monitoring interrupts: */
-# ifdef CONFIG_PERF_COUNTERS
+# ifdef CONFIG_PERF_EVENTS
 	alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
 # endif
 
 #endif
 }
 
-/**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- *	Perform any necessary interrupt initialisation prior to setting up
- *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
- *	interrupts should be initialised here if the machine emulates a PC
- *	in any way.
- **/
-static void __init x86_quirk_pre_intr_init(void)
-{
-#ifdef CONFIG_X86_32
-	if (x86_quirks->arch_pre_intr_init) {
-		if (x86_quirks->arch_pre_intr_init())
-			return;
-	}
-#endif
-	init_ISA_irqs();
-}
-
 void __init native_init_IRQ(void)
 {
 	int i;
 
 	/* Execute any quirks before the call gates are initialised: */
-	x86_quirk_pre_intr_init();
+	x86_init.irqs.pre_vector_init();
 
 	apic_intr_init();
 
@@ -258,12 +240,6 @@ void __init native_init_IRQ(void)
 
 #ifdef CONFIG_X86_32
 	/*
-	 * Call quirks after call gates are initialised (usually add in
-	 * the architecture specific gates):
-	 */
-	x86_quirk_intr_init();
-
-	/*
 	 * External FPU? Set up irq13 if so, for
 	 * original braindamaged IBM FERR coupling.
 	 */
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index e5efcdc..feaeb0d 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -22,6 +22,8 @@
 #include <asm/msr.h>
 #include <asm/apic.h>
 #include <linux/percpu.h>
+
+#include <asm/x86_init.h>
 #include <asm/reboot.h>
 
 #define KVM_SCALE 22
@@ -182,12 +184,13 @@ void __init kvmclock_init(void)
 	if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
 		if (kvm_register_clock("boot clock"))
 			return;
-		pv_time_ops.get_wallclock = kvm_get_wallclock;
-		pv_time_ops.set_wallclock = kvm_set_wallclock;
 		pv_time_ops.sched_clock = kvm_clock_read;
-		pv_time_ops.get_tsc_khz = kvm_get_tsc_khz;
+		x86_platform.calibrate_tsc = kvm_get_tsc_khz;
+		x86_platform.get_wallclock = kvm_get_wallclock;
+		x86_platform.set_wallclock = kvm_set_wallclock;
 #ifdef CONFIG_X86_LOCAL_APIC
-		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
+		x86_cpuinit.setup_percpu_clockev =
+			kvm_setup_secondary_clock;
 #endif
 #ifdef CONFIG_SMP
 		smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9371448..378e9a8 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -210,8 +210,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
 {
 	ssize_t ret = -EINVAL;
 
-	if ((len >> PAGE_SHIFT) > num_physpages) {
-		pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+	if ((len >> PAGE_SHIFT) > totalram_pages) {
+		pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
 		return ret;
 	}
 
@@ -236,7 +236,7 @@ static const struct file_operations microcode_fops = {
 static struct miscdevice microcode_dev = {
 	.minor			= MICROCODE_MINOR,
 	.name			= "microcode",
-	.devnode		= "cpu/microcode",
+	.nodename		= "cpu/microcode",
 	.fops			= &microcode_fops,
 };
 
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index fcd513b..5be95ef 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -45,6 +45,11 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 	return sum & 0xFF;
 }
 
+int __init default_mpc_apic_id(struct mpc_cpu *m)
+{
+	return m->apicid;
+}
+
 static void __init MP_processor_info(struct mpc_cpu *m)
 {
 	int apicid;
@@ -55,10 +60,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 		return;
 	}
 
-	if (x86_quirks->mpc_apic_id)
-		apicid = x86_quirks->mpc_apic_id(m);
-	else
-		apicid = m->apicid;
+	apicid = x86_init.mpparse.mpc_apic_id(m);
 
 	if (m->cpuflag & CPU_BOOTPROCESSOR) {
 		bootup_cpu = " (Bootup-CPU)";
@@ -70,16 +72,18 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 }
 
 #ifdef CONFIG_X86_IO_APIC
-static void __init MP_bus_info(struct mpc_bus *m)
+void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str)
 {
-	char str[7];
 	memcpy(str, m->bustype, 6);
 	str[6] = 0;
+	apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
+}
 
-	if (x86_quirks->mpc_oem_bus_info)
-		x86_quirks->mpc_oem_bus_info(m, str);
-	else
-		apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
+static void __init MP_bus_info(struct mpc_bus *m)
+{
+	char str[7];
+
+	x86_init.mpparse.mpc_oem_bus_info(m, str);
 
 #if MAX_MP_BUSSES < 256
 	if (m->busid >= MAX_MP_BUSSES) {
@@ -96,8 +100,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
 		mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
 #endif
 	} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
-		if (x86_quirks->mpc_oem_pci_bus)
-			x86_quirks->mpc_oem_pci_bus(m);
+		if (x86_init.mpparse.mpc_oem_pci_bus)
+			x86_init.mpparse.mpc_oem_pci_bus(m);
 
 		clear_bit(m->busid, mp_bus_not_pci);
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
@@ -291,6 +295,8 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
 			1, mpc, mpc->length, 1);
 }
 
+void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
+
 static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 {
 	char str[16];
@@ -312,16 +318,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 	if (early)
 		return 1;
 
-	if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) {
-		struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr;
-		x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize);
-	}
+	if (mpc->oemptr)
+		x86_init.mpparse.smp_read_mpc_oem(mpc);
 
 	/*
 	 *      Now process the configuration blocks.
 	 */
-	if (x86_quirks->mpc_record)
-		*x86_quirks->mpc_record = 0;
+	x86_init.mpparse.mpc_record(0);
 
 	while (count < mpc->length) {
 		switch (*mpt) {
@@ -353,8 +356,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
 			count = mpc->length;
 			break;
 		}
-		if (x86_quirks->mpc_record)
-			(*x86_quirks->mpc_record)++;
+		x86_init.mpparse.mpc_record(1);
 	}
 
 #ifdef CONFIG_X86_BIGSMP
@@ -608,7 +610,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
 /*
  * Scan the memory blocks for an SMP configuration block.
  */
-static void __init __get_smp_config(unsigned int early)
+void __init default_get_smp_config(unsigned int early)
 {
 	struct mpf_intel *mpf = mpf_found;
 
@@ -625,11 +627,6 @@ static void __init __get_smp_config(unsigned int early)
 	if (acpi_lapic && acpi_ioapic)
 		return;
 
-	if (x86_quirks->mach_get_smp_config) {
-		if (x86_quirks->mach_get_smp_config(early))
-			return;
-	}
-
 	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
 	       mpf->specification);
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
@@ -670,16 +667,6 @@ static void __init __get_smp_config(unsigned int early)
 	 */
 }
 
-void __init early_get_smp_config(void)
-{
-	__get_smp_config(1);
-}
-
-void __init get_smp_config(void)
-{
-	__get_smp_config(0);
-}
-
 static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
 {
 	unsigned long size = get_mpc_size(mpf->physptr);
@@ -745,14 +732,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
 	return 0;
 }
 
-static void __init __find_smp_config(unsigned int reserve)
+void __init default_find_smp_config(unsigned int reserve)
 {
 	unsigned int address;
 
-	if (x86_quirks->mach_find_smp_config) {
-		if (x86_quirks->mach_find_smp_config(reserve))
-			return;
-	}
 	/*
 	 * FIXME: Linux assumes you have 640K of base ram..
 	 * this continues the error...
@@ -787,16 +770,6 @@ static void __init __find_smp_config(unsigned int reserve)
 		smp_scan_config(address, 0x400, reserve);
 }
 
-void __init early_find_smp_config(void)
-{
-	__find_smp_config(0);
-}
-
-void __init find_smp_config(void)
-{
-	__find_smp_config(1);
-}
-
 #ifdef CONFIG_X86_IO_APIC
 static u8 __initdata irq_used[MAX_IRQ_SOURCES];
 
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
new file mode 100644
index 0000000..3b7078a
--- /dev/null
+++ b/arch/x86/kernel/mrst.c
@@ -0,0 +1,24 @@
+/*
+ * mrst.c: Intel Moorestown platform specific setup code
+ *
+ * (C) Copyright 2008 Intel Corporation
+ * Author: Jacob Pan (jacob.jun.pan@intel.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+#include <linux/init.h>
+
+#include <asm/setup.h>
+
+/*
+ * Moorestown specific x86_init function overrides and early setup
+ * calls.
+ */
+void __init x86_mrst_early_setup(void)
+{
+	x86_init.resources.probe_roms = x86_init_noop;
+	x86_init.resources.reserve_resources = x86_init_noop;
+}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7dd9500..6a3cefc 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -241,7 +241,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
 	.notifier_call = msr_class_cpu_callback,
 };
 
-static char *msr_nodename(struct device *dev)
+static char *msr_devnode(struct device *dev, mode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
 }
@@ -262,7 +262,7 @@ static int __init msr_init(void)
 		err = PTR_ERR(msr_class);
 		goto out_chrdev;
 	}
-	msr_class->nodename = msr_nodename;
+	msr_class->devnode = msr_devnode;
 	for_each_online_cpu(i) {
 		err = msr_device_create(i);
 		if (err != 0)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index f5b0b4a..1b1739d 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -54,17 +54,12 @@ u64 _paravirt_ident_64(u64 x)
 	return x;
 }
 
-static void __init default_banner(void)
+void __init default_banner(void)
 {
 	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
 	       pv_info.name);
 }
 
-char *memory_setup(void)
-{
-	return pv_init_ops.memory_setup();
-}
-
 /* Simple instruction patching code. */
 #define DEF_NATIVE(ops, name, code)					\
 	extern const char start_##ops##_##name[], end_##ops##_##name[];	\
@@ -188,11 +183,6 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
 	return insn_len;
 }
 
-void init_IRQ(void)
-{
-	pv_irq_ops.init_IRQ();
-}
-
 static void native_flush_tlb(void)
 {
 	__native_flush_tlb();
@@ -218,13 +208,6 @@ extern void native_irq_enable_sysexit(void);
 extern void native_usergs_sysret32(void);
 extern void native_usergs_sysret64(void);
 
-static int __init print_banner(void)
-{
-	pv_init_ops.banner();
-	return 0;
-}
-core_initcall(print_banner);
-
 static struct resource reserve_ioports = {
 	.start = 0,
 	.end = IO_SPACE_LIMIT,
@@ -320,21 +303,13 @@ struct pv_info pv_info = {
 
 struct pv_init_ops pv_init_ops = {
 	.patch = native_patch,
-	.banner = default_banner,
-	.arch_setup = paravirt_nop,
-	.memory_setup = machine_specific_memory_setup,
 };
 
 struct pv_time_ops pv_time_ops = {
-	.time_init = hpet_time_init,
-	.get_wallclock = native_get_wallclock,
-	.set_wallclock = native_set_wallclock,
 	.sched_clock = native_sched_clock,
-	.get_tsc_khz = native_calibrate_tsc,
 };
 
 struct pv_irq_ops pv_irq_ops = {
-	.init_IRQ = native_init_IRQ,
 	.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
 	.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
 	.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
@@ -409,8 +384,6 @@ struct pv_cpu_ops pv_cpu_ops = {
 
 struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
-	.setup_boot_clock = setup_boot_APIC_clock,
-	.setup_secondary_clock = setup_secondary_APIC_clock,
 	.startup_ipi_hook = paravirt_nop,
 #endif
 };
@@ -424,13 +397,6 @@ struct pv_apic_ops pv_apic_ops = {
 #endif
 
 struct pv_mmu_ops pv_mmu_ops = {
-#ifndef CONFIG_X86_64
-	.pagetable_setup_start = native_pagetable_setup_start,
-	.pagetable_setup_done = native_pagetable_setup_done,
-#else
-	.pagetable_setup_start = paravirt_nop,
-	.pagetable_setup_done = paravirt_nop,
-#endif
 
 	.read_cr2 = native_read_cr2,
 	.write_cr2 = native_write_cr2,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index d71c865..64b838e 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -225,10 +225,8 @@ static __init int iommu_setup(char *p)
 		if (!strncmp(p, "soft", 4))
 			swiotlb = 1;
 #endif
-		if (!strncmp(p, "pt", 2)) {
+		if (!strncmp(p, "pt", 2))
 			iommu_pass_through = 1;
-			return 1;
-		}
 
 		gart_parse_options(p);
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 071166a..847ab41 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,7 +9,7 @@
 #include <linux/pm.h>
 #include <linux/clockchips.h>
 #include <linux/random.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
 #include <asm/syscalls.h>
@@ -25,9 +25,6 @@ EXPORT_SYMBOL(idle_nomwait);
 
 struct kmem_cache *task_xstate_cachep;
 
-DEFINE_TRACE(power_start);
-DEFINE_TRACE(power_end);
-
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
 	*dst = *src;
@@ -299,9 +296,7 @@ static inline int hlt_use_halt(void)
 void default_idle(void)
 {
 	if (hlt_use_halt()) {
-		struct power_trace it;
-
-		trace_power_start(&it, POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1);
 		current_thread_info()->status &= ~TS_POLLING;
 		/*
 		 * TS_POLLING-cleared state must be visible before we
@@ -314,7 +309,6 @@ void default_idle(void)
 		else
 			local_irq_enable();
 		current_thread_info()->status |= TS_POLLING;
-		trace_power_end(&it);
 	} else {
 		local_irq_enable();
 		/* loop is done by the caller */
@@ -372,9 +366,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
+	trace_power_start(POWER_CSTATE, (ax>>4)+1);
 	if (!need_resched()) {
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
@@ -384,15 +376,13 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 		if (!need_resched())
 			__mwait(ax, cx);
 	}
-	trace_power_end(&it);
 }
 
 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
 static void mwait_idle(void)
 {
-	struct power_trace it;
 	if (!need_resched()) {
-		trace_power_start(&it, POWER_CSTATE, 1);
+		trace_power_start(POWER_CSTATE, 1);
 		if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
 			clflush((void *)&current_thread_info()->flags);
 
@@ -402,7 +392,6 @@ static void mwait_idle(void)
 			__sti_mwait(0, 0);
 		else
 			local_irq_enable();
-		trace_power_end(&it);
 	} else
 		local_irq_enable();
 }
@@ -414,13 +403,11 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
-	struct power_trace it;
-
-	trace_power_start(&it, POWER_CSTATE, 0);
+	trace_power_start(POWER_CSTATE, 0);
 	local_irq_enable();
 	while (!need_resched())
 		cpu_relax();
-	trace_power_end(&it);
+	trace_power_end(0);
 }
 
 /*
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index af71d06..6c3b2c6 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -508,7 +508,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev)
 
 	pci_read_config_dword(nb_ht, 0x60, &val);
 	set_dev_node(&dev->dev, val & 7);
-	pci_dev_put(dev);
+	pci_dev_put(nb_ht);
 }
 
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index a06e8d1..27349f9 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -4,6 +4,7 @@
 #include <linux/pm.h>
 #include <linux/efi.h>
 #include <linux/dmi.h>
+#include <linux/tboot.h>
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
@@ -508,6 +509,8 @@ static void native_machine_emergency_restart(void)
 	if (reboot_emergency)
 		emergency_vmx_disable_all();
 
+	tboot_shutdown(TB_SHUTDOWN_REBOOT);
+
 	/* Tell the BIOS if we want cold or warm reboot */
 	*((unsigned short *)__va(0x472)) = reboot_mode;
 
@@ -634,6 +637,8 @@ static void native_machine_halt(void)
 	/* stop other cpus and apics */
 	machine_shutdown();
 
+	tboot_shutdown(TB_SHUTDOWN_HALT);
+
 	/* stop this cpu */
 	stop_this_cpu(NULL);
 }
@@ -645,6 +650,8 @@ static void native_machine_power_off(void)
 			machine_shutdown();
 		pm_power_off();
 	}
+	/* a fallback in case there is no PM info available */
+	tboot_shutdown(TB_SHUTDOWN_HALT);
 }
 
 struct machine_ops machine_ops = {
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 5d465b2..1cfbbfc 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -8,6 +8,7 @@
 #include <linux/pnp.h>
 
 #include <asm/vsyscall.h>
+#include <asm/x86_init.h>
 #include <asm/time.h>
 
 #ifdef CONFIG_X86_32
@@ -165,33 +166,29 @@ void rtc_cmos_write(unsigned char val, unsigned char addr)
 }
 EXPORT_SYMBOL(rtc_cmos_write);
 
-static int set_rtc_mmss(unsigned long nowtime)
+int update_persistent_clock(struct timespec now)
 {
 	unsigned long flags;
 	int retval;
 
 	spin_lock_irqsave(&rtc_lock, flags);
-	retval = set_wallclock(nowtime);
+	retval = x86_platform.set_wallclock(now.tv_sec);
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
 	return retval;
 }
 
 /* not static: needed by APM */
-unsigned long read_persistent_clock(void)
+void read_persistent_clock(struct timespec *ts)
 {
 	unsigned long retval, flags;
 
 	spin_lock_irqsave(&rtc_lock, flags);
-	retval = get_wallclock();
+	retval = x86_platform.get_wallclock();
 	spin_unlock_irqrestore(&rtc_lock, flags);
 
-	return retval;
-}
-
-int update_persistent_clock(struct timespec now)
-{
-	return set_rtc_mmss(now.tv_sec);
+	ts->tv_sec = retval;
+	ts->tv_nsec = 0;
 }
 
 unsigned long long native_read_tsc(void)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 63f32d2..f327bcc 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -66,6 +66,7 @@
 
 #include <linux/percpu.h>
 #include <linux/crash_dump.h>
+#include <linux/tboot.h>
 
 #include <video/edid.h>
 
@@ -108,10 +109,6 @@
 #include <asm/numa_64.h>
 #endif
 
-#ifndef ARCH_SETUP
-#define ARCH_SETUP
-#endif
-
 /*
  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
  * The direct mapping extends to max_pfn_mapped, so that we can directly access
@@ -133,9 +130,9 @@ int default_cpu_present_to_apicid(int mps_cpu)
 	return __default_cpu_present_to_apicid(mps_cpu);
 }
 
-int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
+int default_check_phys_apicid_present(int phys_apicid)
 {
-	return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
+	return __default_check_phys_apicid_present(phys_apicid);
 }
 #endif
 
@@ -171,13 +168,6 @@ static struct resource bss_resource = {
 
 
 #ifdef CONFIG_X86_32
-static struct resource video_ram_resource = {
-	.name	= "Video RAM area",
-	.start	= 0xa0000,
-	.end	= 0xbffff,
-	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
 /* cpu data as detected by the assembly code in head.S */
 struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
 /* common cpu data for all cpus */
@@ -605,7 +595,7 @@ static struct resource standard_io_resources[] = {
 		.flags = IORESOURCE_BUSY | IORESOURCE_IO }
 };
 
-static void __init reserve_standard_io_resources(void)
+void __init reserve_standard_io_resources(void)
 {
 	int i;
 
@@ -637,10 +627,6 @@ static int __init setup_elfcorehdr(char *arg)
 early_param("elfcorehdr", setup_elfcorehdr);
 #endif
 
-static struct x86_quirks default_x86_quirks __initdata;
-
-struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
-
 #ifdef CONFIG_X86_RESERVE_LOW_64K
 static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
 {
@@ -757,7 +743,7 @@ void __init setup_arch(char **cmdline_p)
 	}
 #endif
 
-	ARCH_SETUP
+	x86_init.oem.arch_setup();
 
 	setup_memory_map();
 	parse_setup_data();
@@ -796,6 +782,16 @@ void __init setup_arch(char **cmdline_p)
 	strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
 	*cmdline_p = command_line;
 
+#ifdef CONFIG_X86_64
+	/*
+	 * Must call this twice: Once just to detect whether hardware doesn't
+	 * support NX (so that the early EHCI debug console setup can safely
+	 * call set_fixmap(), and then again after parsing early parameters to
+	 * honor the respective command line option.
+	 */
+	check_efer();
+#endif
+
 	parse_early_param();
 
 #ifdef CONFIG_X86_64
@@ -833,11 +829,9 @@ void __init setup_arch(char **cmdline_p)
 	 * VMware detection requires dmi to be available, so this
 	 * needs to be done after dmi_scan_machine, for the BP.
 	 */
-	init_hypervisor(&boot_cpu_data);
+	init_hypervisor_platform();
 
-#ifdef CONFIG_X86_32
-	probe_roms();
-#endif
+	x86_init.resources.probe_roms();
 
 	/* after parse_early_param, so could debug it */
 	insert_resource(&iomem_resource, &code_resource);
@@ -972,10 +966,11 @@ void __init setup_arch(char **cmdline_p)
 	kvmclock_init();
 #endif
 
-	paravirt_pagetable_setup_start(swapper_pg_dir);
+	x86_init.paging.pagetable_setup_start(swapper_pg_dir);
 	paging_init();
-	paravirt_pagetable_setup_done(swapper_pg_dir);
-	paravirt_post_allocator_init();
+	x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+
+	tboot_probe();
 
 #ifdef CONFIG_X86_64
 	map_vsyscall();
@@ -990,13 +985,11 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	acpi_boot_init();
 
-#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
 	/*
 	 * get boot-time SMP configuration:
 	 */
 	if (smp_found_config)
 		get_smp_config();
-#endif
 
 	prefill_possible_map();
 
@@ -1015,10 +1008,7 @@ void __init setup_arch(char **cmdline_p)
 	e820_reserve_resources();
 	e820_mark_nosave_regions(max_low_pfn);
 
-#ifdef CONFIG_X86_32
-	request_resource(&iomem_resource, &video_ram_resource);
-#endif
-	reserve_standard_io_resources();
+	x86_init.resources.reserve_resources();
 
 	e820_setup_gap();
 
@@ -1030,78 +1020,22 @@ void __init setup_arch(char **cmdline_p)
 	conswitchp = &dummy_con;
 #endif
 #endif
+	x86_init.oem.banner();
 }
 
 #ifdef CONFIG_X86_32
 
-/**
- * x86_quirk_intr_init - post gate setup interrupt initialisation
- *
- * Description:
- *	Fill in any interrupts that may have been left out by the general
- *	init_IRQ() routine.  interrupts having to do with the machine rather
- *	than the devices on the I/O bus (like APIC interrupts in intel MP
- *	systems) are started here.
- **/
-void __init x86_quirk_intr_init(void)
-{
-	if (x86_quirks->arch_intr_init) {
-		if (x86_quirks->arch_intr_init())
-			return;
-	}
-}
-
-/**
- * x86_quirk_trap_init - initialise system specific traps
- *
- * Description:
- *	Called as the final act of trap_init().  Used in VISWS to initialise
- *	the various board specific APIC traps.
- **/
-void __init x86_quirk_trap_init(void)
-{
-	if (x86_quirks->arch_trap_init) {
-		if (x86_quirks->arch_trap_init())
-			return;
-	}
-}
-
-static struct irqaction irq0  = {
-	.handler = timer_interrupt,
-	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
-	.name = "timer"
+static struct resource video_ram_resource = {
+	.name	= "Video RAM area",
+	.start	= 0xa0000,
+	.end	= 0xbffff,
+	.flags	= IORESOURCE_BUSY | IORESOURCE_MEM
 };
 
-/**
- * x86_quirk_pre_time_init - do any specific initialisations before.
- *
- **/
-void __init x86_quirk_pre_time_init(void)
+void __init i386_reserve_resources(void)
 {
-	if (x86_quirks->arch_pre_time_init)
-		x86_quirks->arch_pre_time_init();
+	request_resource(&iomem_resource, &video_ram_resource);
+	reserve_standard_io_resources();
 }
 
-/**
- * x86_quirk_time_init - do any specific initialisations for the system timer.
- *
- * Description:
- *	Must plug the system timer interrupt source at HZ into the IRQ listed
- *	in irq_vectors.h:TIMER_IRQ
- **/
-void __init x86_quirk_time_init(void)
-{
-	if (x86_quirks->arch_time_init) {
-		/*
-		 * A nonzero return code does not mean failure, it means
-		 * that the architecture quirk does not want any
-		 * generic (timer) setup to be performed after this:
-		 */
-		if (x86_quirks->arch_time_init())
-			return;
-	}
-
-	irq0.mask = cpumask_of_cpu(0);
-	setup_irq(0, &irq0);
-}
 #endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 07d8191..d559af9 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -55,6 +55,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
 #define PERCPU_FIRST_CHUNK_RESERVE	0
 #endif
 
+#ifdef CONFIG_X86_32
 /**
  * pcpu_need_numa - determine percpu allocation needs to consider NUMA
  *
@@ -83,6 +84,7 @@ static bool __init pcpu_need_numa(void)
 #endif
 	return false;
 }
+#endif
 
 /**
  * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
@@ -124,308 +126,35 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
 }
 
 /*
- * Large page remap allocator
- *
- * This allocator uses PMD page as unit.  A PMD page is allocated for
- * each cpu and each is remapped into vmalloc area using PMD mapping.
- * As PMD page is quite large, only part of it is used for the first
- * chunk.  Unused part is returned to the bootmem allocator.
- *
- * So, the PMD pages are mapped twice - once to the physical mapping
- * and to the vmalloc area for the first percpu chunk.  The double
- * mapping does add one more PMD TLB entry pressure but still is much
- * better than only using 4k mappings while still being NUMA friendly.
+ * Helpers for first chunk memory allocation
  */
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-struct pcpul_ent {
-	unsigned int	cpu;
-	void		*ptr;
-};
-
-static size_t pcpul_size;
-static struct pcpul_ent *pcpul_map;
-static struct vm_struct pcpul_vm;
-
-static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
+static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
 {
-	size_t off = (size_t)pageno << PAGE_SHIFT;
-
-	if (off >= pcpul_size)
-		return NULL;
-
-	return virt_to_page(pcpul_map[cpu].ptr + off);
+	return pcpu_alloc_bootmem(cpu, size, align);
 }
 
-static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
+static void __init pcpu_fc_free(void *ptr, size_t size)
 {
-	size_t map_size, dyn_size;
-	unsigned int cpu;
-	int i, j;
-	ssize_t ret;
-
-	if (!chosen) {
-		size_t vm_size = VMALLOC_END - VMALLOC_START;
-		size_t tot_size = nr_cpu_ids * PMD_SIZE;
-
-		/* on non-NUMA, embedding is better */
-		if (!pcpu_need_numa())
-			return -EINVAL;
-
-		/* don't consume more than 20% of vmalloc area */
-		if (tot_size > vm_size / 5) {
-			pr_info("PERCPU: too large chunk size %zuMB for "
-				"large page remap\n", tot_size >> 20);
-			return -EINVAL;
-		}
-	}
-
-	/* need PSE */
-	if (!cpu_has_pse) {
-		pr_warning("PERCPU: lpage allocator requires PSE\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * Currently supports only single page.  Supporting multiple
-	 * pages won't be too difficult if it ever becomes necessary.
-	 */
-	pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
-			       PERCPU_DYNAMIC_RESERVE);
-	if (pcpul_size > PMD_SIZE) {
-		pr_warning("PERCPU: static data is larger than large page, "
-			   "can't use large page\n");
-		return -EINVAL;
-	}
-	dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
-
-	/* allocate pointer array and alloc large pages */
-	map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0]));
-	pcpul_map = alloc_bootmem(map_size);
-
-	for_each_possible_cpu(cpu) {
-		pcpul_map[cpu].cpu = cpu;
-		pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
-							PMD_SIZE);
-		if (!pcpul_map[cpu].ptr) {
-			pr_warning("PERCPU: failed to allocate large page "
-				   "for cpu%u\n", cpu);
-			goto enomem;
-		}
-
-		/*
-		 * Only use pcpul_size bytes and give back the rest.
-		 *
-		 * Ingo: The 2MB up-rounding bootmem is needed to make
-		 * sure the partial 2MB page is still fully RAM - it's
-		 * not well-specified to have a PAT-incompatible area
-		 * (unmapped RAM, device memory, etc.) in that hole.
-		 */
-		free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
-			     PMD_SIZE - pcpul_size);
-
-		memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
-	}
-
-	/* allocate address and map */
-	pcpul_vm.flags = VM_ALLOC;
-	pcpul_vm.size = nr_cpu_ids * PMD_SIZE;
-	vm_area_register_early(&pcpul_vm, PMD_SIZE);
-
-	for_each_possible_cpu(cpu) {
-		pmd_t *pmd, pmd_v;
-
-		pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
-					 cpu * PMD_SIZE);
-		pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
-				PAGE_KERNEL_LARGE);
-		set_pmd(pmd, pmd_v);
-	}
-
-	/* we're ready, commit */
-	pr_info("PERCPU: Remapped at %p with large pages, static data "
-		"%zu bytes\n", pcpul_vm.addr, static_size);
-
-	ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
-				     PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
-				     PMD_SIZE, pcpul_vm.addr, NULL);
-
-	/* sort pcpul_map array for pcpu_lpage_remapped() */
-	for (i = 0; i < nr_cpu_ids - 1; i++)
-		for (j = i + 1; j < nr_cpu_ids; j++)
-			if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
-				struct pcpul_ent tmp = pcpul_map[i];
-				pcpul_map[i] = pcpul_map[j];
-				pcpul_map[j] = tmp;
-			}
-
-	return ret;
-
-enomem:
-	for_each_possible_cpu(cpu)
-		if (pcpul_map[cpu].ptr)
-			free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
-	free_bootmem(__pa(pcpul_map), map_size);
-	return -ENOMEM;
+	free_bootmem(__pa(ptr), size);
 }
 
-/**
- * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
- * @kaddr: the kernel address in question
- *
- * Determine whether @kaddr falls in the pcpul recycled area.  This is
- * used by pageattr to detect VM aliases and break up the pcpu PMD
- * mapping such that the same physical page is not mapped under
- * different attributes.
- *
- * The recycled area is always at the tail of a partially used PMD
- * page.
- *
- * RETURNS:
- * Address of corresponding remapped pcpu address if match is found;
- * otherwise, NULL.
- */
-void *pcpu_lpage_remapped(void *kaddr)
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 {
-	void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK);
-	unsigned long offset = (unsigned long)kaddr & ~PMD_MASK;
-	int left = 0, right = nr_cpu_ids - 1;
-	int pos;
-
-	/* pcpul in use at all? */
-	if (!pcpul_map)
-		return NULL;
-
-	/* okay, perform binary search */
-	while (left <= right) {
-		pos = (left + right) / 2;
-
-		if (pcpul_map[pos].ptr < pmd_addr)
-			left = pos + 1;
-		else if (pcpul_map[pos].ptr > pmd_addr)
-			right = pos - 1;
-		else {
-			/* it shouldn't be in the area for the first chunk */
-			WARN_ON(offset < pcpul_size);
-
-			return pcpul_vm.addr +
-				pcpul_map[pos].cpu * PMD_SIZE + offset;
-		}
-	}
-
-	return NULL;
-}
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+	if (early_cpu_to_node(from) == early_cpu_to_node(to))
+		return LOCAL_DISTANCE;
+	else
+		return REMOTE_DISTANCE;
 #else
-static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen)
-{
-	return -EINVAL;
-}
+	return LOCAL_DISTANCE;
 #endif
-
-/*
- * Embedding allocator
- *
- * The first chunk is sized to just contain the static area plus
- * module and dynamic reserves and embedded into linear physical
- * mapping so that it can use PMD mapping without additional TLB
- * pressure.
- */
-static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
-{
-	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
-
-	/*
-	 * If large page isn't supported, there's no benefit in doing
-	 * this.  Also, embedding allocation doesn't play well with
-	 * NUMA.
-	 */
-	if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
-		return -EINVAL;
-
-	return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
-				      reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
 }
 
-/*
- * 4k page allocator
- *
- * This is the basic allocator.  Static percpu area is allocated
- * page-by-page and most of initialization is done by the generic
- * setup function.
- */
-static struct page **pcpu4k_pages __initdata;
-static int pcpu4k_nr_static_pages __initdata;
-
-static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
-{
-	if (pageno < pcpu4k_nr_static_pages)
-		return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
-	return NULL;
-}
-
-static void __init pcpu4k_populate_pte(unsigned long addr)
+static void __init pcpup_populate_pte(unsigned long addr)
 {
 	populate_extra_pte(addr);
 }
 
-static ssize_t __init setup_pcpu_4k(size_t static_size)
-{
-	size_t pages_size;
-	unsigned int cpu;
-	int i, j;
-	ssize_t ret;
-
-	pcpu4k_nr_static_pages = PFN_UP(static_size);
-
-	/* unaligned allocations can't be freed, round up to page size */
-	pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids
-			       * sizeof(pcpu4k_pages[0]));
-	pcpu4k_pages = alloc_bootmem(pages_size);
-
-	/* allocate and copy */
-	j = 0;
-	for_each_possible_cpu(cpu)
-		for (i = 0; i < pcpu4k_nr_static_pages; i++) {
-			void *ptr;
-
-			ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
-			if (!ptr) {
-				pr_warning("PERCPU: failed to allocate "
-					   "4k page for cpu%u\n", cpu);
-				goto enomem;
-			}
-
-			memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
-			pcpu4k_pages[j++] = virt_to_page(ptr);
-		}
-
-	/* we're ready, commit */
-	pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
-		pcpu4k_nr_static_pages, static_size);
-
-	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
-				     PERCPU_FIRST_CHUNK_RESERVE, -1,
-				     -1, NULL, pcpu4k_populate_pte);
-	goto out_free_ar;
-
-enomem:
-	while (--j >= 0)
-		free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE);
-	ret = -ENOMEM;
-out_free_ar:
-	free_bootmem(__pa(pcpu4k_pages), pages_size);
-	return ret;
-}
-
-/* for explicit first chunk allocator selection */
-static char pcpu_chosen_alloc[16] __initdata;
-
-static int __init percpu_alloc_setup(char *str)
-{
-	strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
-	return 0;
-}
-early_param("percpu_alloc", percpu_alloc_setup);
-
 static inline void setup_percpu_segment(int cpu)
 {
 #ifdef CONFIG_X86_32
@@ -441,52 +170,49 @@ static inline void setup_percpu_segment(int cpu)
 
 void __init setup_per_cpu_areas(void)
 {
-	size_t static_size = __per_cpu_end - __per_cpu_start;
 	unsigned int cpu;
 	unsigned long delta;
-	size_t pcpu_unit_size;
-	ssize_t ret;
+	int rc;
 
 	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
 		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
 
 	/*
-	 * Allocate percpu area.  If PSE is supported, try to make use
-	 * of large page mappings.  Please read comments on top of
-	 * each allocator for details.
+	 * Allocate percpu area.  Embedding allocator is our favorite;
+	 * however, on NUMA configurations, it can result in very
+	 * sparse unit mapping and vmalloc area isn't spacious enough
+	 * on 32bit.  Use page in that case.
 	 */
-	ret = -EINVAL;
-	if (strlen(pcpu_chosen_alloc)) {
-		if (strcmp(pcpu_chosen_alloc, "4k")) {
-			if (!strcmp(pcpu_chosen_alloc, "lpage"))
-				ret = setup_pcpu_lpage(static_size, true);
-			else if (!strcmp(pcpu_chosen_alloc, "embed"))
-				ret = setup_pcpu_embed(static_size, true);
-			else
-				pr_warning("PERCPU: unknown allocator %s "
-					   "specified\n", pcpu_chosen_alloc);
-			if (ret < 0)
-				pr_warning("PERCPU: %s allocator failed (%zd), "
-					   "falling back to 4k\n",
-					   pcpu_chosen_alloc, ret);
-		}
-	} else {
-		ret = setup_pcpu_lpage(static_size, false);
-		if (ret < 0)
-			ret = setup_pcpu_embed(static_size, false);
+#ifdef CONFIG_X86_32
+	if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa())
+		pcpu_chosen_fc = PCPU_FC_PAGE;
+#endif
+	rc = -EINVAL;
+	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+		const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE;
+		const size_t dyn_size = PERCPU_MODULE_RESERVE +
+			PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE;
+
+		rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
+					    dyn_size, atom_size,
+					    pcpu_cpu_distance,
+					    pcpu_fc_alloc, pcpu_fc_free);
+		if (rc < 0)
+			pr_warning("PERCPU: %s allocator failed (%d), "
+				   "falling back to page size\n",
+				   pcpu_fc_names[pcpu_chosen_fc], rc);
 	}
-	if (ret < 0)
-		ret = setup_pcpu_4k(static_size);
-	if (ret < 0)
-		panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
-		      static_size, ret);
-
-	pcpu_unit_size = ret;
+	if (rc < 0)
+		rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
+					   pcpu_fc_alloc, pcpu_fc_free,
+					   pcpup_populate_pte);
+	if (rc < 0)
+		panic("cannot initialize percpu area (err=%d)", rc);
 
 	/* alrighty, percpu areas up and running */
 	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
 	for_each_possible_cpu(cpu) {
-		per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
+		per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
 		per_cpu(cpu_number, cpu) = cpu;
 		setup_percpu_segment(cpu);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 81e58238..6a44a76 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -856,7 +856,7 @@ static void do_signal(struct pt_regs *regs)
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
 	/* notify userspace of pending MCEs */
 	if (thread_info_flags & _TIF_MCE_NOTIFY)
 		mce_notify_process();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c36cc14..09c5e07 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -47,6 +47,7 @@
 #include <linux/bootmem.h>
 #include <linux/err.h>
 #include <linux/nmi.h>
+#include <linux/tboot.h>
 
 #include <asm/acpi.h>
 #include <asm/desc.h>
@@ -323,7 +324,7 @@ notrace static void __cpuinit start_secondary(void *unused)
 	/* enable local interrupts */
 	local_irq_enable();
 
-	setup_secondary_clock();
+	x86_cpuinit.setup_percpu_clockev();
 
 	wmb();
 	cpu_idle();
@@ -1113,13 +1114,26 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
 	printk(KERN_INFO "CPU%d: ", 0);
 	print_cpu_info(&cpu_data(0));
-	setup_boot_clock();
+	x86_init.timers.setup_percpu_clockev();
 
 	if (is_uv_system())
 		uv_system_init();
+
+	set_mtrr_aps_delayed_init();
 out:
 	preempt_enable();
 }
+
+void arch_enable_nonboot_cpus_begin(void)
+{
+	set_mtrr_aps_delayed_init();
+}
+
+void arch_enable_nonboot_cpus_end(void)
+{
+	mtrr_aps_init();
+}
+
 /*
  * Early setup to make printk work.
  */
@@ -1141,6 +1155,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	setup_ioapic_dest();
 #endif
 	check_nmi_watchdog();
+	mtrr_aps_init();
 }
 
 static int __initdata setup_possible_cpus = -1;
@@ -1318,6 +1333,7 @@ void play_dead_common(void)
 void native_play_dead(void)
 {
 	play_dead_common();
+	tboot_shutdown(TB_SHUTDOWN_WFS);
 	wbinvd_halt();
 }
 
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d51321d..0157cd2 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -335,4 +335,4 @@ ENTRY(sys_call_table)
 	.long sys_preadv
 	.long sys_pwritev
 	.long sys_rt_tgsigqueueinfo	/* 335 */
-	.long sys_perf_counter_open
+	.long sys_perf_event_open
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
new file mode 100644
index 0000000..86c9f91
--- /dev/null
+++ b/arch/x86/kernel/tboot.c
@@ -0,0 +1,447 @@
+/*
+ * tboot.c: main implementation of helper functions used by kernel for
+ *          runtime support of Intel(R) Trusted Execution Technology
+ *
+ * Copyright (c) 2006-2009, Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/dma_remapping.h>
+#include <linux/init_task.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/dmar.h>
+#include <linux/cpu.h>
+#include <linux/pfn.h>
+#include <linux/mm.h>
+#include <linux/tboot.h>
+
+#include <asm/trampoline.h>
+#include <asm/processor.h>
+#include <asm/bootparam.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/fixmap.h>
+#include <asm/proto.h>
+#include <asm/setup.h>
+#include <asm/e820.h>
+#include <asm/io.h>
+
+#include "acpi/realmode/wakeup.h"
+
+/* Global pointer to shared data; NULL means no measured launch. */
+struct tboot *tboot __read_mostly;
+
+/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
+#define AP_WAIT_TIMEOUT		1
+
+#undef pr_fmt
+#define pr_fmt(fmt)	"tboot: " fmt
+
+static u8 tboot_uuid[16] __initdata = TBOOT_UUID;
+
+void __init tboot_probe(void)
+{
+	/* Look for valid page-aligned address for shared page. */
+	if (!boot_params.tboot_addr)
+		return;
+	/*
+	 * also verify that it is mapped as we expect it before calling
+	 * set_fixmap(), to reduce chance of garbage value causing crash
+	 */
+	if (!e820_any_mapped(boot_params.tboot_addr,
+			     boot_params.tboot_addr, E820_RESERVED)) {
+		pr_warning("non-0 tboot_addr but it is not of type E820_RESERVED\n");
+		return;
+	}
+
+	/* only a natively booted kernel should be using TXT */
+	if (paravirt_enabled()) {
+		pr_warning("non-0 tboot_addr but pv_ops is enabled\n");
+		return;
+	}
+
+	/* Map and check for tboot UUID. */
+	set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
+	tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
+	if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) {
+		pr_warning("tboot at 0x%llx is invalid\n",
+			   boot_params.tboot_addr);
+		tboot = NULL;
+		return;
+	}
+	if (tboot->version < 5) {
+		pr_warning("tboot version is invalid: %u\n", tboot->version);
+		tboot = NULL;
+		return;
+	}
+
+	pr_info("found shared page at phys addr 0x%llx:\n",
+		boot_params.tboot_addr);
+	pr_debug("version: %d\n", tboot->version);
+	pr_debug("log_addr: 0x%08x\n", tboot->log_addr);
+	pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry);
+	pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base);
+	pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
+}
+
+static pgd_t *tboot_pg_dir;
+static struct mm_struct tboot_mm = {
+	.mm_rb          = RB_ROOT,
+	.pgd            = swapper_pg_dir,
+	.mm_users       = ATOMIC_INIT(2),
+	.mm_count       = ATOMIC_INIT(1),
+	.mmap_sem       = __RWSEM_INITIALIZER(init_mm.mmap_sem),
+	.page_table_lock =  __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
+	.mmlist         = LIST_HEAD_INIT(init_mm.mmlist),
+	.cpu_vm_mask    = CPU_MASK_ALL,
+};
+
+static inline void switch_to_tboot_pt(void)
+{
+	write_cr3(virt_to_phys(tboot_pg_dir));
+}
+
+static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
+			  pgprot_t prot)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset(&tboot_mm, vaddr);
+	pud = pud_alloc(&tboot_mm, pgd, vaddr);
+	if (!pud)
+		return -1;
+	pmd = pmd_alloc(&tboot_mm, pud, vaddr);
+	if (!pmd)
+		return -1;
+	pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
+	if (!pte)
+		return -1;
+	set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
+	pte_unmap(pte);
+	return 0;
+}
+
+static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
+			   unsigned long nr)
+{
+	/* Reuse the original kernel mapping */
+	tboot_pg_dir = pgd_alloc(&tboot_mm);
+	if (!tboot_pg_dir)
+		return -1;
+
+	for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) {
+		if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC))
+			return -1;
+	}
+
+	return 0;
+}
+
+static void tboot_create_trampoline(void)
+{
+	u32 map_base, map_size;
+
+	/* Create identity map for tboot shutdown code. */
+	map_base = PFN_DOWN(tboot->tboot_base);
+	map_size = PFN_UP(tboot->tboot_size);
+	if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size))
+		panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n",
+		      map_base, map_size);
+}
+
+#ifdef CONFIG_ACPI_SLEEP
+
+static void add_mac_region(phys_addr_t start, unsigned long size)
+{
+	struct tboot_mac_region *mr;
+	phys_addr_t end = start + size;
+
+	if (start && size) {
+		mr = &tboot->mac_regions[tboot->num_mac_regions++];
+		mr->start = round_down(start, PAGE_SIZE);
+		mr->size  = round_up(end, PAGE_SIZE) - mr->start;
+	}
+}
+
+static int tboot_setup_sleep(void)
+{
+	tboot->num_mac_regions = 0;
+
+	/* S3 resume code */
+	add_mac_region(acpi_wakeup_address, WAKEUP_SIZE);
+
+#ifdef CONFIG_X86_TRAMPOLINE
+	/* AP trampoline code */
+	add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE);
+#endif
+
+	/* kernel code + data + bss */
+	add_mac_region(virt_to_phys(_text), _end - _text);
+
+	tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address;
+
+	return 0;
+}
+
+#else /* no CONFIG_ACPI_SLEEP */
+
+static int tboot_setup_sleep(void)
+{
+	/* S3 shutdown requested, but S3 not supported by the kernel... */
+	BUG();
+	return -1;
+}
+
+#endif
+
+void tboot_shutdown(u32 shutdown_type)
+{
+	void (*shutdown)(void);
+
+	if (!tboot_enabled())
+		return;
+
+	/*
+	 * if we're being called before the 1:1 mapping is set up then just
+	 * return and let the normal shutdown happen; this should only be
+	 * due to very early panic()
+	 */
+	if (!tboot_pg_dir)
+		return;
+
+	/* if this is S3 then set regions to MAC */
+	if (shutdown_type == TB_SHUTDOWN_S3)
+		if (tboot_setup_sleep())
+			return;
+
+	tboot->shutdown_type = shutdown_type;
+
+	switch_to_tboot_pt();
+
+	shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry;
+	shutdown();
+
+	/* should not reach here */
+	while (1)
+		halt();
+}
+
+static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
+{
+#define TB_COPY_GAS(tbg, g)			\
+	tbg.space_id     = g.space_id;		\
+	tbg.bit_width    = g.bit_width;		\
+	tbg.bit_offset   = g.bit_offset;	\
+	tbg.access_width = g.access_width;	\
+	tbg.address      = g.address;
+
+	TB_COPY_GAS(tboot->acpi_sinfo.pm1a_cnt_blk, fadt->xpm1a_control_block);
+	TB_COPY_GAS(tboot->acpi_sinfo.pm1b_cnt_blk, fadt->xpm1b_control_block);
+	TB_COPY_GAS(tboot->acpi_sinfo.pm1a_evt_blk, fadt->xpm1a_event_block);
+	TB_COPY_GAS(tboot->acpi_sinfo.pm1b_evt_blk, fadt->xpm1b_event_block);
+
+	/*
+	 * We need phys addr of waking vector, but can't use virt_to_phys() on
+	 * &acpi_gbl_FACS because it is ioremap'ed, so calc from FACS phys
+	 * addr.
+	 */
+	tboot->acpi_sinfo.wakeup_vector = fadt->facs +
+		offsetof(struct acpi_table_facs, firmware_waking_vector);
+}
+
+void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
+{
+	static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = {
+		/* S0,1,2: */ -1, -1, -1,
+		/* S3: */ TB_SHUTDOWN_S3,
+		/* S4: */ TB_SHUTDOWN_S4,
+		/* S5: */ TB_SHUTDOWN_S5 };
+
+	if (!tboot_enabled())
+		return;
+
+	tboot_copy_fadt(&acpi_gbl_FADT);
+	tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control;
+	tboot->acpi_sinfo.pm1b_cnt_val = pm1b_control;
+	/* we always use the 32b wakeup vector */
+	tboot->acpi_sinfo.vector_width = 32;
+
+	if (sleep_state >= ACPI_S_STATE_COUNT ||
+	    acpi_shutdown_map[sleep_state] == -1) {
+		pr_warning("unsupported sleep state 0x%x\n", sleep_state);
+		return;
+	}
+
+	tboot_shutdown(acpi_shutdown_map[sleep_state]);
+}
+
+static atomic_t ap_wfs_count;
+
+static int tboot_wait_for_aps(int num_aps)
+{
+	unsigned long timeout;
+
+	timeout = AP_WAIT_TIMEOUT*HZ;
+	while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
+	       timeout) {
+		mdelay(1);
+		timeout--;
+	}
+
+	if (timeout)
+		pr_warning("tboot wait for APs timeout\n");
+
+	return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
+}
+
+static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb,
+			unsigned long action, void *hcpu)
+{
+	switch (action) {
+	case CPU_DYING:
+		atomic_inc(&ap_wfs_count);
+		if (num_online_cpus() == 1)
+			if (tboot_wait_for_aps(atomic_read(&ap_wfs_count)))
+				return NOTIFY_BAD;
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static struct notifier_block tboot_cpu_notifier __cpuinitdata =
+{
+	.notifier_call = tboot_cpu_callback,
+};
+
+static __init int tboot_late_init(void)
+{
+	if (!tboot_enabled())
+		return 0;
+
+	tboot_create_trampoline();
+
+	atomic_set(&ap_wfs_count, 0);
+	register_hotcpu_notifier(&tboot_cpu_notifier);
+	return 0;
+}
+
+late_initcall(tboot_late_init);
+
+/*
+ * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE)
+ */
+
+#define TXT_PUB_CONFIG_REGS_BASE       0xfed30000
+#define TXT_PRIV_CONFIG_REGS_BASE      0xfed20000
+
+/* # pages for each config regs space - used by fixmap */
+#define NR_TXT_CONFIG_PAGES     ((TXT_PUB_CONFIG_REGS_BASE -                \
+				  TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT)
+
+/* offsets from pub/priv config space */
+#define TXTCR_HEAP_BASE             0x0300
+#define TXTCR_HEAP_SIZE             0x0308
+
+#define SHA1_SIZE      20
+
+struct sha1_hash {
+	u8 hash[SHA1_SIZE];
+};
+
+struct sinit_mle_data {
+	u32               version;             /* currently 6 */
+	struct sha1_hash  bios_acm_id;
+	u32               edx_senter_flags;
+	u64               mseg_valid;
+	struct sha1_hash  sinit_hash;
+	struct sha1_hash  mle_hash;
+	struct sha1_hash  stm_hash;
+	struct sha1_hash  lcp_policy_hash;
+	u32               lcp_policy_control;
+	u32               rlp_wakeup_addr;
+	u32               reserved;
+	u32               num_mdrs;
+	u32               mdrs_off;
+	u32               num_vtd_dmars;
+	u32               vtd_dmars_off;
+} __packed;
+
+struct acpi_table_header *tboot_get_dmar_table(struct acpi_table_header *dmar_tbl)
+{
+	void *heap_base, *heap_ptr, *config;
+
+	if (!tboot_enabled())
+		return dmar_tbl;
+
+	/*
+	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
+	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
+	 */
+
+	/* map config space in order to get heap addr */
+	config = ioremap(TXT_PUB_CONFIG_REGS_BASE, NR_TXT_CONFIG_PAGES *
+			 PAGE_SIZE);
+	if (!config)
+		return NULL;
+
+	/* now map TXT heap */
+	heap_base = ioremap(*(u64 *)(config + TXTCR_HEAP_BASE),
+			    *(u64 *)(config + TXTCR_HEAP_SIZE));
+	iounmap(config);
+	if (!heap_base)
+		return NULL;
+
+	/* walk heap to SinitMleData */
+	/* skip BiosData */
+	heap_ptr = heap_base + *(u64 *)heap_base;
+	/* skip OsMleData */
+	heap_ptr += *(u64 *)heap_ptr;
+	/* skip OsSinitData */
+	heap_ptr += *(u64 *)heap_ptr;
+	/* now points to SinitMleDataSize; set to SinitMleData */
+	heap_ptr += sizeof(u64);
+	/* get addr of DMAR table */
+	dmar_tbl = (struct acpi_table_header *)(heap_ptr +
+		   ((struct sinit_mle_data *)heap_ptr)->vtd_dmars_off -
+		   sizeof(u64));
+
+	/* don't unmap heap because dmar.c needs access to this */
+
+	return dmar_tbl;
+}
+
+int tboot_force_iommu(void)
+{
+	if (!tboot_enabled())
+		return 0;
+
+	if (no_iommu || swiotlb || dmar_disabled)
+		pr_warning("Forcing Intel-IOMMU to enabled\n");
+
+	dmar_disabled = 0;
+#ifdef CONFIG_SWIOTLB
+	swiotlb = 0;
+#endif
+	no_iommu = 0;
+
+	return 1;
+}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
new file mode 100644
index 0000000..e293ac5
--- /dev/null
+++ b/arch/x86/kernel/time.c
@@ -0,0 +1,121 @@
+/*
+ *  Copyright (c) 1991,1992,1995  Linus Torvalds
+ *  Copyright (c) 1994  Alan Modra
+ *  Copyright (c) 1995  Markus Kuhn
+ *  Copyright (c) 1996  Ingo Molnar
+ *  Copyright (c) 1998  Andrea Arcangeli
+ *  Copyright (c) 2002,2006  Vojtech Pavlik
+ *  Copyright (c) 2003  Andi Kleen
+ *
+ */
+
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/time.h>
+#include <linux/mca.h>
+
+#include <asm/vsyscall.h>
+#include <asm/x86_init.h>
+#include <asm/i8259.h>
+#include <asm/i8253.h>
+#include <asm/timer.h>
+#include <asm/hpet.h>
+#include <asm/time.h>
+
+#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
+int timer_ack;
+#endif
+
+#ifdef CONFIG_X86_64
+volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
+#endif
+
+unsigned long profile_pc(struct pt_regs *regs)
+{
+	unsigned long pc = instruction_pointer(regs);
+
+	if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+#ifdef CONFIG_FRAME_POINTER
+		return *(unsigned long *)(regs->bp + sizeof(long));
+#else
+		unsigned long *sp = (unsigned long *)regs->sp;
+		/*
+		 * Return address is either directly at stack pointer
+		 * or above a saved flags. Eflags has bits 22-31 zero,
+		 * kernel addresses don't.
+		 */
+		if (sp[0] >> 22)
+			return sp[0];
+		if (sp[1] >> 22)
+			return sp[1];
+#endif
+	}
+	return pc;
+}
+EXPORT_SYMBOL(profile_pc);
+
+/*
+ * Default timer interrupt handler for PIT/HPET
+ */
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
+{
+	/* Keep nmi watchdog up to date */
+	inc_irq_stat(irq0_irqs);
+
+	/* Optimized out for !IO_APIC and x86_64 */
+	if (timer_ack) {
+		/*
+		 * Subtle, when I/O APICs are used we have to ack timer IRQ
+		 * manually to deassert NMI lines for the watchdog if run
+		 * on an 82489DX-based system.
+		 */
+		spin_lock(&i8259A_lock);
+		outb(0x0c, PIC_MASTER_OCW3);
+		/* Ack the IRQ; AEOI will end it automatically. */
+		inb(PIC_MASTER_POLL);
+		spin_unlock(&i8259A_lock);
+	}
+
+	global_clock_event->event_handler(global_clock_event);
+
+	/* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
+	if (MCA_bus)
+		outb_p(inb_p(0x61)| 0x80, 0x61);
+
+	return IRQ_HANDLED;
+}
+
+static struct irqaction irq0  = {
+	.handler = timer_interrupt,
+	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
+	.name = "timer"
+};
+
+void __init setup_default_timer_irq(void)
+{
+	irq0.mask = cpumask_of_cpu(0);
+	setup_irq(0, &irq0);
+}
+
+/* Default timer init function */
+void __init hpet_time_init(void)
+{
+	if (!hpet_enable())
+		setup_pit_timer();
+	setup_default_timer_irq();
+}
+
+static __init void x86_late_time_init(void)
+{
+	x86_init.timers.timer_init();
+	tsc_init();
+}
+
+/*
+ * Initialize TSC and delay the periodic timer init to
+ * late x86_late_time_init() so ioremap works.
+ */
+void __init time_init(void)
+{
+	late_time_init = x86_late_time_init;
+}
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
deleted file mode 100644
index 5c5d87f..0000000
--- a/arch/x86/kernel/time_32.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- *  Copyright (C) 1991, 1992, 1995  Linus Torvalds
- *
- * This file contains the PC-specific time handling details:
- * reading the RTC at bootup, etc..
- * 1994-07-02    Alan Modra
- *	fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
- * 1995-03-26    Markus Kuhn
- *      fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
- *      precision CMOS clock update
- * 1996-05-03    Ingo Molnar
- *      fixed time warps in do_[slow|fast]_gettimeoffset()
- * 1997-09-10	Updated NTP code according to technical memorandum Jan '96
- *		"A Kernel Model for Precision Timekeeping" by Dave Mills
- * 1998-09-05    (Various)
- *	More robust do_fast_gettimeoffset() algorithm implemented
- *	(works with APM, Cyrix 6x86MX and Centaur C6),
- *	monotonic gettimeofday() with fast_get_timeoffset(),
- *	drift-proof precision TSC calibration on boot
- *	(C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
- *	Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
- *	ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
- * 1998-12-16    Andrea Arcangeli
- *	Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
- *	because was not accounting lost_ticks.
- * 1998-12-24 Copyright (C) 1998  Andrea Arcangeli
- *	Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
- *	serialize accesses to xtime/lost_ticks).
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/time.h>
-#include <linux/mca.h>
-
-#include <asm/setup.h>
-#include <asm/hpet.h>
-#include <asm/time.h>
-#include <asm/timer.h>
-
-#include <asm/do_timer.h>
-
-int timer_ack;
-
-unsigned long profile_pc(struct pt_regs *regs)
-{
-	unsigned long pc = instruction_pointer(regs);
-
-#ifdef CONFIG_SMP
-	if (!user_mode_vm(regs) && in_lock_functions(pc)) {
-#ifdef CONFIG_FRAME_POINTER
-		return *(unsigned long *)(regs->bp + sizeof(long));
-#else
-		unsigned long *sp = (unsigned long *)&regs->sp;
-
-		/* Return address is either directly at stack pointer
-		   or above a saved flags. Eflags has bits 22-31 zero,
-		   kernel addresses don't. */
-		if (sp[0] >> 22)
-			return sp[0];
-		if (sp[1] >> 22)
-			return sp[1];
-#endif
-	}
-#endif
-	return pc;
-}
-EXPORT_SYMBOL(profile_pc);
-
-/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
- */
-irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
-	/* Keep nmi watchdog up to date */
-	inc_irq_stat(irq0_irqs);
-
-#ifdef CONFIG_X86_IO_APIC
-	if (timer_ack) {
-		/*
-		 * Subtle, when I/O APICs are used we have to ack timer IRQ
-		 * manually to deassert NMI lines for the watchdog if run
-		 * on an 82489DX-based system.
-		 */
-		spin_lock(&i8259A_lock);
-		outb(0x0c, PIC_MASTER_OCW3);
-		/* Ack the IRQ; AEOI will end it automatically. */
-		inb(PIC_MASTER_POLL);
-		spin_unlock(&i8259A_lock);
-	}
-#endif
-
-	do_timer_interrupt_hook();
-
-#ifdef CONFIG_MCA
-	if (MCA_bus) {
-		/* The PS/2 uses level-triggered interrupts.  You can't
-		turn them off, nor would you want to (any attempt to
-		enable edge-triggered interrupts usually gets intercepted by a
-		special hardware circuit).  Hence we have to acknowledge
-		the timer interrupt.  Through some incredibly stupid
-		design idea, the reset for IRQ 0 is done by setting the
-		high bit of the PPI port B (0x61).  Note that some PS/2s,
-		notably the 55SX, work fine if this is removed.  */
-
-		u8 irq_v = inb_p(0x61);		/* read the current state */
-		outb_p(irq_v | 0x80, 0x61);	/* reset the IRQ */
-	}
-#endif
-
-	return IRQ_HANDLED;
-}
-
-/* Duplicate of time_init() below, with hpet_enable part added */
-void __init hpet_time_init(void)
-{
-	if (!hpet_enable())
-		setup_pit_timer();
-	x86_quirk_time_init();
-}
-
-/*
- * This is called directly from init code; we must delay timer setup in the
- * HPET case as we can't make the decision to turn on HPET this early in the
- * boot process.
- *
- * The chosen time_init function will usually be hpet_time_init, above, but
- * in the case of virtual hardware, an alternative function may be substituted.
- */
-void __init time_init(void)
-{
-	x86_quirk_pre_time_init();
-	tsc_init();
-	late_time_init = choose_time_init();
-}
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
deleted file mode 100644
index 5ba343e..0000000
--- a/arch/x86/kernel/time_64.c
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- *  "High Precision Event Timer" based timekeeping.
- *
- *  Copyright (c) 1991,1992,1995  Linus Torvalds
- *  Copyright (c) 1994  Alan Modra
- *  Copyright (c) 1995  Markus Kuhn
- *  Copyright (c) 1996  Ingo Molnar
- *  Copyright (c) 1998  Andrea Arcangeli
- *  Copyright (c) 2002,2006  Vojtech Pavlik
- *  Copyright (c) 2003  Andi Kleen
- *  RTC support code taken from arch/i386/kernel/timers/time_hpet.c
- */
-
-#include <linux/clockchips.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/mca.h>
-#include <linux/nmi.h>
-
-#include <asm/i8253.h>
-#include <asm/hpet.h>
-#include <asm/vgtod.h>
-#include <asm/time.h>
-#include <asm/timer.h>
-
-volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
-
-unsigned long profile_pc(struct pt_regs *regs)
-{
-	unsigned long pc = instruction_pointer(regs);
-
-	/* Assume the lock function has either no stack frame or a copy
-	   of flags from PUSHF
-	   Eflags always has bits 22 and up cleared unlike kernel addresses. */
-	if (!user_mode_vm(regs) && in_lock_functions(pc)) {
-#ifdef CONFIG_FRAME_POINTER
-		return *(unsigned long *)(regs->bp + sizeof(long));
-#else
-		unsigned long *sp = (unsigned long *)regs->sp;
-		if (sp[0] >> 22)
-			return sp[0];
-		if (sp[1] >> 22)
-			return sp[1];
-#endif
-	}
-	return pc;
-}
-EXPORT_SYMBOL(profile_pc);
-
-static irqreturn_t timer_interrupt(int irq, void *dev_id)
-{
-	inc_irq_stat(irq0_irqs);
-
-	global_clock_event->event_handler(global_clock_event);
-
-#ifdef CONFIG_MCA
-	if (MCA_bus) {
-		u8 irq_v = inb_p(0x61);       /* read the current state */
-		outb_p(irq_v|0x80, 0x61);     /* reset the IRQ */
-	}
-#endif
-
-	return IRQ_HANDLED;
-}
-
-/* calibrate_cpu is used on systems with fixed rate TSCs to determine
- * processor frequency */
-#define TICK_COUNT 100000000
-unsigned long __init calibrate_cpu(void)
-{
-	int tsc_start, tsc_now;
-	int i, no_ctr_free;
-	unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
-	unsigned long flags;
-
-	for (i = 0; i < 4; i++)
-		if (avail_to_resrv_perfctr_nmi_bit(i))
-			break;
-	no_ctr_free = (i == 4);
-	if (no_ctr_free) {
-		WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
-		     "cpu_khz value may be incorrect.\n");
-		i = 3;
-		rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
-		wrmsrl(MSR_K7_EVNTSEL3, 0);
-		rdmsrl(MSR_K7_PERFCTR3, pmc3);
-	} else {
-		reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
-		reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
-	}
-	local_irq_save(flags);
-	/* start measuring cycles, incrementing from 0 */
-	wrmsrl(MSR_K7_PERFCTR0 + i, 0);
-	wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
-	rdtscl(tsc_start);
-	do {
-		rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
-		tsc_now = get_cycles();
-	} while ((tsc_now - tsc_start) < TICK_COUNT);
-
-	local_irq_restore(flags);
-	if (no_ctr_free) {
-		wrmsrl(MSR_K7_EVNTSEL3, 0);
-		wrmsrl(MSR_K7_PERFCTR3, pmc3);
-		wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
-	} else {
-		release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
-		release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
-	}
-
-	return pmc_now * tsc_khz / (tsc_now - tsc_start);
-}
-
-static struct irqaction irq0 = {
-	.handler	= timer_interrupt,
-	.flags		= IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER,
-	.name		= "timer"
-};
-
-void __init hpet_time_init(void)
-{
-	if (!hpet_enable())
-		setup_pit_timer();
-
-	setup_irq(0, &irq0);
-}
-
-void __init time_init(void)
-{
-	tsc_init();
-
-	late_time_init = choose_time_init();
-}
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 808031a..699f7ee 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -4,7 +4,7 @@
 #include <asm/e820.h>
 
 /* ready for x86_64 and x86 */
-unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
+unsigned char *__cpuinitdata trampoline_base = __va(TRAMPOLINE_BASE);
 
 void __init reserve_trampoline_memory(void)
 {
@@ -26,7 +26,7 @@ void __init reserve_trampoline_memory(void)
  * bootstrap into the page concerned. The caller
  * has made sure it's suitably aligned.
  */
-unsigned long setup_trampoline(void)
+unsigned long __cpuinit setup_trampoline(void)
 {
 	memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
 	return virt_to_phys(trampoline_base);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 66d874e..8508237 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -28,16 +28,12 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/init.h>
 #include <asm/segment.h>
 #include <asm/page_types.h>
 
 /* We can free up trampoline after bootup if cpu hotplug is not supported. */
-#ifndef CONFIG_HOTPLUG_CPU
-.section ".cpuinit.data","aw",@progbits
-#else
-.section .rodata,"a",@progbits
-#endif
-
+__CPUINITRODATA
 .code16
 
 ENTRY(trampoline_data)
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index cddfb8d..596d54c 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -25,14 +25,15 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/init.h>
 #include <asm/pgtable_types.h>
 #include <asm/page_types.h>
 #include <asm/msr.h>
 #include <asm/segment.h>
 #include <asm/processor-flags.h>
 
-.section .rodata, "a", @progbits
-
+/* We can free up the trampoline after bootup if cpu hotplug is not supported. */
+__CPUINITRODATA
 .code16
 
 ENTRY(trampoline_data)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8326492..9346e10 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,12 +59,12 @@
 #include <asm/mach_traps.h>
 
 #ifdef CONFIG_X86_64
+#include <asm/x86_init.h>
 #include <asm/pgalloc.h>
 #include <asm/proto.h>
 #else
 #include <asm/processor-flags.h>
 #include <asm/setup.h>
-#include <asm/traps.h>
 
 asmlinkage int system_call(void);
 
@@ -972,7 +972,5 @@ void __init trap_init(void)
 	 */
 	cpu_init();
 
-#ifdef CONFIG_X86_32
-	x86_quirk_trap_init();
-#endif
+	x86_init.irqs.trap_init();
 }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 71f4368..cd982f4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -17,6 +17,8 @@
 #include <asm/time.h>
 #include <asm/delay.h>
 #include <asm/hypervisor.h>
+#include <asm/nmi.h>
+#include <asm/x86_init.h>
 
 unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
 EXPORT_SYMBOL(cpu_khz);
@@ -400,15 +402,9 @@ unsigned long native_calibrate_tsc(void)
 {
 	u64 tsc1, tsc2, delta, ref1, ref2;
 	unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
-	unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz;
+	unsigned long flags, latch, ms, fast_calibrate;
 	int hpet = is_hpet_enabled(), i, loopmin;
 
-	hv_tsc_khz = get_hypervisor_tsc_freq();
-	if (hv_tsc_khz) {
-		printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
-		return hv_tsc_khz;
-	}
-
 	local_irq_save(flags);
 	fast_calibrate = quick_pit_calibrate();
 	local_irq_restore(flags);
@@ -566,7 +562,7 @@ int recalibrate_cpu_khz(void)
 	unsigned long cpu_khz_old = cpu_khz;
 
 	if (cpu_has_tsc) {
-		tsc_khz = calibrate_tsc();
+		tsc_khz = x86_platform.calibrate_tsc();
 		cpu_khz = tsc_khz;
 		cpu_data(0).loops_per_jiffy =
 			cpufreq_scale(cpu_data(0).loops_per_jiffy,
@@ -670,7 +666,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
 	if ((val == CPUFREQ_PRECHANGE  && freq->old < freq->new) ||
 			(val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
 			(val == CPUFREQ_RESUMECHANGE)) {
-		*lpj = 	cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+		*lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
 
 		tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
 		if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -744,10 +740,16 @@ static cycle_t __vsyscall_fn vread_tsc(void)
 }
 #endif
 
+static void resume_tsc(void)
+{
+	clocksource_tsc.cycle_last = 0;
+}
+
 static struct clocksource clocksource_tsc = {
 	.name                   = "tsc",
 	.rating                 = 300,
 	.read                   = read_tsc,
+	.resume			= resume_tsc,
 	.mask                   = CLOCKSOURCE_MASK(64),
 	.shift                  = 22,
 	.flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
@@ -761,12 +763,14 @@ void mark_tsc_unstable(char *reason)
 {
 	if (!tsc_unstable) {
 		tsc_unstable = 1;
-		printk("Marking TSC unstable due to %s\n", reason);
+		printk(KERN_INFO "Marking TSC unstable due to %s\n", reason);
 		/* Change only the rating, when not registered */
 		if (clocksource_tsc.mult)
-			clocksource_change_rating(&clocksource_tsc, 0);
-		else
+			clocksource_mark_unstable(&clocksource_tsc);
+		else {
+			clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE;
 			clocksource_tsc.rating = 0;
+		}
 	}
 }
 
@@ -852,15 +856,71 @@ static void __init init_tsc_clocksource(void)
 	clocksource_register(&clocksource_tsc);
 }
 
+#ifdef CONFIG_X86_64
+/*
+ * calibrate_cpu is used on systems with fixed rate TSCs to determine
+ * processor frequency
+ */
+#define TICK_COUNT 100000000
+static unsigned long __init calibrate_cpu(void)
+{
+	int tsc_start, tsc_now;
+	int i, no_ctr_free;
+	unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
+	unsigned long flags;
+
+	for (i = 0; i < 4; i++)
+		if (avail_to_resrv_perfctr_nmi_bit(i))
+			break;
+	no_ctr_free = (i == 4);
+	if (no_ctr_free) {
+		WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... "
+		     "cpu_khz value may be incorrect.\n");
+		i = 3;
+		rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
+		wrmsrl(MSR_K7_EVNTSEL3, 0);
+		rdmsrl(MSR_K7_PERFCTR3, pmc3);
+	} else {
+		reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+		reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+	}
+	local_irq_save(flags);
+	/* start measuring cycles, incrementing from 0 */
+	wrmsrl(MSR_K7_PERFCTR0 + i, 0);
+	wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
+	rdtscl(tsc_start);
+	do {
+		rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
+		tsc_now = get_cycles();
+	} while ((tsc_now - tsc_start) < TICK_COUNT);
+
+	local_irq_restore(flags);
+	if (no_ctr_free) {
+		wrmsrl(MSR_K7_EVNTSEL3, 0);
+		wrmsrl(MSR_K7_PERFCTR3, pmc3);
+		wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
+	} else {
+		release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+		release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+	}
+
+	return pmc_now * tsc_khz / (tsc_now - tsc_start);
+}
+#else
+static inline unsigned long calibrate_cpu(void) { return cpu_khz; }
+#endif
+
 void __init tsc_init(void)
 {
 	u64 lpj;
 	int cpu;
 
+	x86_init.timers.tsc_pre_init();
+
 	if (!cpu_has_tsc)
 		return;
 
-	tsc_khz = calibrate_tsc();
+	tsc_khz = x86_platform.calibrate_tsc();
 	cpu_khz = tsc_khz;
 
 	if (!tsc_khz) {
@@ -868,11 +928,9 @@ void __init tsc_init(void)
 		return;
 	}
 
-#ifdef CONFIG_X86_64
 	if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
 			(boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
 		cpu_khz = calibrate_cpu();
-#endif
 
 	printk("Detected %lu.%03lu MHz processor.\n",
 			(unsigned long)cpu_khz / 1000,
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 31ffc24..f068553 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -30,6 +30,7 @@
 #include <asm/setup.h>
 #include <asm/apic.h>
 #include <asm/e820.h>
+#include <asm/time.h>
 #include <asm/io.h>
 
 #include <linux/kernel_stat.h>
@@ -53,7 +54,7 @@ int is_visws_box(void)
 	return visws_board_type >= 0;
 }
 
-static int __init visws_time_init(void)
+static void __init visws_time_init(void)
 {
 	printk(KERN_INFO "Starting Cobalt Timer system clock\n");
 
@@ -66,21 +67,13 @@ static int __init visws_time_init(void)
 	/* Enable (unmask) the timer interrupt */
 	co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
 
-	/*
-	 * Zero return means the generic timer setup code will set up
-	 * the standard vector:
-	 */
-	return 0;
+	setup_default_timer_irq();
 }
 
-static int __init visws_pre_intr_init(void)
+/* Replaces the default init_ISA_irqs in the generic setup */
+static void __init visws_pre_intr_init(void)
 {
 	init_VISWS_APIC_irqs();
-
-	/*
-	 * We dont want ISA irqs to be set up by the generic code:
-	 */
-	return 1;
 }
 
 /* Quirk for machine specific memory setup. */
@@ -156,12 +149,8 @@ static void visws_machine_power_off(void)
 	outl(PIIX_SPECIAL_STOP, 0xCFC);
 }
 
-static int __init visws_get_smp_config(unsigned int early)
+static void __init visws_get_smp_config(unsigned int early)
 {
-	/*
-	 * Prevent MP-table parsing by the generic code:
-	 */
-	return 1;
 }
 
 /*
@@ -208,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
 	apic_version[m->apicid] = ver;
 }
 
-static int __init visws_find_smp_config(unsigned int reserve)
+static void __init visws_find_smp_config(unsigned int reserve)
 {
 	struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS);
 	unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
@@ -230,21 +219,9 @@ static int __init visws_find_smp_config(unsigned int reserve)
 		MP_processor_info(mp++);
 
 	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-	return 1;
 }
 
-static int visws_trap_init(void);
-
-static struct x86_quirks visws_x86_quirks __initdata = {
-	.arch_time_init		= visws_time_init,
-	.arch_pre_intr_init	= visws_pre_intr_init,
-	.arch_memory_setup	= visws_memory_setup,
-	.arch_intr_init		= NULL,
-	.arch_trap_init		= visws_trap_init,
-	.mach_get_smp_config	= visws_get_smp_config,
-	.mach_find_smp_config	= visws_find_smp_config,
-};
+static void visws_trap_init(void);
 
 void __init visws_early_detect(void)
 {
@@ -257,11 +234,14 @@ void __init visws_early_detect(void)
 		return;
 
 	/*
-	 * Install special quirks for timer, interrupt and memory setup:
-	 * Fall back to generic behavior for traps:
-	 * Override generic MP-table parsing:
+	 * Override the default platform setup functions
 	 */
-	x86_quirks = &visws_x86_quirks;
+	x86_init.resources.memory_setup = visws_memory_setup;
+	x86_init.mpparse.get_smp_config = visws_get_smp_config;
+	x86_init.mpparse.find_smp_config = visws_find_smp_config;
+	x86_init.irqs.pre_vector_init = visws_pre_intr_init;
+	x86_init.irqs.trap_init = visws_trap_init;
+	x86_init.timers.timer_init = visws_time_init;
 
 	/*
 	 * Install reboot quirks:
@@ -400,12 +380,10 @@ static __init void cobalt_init(void)
 		co_apic_read(CO_APIC_ID));
 }
 
-static int __init visws_trap_init(void)
+static void __init visws_trap_init(void)
 {
 	lithium_init();
 	cobalt_init();
-
-	return 1;
 }
 
 /*
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 95a7289..31e6f6c 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -817,15 +817,15 @@ static inline int __init activate_vmi(void)
 		vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm);
 		vmi_timer_ops.cancel_alarm =
 			 vmi_get_function(VMI_CALL_CancelAlarm);
-		pv_time_ops.time_init = vmi_time_init;
-		pv_time_ops.get_wallclock = vmi_get_wallclock;
-		pv_time_ops.set_wallclock = vmi_set_wallclock;
+		x86_init.timers.timer_init = vmi_time_init;
 #ifdef CONFIG_X86_LOCAL_APIC
-		pv_apic_ops.setup_boot_clock = vmi_time_bsp_init;
-		pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
+		x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init;
+		x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init;
 #endif
 		pv_time_ops.sched_clock = vmi_sched_clock;
-		pv_time_ops.get_tsc_khz = vmi_tsc_khz;
+		x86_platform.calibrate_tsc = vmi_tsc_khz;
+		x86_platform.get_wallclock = vmi_get_wallclock;
+		x86_platform.set_wallclock = vmi_set_wallclock;
 
 		/* We have true wallclock functions; disable CMOS clock sync */
 		no_sync_cmos_clock = 1;
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index 2b3eb82..611b9e2 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -68,7 +68,7 @@ unsigned long long vmi_sched_clock(void)
 	return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
 }
 
-/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */
+/* x86_platform.calibrate_tsc = vmi_tsc_khz */
 unsigned long vmi_tsc_khz(void)
 {
 	unsigned long long khz;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 9fc1782..a46accc 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -45,9 +45,9 @@ PHDRS {
 	text PT_LOAD FLAGS(5);          /* R_E */
 	data PT_LOAD FLAGS(7);          /* RWE */
 #ifdef CONFIG_X86_64
-	user PT_LOAD FLAGS(7);          /* RWE */
+	user PT_LOAD FLAGS(5);          /* R_E */
 #ifdef CONFIG_SMP
-	percpu PT_LOAD FLAGS(7);        /* RWE */
+	percpu PT_LOAD FLAGS(6);        /* RW_ */
 #endif
 	init PT_LOAD FLAGS(7);          /* RWE */
 #endif
@@ -348,15 +348,12 @@ SECTIONS
 		_end = .;
 	}
 
-	/* Sections to be discarded */
-	/DISCARD/ : {
-		*(.exitcall.exit)
-		*(.eh_frame)
-		*(.discard)
-	}
-
         STABS_DEBUG
         DWARF_DEBUG
+
+	/* Sections to be discarded */
+	DISCARDS
+	/DISCARD/ : { *(.eh_frame) }
 }
 
 
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 25ee06a..cf53a78 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
 	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
 	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
 	vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
+	vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
 }
 
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
new file mode 100644
index 0000000..4449a4a
--- /dev/null
+++ b/arch/x86/kernel/x86_init.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de>
+ *
+ *  For licencing details see kernel-base/COPYING
+ */
+#include <linux/init.h>
+
+#include <asm/bios_ebda.h>
+#include <asm/paravirt.h>
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/e820.h>
+#include <asm/time.h>
+#include <asm/irq.h>
+#include <asm/tsc.h>
+
+void __cpuinit x86_init_noop(void) { }
+void __init x86_init_uint_noop(unsigned int unused) { }
+void __init x86_init_pgd_noop(pgd_t *unused) { }
+
+/*
+ * The platform setup functions are preset with the default functions
+ * for standard PC hardware.
+ */
+struct x86_init_ops x86_init __initdata = {
+
+	.resources = {
+		.probe_roms		= x86_init_noop,
+		.reserve_resources	= reserve_standard_io_resources,
+		.memory_setup		= default_machine_specific_memory_setup,
+	},
+
+	.mpparse = {
+		.mpc_record		= x86_init_uint_noop,
+		.setup_ioapic_ids	= x86_init_noop,
+		.mpc_apic_id		= default_mpc_apic_id,
+		.smp_read_mpc_oem	= default_smp_read_mpc_oem,
+		.mpc_oem_bus_info	= default_mpc_oem_bus_info,
+		.find_smp_config	= default_find_smp_config,
+		.get_smp_config		= default_get_smp_config,
+	},
+
+	.irqs = {
+		.pre_vector_init	= init_ISA_irqs,
+		.intr_init		= native_init_IRQ,
+		.trap_init		= x86_init_noop,
+	},
+
+	.oem = {
+		.arch_setup		= x86_init_noop,
+		.banner			= default_banner,
+	},
+
+	.paging = {
+		.pagetable_setup_start	= native_pagetable_setup_start,
+		.pagetable_setup_done	= native_pagetable_setup_done,
+	},
+
+	.timers = {
+		.setup_percpu_clockev	= setup_boot_APIC_clock,
+		.tsc_pre_init		= x86_init_noop,
+		.timer_init		= hpet_time_init,
+	},
+};
+
+struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
+	.setup_percpu_clockev		= setup_secondary_APIC_clock,
+};
+
+struct x86_platform_ops x86_platform = {
+	.calibrate_tsc			= native_calibrate_tsc,
+	.get_wallclock			= mach_get_cmos_time,
+	.set_wallclock			= mach_set_rtc_mmss,
+};
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d677fa9..4cb7d5d 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1262,7 +1262,6 @@ __init void lguest_init(void)
 	 */
 
 	/* Interrupt-related operations */
-	pv_irq_ops.init_IRQ = lguest_init_IRQ;
 	pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
 	pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl);
 	pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
@@ -1270,7 +1269,6 @@ __init void lguest_init(void)
 	pv_irq_ops.safe_halt = lguest_safe_halt;
 
 	/* Setup operations */
-	pv_init_ops.memory_setup = lguest_memory_setup;
 	pv_init_ops.patch = lguest_patch;
 
 	/* Intercepts of various CPU instructions */
@@ -1320,10 +1318,11 @@ __init void lguest_init(void)
 	set_lguest_basic_apic_ops();
 #endif
 
-	/* Time operations */
-	pv_time_ops.get_wallclock = lguest_get_wallclock;
-	pv_time_ops.time_init = lguest_time_init;
-	pv_time_ops.get_tsc_khz = lguest_tsc_khz;
+	x86_init.resources.memory_setup = lguest_memory_setup;
+	x86_init.irqs.intr_init = lguest_init_IRQ;
+	x86_init.timers.timer_init = lguest_time_init;
+	x86_platform.calibrate_tsc = lguest_tsc_khz;
+	x86_platform.get_wallclock =  lguest_get_wallclock;
 
 	/*
 	 * Now is a good time to look at the implementations of these functions
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 775a020..82728f2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,7 +10,7 @@
 #include <linux/bootmem.h>		/* max_low_pfn			*/
 #include <linux/kprobes.h>		/* __kprobes, ...		*/
 #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
-#include <linux/perf_counter.h>		/* perf_swcounter_event		*/
+#include <linux/perf_event.h>		/* perf_sw_event		*/
 
 #include <asm/traps.h>			/* dotraplinkage, ...		*/
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
@@ -1017,7 +1017,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
 	if (unlikely(error_code & PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
-	perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
 
 	/*
 	 * If we're in an interrupt, have no user context or are running
@@ -1114,11 +1114,11 @@ good_area:
 
 	if (fault & VM_FAULT_MAJOR) {
 		tsk->maj_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
 				     regs, address);
 	} else {
 		tsk->min_flt++;
-		perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
 				     regs, address);
 	}
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3cd7711..b49b4f6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -84,7 +84,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 #ifdef CONFIG_X86_PAE
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
 		if (after_bootmem)
-			pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+			pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
 		else
 			pmd_table = (pmd_t *)alloc_low_page();
 		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
@@ -116,7 +116,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 #endif
 			if (!page_table)
 				page_table =
-				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+				(pte_t *)alloc_bootmem_pages(PAGE_SIZE);
 		} else
 			page_table = (pte_t *)alloc_low_page();
 
@@ -892,7 +892,7 @@ void __init mem_init(void)
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
 			"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		num_physpages << (PAGE_SHIFT-10),
 		codesize >> 10,
 		reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ea56b8c..810bd31 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -687,7 +687,7 @@ void __init mem_init(void)
 
 	printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
 			 "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
-		(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+		nr_free_pages() << (PAGE_SHIFT-10),
 		max_pfn << (PAGE_SHIFT-10),
 		codesize >> 10,
 		absent_pages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index fe6f84c..84e236c 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -21,7 +21,7 @@
 #include <linux/module.h>
 #include <linux/highmem.h>
 
-int is_io_mapping_possible(resource_size_t base, unsigned long size)
+static int is_io_mapping_possible(resource_size_t base, unsigned long size)
 {
 #if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
 	/* There is no way to map greater than 1 << 32 address without PAE */
@@ -30,7 +30,30 @@ int is_io_mapping_possible(resource_size_t base, unsigned long size)
 #endif
 	return 1;
 }
-EXPORT_SYMBOL_GPL(is_io_mapping_possible);
+
+int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
+{
+	unsigned long flag = _PAGE_CACHE_WC;
+	int ret;
+
+	if (!is_io_mapping_possible(base, size))
+		return -EINVAL;
+
+	ret = io_reserve_memtype(base, base + size, &flag);
+	if (ret)
+		return ret;
+
+	*prot = __pgprot(__PAGE_KERNEL | flag);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_create_wc);
+
+void
+iomap_free(resource_size_t base, unsigned long size)
+{
+	io_free_memtype(base, base + size);
+}
+EXPORT_SYMBOL_GPL(iomap_free);
 
 void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
 {
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 04e1ad6..334e63c 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -158,24 +158,14 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
 	retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
 						prot_val, &new_prot_val);
 	if (retval) {
-		pr_debug("Warning: reserve_memtype returned %d\n", retval);
+		printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
 		return NULL;
 	}
 
 	if (prot_val != new_prot_val) {
-		/*
-		 * Do not fallback to certain memory types with certain
-		 * requested type:
-		 * - request is uc-, return cannot be write-back
-		 * - request is uc-, return cannot be write-combine
-		 * - request is write-combine, return cannot be write-back
-		 */
-		if ((prot_val == _PAGE_CACHE_UC_MINUS &&
-		     (new_prot_val == _PAGE_CACHE_WB ||
-		      new_prot_val == _PAGE_CACHE_WC)) ||
-		    (prot_val == _PAGE_CACHE_WC &&
-		     new_prot_val == _PAGE_CACHE_WB)) {
-			pr_debug(
+		if (!is_new_memtype_allowed(phys_addr, size,
+					    prot_val, new_prot_val)) {
+			printk(KERN_ERR
 		"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
 				(unsigned long long)phys_addr,
 				(unsigned long long)(phys_addr + size),
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index e773b6b..3f66b82 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -1,7 +1,6 @@
 #include <linux/kmemcheck.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/module.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 1658296..c8191de 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -29,13 +29,26 @@
 #include <linux/random.h>
 #include <linux/limits.h>
 #include <linux/sched.h>
+#include <asm/elf.h>
+
+static unsigned int stack_maxrandom_size(void)
+{
+	unsigned int max = 0;
+	if ((current->flags & PF_RANDOMIZE) &&
+		!(current->personality & ADDR_NO_RANDOMIZE)) {
+		max = ((-1U) & STACK_RND_MASK) << PAGE_SHIFT;
+	}
+
+	return max;
+}
+
 
 /*
  * Top of mmap area (just below the process stack).
  *
- * Leave an at least ~128 MB hole.
+ * Leave an at least ~128 MB hole with possible stack randomization.
  */
-#define MIN_GAP (128*1024*1024)
+#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
 #define MAX_GAP (TASK_SIZE/6*5)
 
 /*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 7e600c1..24952fd 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -12,6 +12,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugfs.h>
 #include <linux/pfn.h>
+#include <linux/percpu.h>
 
 #include <asm/e820.h>
 #include <asm/processor.h>
@@ -686,7 +687,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
 {
 	struct cpa_data alias_cpa;
 	unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
-	unsigned long vaddr, remapped;
+	unsigned long vaddr;
 	int ret;
 
 	if (cpa->pfn >= max_pfn_mapped)
@@ -744,24 +745,6 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	}
 #endif
 
-	/*
-	 * If the PMD page was partially used for per-cpu remapping,
-	 * the recycled area needs to be split and modified.  Because
-	 * the area is always proper subset of a PMD page
-	 * cpa->numpages is guaranteed to be 1 for these areas, so
-	 * there's no need to loop over and check for further remaps.
-	 */
-	remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
-	if (remapped) {
-		WARN_ON(cpa->numpages > 1);
-		alias_cpa = *cpa;
-		alias_cpa.vaddr = &remapped;
-		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-		ret = __change_page_attr_set_clr(&alias_cpa, 0);
-		if (ret)
-			return ret;
-	}
-
 	return 0;
 }
 
@@ -822,6 +805,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 {
 	struct cpa_data cpa;
 	int ret, cache, checkalias;
+	unsigned long baddr = 0;
 
 	/*
 	 * Check, if we are requested to change a not supported
@@ -853,6 +837,11 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 			 */
 			WARN_ON_ONCE(1);
 		}
+		/*
+		 * Save address for cache flush. *addr is modified in the call
+		 * to __change_page_attr_set_clr() below.
+		 */
+		baddr = *addr;
 	}
 
 	/* Must avoid aliasing mappings in the highmem code */
@@ -900,7 +889,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 			cpa_flush_array(addr, numpages, cache,
 					cpa.flags, pages);
 		} else
-			cpa_flush_range(*addr, numpages, cache);
+			cpa_flush_range(baddr, numpages, cache);
 	} else
 		cpa_flush_all(cache);
 
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index b2f7d3e..7257cf3 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -15,6 +15,7 @@
 #include <linux/gfp.h>
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/rbtree.h>
 
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
@@ -148,11 +149,10 @@ static char *cattr_name(unsigned long flags)
  * areas). All the aliases have the same cache attributes of course.
  * Zero attributes are represented as holes.
  *
- * Currently the data structure is a list because the number of mappings
- * are expected to be relatively small. If this should be a problem
- * it could be changed to a rbtree or similar.
+ * The data structure is a list that is also organized as an rbtree
+ * sorted on the start address of memtype range.
  *
- * memtype_lock protects the whole list.
+ * memtype_lock protects both the linear list and rbtree.
  */
 
 struct memtype {
@@ -160,11 +160,53 @@ struct memtype {
 	u64			end;
 	unsigned long		type;
 	struct list_head	nd;
+	struct rb_node		rb;
 };
 
+static struct rb_root memtype_rbroot = RB_ROOT;
 static LIST_HEAD(memtype_list);
 static DEFINE_SPINLOCK(memtype_lock);	/* protects memtype list */
 
+static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
+{
+	struct rb_node *node = root->rb_node;
+	struct memtype *last_lower = NULL;
+
+	while (node) {
+		struct memtype *data = container_of(node, struct memtype, rb);
+
+		if (data->start < start) {
+			last_lower = data;
+			node = node->rb_right;
+		} else if (data->start > start) {
+			node = node->rb_left;
+		} else
+			return data;
+	}
+
+	/* Will return NULL if there is no entry with its start <= start */
+	return last_lower;
+}
+
+static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
+{
+	struct rb_node **new = &(root->rb_node);
+	struct rb_node *parent = NULL;
+
+	while (*new) {
+		struct memtype *this = container_of(*new, struct memtype, rb);
+
+		parent = *new;
+		if (data->start <= this->start)
+			new = &((*new)->rb_left);
+		else if (data->start > this->start)
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&data->rb, parent, new);
+	rb_insert_color(&data->rb, root);
+}
+
 /*
  * Does intersection of PAT memory type and MTRR memory type and returns
  * the resulting memory type as PAT understands it.
@@ -218,9 +260,6 @@ chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
 	return -EBUSY;
 }
 
-static struct memtype *cached_entry;
-static u64 cached_start;
-
 static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
 {
 	int ram_page = 0, not_rampage = 0;
@@ -249,63 +288,61 @@ static int pat_pagerange_is_ram(unsigned long start, unsigned long end)
 }
 
 /*
- * For RAM pages, mark the pages as non WB memory type using
- * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
- * set_memory_wc() on a RAM page at a time before marking it as WB again.
- * This is ok, because only one driver will be owning the page and
- * doing set_memory_*() calls.
+ * For RAM pages, we use page flags to mark the pages with appropriate type.
+ * Here we do two pass:
+ * - Find the memtype of all the pages in the range, look for any conflicts
+ * - In case of no conflicts, set the new memtype for pages in the range
  *
- * For now, we use PageNonWB to track that the RAM page is being mapped
- * as non WB. In future, we will have to use one more flag
- * (or some other mechanism in page_struct) to distinguish between
- * UC and WC mapping.
+ * Caller must hold memtype_lock for atomicity.
  */
 static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
 				  unsigned long *new_type)
 {
 	struct page *page;
-	u64 pfn, end_pfn;
+	u64 pfn;
+
+	if (req_type == _PAGE_CACHE_UC) {
+		/* We do not support strong UC */
+		WARN_ON_ONCE(1);
+		req_type = _PAGE_CACHE_UC_MINUS;
+	}
 
 	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
-		page = pfn_to_page(pfn);
-		if (page_mapped(page) || PageNonWB(page))
-			goto out;
+		unsigned long type;
 
-		SetPageNonWB(page);
+		page = pfn_to_page(pfn);
+		type = get_page_memtype(page);
+		if (type != -1) {
+			printk(KERN_INFO "reserve_ram_pages_type failed "
+				"0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
+				start, end, type, req_type);
+			if (new_type)
+				*new_type = type;
+
+			return -EBUSY;
+		}
 	}
-	return 0;
 
-out:
-	end_pfn = pfn;
-	for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
+	if (new_type)
+		*new_type = req_type;
+
+	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
 		page = pfn_to_page(pfn);
-		ClearPageNonWB(page);
+		set_page_memtype(page, req_type);
 	}
-
-	return -EINVAL;
+	return 0;
 }
 
 static int free_ram_pages_type(u64 start, u64 end)
 {
 	struct page *page;
-	u64 pfn, end_pfn;
+	u64 pfn;
 
 	for (pfn = (start >> PAGE_SHIFT); pfn < (end >> PAGE_SHIFT); ++pfn) {
 		page = pfn_to_page(pfn);
-		if (page_mapped(page) || !PageNonWB(page))
-			goto out;
-
-		ClearPageNonWB(page);
+		set_page_memtype(page, -1);
 	}
 	return 0;
-
-out:
-	end_pfn = pfn;
-	for (pfn = (start >> PAGE_SHIFT); pfn < end_pfn; ++pfn) {
-		page = pfn_to_page(pfn);
-		SetPageNonWB(page);
-	}
-	return -EINVAL;
 }
 
 /*
@@ -339,6 +376,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 		if (new_type) {
 			if (req_type == -1)
 				*new_type = _PAGE_CACHE_WB;
+			else if (req_type == _PAGE_CACHE_WC)
+				*new_type = _PAGE_CACHE_UC_MINUS;
 			else
 				*new_type = req_type & _PAGE_CACHE_MASK;
 		}
@@ -364,11 +403,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 		*new_type = actual_type;
 
 	is_range_ram = pat_pagerange_is_ram(start, end);
-	if (is_range_ram == 1)
-		return reserve_ram_pages_type(start, end, req_type,
-					      new_type);
-	else if (is_range_ram < 0)
+	if (is_range_ram == 1) {
+
+		spin_lock(&memtype_lock);
+		err = reserve_ram_pages_type(start, end, req_type, new_type);
+		spin_unlock(&memtype_lock);
+
+		return err;
+	} else if (is_range_ram < 0) {
 		return -EINVAL;
+	}
 
 	new  = kmalloc(sizeof(struct memtype), GFP_KERNEL);
 	if (!new)
@@ -380,17 +424,11 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 
 	spin_lock(&memtype_lock);
 
-	if (cached_entry && start >= cached_start)
-		entry = cached_entry;
-	else
-		entry = list_entry(&memtype_list, struct memtype, nd);
-
 	/* Search for existing mapping that overlaps the current range */
 	where = NULL;
-	list_for_each_entry_continue(entry, &memtype_list, nd) {
+	list_for_each_entry(entry, &memtype_list, nd) {
 		if (end <= entry->start) {
 			where = entry->nd.prev;
-			cached_entry = list_entry(where, struct memtype, nd);
 			break;
 		} else if (start <= entry->start) { /* end > entry->start */
 			err = chk_conflict(new, entry, new_type);
@@ -398,8 +436,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 				dprintk("Overlap at 0x%Lx-0x%Lx\n",
 					entry->start, entry->end);
 				where = entry->nd.prev;
-				cached_entry = list_entry(where,
-							struct memtype, nd);
 			}
 			break;
 		} else if (start < entry->end) { /* start > entry->start */
@@ -407,8 +443,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 			if (!err) {
 				dprintk("Overlap at 0x%Lx-0x%Lx\n",
 					entry->start, entry->end);
-				cached_entry = list_entry(entry->nd.prev,
-							struct memtype, nd);
 
 				/*
 				 * Move to right position in the linked
@@ -436,13 +470,13 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 		return err;
 	}
 
-	cached_start = start;
-
 	if (where)
 		list_add(&new->nd, where);
 	else
 		list_add_tail(&new->nd, &memtype_list);
 
+	memtype_rb_insert(&memtype_rbroot, new);
+
 	spin_unlock(&memtype_lock);
 
 	dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
@@ -454,7 +488,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 
 int free_memtype(u64 start, u64 end)
 {
-	struct memtype *entry;
+	struct memtype *entry, *saved_entry;
 	int err = -EINVAL;
 	int is_range_ram;
 
@@ -466,23 +500,58 @@ int free_memtype(u64 start, u64 end)
 		return 0;
 
 	is_range_ram = pat_pagerange_is_ram(start, end);
-	if (is_range_ram == 1)
-		return free_ram_pages_type(start, end);
-	else if (is_range_ram < 0)
+	if (is_range_ram == 1) {
+
+		spin_lock(&memtype_lock);
+		err = free_ram_pages_type(start, end);
+		spin_unlock(&memtype_lock);
+
+		return err;
+	} else if (is_range_ram < 0) {
 		return -EINVAL;
+	}
 
 	spin_lock(&memtype_lock);
-	list_for_each_entry(entry, &memtype_list, nd) {
+
+	entry = memtype_rb_search(&memtype_rbroot, start);
+	if (unlikely(entry == NULL))
+		goto unlock_ret;
+
+	/*
+	 * Saved entry points to an entry with start same or less than what
+	 * we searched for. Now go through the list in both directions to look
+	 * for the entry that matches with both start and end, with list stored
+	 * in sorted start address
+	 */
+	saved_entry = entry;
+	list_for_each_entry_from(entry, &memtype_list, nd) {
 		if (entry->start == start && entry->end == end) {
-			if (cached_entry == entry || cached_start == start)
-				cached_entry = NULL;
+			rb_erase(&entry->rb, &memtype_rbroot);
+			list_del(&entry->nd);
+			kfree(entry);
+			err = 0;
+			break;
+		} else if (entry->start > start) {
+			break;
+		}
+	}
+
+	if (!err)
+		goto unlock_ret;
 
+	entry = saved_entry;
+	list_for_each_entry_reverse(entry, &memtype_list, nd) {
+		if (entry->start == start && entry->end == end) {
+			rb_erase(&entry->rb, &memtype_rbroot);
 			list_del(&entry->nd);
 			kfree(entry);
 			err = 0;
 			break;
+		} else if (entry->start < start) {
+			break;
 		}
 	}
+unlock_ret:
 	spin_unlock(&memtype_lock);
 
 	if (err) {
@@ -496,6 +565,101 @@ int free_memtype(u64 start, u64 end)
 }
 
 
+/**
+ * lookup_memtype - Looksup the memory type for a physical address
+ * @paddr: physical address of which memory type needs to be looked up
+ *
+ * Only to be called when PAT is enabled
+ *
+ * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or
+ * _PAGE_CACHE_UC
+ */
+static unsigned long lookup_memtype(u64 paddr)
+{
+	int rettype = _PAGE_CACHE_WB;
+	struct memtype *entry;
+
+	if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
+		return rettype;
+
+	if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
+		struct page *page;
+		spin_lock(&memtype_lock);
+		page = pfn_to_page(paddr >> PAGE_SHIFT);
+		rettype = get_page_memtype(page);
+		spin_unlock(&memtype_lock);
+		/*
+		 * -1 from get_page_memtype() implies RAM page is in its
+		 * default state and not reserved, and hence of type WB
+		 */
+		if (rettype == -1)
+			rettype = _PAGE_CACHE_WB;
+
+		return rettype;
+	}
+
+	spin_lock(&memtype_lock);
+
+	entry = memtype_rb_search(&memtype_rbroot, paddr);
+	if (entry != NULL)
+		rettype = entry->type;
+	else
+		rettype = _PAGE_CACHE_UC_MINUS;
+
+	spin_unlock(&memtype_lock);
+	return rettype;
+}
+
+/**
+ * io_reserve_memtype - Request a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ * @type: A pointer to memtype, with requested type. On success, requested
+ * or any other compatible type that was available for the region is returned
+ *
+ * On success, returns 0
+ * On failure, returns non-zero
+ */
+int io_reserve_memtype(resource_size_t start, resource_size_t end,
+			unsigned long *type)
+{
+	resource_size_t size = end - start;
+	unsigned long req_type = *type;
+	unsigned long new_type;
+	int ret;
+
+	WARN_ON_ONCE(iomem_map_sanity_check(start, size));
+
+	ret = reserve_memtype(start, end, req_type, &new_type);
+	if (ret)
+		goto out_err;
+
+	if (!is_new_memtype_allowed(start, size, req_type, new_type))
+		goto out_free;
+
+	if (kernel_map_sync_memtype(start, size, new_type) < 0)
+		goto out_free;
+
+	*type = new_type;
+	return 0;
+
+out_free:
+	free_memtype(start, end);
+	ret = -EBUSY;
+out_err:
+	return ret;
+}
+
+/**
+ * io_free_memtype - Release a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ */
+void io_free_memtype(resource_size_t start, resource_size_t end)
+{
+	free_memtype(start, end);
+}
+
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				unsigned long size, pgprot_t vma_prot)
 {
@@ -577,7 +741,7 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
 {
 	unsigned long id_sz;
 
-	if (!pat_enabled || base >= __pa(high_memory))
+	if (base >= __pa(high_memory))
 		return 0;
 
 	id_sz = (__pa(high_memory) < base + size) ?
@@ -612,11 +776,29 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
 	is_ram = pat_pagerange_is_ram(paddr, paddr + size);
 
 	/*
-	 * reserve_pfn_range() doesn't support RAM pages. Maintain the current
-	 * behavior with RAM pages by returning success.
+	 * reserve_pfn_range() for RAM pages. We do not refcount to keep
+	 * track of number of mappings of RAM pages. We can assert that
+	 * the type requested matches the type of first page in the range.
 	 */
-	if (is_ram != 0)
+	if (is_ram) {
+		if (!pat_enabled)
+			return 0;
+
+		flags = lookup_memtype(paddr);
+		if (want_flags != flags) {
+			printk(KERN_WARNING
+			"%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
+				current->comm, current->pid,
+				cattr_name(want_flags),
+				(unsigned long long)paddr,
+				(unsigned long long)(paddr + size),
+				cattr_name(flags));
+			*vma_prot = __pgprot((pgprot_val(*vma_prot) &
+					      (~_PAGE_CACHE_MASK)) |
+					     flags);
+		}
 		return 0;
+	}
 
 	ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
 	if (ret)
@@ -678,14 +860,6 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 	pgprot_t pgprot;
 
-	if (!pat_enabled)
-		return 0;
-
-	/*
-	 * For now, only handle remap_pfn_range() vmas where
-	 * is_linear_pfn_mapping() == TRUE. Handling of
-	 * vm_insert_pfn() is TBD.
-	 */
 	if (is_linear_pfn_mapping(vma)) {
 		/*
 		 * reserve the whole chunk covered by vma. We need the
@@ -713,23 +887,24 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 			unsigned long pfn, unsigned long size)
 {
+	unsigned long flags;
 	resource_size_t paddr;
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 
-	if (!pat_enabled)
-		return 0;
-
-	/*
-	 * For now, only handle remap_pfn_range() vmas where
-	 * is_linear_pfn_mapping() == TRUE. Handling of
-	 * vm_insert_pfn() is TBD.
-	 */
 	if (is_linear_pfn_mapping(vma)) {
 		/* reserve the whole chunk starting from vm_pgoff */
 		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
 		return reserve_pfn_range(paddr, vma_size, prot, 0);
 	}
 
+	if (!pat_enabled)
+		return 0;
+
+	/* for vm_insert_pfn and friends, we set prot based on lookup */
+	flags = lookup_memtype(pfn << PAGE_SHIFT);
+	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+			 flags);
+
 	return 0;
 }
 
@@ -744,14 +919,6 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 	resource_size_t paddr;
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 
-	if (!pat_enabled)
-		return;
-
-	/*
-	 * For now, only handle remap_pfn_range() vmas where
-	 * is_linear_pfn_mapping() == TRUE. Handling of
-	 * vm_insert_pfn() is TBD.
-	 */
 	if (is_linear_pfn_mapping(vma)) {
 		/* free the whole chunk starting from vm_pgoff */
 		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 4899215..8eb0587 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void)
 	if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
 		current_cpu_data.x86_model == 15) {
 		eax.split.version_id = 2;
-		eax.split.num_counters = 2;
+		eax.split.num_events = 2;
 		eax.split.bit_width = 40;
 	}
 
-	num_counters = eax.split.num_counters;
+	num_counters = eax.split.num_events;
 
 	op_arch_perfmon_spec.num_counters = num_counters;
 	op_arch_perfmon_spec.num_controls = num_counters;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index b837761..7b8e75d 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -13,7 +13,7 @@
 #define OP_X86_MODEL_H
 
 #include <asm/types.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
 
 struct op_msr {
 	unsigned long	addr;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 3ffa10d..572ee97 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -15,63 +15,6 @@
  * also get peer root bus resource for io,mmio
  */
 
-#ifdef CONFIG_NUMA
-
-#define BUS_NR 256
-
-#ifdef CONFIG_X86_64
-
-static int mp_bus_to_node[BUS_NR];
-
-void set_mp_bus_to_node(int busnum, int node)
-{
-	if (busnum >= 0 &&  busnum < BUS_NR)
-		mp_bus_to_node[busnum] = node;
-}
-
-int get_mp_bus_to_node(int busnum)
-{
-	int node = -1;
-
-	if (busnum < 0 || busnum > (BUS_NR - 1))
-		return node;
-
-	node = mp_bus_to_node[busnum];
-
-	/*
-	 * let numa_node_id to decide it later in dma_alloc_pages
-	 * if there is no ram on that node
-	 */
-	if (node != -1 && !node_online(node))
-		node = -1;
-
-	return node;
-}
-
-#else /* CONFIG_X86_32 */
-
-static unsigned char mp_bus_to_node[BUS_NR];
-
-void set_mp_bus_to_node(int busnum, int node)
-{
-	if (busnum >= 0 &&  busnum < BUS_NR)
-	mp_bus_to_node[busnum] = (unsigned char) node;
-}
-
-int get_mp_bus_to_node(int busnum)
-{
-	int node;
-
-	if (busnum < 0 || busnum > (BUS_NR - 1))
-		return 0;
-	node = mp_bus_to_node[busnum];
-	return node;
-}
-
-#endif /* CONFIG_X86_32 */
-
-#endif /* CONFIG_NUMA */
-
 #ifdef CONFIG_X86_64
 
 /*
@@ -301,11 +244,6 @@ static int __init early_fill_mp_bus_info(void)
 	u64 val;
 	u32 address;
 
-#ifdef CONFIG_NUMA
-	for (i = 0; i < BUS_NR; i++)
-		mp_bus_to_node[i] = -1;
-#endif
-
 	if (!early_pci_allowed())
 		return -1;
 
@@ -346,7 +284,7 @@ static int __init early_fill_mp_bus_info(void)
 		node = (reg >> 4) & 0x07;
 #ifdef CONFIG_NUMA
 		for (j = min_bus; j <= max_bus; j++)
-			mp_bus_to_node[j] = (unsigned char) node;
+			set_mp_bus_to_node(j, node);
 #endif
 		link = (reg >> 8) & 0x03;
 
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 2202b62..5db96d43 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -600,3 +600,72 @@ struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno)
 {
 	return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
 }
+
+/*
+ * NUMA info for PCI busses
+ *
+ * Early arch code is responsible for filling in reasonable values here.
+ * A node id of "-1" means "use current node".  In other words, if a bus
+ * has a -1 node id, it's not tightly coupled to any particular chunk
+ * of memory (as is the case on some Nehalem systems).
+ */
+#ifdef CONFIG_NUMA
+
+#define BUS_NR 256
+
+#ifdef CONFIG_X86_64
+
+static int mp_bus_to_node[BUS_NR] = {
+	[0 ... BUS_NR - 1] = -1
+};
+
+void set_mp_bus_to_node(int busnum, int node)
+{
+	if (busnum >= 0 &&  busnum < BUS_NR)
+		mp_bus_to_node[busnum] = node;
+}
+
+int get_mp_bus_to_node(int busnum)
+{
+	int node = -1;
+
+	if (busnum < 0 || busnum > (BUS_NR - 1))
+		return node;
+
+	node = mp_bus_to_node[busnum];
+
+	/*
+	 * let numa_node_id to decide it later in dma_alloc_pages
+	 * if there is no ram on that node
+	 */
+	if (node != -1 && !node_online(node))
+		node = -1;
+
+	return node;
+}
+
+#else /* CONFIG_X86_32 */
+
+static unsigned char mp_bus_to_node[BUS_NR] = {
+	[0 ... BUS_NR - 1] = -1
+};
+
+void set_mp_bus_to_node(int busnum, int node)
+{
+	if (busnum >= 0 &&  busnum < BUS_NR)
+	mp_bus_to_node[busnum] = (unsigned char) node;
+}
+
+int get_mp_bus_to_node(int busnum)
+{
+	int node;
+
+	if (busnum < 0 || busnum > (BUS_NR - 1))
+		return 0;
+	node = mp_bus_to_node[busnum];
+	return node;
+}
+
+#endif /* CONFIG_X86_32 */
+
+#endif /* CONFIG_NUMA */
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index b3d20b9..8aa85f1 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -242,11 +242,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
 	fix_processor_context();
 
 	do_fpu_end();
-	mtrr_ap_init();
-
-#ifdef CONFIG_X86_OLD_MCE
-	mcheck_init(&boot_cpu_data);
-#endif
+	mtrr_bp_restore();
 }
 
 /* Needed by apm.c */
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 6a40b78..ee55754 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -86,14 +86,47 @@ notrace static noinline int do_monotonic(struct timespec *ts)
 	return 0;
 }
 
+notrace static noinline int do_realtime_coarse(struct timespec *ts)
+{
+	unsigned long seq;
+	do {
+		seq = read_seqbegin(&gtod->lock);
+		ts->tv_sec = gtod->wall_time_coarse.tv_sec;
+		ts->tv_nsec = gtod->wall_time_coarse.tv_nsec;
+	} while (unlikely(read_seqretry(&gtod->lock, seq)));
+	return 0;
+}
+
+notrace static noinline int do_monotonic_coarse(struct timespec *ts)
+{
+	unsigned long seq, ns, secs;
+	do {
+		seq = read_seqbegin(&gtod->lock);
+		secs = gtod->wall_time_coarse.tv_sec;
+		ns = gtod->wall_time_coarse.tv_nsec;
+		secs += gtod->wall_to_monotonic.tv_sec;
+		ns += gtod->wall_to_monotonic.tv_nsec;
+	} while (unlikely(read_seqretry(&gtod->lock, seq)));
+	vset_normalized_timespec(ts, secs, ns);
+	return 0;
+}
+
 notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
 {
-	if (likely(gtod->sysctl_enabled && gtod->clock.vread))
+	if (likely(gtod->sysctl_enabled))
 		switch (clock) {
 		case CLOCK_REALTIME:
-			return do_realtime(ts);
+			if (likely(gtod->clock.vread))
+				return do_realtime(ts);
+			break;
 		case CLOCK_MONOTONIC:
-			return do_monotonic(ts);
+			if (likely(gtod->clock.vread))
+				return do_monotonic(ts);
+			break;
+		case CLOCK_REALTIME_COARSE:
+			return do_realtime_coarse(ts);
+		case CLOCK_MONOTONIC_COARSE:
+			return do_monotonic_coarse(ts);
 		}
 	return vdso_fallback_gettime(clock, ts);
 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 0dd0c2c..544eb74 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -912,19 +912,9 @@ static const struct pv_info xen_info __initdata = {
 
 static const struct pv_init_ops xen_init_ops __initdata = {
 	.patch = xen_patch,
-
-	.banner = xen_banner,
-	.memory_setup = xen_memory_setup,
-	.arch_setup = xen_arch_setup,
-	.post_allocator_init = xen_post_allocator_init,
 };
 
 static const struct pv_time_ops xen_time_ops __initdata = {
-	.time_init = xen_time_init,
-
-	.set_wallclock = xen_set_wallclock,
-	.get_wallclock = xen_get_wallclock,
-	.get_tsc_khz = xen_tsc_khz,
 	.sched_clock = xen_sched_clock,
 };
 
@@ -990,8 +980,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
 #ifdef CONFIG_X86_LOCAL_APIC
-	.setup_boot_clock = paravirt_nop,
-	.setup_secondary_clock = paravirt_nop,
 	.startup_ipi_hook = paravirt_nop,
 #endif
 };
@@ -1070,7 +1058,18 @@ asmlinkage void __init xen_start_kernel(void)
 	pv_time_ops = xen_time_ops;
 	pv_cpu_ops = xen_cpu_ops;
 	pv_apic_ops = xen_apic_ops;
-	pv_mmu_ops = xen_mmu_ops;
+
+	x86_init.resources.memory_setup = xen_memory_setup;
+	x86_init.oem.arch_setup = xen_arch_setup;
+	x86_init.oem.banner = xen_banner;
+
+	x86_init.timers.timer_init = xen_time_init;
+	x86_init.timers.setup_percpu_clockev = x86_init_noop;
+	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
+
+	x86_platform.calibrate_tsc = xen_tsc_khz;
+	x86_platform.get_wallclock = xen_get_wallclock;
+	x86_platform.set_wallclock = xen_set_wallclock;
 
 	/*
 	 * Set up some pagetable state before starting to set any ptes.
@@ -1095,6 +1094,7 @@ asmlinkage void __init xen_start_kernel(void)
 	 */
 	xen_setup_stackprotector();
 
+	xen_init_mmu_ops();
 	xen_init_irq_ops();
 	xen_init_cpuid_mask();
 
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index cfd1779..9d30105 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -1,5 +1,7 @@
 #include <linux/hardirq.h>
 
+#include <asm/x86_init.h>
+
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
 #include <xen/interface/vcpu.h>
@@ -112,8 +114,6 @@ static void xen_halt(void)
 }
 
 static const struct pv_irq_ops xen_irq_ops __initdata = {
-	.init_IRQ = xen_init_IRQ,
-
 	.save_fl = PV_CALLEE_SAVE(xen_save_fl),
 	.restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
 	.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
@@ -129,4 +129,5 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
 void __init xen_init_irq_ops()
 {
 	pv_irq_ops = xen_irq_ops;
+	x86_init.irqs.intr_init = xen_init_IRQ;
 }
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 4ceb285..093dd59 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1229,9 +1229,12 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
 {
 }
 
+static void xen_post_allocator_init(void);
+
 static __init void xen_pagetable_setup_done(pgd_t *base)
 {
 	xen_setup_shared_info();
+	xen_post_allocator_init();
 }
 
 static void xen_write_cr2(unsigned long cr2)
@@ -1841,7 +1844,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
 #endif
 }
 
-__init void xen_post_allocator_init(void)
+static __init void xen_post_allocator_init(void)
 {
 	pv_mmu_ops.set_pte = xen_set_pte;
 	pv_mmu_ops.set_pmd = xen_set_pmd;
@@ -1875,10 +1878,7 @@ static void xen_leave_lazy_mmu(void)
 	preempt_enable();
 }
 
-const struct pv_mmu_ops xen_mmu_ops __initdata = {
-	.pagetable_setup_start = xen_pagetable_setup_start,
-	.pagetable_setup_done = xen_pagetable_setup_done,
-
+static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.read_cr2 = xen_read_cr2,
 	.write_cr2 = xen_write_cr2,
 
@@ -1954,6 +1954,12 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.set_fixmap = xen_set_fixmap,
 };
 
+void __init xen_init_mmu_ops(void)
+{
+	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
+	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
+	pv_mmu_ops = xen_mmu_ops;
+}
 
 #ifdef CONFIG_XEN_DEBUG_FS
 
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index da73026..5fe6bc7 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -59,5 +59,5 @@ void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
 
 unsigned long xen_read_cr2_direct(void);
 
-extern const struct pv_mmu_ops xen_mmu_ops;
+extern void xen_init_mmu_ops(void);
 #endif	/* _XEN_MMU_H */
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 22494fd..355fa6b 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -30,8 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_ident_map_ISA(void);
 void xen_reserve_top(void);
 
-void xen_post_allocator_init(void);
-
 char * __init xen_memory_setup(void);
 void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);