summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2009-12-01 15:16:22 +0800
committerHerbert Xu <herbert@gondor.apana.org.au>2009-12-01 15:16:22 +0800
commit838632438145ac6863377eb12d8b8eef9c55d288 (patch)
treefbb0757df837f3c75a99c518a3596c38daef162d /arch/x86
parent9996508b3353063f2d6c48c1a28a84543d72d70b (diff)
parent29e553631b2a0d4eebd23db630572e1027a9967a (diff)
downloadop-kernel-dev-838632438145ac6863377eb12d8b8eef9c55d288.zip
op-kernel-dev-838632438145ac6863377eb12d8b8eef9c55d288.tar.gz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig42
-rw-r--r--arch/x86/Kconfig.cpu3
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/boot/compressed/head_32.S3
-rw-r--r--arch/x86/boot/compressed/head_64.S3
-rw-r--r--arch/x86/boot/compressed/vmlinux.lds.S6
-rw-r--r--arch/x86/boot/install.sh4
-rw-r--r--arch/x86/boot/setup.ld3
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c10
-rw-r--r--arch/x86/ia32/ia32entry.S43
-rw-r--r--arch/x86/include/asm/acpi.h1
-rw-r--r--arch/x86/include/asm/amd_iommu.h1
-rw-r--r--arch/x86/include/asm/apic.h13
-rw-r--r--arch/x86/include/asm/cache.h4
-rw-r--r--arch/x86/include/asm/checksum_32.h3
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h30
-rw-r--r--arch/x86/include/asm/desc.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h10
-rw-r--r--arch/x86/include/asm/entry_arch.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mce.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h6
-rw-r--r--arch/x86/include/asm/nmi.h3
-rw-r--r--arch/x86/include/asm/paravirt.h28
-rw-r--r--arch/x86/include/asm/paravirt_types.h10
-rw-r--r--arch/x86/include/asm/pci.h6
-rw-r--r--arch/x86/include/asm/perf_event.h (renamed from arch/x86/include/asm/perf_counter.h)30
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/string_32.h1
-rw-r--r--arch/x86/include/asm/syscall.h14
-rw-r--r--arch/x86/include/asm/topology.h11
-rw-r--r--arch/x86/include/asm/uaccess_32.h2
-rw-r--r--arch/x86/include/asm/unistd_32.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h42
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/cstate.c2
-rw-r--r--arch/x86/kernel/acpi/processor.c3
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.lds.S3
-rw-r--r--arch/x86/kernel/amd_iommu.c2
-rw-r--r--arch/x86/kernel/amd_iommu_init.c24
-rw-r--r--arch/x86/kernel/apic/apic.c37
-rw-r--r--arch/x86/kernel/apic/io_apic.c11
-rw-r--r--arch/x86/kernel/apic/nmi.c4
-rw-r--r--arch/x86/kernel/apic/probe_64.c15
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c19
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c5
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c30
-rw-r--r--arch/x86/kernel/cpu/cpufreq/longhaul.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-ich.c19
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c87
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c3
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c67
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c29
-rw-r--r--arch/x86/kernel/cpu/perf_event.c (renamed from arch/x86/kernel/cpu/perf_counter.c)604
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c2
-rw-r--r--arch/x86/kernel/cpu/vmware.c6
-rw-r--r--arch/x86/kernel/cpuid.c4
-rw-r--r--arch/x86/kernel/crash_dump_32.c19
-rw-r--r--arch/x86/kernel/dumpstack_32.c1
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/e820.c6
-rw-r--r--arch/x86/kernel/early_printk.c788
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/entry_64.S24
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S4
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c10
-rw-r--r--arch/x86/kernel/init_task.c5
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/microcode_amd.c6
-rw-r--r--arch/x86/kernel/microcode_core.c6
-rw-r--r--arch/x86/kernel/msr.c4
-rw-r--r--arch/x86/kernel/pci-dma.c10
-rw-r--r--arch/x86/kernel/pci-gart_64.c1
-rw-r--r--arch/x86/kernel/pci-swiotlb.c5
-rw-r--r--arch/x86/kernel/process.c31
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/ptrace.c21
-rw-r--r--arch/x86/kernel/reboot.c9
-rw-r--r--arch/x86/kernel/setup.c40
-rw-r--r--arch/x86/kernel/sfi.c122
-rw-r--r--arch/x86/kernel/smpboot.c9
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/time.c4
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/trampoline.c12
-rw-r--r--arch/x86/kernel/trampoline_32.S8
-rw-r--r--arch/x86/kernel/trampoline_64.S7
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/tsc_sync.c2
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S86
-rw-r--r--arch/x86/kernel/vsyscall_64.c10
-rw-r--r--arch/x86/kvm/i8254.c2
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu.c86
-rw-r--r--arch/x86/kvm/paging_tmpl.h18
-rw-r--r--arch/x86/kvm/svm.c25
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c6
-rw-r--r--arch/x86/lguest/boot.c10
-rw-r--r--arch/x86/lib/Makefile4
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S57
-rw-r--r--arch/x86/mm/Makefile3
-rw-r--r--arch/x86/mm/fault.c27
-rw-r--r--arch/x86/mm/init.c63
-rw-r--r--arch/x86/mm/init_32.c12
-rw-r--r--arch/x86/mm/init_64.c12
-rw-r--r--arch/x86/mm/ioremap.c24
-rw-r--r--arch/x86/mm/kmemcheck/kmemcheck.c3
-rw-r--r--arch/x86/mm/kmemcheck/shadow.c1
-rw-r--r--arch/x86/mm/pageattr.c1
-rw-r--r--arch/x86/mm/pat.c7
-rw-r--r--arch/x86/mm/setup_nx.c69
-rw-r--r--arch/x86/mm/tlb.c15
-rw-r--r--arch/x86/oprofile/op_model_ppro.c4
-rw-r--r--arch/x86/oprofile/op_x86_model.h2
-rw-r--r--arch/x86/pci/common.c2
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c8
-rw-r--r--arch/x86/pci/mmconfig_32.c2
-rw-r--r--arch/x86/power/cpu.c4
-rw-r--r--arch/x86/vdso/Makefile2
-rw-r--r--arch/x86/xen/debugfs.c2
-rw-r--r--arch/x86/xen/enlighten.c24
-rw-r--r--arch/x86/xen/mmu.c4
138 files changed, 1434 insertions, 1719 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 51c5901..72ace95 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
- select HAVE_PERF_COUNTERS if (!M386 && !M486)
+ select HAVE_PERF_EVENTS if (!M386 && !M486)
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -86,10 +86,6 @@ config STACKTRACE_SUPPORT
config HAVE_LATENCYTOP_SUPPORT
def_bool y
-config FAST_CMPXCHG_LOCAL
- bool
- default y
-
config MMU
def_bool y
@@ -432,6 +428,17 @@ config X86_NUMAQ
of Flat Logical. You will need a new lynxer.elf file to flash your
firmware with - send email to <Martin.Bligh@us.ibm.com>.
+config X86_SUPPORTS_MEMORY_FAILURE
+ bool
+ # MCE code calls memory_failure():
+ depends on X86_MCE
+ # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags:
+ depends on !X86_NUMAQ
+ # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH:
+ depends on X86_64 || !SPARSEMEM
+ select ARCH_SUPPORTS_MEMORY_FAILURE
+ default y
+
config X86_VISWS
bool "SGI 320/540 (Visual Workstation)"
depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT
@@ -484,7 +491,7 @@ if PARAVIRT_GUEST
source "arch/x86/xen/Kconfig"
config VMI
- bool "VMI Guest support"
+ bool "VMI Guest support (DEPRECATED)"
select PARAVIRT
depends on X86_32
---help---
@@ -493,6 +500,15 @@ config VMI
at the moment), by linking the kernel to a GPL-ed ROM module
provided by the hypervisor.
+ As of September 2009, VMware has started a phased retirement
+ of this feature from VMware's products. Please see
+ feature-removal-schedule.txt for details. If you are
+ planning to enable this option, please note that you cannot
+ live migrate a VMI enabled VM to a future VMware product,
+ which doesn't support VMI. So if you expect your kernel to
+ seamlessly migrate to newer VMware products, keep this
+ disabled.
+
config KVM_CLOCK
bool "KVM paravirtualized clock"
select PARAVIRT
@@ -1204,6 +1220,10 @@ config ARCH_DISCONTIGMEM_DEFAULT
def_bool y
depends on NUMA && X86_32
+config ARCH_PROC_KCORE_TEXT
+ def_bool y
+ depends on X86_64 && PROC_KCORE
+
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on X86_64
@@ -1423,12 +1443,8 @@ config SECCOMP
If unsure, say Y. Only embedded should say N here.
-config CC_STACKPROTECTOR_ALL
- bool
-
config CC_STACKPROTECTOR
bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
- select CC_STACKPROTECTOR_ALL
---help---
This option turns on the -fstack-protector GCC feature. This
feature puts, at the beginning of functions, a canary value on
@@ -1662,6 +1678,8 @@ source "kernel/power/Kconfig"
source "drivers/acpi/Kconfig"
+source "drivers/sfi/Kconfig"
+
config X86_APM_BOOT
bool
default y
@@ -1857,7 +1875,7 @@ config PCI_DIRECT
config PCI_MMCONFIG
def_bool y
- depends on X86_32 && PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY)
+ depends on X86_32 && PCI && (ACPI || SFI) && (PCI_GOMMCONFIG || PCI_GOANY)
config PCI_OLPC
def_bool y
@@ -1895,7 +1913,7 @@ config DMAR_DEFAULT_ON
config DMAR_BROKEN_GFX_WA
def_bool n
prompt "Workaround broken graphics drivers (going away soon)"
- depends on DMAR
+ depends on DMAR && BROKEN
---help---
Current Graphics drivers tend to use physical address
for DMA and avoid using DMA APIs. Setting this config
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 527519b..2649840 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -400,7 +400,7 @@ config X86_TSC
config X86_CMPXCHG64
def_bool y
- depends on X86_PAE || X86_64
+ depends on !M386 && !M486
# this should be set for all -march=.. options where the compiler
# generates cmov.
@@ -412,6 +412,7 @@ config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
default "6" if X86_32 && X86_P6_NOP
+ default "5" if X86_32 && X86_CMPXCHG64
default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
default "3"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 7983c42..d2d24c9 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -76,7 +76,6 @@ ifdef CONFIG_CC_STACKPROTECTOR
cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y)
stackp-y := -fstack-protector
- stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
KBUILD_CFLAGS += $(stackp-y)
else
$(warning stack protector enabled but no compiler support)
@@ -179,8 +178,8 @@ archclean:
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
echo ' install - Install kernel using'
- echo ' (your) ~/bin/installkernel or'
- echo ' (distribution) /sbin/installkernel or'
+ echo ' (your) ~/bin/$(INSTALLKERNEL) or'
+ echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH) and run lilo'
echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)'
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 75e4f00..f543b70 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -23,13 +23,14 @@
*/
.text
+#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/asm-offsets.h>
- .section ".text.head","ax",@progbits
+ __HEAD
ENTRY(startup_32)
cld
/*
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index f62c284..077e1b6 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -24,6 +24,7 @@
.code32
.text
+#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/segment.h>
#include <asm/pgtable_types.h>
@@ -33,7 +34,7 @@
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
- .section ".text.head"
+ __HEAD
.code32
ENTRY(startup_32)
cld
diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S
index cc353e1..f4193bb 100644
--- a/arch/x86/boot/compressed/vmlinux.lds.S
+++ b/arch/x86/boot/compressed/vmlinux.lds.S
@@ -1,3 +1,5 @@
+#include <asm-generic/vmlinux.lds.h>
+
OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
#undef i386
@@ -18,9 +20,9 @@ SECTIONS
* address 0.
*/
. = 0;
- .text.head : {
+ .head.text : {
_head = . ;
- *(.text.head)
+ HEAD_TEXT
_ehead = . ;
}
.rodata.compressed : {
diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh
index 8d60ee1..d13ec1c 100644
--- a/arch/x86/boot/install.sh
+++ b/arch/x86/boot/install.sh
@@ -33,8 +33,8 @@ verify "$3"
# User may have a custom install script
-if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi
-if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi
+if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi
+if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi
# Default install - same as make zlilo
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 0f6ec455..03c0683 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -53,6 +53,9 @@ SECTIONS
/DISCARD/ : { *(.note*) }
+ /*
+ * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
+ */
. = ASSERT(_end <= 0x8000, "Setup too big!");
. = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
/* Necessary for the very-old-loader check to work... */
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 585edeb..49c552c 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -82,7 +82,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
return -EINVAL;
}
- if (irq_fpu_usable())
+ if (!irq_fpu_usable())
err = crypto_aes_expand_key(ctx, in_key, key_len);
else {
kernel_fpu_begin();
@@ -103,7 +103,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
- if (irq_fpu_usable())
+ if (!irq_fpu_usable())
crypto_aes_encrypt_x86(ctx, dst, src);
else {
kernel_fpu_begin();
@@ -116,7 +116,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
- if (irq_fpu_usable())
+ if (!irq_fpu_usable())
crypto_aes_decrypt_x86(ctx, dst, src);
else {
kernel_fpu_begin();
@@ -342,7 +342,7 @@ static int ablk_encrypt(struct ablkcipher_request *req)
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
- if (irq_fpu_usable()) {
+ if (!irq_fpu_usable()) {
struct ablkcipher_request *cryptd_req =
ablkcipher_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
@@ -363,7 +363,7 @@ static int ablk_decrypt(struct ablkcipher_request *req)
struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
- if (irq_fpu_usable()) {
+ if (!irq_fpu_usable()) {
struct ablkcipher_request *cryptd_req =
ablkcipher_request_ctx(req);
memcpy(cryptd_req, req, sizeof(*req));
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ba331bf..581b056 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -21,8 +21,8 @@
#define __AUDIT_ARCH_LE 0x40000000
#ifndef CONFIG_AUDITSYSCALL
-#define sysexit_audit int_ret_from_sys_call
-#define sysretl_audit int_ret_from_sys_call
+#define sysexit_audit ia32_ret_from_sys_call
+#define sysretl_audit ia32_ret_from_sys_call
#endif
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
@@ -39,12 +39,12 @@
.endm
/* clobbers %eax */
- .macro CLEAR_RREGS _r9=rax
+ .macro CLEAR_RREGS offset=0, _r9=rax
xorl %eax,%eax
- movq %rax,R11(%rsp)
- movq %rax,R10(%rsp)
- movq %\_r9,R9(%rsp)
- movq %rax,R8(%rsp)
+ movq %rax,\offset+R11(%rsp)
+ movq %rax,\offset+R10(%rsp)
+ movq %\_r9,\offset+R9(%rsp)
+ movq %rax,\offset+R8(%rsp)
.endm
/*
@@ -172,6 +172,10 @@ sysexit_from_sys_call:
movl RIP-R11(%rsp),%edx /* User %eip */
CFI_REGISTER rip,rdx
RESTORE_ARGS 1,24,1,1,1,1
+ xorq %r8,%r8
+ xorq %r9,%r9
+ xorq %r10,%r10
+ xorq %r11,%r11
popfq
CFI_ADJUST_CFA_OFFSET -8
/*CFI_RESTORE rflags*/
@@ -200,9 +204,9 @@ sysexit_from_sys_call:
movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
.endm
- .macro auditsys_exit exit,ebpsave=RBP
+ .macro auditsys_exit exit
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
- jnz int_ret_from_sys_call
+ jnz ia32_ret_from_sys_call
TRACE_IRQS_ON
sti
movl %eax,%esi /* second arg, syscall return value */
@@ -213,13 +217,13 @@ sysexit_from_sys_call:
call audit_syscall_exit
GET_THREAD_INFO(%r10)
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */
- movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
cli
TRACE_IRQS_OFF
testl %edi,TI_flags(%r10)
- jnz int_with_check
- jmp \exit
+ jz \exit
+ CLEAR_RREGS -ARGOFFSET
+ jmp int_with_check
.endm
sysenter_auditsys:
@@ -329,6 +333,9 @@ sysretl_from_sys_call:
CFI_REGISTER rip,rcx
movl EFLAGS-ARGOFFSET(%rsp),%r11d
/*CFI_REGISTER rflags,r11*/
+ xorq %r10,%r10
+ xorq %r9,%r9
+ xorq %r8,%r8
TRACE_IRQS_ON
movl RSP-ARGOFFSET(%rsp),%esp
CFI_RESTORE rsp
@@ -343,7 +350,7 @@ cstar_auditsys:
jmp cstar_dispatch
sysretl_audit:
- auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */
+ auditsys_exit sysretl_from_sys_call
#endif
cstar_tracesys:
@@ -353,7 +360,7 @@ cstar_tracesys:
#endif
xchgl %r9d,%ebp
SAVE_REST
- CLEAR_RREGS r9
+ CLEAR_RREGS 0, r9
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
@@ -425,6 +432,8 @@ ia32_do_call:
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
movq %rax,RAX-ARGOFFSET(%rsp)
+ia32_ret_from_sys_call:
+ CLEAR_RREGS -ARGOFFSET
jmp int_ret_from_sys_call
ia32_tracesys:
@@ -442,8 +451,8 @@ END(ia32_syscall)
ia32_badsys:
movq $0,ORIG_RAX-ARGOFFSET(%rsp)
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
- jmp int_ret_from_sys_call
+ movq $-ENOSYS,%rax
+ jmp ia32_sysret
quiet_ni_syscall:
movq $-ENOSYS,%rax
@@ -831,5 +840,5 @@ ia32_sys_call_table:
.quad compat_sys_preadv
.quad compat_sys_pwritev
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
- .quad sys_perf_counter_open
+ .quad sys_perf_event_open
ia32_syscall_end:
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 20d1465..4518dc5 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -144,7 +144,6 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
#else /* !CONFIG_ACPI */
-#define acpi_disabled 1
#define acpi_lapic 0
#define acpi_ioapic 0
static inline void acpi_noirq_set(void) { }
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h
index ac95995..4b18089 100644
--- a/arch/x86/include/asm/amd_iommu.h
+++ b/arch/x86/include/asm/amd_iommu.h
@@ -31,6 +31,7 @@ extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
extern void amd_iommu_flush_all_domains(void);
extern void amd_iommu_flush_all_devices(void);
extern void amd_iommu_shutdown(void);
+extern void amd_iommu_apply_erratum_63(u16 devid);
#else
static inline int amd_iommu_init(void) { return -ENODEV; }
static inline void amd_iommu_detect(void) { }
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index c6d21b1..474d80d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -66,6 +66,19 @@ static inline void default_inquire_remote_apic(int apicid)
}
/*
+ * With 82489DX we can't rely on apic feature bit
+ * retrieved via cpuid but still have to deal with
+ * such an apic chip so we assume that SMP configuration
+ * is found from MP table (64bit case uses ACPI mostly
+ * which set smp presence flag as well so we are safe
+ * to use this helper too).
+ */
+static inline bool apic_from_smp_config(void)
+{
+ return smp_found_config && !disable_apic;
+}
+
+/*
* Basic functions accessing APICs.
*/
#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/include/asm/cache.h b/arch/x86/include/asm/cache.h
index 5d367ca..549860d 100644
--- a/arch/x86/include/asm/cache.h
+++ b/arch/x86/include/asm/cache.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_CACHE_H
#define _ASM_X86_CACHE_H
+#include <linux/linkage.h>
+
/* L1 cache line size */
#define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT)
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
@@ -13,7 +15,7 @@
#ifdef CONFIG_SMP
#define __cacheline_aligned_in_smp \
__attribute__((__aligned__(1 << (INTERNODE_CACHE_SHIFT)))) \
- __attribute__((__section__(".data.page_aligned")))
+ __page_aligned_data
#endif
#endif
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 7c5ef8b..46fc474 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -161,7 +161,8 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
"adcl $0, %0 ;\n"
: "=&r" (sum)
: "r" (saddr), "r" (daddr),
- "r" (htonl(len)), "r" (htonl(proto)), "0" (sum));
+ "r" (htonl(len)), "r" (htonl(proto)), "0" (sum)
+ : "memory");
return csum_fold(sum);
}
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 82ceb78..ee1931b 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -312,19 +312,23 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64);
-#define cmpxchg64(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- if (likely(boot_cpu_data.x86 > 4)) \
- __ret = (__typeof__(*(ptr)))__cmpxchg64((ptr), \
- (unsigned long long)(o), \
- (unsigned long long)(n)); \
- else \
- __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \
- (unsigned long long)(o), \
- (unsigned long long)(n)); \
- __ret; \
-})
+#define cmpxchg64(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __typeof__(*(ptr)) __old = (o); \
+ __typeof__(*(ptr)) __new = (n); \
+ alternative_io("call cmpxchg8b_emu", \
+ "lock; cmpxchg8b (%%esi)" , \
+ X86_FEATURE_CX8, \
+ "=A" (__ret), \
+ "S" ((ptr)), "0" (__old), \
+ "b" ((unsigned int)__new), \
+ "c" ((unsigned int)(__new>>32)) \
+ : "memory"); \
+ __ret; })
+
+
+
#define cmpxchg64_local(ptr, o, n) \
({ \
__typeof__(*(ptr)) __ret; \
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index e8de2f6..617bd56 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -288,7 +288,7 @@ static inline void load_LDT(mm_context_t *pc)
static inline unsigned long get_desc_base(const struct desc_struct *desc)
{
- return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
+ return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
}
static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 0ee770d..6a25d5d 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -14,6 +14,12 @@
#include <asm/swiotlb.h>
#include <asm-generic/dma-coherent.h>
+#ifdef CONFIG_ISA
+# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
+#else
+# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32)
+#endif
+
extern dma_addr_t bad_dma_address;
extern int iommu_merge;
extern struct device x86_dma_fallback_dev;
@@ -124,10 +130,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
return memory;
- if (!dev) {
+ if (!dev)
dev = &x86_dma_fallback_dev;
- gfp |= GFP_DMA;
- }
if (!is_device_dma_capable(dev))
return NULL;
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h
index 5e3f204..f5693c8 100644
--- a/arch/x86/include/asm/entry_arch.h
+++ b/arch/x86/include/asm/entry_arch.h
@@ -49,7 +49,7 @@ BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR)
#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3be0004..d838922 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -796,6 +796,7 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index b608a64..f1363b7 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void enable_p5_mce(void) {}
#endif
+extern void (*x86_mce_decode_callback)(struct mce *m);
+
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, mce_dev);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index f923203..4a2d4e0 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -37,12 +37,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
if (likely(prev != next)) {
/* stop flush ipis for the previous mm */
- cpu_clear(cpu, prev->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
#ifdef CONFIG_SMP
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
/* Re-load page tables */
load_cr3(next->pgd);
@@ -58,7 +58,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index e63cf7d..139d4c1 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -40,8 +40,7 @@ extern unsigned int nmi_watchdog;
#define NMI_INVALID 3
struct ctl_table;
-struct file;
-extern int proc_nmi_enabled(struct ctl_table *, int , struct file *,
+extern int proc_nmi_enabled(struct ctl_table *, int ,
void __user *, size_t *, loff_t *);
extern int unknown_nmi_panic;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 8aebcc4..efb3899 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -840,42 +840,22 @@ static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock)
static inline unsigned long __raw_local_save_flags(void)
{
- unsigned long f;
-
- asm volatile(paravirt_alt(PARAVIRT_CALL)
- : "=a"(f)
- : paravirt_type(pv_irq_ops.save_fl),
- paravirt_clobber(CLBR_EAX)
- : "memory", "cc");
- return f;
+ return PVOP_CALLEE0(unsigned long, pv_irq_ops.save_fl);
}
static inline void raw_local_irq_restore(unsigned long f)
{
- asm volatile(paravirt_alt(PARAVIRT_CALL)
- : "=a"(f)
- : PV_FLAGS_ARG(f),
- paravirt_type(pv_irq_ops.restore_fl),
- paravirt_clobber(CLBR_EAX)
- : "memory", "cc");
+ PVOP_VCALLEE1(pv_irq_ops.restore_fl, f);
}
static inline void raw_local_irq_disable(void)
{
- asm volatile(paravirt_alt(PARAVIRT_CALL)
- :
- : paravirt_type(pv_irq_ops.irq_disable),
- paravirt_clobber(CLBR_EAX)
- : "memory", "eax", "cc");
+ PVOP_VCALLEE0(pv_irq_ops.irq_disable);
}
static inline void raw_local_irq_enable(void)
{
- asm volatile(paravirt_alt(PARAVIRT_CALL)
- :
- : paravirt_type(pv_irq_ops.irq_enable),
- paravirt_clobber(CLBR_EAX)
- : "memory", "eax", "cc");
+ PVOP_VCALLEE0(pv_irq_ops.irq_enable);
}
static inline unsigned long __raw_local_irq_save(void)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index dd0f5b3..9357473 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -494,10 +494,11 @@ int paravirt_disable_iospace(void);
#define EXTRA_CLOBBERS
#define VEXTRA_CLOBBERS
#else /* CONFIG_X86_64 */
+/* [re]ax isn't an arg, but the return val */
#define PVOP_VCALL_ARGS \
unsigned long __edi = __edi, __esi = __esi, \
- __edx = __edx, __ecx = __ecx
-#define PVOP_CALL_ARGS PVOP_VCALL_ARGS, __eax
+ __edx = __edx, __ecx = __ecx, __eax = __eax
+#define PVOP_CALL_ARGS PVOP_VCALL_ARGS
#define PVOP_CALL_ARG1(x) "D" ((unsigned long)(x))
#define PVOP_CALL_ARG2(x) "S" ((unsigned long)(x))
@@ -509,6 +510,7 @@ int paravirt_disable_iospace(void);
"=c" (__ecx)
#define PVOP_CALL_CLOBBERS PVOP_VCALL_CLOBBERS, "=a" (__eax)
+/* void functions are still allowed [re]ax for scratch */
#define PVOP_VCALLEE_CLOBBERS "=a" (__eax)
#define PVOP_CALLEE_CLOBBERS PVOP_VCALLEE_CLOBBERS
@@ -583,8 +585,8 @@ int paravirt_disable_iospace(void);
VEXTRA_CLOBBERS, \
pre, post, ##__VA_ARGS__)
-#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...) \
- ____PVOP_CALL(rettype, op.func, CLBR_RET_REG, \
+#define __PVOP_VCALLEESAVE(op, pre, post, ...) \
+ ____PVOP_VCALL(op.func, CLBR_RET_REG, \
PVOP_VCALLEE_CLOBBERS, , \
pre, post, ##__VA_ARGS__)
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index f76a162..ada8c20 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -143,7 +143,11 @@ static inline int __pcibus_to_node(const struct pci_bus *bus)
static inline const struct cpumask *
cpumask_of_pcibus(const struct pci_bus *bus)
{
- return cpumask_of_node(__pcibus_to_node(bus));
+ int node;
+
+ node = __pcibus_to_node(bus);
+ return (node == -1) ? cpu_online_mask :
+ cpumask_of_node(node);
}
#endif
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_event.h
index e7b7c93..ad7ce3f 100644
--- a/arch/x86/include/asm/perf_counter.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -1,8 +1,8 @@
-#ifndef _ASM_X86_PERF_COUNTER_H
-#define _ASM_X86_PERF_COUNTER_H
+#ifndef _ASM_X86_PERF_EVENT_H
+#define _ASM_X86_PERF_EVENT_H
/*
- * Performance counter hw details:
+ * Performance event hw details:
*/
#define X86_PMC_MAX_GENERIC 8
@@ -43,7 +43,7 @@
union cpuid10_eax {
struct {
unsigned int version_id:8;
- unsigned int num_counters:8;
+ unsigned int num_events:8;
unsigned int bit_width:8;
unsigned int mask_length:8;
} split;
@@ -52,7 +52,7 @@ union cpuid10_eax {
union cpuid10_edx {
struct {
- unsigned int num_counters_fixed:4;
+ unsigned int num_events_fixed:4;
unsigned int reserved:28;
} split;
unsigned int full;
@@ -60,7 +60,7 @@ union cpuid10_edx {
/*
- * Fixed-purpose performance counters:
+ * Fixed-purpose performance events:
*/
/*
@@ -87,22 +87,22 @@ union cpuid10_edx {
/*
* We model BTS tracing as another fixed-mode PMC.
*
- * We choose a value in the middle of the fixed counter range, since lower
- * values are used by actual fixed counters and higher values are used
+ * We choose a value in the middle of the fixed event range, since lower
+ * values are used by actual fixed events and higher values are used
* to indicate other overflow conditions in the PERF_GLOBAL_STATUS msr.
*/
#define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16)
-#ifdef CONFIG_PERF_COUNTERS
-extern void init_hw_perf_counters(void);
-extern void perf_counters_lapic_init(void);
+#ifdef CONFIG_PERF_EVENTS
+extern void init_hw_perf_events(void);
+extern void perf_events_lapic_init(void);
-#define PERF_COUNTER_INDEX_OFFSET 0
+#define PERF_EVENT_INDEX_OFFSET 0
#else
-static inline void init_hw_perf_counters(void) { }
-static inline void perf_counters_lapic_init(void) { }
+static inline void init_hw_perf_events(void) { }
+static inline void perf_events_lapic_init(void) { }
#endif
-#endif /* _ASM_X86_PERF_COUNTER_H */
+#endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 7b467bf..d1f4a76 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -277,6 +277,7 @@ static inline pteval_t pte_flags(pte_t pte)
typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
+extern void set_nx(void);
extern int nx_enabled;
#define pgprot_writecombine pgprot_writecombine
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c3429e8..c978648 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -1000,7 +1000,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define thread_saved_pc(t) (*(unsigned long *)((t)->thread.sp - 8))
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
-#define KSTK_ESP(tsk) -1 /* sorry. doesn't work for syscall. */
+extern unsigned long KSTK_ESP(struct task_struct *task);
#endif /* CONFIG_X86_64 */
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 6a84ed1..1e79678 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -121,7 +121,6 @@ static inline void arch_send_call_function_single_ipi(int cpu)
smp_ops.send_call_func_single_ipi(cpu);
}
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
smp_ops.send_call_func_ipi(mask);
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h
index c86f452..ae907e6 100644
--- a/arch/x86/include/asm/string_32.h
+++ b/arch/x86/include/asm/string_32.h
@@ -65,7 +65,6 @@ static __always_inline void *__constant_memcpy(void *to, const void *from,
case 4:
*(int *)to = *(int *)from;
return to;
-
case 3:
*(short *)to = *(short *)from;
*((char *)to + 2) = *((char *)from + 2);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d82f39b..8d33bc5 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -1,7 +1,7 @@
/*
* Access to user system call parameters and results
*
- * Copyright (C) 2008 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
*
* This copyrighted material is made available to anyone wishing to use,
* modify, copy, or redistribute it subject to the terms and conditions
@@ -16,13 +16,13 @@
#include <linux/sched.h>
#include <linux/err.h>
-static inline long syscall_get_nr(struct task_struct *task,
- struct pt_regs *regs)
+/*
+ * Only the low 32 bits of orig_ax are meaningful, so we return int.
+ * This importantly ignores the high bits on 64-bit, so comparisons
+ * sign-extend the low 32 bits.
+ */
+static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
- /*
- * We always sign-extend a -1 value being set here,
- * so this is always either -1L or a syscall number.
- */
return regs->orig_ax;
}
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 6f0695d..40e37b10 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -143,6 +143,7 @@ extern unsigned long node_remap_size[];
| 1*SD_BALANCE_FORK \
| 0*SD_BALANCE_WAKE \
| 1*SD_WAKE_AFFINE \
+ | 0*SD_PREFER_LOCAL \
| 0*SD_SHARE_CPUPOWER \
| 0*SD_POWERSAVINGS_BALANCE \
| 0*SD_SHARE_PKG_RESOURCES \
@@ -165,21 +166,11 @@ static inline int numa_node_id(void)
return 0;
}
-static inline int cpu_to_node(int cpu)
-{
- return 0;
-}
-
static inline int early_cpu_to_node(int cpu)
{
return 0;
}
-static inline const struct cpumask *cpumask_of_node(int node)
-{
- return cpu_online_mask;
-}
-
static inline void setup_node_to_cpumask_map(void) { }
#endif
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h
index 5e06259..632fb44 100644
--- a/arch/x86/include/asm/uaccess_32.h
+++ b/arch/x86/include/asm/uaccess_32.h
@@ -33,7 +33,7 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero
* Copy data from kernel space to user space. Caller must check
* the specified block with access_ok() before calling this function.
* The caller should also make sure he pins the user space address
- * so that the we don't result in page fault and sleep.
+ * so that we don't result in page fault and sleep.
*
* Here we special-case 1, 2 and 4-byte copy_*_user invocations. On a fault
* we return the initial request size (1, 2 or 4), as copy_*_user should do.
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
index 8deaada..6fb3c20 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
@@ -341,7 +341,7 @@
#define __NR_preadv 333
#define __NR_pwritev 334
#define __NR_rt_tgsigqueueinfo 335
-#define __NR_perf_counter_open 336
+#define __NR_perf_event_open 336
#ifdef __KERNEL__
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index b9f3c60..8d3ad0a 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -659,8 +659,8 @@ __SYSCALL(__NR_preadv, sys_preadv)
__SYSCALL(__NR_pwritev, sys_pwritev)
#define __NR_rt_tgsigqueueinfo 297
__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
-#define __NR_perf_counter_open 298
-__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+#define __NR_perf_event_open 298
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 77a6850..d1414af 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -15,9 +15,12 @@
#include <linux/numa.h>
#include <linux/percpu.h>
#include <linux/timer.h>
+#include <linux/io.h>
#include <asm/types.h>
#include <asm/percpu.h>
#include <asm/uv/uv_mmrs.h>
+#include <asm/irq_vectors.h>
+#include <asm/io_apic.h>
/*
@@ -113,7 +116,7 @@
/*
* The largest possible NASID of a C or M brick (+ 2)
*/
-#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2)
+#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_BLADES * 2)
struct uv_scir_s {
struct timer_list timer;
@@ -229,6 +232,20 @@ static inline unsigned long uv_gpa(void *v)
return uv_soc_phys_ram_to_gpa(__pa(v));
}
+/* gnode -> pnode */
+static inline unsigned long uv_gpa_to_gnode(unsigned long gpa)
+{
+ return gpa >> uv_hub_info->m_val;
+}
+
+/* gpa -> pnode */
+static inline int uv_gpa_to_pnode(unsigned long gpa)
+{
+ unsigned long n_mask = (1UL << uv_hub_info->n_val) - 1;
+
+ return uv_gpa_to_gnode(gpa) & n_mask;
+}
+
/* pnode, offset --> socket virtual */
static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
{
@@ -258,13 +275,13 @@ static inline unsigned long *uv_global_mmr32_address(int pnode,
static inline void uv_write_global_mmr32(int pnode, unsigned long offset,
unsigned long val)
{
- *uv_global_mmr32_address(pnode, offset) = val;
+ writeq(val, uv_global_mmr32_address(pnode, offset));
}
static inline unsigned long uv_read_global_mmr32(int pnode,
unsigned long offset)
{
- return *uv_global_mmr32_address(pnode, offset);
+ return readq(uv_global_mmr32_address(pnode, offset));
}
/*
@@ -281,13 +298,13 @@ static inline unsigned long *uv_global_mmr64_address(int pnode,
static inline void uv_write_global_mmr64(int pnode, unsigned long offset,
unsigned long val)
{
- *uv_global_mmr64_address(pnode, offset) = val;
+ writeq(val, uv_global_mmr64_address(pnode, offset));
}
static inline unsigned long uv_read_global_mmr64(int pnode,
unsigned long offset)
{
- return *uv_global_mmr64_address(pnode, offset);
+ return readq(uv_global_mmr64_address(pnode, offset));
}
/*
@@ -301,22 +318,22 @@ static inline unsigned long *uv_local_mmr_address(unsigned long offset)
static inline unsigned long uv_read_local_mmr(unsigned long offset)
{
- return *uv_local_mmr_address(offset);
+ return readq(uv_local_mmr_address(offset));
}
static inline void uv_write_local_mmr(unsigned long offset, unsigned long val)
{
- *uv_local_mmr_address(offset) = val;
+ writeq(val, uv_local_mmr_address(offset));
}
static inline unsigned char uv_read_local_mmr8(unsigned long offset)
{
- return *((unsigned char *)uv_local_mmr_address(offset));
+ return readb(uv_local_mmr_address(offset));
}
static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val)
{
- *((unsigned char *)uv_local_mmr_address(offset)) = val;
+ writeb(val, uv_local_mmr_address(offset));
}
/*
@@ -420,9 +437,14 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value)
static inline void uv_hub_send_ipi(int pnode, int apicid, int vector)
{
unsigned long val;
+ unsigned long dmode = dest_Fixed;
+
+ if (vector == NMI_VECTOR)
+ dmode = dest_NMI;
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
- ((apicid & 0x3f) << UVH_IPI_INT_APIC_ID_SHFT) |
+ ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) |
+ (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) |
(vector << UVH_IPI_INT_VECTOR_SHFT);
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4ba419b..d8e5d0cd 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_INTEL_TXT) += tboot.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-y += cpu/
obj-y += acpi/
+obj-$(CONFIG_SFI) += sfi.o
obj-y += reboot.o
obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 8c44c23..59cdfa4 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
* P4, Core and beyond CPUs
*/
if (c->x86_vendor == X86_VENDOR_INTEL &&
- (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14)))
+ (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14)))
flags->bm_control = 0;
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index d296f4a..d85d1b2 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -79,7 +79,8 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
struct cpuinfo_x86 *c = &cpu_data(pr->id);
pr->pdc = NULL;
- if (c->x86_vendor == X86_VENDOR_INTEL)
+ if (c->x86_vendor == X86_VENDOR_INTEL ||
+ c->x86_vendor == X86_VENDOR_CENTAUR)
init_intel_pdc(pr, c);
return;
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.lds.S b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
index 7da00b7..060fff8 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.lds.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.lds.S
@@ -57,5 +57,8 @@ SECTIONS
*(.note*)
}
+ /*
+ * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
+ */
. = ASSERT(_end <= WAKEUP_SIZE, "Wakeup too big!");
}
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 98f230f..0285521 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -1220,6 +1220,8 @@ static void __detach_device(struct protection_domain *domain, u16 devid)
amd_iommu_dev_table[devid].data[1] = 0;
amd_iommu_dev_table[devid].data[2] = 0;
+ amd_iommu_apply_erratum_63(devid);
+
/* decrease reference counter */
domain->dev_cnt -= 1;
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index b4b61d4..c20001e 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -240,7 +240,7 @@ static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET);
}
-static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
+static void iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
{
u32 ctrl;
@@ -519,6 +519,26 @@ static void set_dev_entry_bit(u16 devid, u8 bit)
amd_iommu_dev_table[devid].data[i] |= (1 << _bit);
}
+static int get_dev_entry_bit(u16 devid, u8 bit)
+{
+ int i = (bit >> 5) & 0x07;
+ int _bit = bit & 0x1f;
+
+ return (amd_iommu_dev_table[devid].data[i] & (1 << _bit)) >> _bit;
+}
+
+
+void amd_iommu_apply_erratum_63(u16 devid)
+{
+ int sysmgt;
+
+ sysmgt = get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT1) |
+ (get_dev_entry_bit(devid, DEV_ENTRY_SYSMGT2) << 1);
+
+ if (sysmgt == 0x01)
+ set_dev_entry_bit(devid, DEV_ENTRY_IW);
+}
+
/* Writes the specific IOMMU for a device into the rlookup table */
static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid)
{
@@ -547,6 +567,8 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu,
if (flags & ACPI_DEVFLAG_LINT1)
set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS);
+ amd_iommu_apply_erratum_63(devid);
+
set_iommu_for_device(iommu, devid);
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index a34601f..894aa97 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -14,7 +14,7 @@
* Mikael Pettersson : PM converted to driver model.
*/
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/acpi_pmtmr.h>
@@ -35,7 +35,7 @@
#include <linux/smp.h>
#include <linux/mm.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
#include <asm/x86_init.h>
#include <asm/pgalloc.h>
#include <asm/atomic.h>
@@ -62,7 +62,7 @@ unsigned int boot_cpu_physical_apicid = -1U;
/*
* The highest APIC ID seen during enumeration.
*
- * This determines the messaging protocol we can use: if all APIC IDs
+ * On AMD, this determines the messaging protocol we can use: if all APIC IDs
* are in the 0 ... 7 range, then we can use logical addressing which
* has some performance advantages (better broadcasting).
*
@@ -979,7 +979,7 @@ void lapic_shutdown(void)
{
unsigned long flags;
- if (!cpu_has_apic)
+ if (!cpu_has_apic && !apic_from_smp_config())
return;
local_irq_save(flags);
@@ -1189,7 +1189,7 @@ void __cpuinit setup_local_APIC(void)
apic_write(APIC_ESR, 0);
}
#endif
- perf_counters_lapic_init();
+ perf_events_lapic_init();
preempt_disable();
@@ -1197,8 +1197,7 @@ void __cpuinit setup_local_APIC(void)
* Double-check whether this APIC is really registered.
* This is meaningless in clustered apic mode, so we skip it.
*/
- if (!apic->apic_id_registered())
- BUG();
+ BUG_ON(!apic->apic_id_registered());
/*
* Intel recommends to set DFR, LDR and TPR before enabling
@@ -1917,24 +1916,14 @@ void __cpuinit generic_processor_info(int apicid, int version)
max_physical_apicid = apicid;
#ifdef CONFIG_X86_32
- /*
- * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
- * but we need to work other dependencies like SMP_SUSPEND etc
- * before this can be done without some confusion.
- * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
- * - Ashok Raj <ashok.raj@intel.com>
- */
- if (max_physical_apicid >= 8) {
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_INTEL:
- if (!APIC_XAPIC(version)) {
- def_to_bigsmp = 0;
- break;
- }
- /* If P4 and above fall through */
- case X86_VENDOR_AMD:
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ if (num_processors > 8)
+ def_to_bigsmp = 1;
+ break;
+ case X86_VENDOR_AMD:
+ if (max_physical_apicid >= 8)
def_to_bigsmp = 1;
- }
}
#endif
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 809e1cf..dc69f28 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -227,17 +227,14 @@ static struct irq_cfg *get_one_free_irq_cfg(int node)
cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
if (cfg) {
- if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+ if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
kfree(cfg);
cfg = NULL;
- } else if (!alloc_cpumask_var_node(&cfg->old_domain,
+ } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
GFP_ATOMIC, node)) {
free_cpumask_var(cfg->domain);
kfree(cfg);
cfg = NULL;
- } else {
- cpumask_clear(cfg->domain);
- cpumask_clear(cfg->old_domain);
}
}
@@ -1874,7 +1871,7 @@ __apicdebuginit(int) print_all_ICs(void)
print_PIC();
/* don't print out if apic is not there */
- if (!cpu_has_apic || disable_apic)
+ if (!cpu_has_apic && !apic_from_smp_config())
return 0;
print_all_local_APICs();
@@ -1999,7 +1996,7 @@ void disable_IO_APIC(void)
/*
* Use virtual wire A mode when interrupt remapping is enabled.
*/
- if (cpu_has_apic)
+ if (cpu_has_apic || apic_from_smp_config())
disconnect_bsp_APIC(!intr_remapping_enabled &&
ioapic_i8259.pin != -1);
}
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index cb66a22..7ff61d6 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -508,14 +508,14 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
/*
* proc handler for /proc/sys/kernel/nmi
*/
-int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+int proc_nmi_enabled(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
int old_state;
nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
old_state = nmi_watchdog_enabled;
- proc_dointvec(table, write, file, buffer, length, ppos);
+ proc_dointvec(table, write, buffer, length, ppos);
if (!!old_state == !!nmi_watchdog_enabled)
return 0;
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index 65edc18..c4cbd308 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -64,16 +64,23 @@ void __init default_setup_apic_routing(void)
apic = &apic_x2apic_phys;
else
apic = &apic_x2apic_cluster;
- printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
}
#endif
if (apic == &apic_flat) {
- if (max_physical_apicid >= 8)
- apic = &apic_physflat;
- printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ if (num_processors > 8)
+ apic = &apic_physflat;
+ break;
+ case X86_VENDOR_AMD:
+ if (max_physical_apicid >= 8)
+ apic = &apic_physflat;
+ }
}
+ printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
+
if (is_vsmp_box()) {
/* need to update phys_pkg_id */
apic->phys_pkg_id = apicid_phys_pkg_id;
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 6011593..326c254 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -352,14 +352,14 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
alias.v = uv_read_local_mmr(redir_addrs[i].alias);
- if (alias.s.base == 0) {
+ if (alias.s.enable && alias.s.base == 0) {
*size = (1UL << alias.s.m_alias);
redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
*base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
return;
}
}
- BUG();
+ *base = *size = 0;
}
enum map_type {map_wb, map_uc};
@@ -389,6 +389,16 @@ static __init void map_gru_high(int max_pnode)
map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
}
+static __init void map_mmr_high(int max_pnode)
+{
+ union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
+ int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+ mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
+ if (mmr.s.enable)
+ map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
+}
+
static __init void map_mmioh_high(int max_pnode)
{
union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
@@ -609,12 +619,12 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
uv_cpu_hub_info(cpu)->m_val = m_val;
- uv_cpu_hub_info(cpu)->n_val = m_val;
+ uv_cpu_hub_info(cpu)->n_val = n_val;
uv_cpu_hub_info(cpu)->numa_blade_id = blade;
uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
uv_cpu_hub_info(cpu)->pnode = pnode;
uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
- uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
+ uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
@@ -643,6 +653,7 @@ void __init uv_system_init(void)
}
map_gru_high(max_pnode);
+ map_mmr_high(max_pnode);
map_mmioh_high(max_pnode);
uv_cpu_init();
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 8dd3063..68537e9 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -27,7 +27,7 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o
obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
-obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
+obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_X86_MCE) += mcheck/
obj-$(CONFIG_MTRR) += mtrr/
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f32fa71..c910a716 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -184,7 +184,7 @@ static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
* approved Athlon
*/
WARN_ONCE(1, "WARNING: This combination of AMD"
- "processors is not suitable for SMP.\n");
+ " processors is not suitable for SMP.\n");
if (!test_taint(TAINT_UNSAFE_SMP))
add_taint(TAINT_UNSAFE_SMP);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2055fc2..cc25c2b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -13,7 +13,7 @@
#include <linux/io.h>
#include <asm/stackprotector.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
@@ -34,7 +34,6 @@
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/pat.h>
-#include <linux/smp.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@@ -870,7 +869,7 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
- init_hw_perf_counters();
+ init_hw_perf_events();
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 7bb676c..8b581d3 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -33,7 +33,7 @@
#include <linux/cpufreq.h>
#include <linux/compiler.h>
#include <linux/dmi.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
#include <linux/acpi.h>
#include <linux/io.h>
@@ -72,8 +72,6 @@ static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
static DEFINE_PER_CPU(struct aperfmperf, old_perf);
-DEFINE_TRACE(power_mark);
-
/* acpi_perf_data is a pointer to percpu data. */
static struct acpi_processor_performance *acpi_perf_data;
@@ -332,7 +330,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
unsigned int next_perf_state = 0; /* Index into perf table */
unsigned int i;
int result = 0;
- struct power_trace it;
dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu);
@@ -364,7 +361,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
}
}
- trace_power_mark(&it, POWER_PSTATE, next_perf_state);
+ trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
switch (data->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
@@ -529,15 +526,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = {
static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c)
{
- /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf
+ /* Intel Xeon Processor 7100 Series Specification Update
+ * http://www.intel.com/Assets/PDF/specupdate/314554.pdf
* AL30: A Machine Check Exception (MCE) Occurring during an
* Enhanced Intel SpeedStep Technology Ratio Change May Cause
- * Both Processor Cores to Lock Up when HT is enabled*/
+ * Both Processor Cores to Lock Up. */
if (c->x86_vendor == X86_VENDOR_INTEL) {
if ((c->x86 == 15) &&
(c->x86_model == 6) &&
- (c->x86_mask == 8) && smt_capable())
+ (c->x86_mask == 8)) {
+ printk(KERN_INFO "acpi-cpufreq: Intel(R) "
+ "Xeon(R) 7100 Errata AL30, processors may "
+ "lock up on frequency changes: disabling "
+ "acpi-cpufreq.\n");
return -ENODEV;
+ }
}
return 0;
}
@@ -552,13 +555,18 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
unsigned int result = 0;
struct cpuinfo_x86 *c = &cpu_data(policy->cpu);
struct acpi_processor_performance *perf;
+#ifdef CONFIG_SMP
+ static int blacklisted;
+#endif
dprintk("acpi_cpufreq_cpu_init\n");
#ifdef CONFIG_SMP
- result = acpi_cpufreq_blacklist(c);
- if (result)
- return result;
+ if (blacklisted)
+ return blacklisted;
+ blacklisted = acpi_cpufreq_blacklist(c);
+ if (blacklisted)
+ return blacklisted;
#endif
data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL);
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c
index ce2ed3e..cabd2fa 100644
--- a/arch/x86/kernel/cpu/cpufreq/longhaul.c
+++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c
@@ -813,7 +813,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
memcpy(eblcr, samuel2_eblcr, sizeof(samuel2_eblcr));
break;
case 1 ... 15:
- longhaul_version = TYPE_LONGHAUL_V1;
+ longhaul_version = TYPE_LONGHAUL_V2;
if (c->x86_mask < 8) {
cpu_model = CPU_SAMUEL2;
cpuname = "C3 'Samuel 2' [C5B]";
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 6394aa5..3f12dab 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -1022,7 +1022,7 @@ static int get_transition_latency(struct powernow_k8_data *data)
* set it to 1 to avoid problems in the future.
* For all others it's a BIOS bug.
*/
- if (!boot_cpu_data.x86 == 0x11)
+ if (boot_cpu_data.x86 != 0x11)
printk(KERN_ERR FW_WARN PFX "Invalid zero transition "
"latency\n");
max_latency = 1;
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
index 6911e91..3ae5a7a 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
@@ -232,28 +232,23 @@ static unsigned int speedstep_detect_chipset(void)
return 0;
}
-struct get_freq_data {
- unsigned int speed;
- unsigned int processor;
-};
-
-static void get_freq_data(void *_data)
+static void get_freq_data(void *_speed)
{
- struct get_freq_data *data = _data;
+ unsigned int *speed = _speed;
- data->speed = speedstep_get_frequency(data->processor);
+ *speed = speedstep_get_frequency(speedstep_processor);
}
static unsigned int speedstep_get(unsigned int cpu)
{
- struct get_freq_data data = { .processor = cpu };
+ unsigned int speed;
/* You're supposed to ensure CPU is online. */
- if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0)
+ if (smp_call_function_single(cpu, get_freq_data, &speed, 1) != 0)
BUG();
- dprintk("detected %u kHz as current frequency\n", data.speed);
- return data.speed;
+ dprintk("detected %u kHz as current frequency\n", speed);
+ return speed;
}
/**
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 7029f0e..472763d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -98,8 +98,9 @@ static struct notifier_block mce_raise_nb = {
};
/* Inject mce on current CPU */
-static int raise_local(struct mce *m)
+static int raise_local(void)
{
+ struct mce *m = &__get_cpu_var(injectm);
int context = MCJ_CTX(m->inject_flags);
int ret = 0;
int cpu = m->extcpu;
@@ -167,12 +168,12 @@ static void raise_mce(struct mce *m)
}
cpu_relax();
}
- raise_local(m);
+ raise_local();
put_cpu();
put_online_cpus();
} else
#endif
- raise_local(m);
+ raise_local();
}
/* Error injection interface */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2f5aab2..721a77c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
+static void default_decode_mce(struct mce *m)
+{
+ pr_emerg("No human readable MCE decoding support on this CPU type.\n");
+ pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
+}
+
+/*
+ * CPU/chipset specific EDAC code can register a callback here to print
+ * MCE errors in a human-readable form:
+ */
+void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
+EXPORT_SYMBOL(x86_mce_decode_callback);
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -165,49 +177,46 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify);
}
-void __weak decode_mce(struct mce *m)
-{
- return;
-}
-
static void print_mce(struct mce *m)
{
- printk(KERN_EMERG
- "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
+ pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
m->extcpu, m->mcgstatus, m->bank, m->status);
+
if (m->ip) {
- printk(KERN_EMERG "RIP%s %02x:<%016Lx> ",
- !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
- m->cs, m->ip);
+ pr_emerg("RIP%s %02x:<%016Lx> ",
+ !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
+ m->cs, m->ip);
+
if (m->cs == __KERNEL_CS)
print_symbol("{%s}", m->ip);
- printk(KERN_CONT "\n");
+ pr_cont("\n");
}
- printk(KERN_EMERG "TSC %llx ", m->tsc);
+
+ pr_emerg("TSC %llx ", m->tsc);
if (m->addr)
- printk(KERN_CONT "ADDR %llx ", m->addr);
+ pr_cont("ADDR %llx ", m->addr);
if (m->misc)
- printk(KERN_CONT "MISC %llx ", m->misc);
- printk(KERN_CONT "\n");
- printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
- m->cpuvendor, m->cpuid, m->time, m->socketid,
- m->apicid);
+ pr_cont("MISC %llx ", m->misc);
- decode_mce(m);
+ pr_cont("\n");
+ pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
+ m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
+
+ /*
+ * Print out human-readable details about the MCE error,
+ * (if the CPU has an implementation for that):
+ */
+ x86_mce_decode_callback(m);
}
static void print_mce_head(void)
{
- printk(KERN_EMERG "\nHARDWARE ERROR\n");
+ pr_emerg("\nHARDWARE ERROR\n");
}
static void print_mce_tail(void)
{
- printk(KERN_EMERG "This is not a software problem!\n"
-#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
- "Run through mcelog --ascii to decode and contact your hardware vendor\n"
-#endif
- );
+ pr_emerg("This is not a software problem!\n");
}
#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -221,6 +230,7 @@ static atomic_t mce_fake_paniced;
static void wait_for_panic(void)
{
long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
+
preempt_disable();
local_irq_enable();
while (timeout-- > 0)
@@ -288,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
static int msr_to_offset(u32 msr)
{
unsigned bank = __get_cpu_var(injectm.bank);
+
if (msr == rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
@@ -305,13 +316,25 @@ static int msr_to_offset(u32 msr)
static u64 mce_rdmsrl(u32 msr)
{
u64 v;
+
if (__get_cpu_var(injectm).finished) {
int offset = msr_to_offset(msr);
+
if (offset < 0)
return 0;
return *(u64 *)((char *)&__get_cpu_var(injectm) + offset);
}
- rdmsrl(msr, v);
+
+ if (rdmsrl_safe(msr, &v)) {
+ WARN_ONCE(1, "mce: Unable to read msr %d!\n", msr);
+ /*
+ * Return zero in case the access faulted. This should
+ * not happen normally but can happen if the CPU does
+ * something weird, or if the code is buggy.
+ */
+ v = 0;
+ }
+
return v;
}
@@ -319,6 +342,7 @@ static void mce_wrmsrl(u32 msr, u64 v)
{
if (__get_cpu_var(injectm).finished) {
int offset = msr_to_offset(msr);
+
if (offset >= 0)
*(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v;
return;
@@ -415,7 +439,7 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
m->ip = mce_rdmsrl(rip_msr);
}
-#ifdef CONFIG_X86_LOCAL_APIC
+#ifdef CONFIG_X86_LOCAL_APIC
/*
* Called after interrupts have been reenabled again
* when a MCE happened during an interrupts off region
@@ -1172,6 +1196,7 @@ static int mce_banks_init(void)
return -ENOMEM;
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
b->ctl = -1ULL;
b->init = 1;
}
@@ -1189,7 +1214,8 @@ static int __cpuinit mce_cap_init(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
- printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
+ if (!banks)
+ printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b);
if (b > MAX_NR_BANKS) {
printk(KERN_WARNING
@@ -1203,6 +1229,7 @@ static int __cpuinit mce_cap_init(void)
banks = b;
if (!mce_banks) {
int err = mce_banks_init();
+
if (err)
return err;
}
@@ -1237,6 +1264,7 @@ static void mce_init(void)
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (!b->init)
continue;
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
@@ -1626,6 +1654,7 @@ static int mce_disable(void)
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
@@ -1911,6 +1940,7 @@ static void mce_disable_cpu(void *h)
cmci_clear();
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), 0);
}
@@ -1928,6 +1958,7 @@ static void mce_reenable_cpu(void *h)
cmci_reenable();
for (i = 0; i < banks; i++) {
struct mce_bank *b = &mce_banks[i];
+
if (b->init)
wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 8cd5224..83a3d1f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -489,8 +489,9 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int i, err = 0;
struct threshold_bank *b = NULL;
char name[32];
+#ifdef CONFIG_SMP
struct cpuinfo_x86 *c = &cpu_data(cpu);
-
+#endif
sprintf(name, "threshold_bank%i", bank);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 889f665..7c78563 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
+#include <linux/sched.h>
#include <asm/apic.h>
#include <asm/processor.h>
#include <asm/msr.h>
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 63a56d1..b3a1dba 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -34,20 +34,31 @@
/* How long to wait between reporting thermal events */
#define CHECK_INTERVAL (300 * HZ)
-static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
-static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
-static DEFINE_PER_CPU(bool, thermal_throttle_active);
+/*
+ * Current thermal throttling state:
+ */
+struct thermal_state {
+ bool is_throttled;
+
+ u64 next_check;
+ unsigned long throttle_count;
+ unsigned long last_throttle_count;
+};
+
+static DEFINE_PER_CPU(struct thermal_state, thermal_state);
-static atomic_t therm_throt_en = ATOMIC_INIT(0);
+static atomic_t therm_throt_en = ATOMIC_INIT(0);
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
#define define_therm_throt_sysdev_show_func(name) \
-static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
- struct sysdev_attribute *attr, \
- char *buf) \
+ \
+static ssize_t therm_throt_sysdev_show_##name( \
+ struct sys_device *dev, \
+ struct sysdev_attribute *attr, \
+ char *buf) \
{ \
unsigned int cpu = dev->id; \
ssize_t ret; \
@@ -55,7 +66,7 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
preempt_disable(); /* CPU hotplug */ \
if (cpu_online(cpu)) \
ret = sprintf(buf, "%lu\n", \
- per_cpu(thermal_throttle_##name, cpu)); \
+ per_cpu(thermal_state, cpu).name); \
else \
ret = 0; \
preempt_enable(); \
@@ -63,11 +74,11 @@ static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
return ret; \
}
-define_therm_throt_sysdev_show_func(count);
-define_therm_throt_sysdev_one_ro(count);
+define_therm_throt_sysdev_show_func(throttle_count);
+define_therm_throt_sysdev_one_ro(throttle_count);
static struct attribute *thermal_throttle_attrs[] = {
- &attr_count.attr,
+ &attr_throttle_count.attr,
NULL
};
@@ -93,33 +104,39 @@ static struct attribute_group thermal_throttle_attr_group = {
* 1 : Event should be logged further, and a message has been
* printed to the syslog.
*/
-static int therm_throt_process(int curr)
+static int therm_throt_process(bool is_throttled)
{
- unsigned int cpu = smp_processor_id();
- __u64 tmp_jiffs = get_jiffies_64();
- bool was_throttled = __get_cpu_var(thermal_throttle_active);
- bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
+ struct thermal_state *state;
+ unsigned int this_cpu;
+ bool was_throttled;
+ u64 now;
+
+ this_cpu = smp_processor_id();
+ now = get_jiffies_64();
+ state = &per_cpu(thermal_state, this_cpu);
+
+ was_throttled = state->is_throttled;
+ state->is_throttled = is_throttled;
if (is_throttled)
- __get_cpu_var(thermal_throttle_count)++;
+ state->throttle_count++;
- if (!(was_throttled ^ is_throttled) &&
- time_before64(tmp_jiffs, __get_cpu_var(next_check)))
+ if (time_before64(now, state->next_check) &&
+ state->throttle_count != state->last_throttle_count)
return 0;
- __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
+ state->next_check = now + CHECK_INTERVAL;
+ state->last_throttle_count = state->throttle_count;
/* if we just entered the thermal event */
if (is_throttled) {
- printk(KERN_CRIT "CPU%d: Temperature above threshold, "
- "cpu clock throttled (total events = %lu)\n",
- cpu, __get_cpu_var(thermal_throttle_count));
+ printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count);
add_taint(TAINT_MACHINE_CHECK);
return 1;
}
if (was_throttled) {
- printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+ printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu);
return 1;
}
@@ -213,7 +230,7 @@ static void intel_thermal_interrupt(void)
__u64 msr_val;
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
- if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT))
+ if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0))
mce_log_therm_throt_event(msr_val);
}
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 315738c..73c86db 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -846,7 +846,7 @@ int __init mtrr_cleanup(unsigned address_bits)
sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
range_sums = sum_ranges(range, nr_range);
- printk(KERN_INFO "total RAM coverred: %ldM\n",
+ printk(KERN_INFO "total RAM covered: %ldM\n",
range_sums >> (20 - PAGE_SHIFT));
if (mtrr_chunk_size && mtrr_gran_size) {
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 08b6ea4..3c1b12d 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
unsigned long long base, size;
char *ptr;
char line[LINE_SIZE];
+ int length;
size_t linelen;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!len)
- return -EINVAL;
memset(line, 0, LINE_SIZE);
- if (len > LINE_SIZE)
- len = LINE_SIZE;
- if (copy_from_user(line, buf, len - 1))
+
+ length = len;
+ length--;
+
+ if (length > LINE_SIZE - 1)
+ length = LINE_SIZE - 1;
+
+ if (length < 0)
+ return -EINVAL;
+
+ if (copy_from_user(line, buf, length))
return -EFAULT;
linelen = strlen(line);
@@ -126,8 +133,8 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
return -EINVAL;
base = simple_strtoull(line + 5, &ptr, 0);
- for (; isspace(*ptr); ++ptr)
- ;
+ while (isspace(*ptr))
+ ptr++;
if (strncmp(ptr, "size=", 5))
return -EINVAL;
@@ -135,14 +142,14 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
size = simple_strtoull(ptr + 5, &ptr, 0);
if ((base & 0xfff) || (size & 0xfff))
return -EINVAL;
- for (; isspace(*ptr); ++ptr)
- ;
+ while (isspace(*ptr))
+ ptr++;
if (strncmp(ptr, "type=", 5))
return -EINVAL;
ptr += 5;
- for (; isspace(*ptr); ++ptr)
- ;
+ while (isspace(*ptr))
+ ptr++;
for (i = 0; i < MTRR_NUM_TYPES; ++i) {
if (strcmp(ptr, mtrr_strings[i]))
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_event.c
index 2732e2c..b5801c3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1,5 +1,5 @@
/*
- * Performance counter x86 architecture code
+ * Performance events x86 architecture code
*
* Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
* Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
@@ -11,7 +11,7 @@
* For licencing details see kernel-base/COPYING
*/
-#include <linux/perf_counter.h>
+#include <linux/perf_event.h>
#include <linux/capability.h>
#include <linux/notifier.h>
#include <linux/hardirq.h>
@@ -27,19 +27,19 @@
#include <asm/stacktrace.h>
#include <asm/nmi.h>
-static u64 perf_counter_mask __read_mostly;
+static u64 perf_event_mask __read_mostly;
-/* The maximal number of PEBS counters: */
-#define MAX_PEBS_COUNTERS 4
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS 4
/* The size of a BTS record in bytes: */
#define BTS_RECORD_SIZE 24
/* The size of a per-cpu BTS buffer in bytes: */
-#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024)
+#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048)
/* The BTS overflow threshold in bytes from the end of the buffer: */
-#define BTS_OVFL_TH (BTS_RECORD_SIZE * 64)
+#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128)
/*
@@ -65,11 +65,11 @@ struct debug_store {
u64 pebs_index;
u64 pebs_absolute_maximum;
u64 pebs_interrupt_threshold;
- u64 pebs_counter_reset[MAX_PEBS_COUNTERS];
+ u64 pebs_event_reset[MAX_PEBS_EVENTS];
};
-struct cpu_hw_counters {
- struct perf_counter *counters[X86_PMC_IDX_MAX];
+struct cpu_hw_events {
+ struct perf_event *events[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
unsigned long interrupts;
@@ -86,17 +86,17 @@ struct x86_pmu {
int (*handle_irq)(struct pt_regs *);
void (*disable_all)(void);
void (*enable_all)(void);
- void (*enable)(struct hw_perf_counter *, int);
- void (*disable)(struct hw_perf_counter *, int);
+ void (*enable)(struct hw_perf_event *, int);
+ void (*disable)(struct hw_perf_event *, int);
unsigned eventsel;
unsigned perfctr;
u64 (*event_map)(int);
u64 (*raw_event)(u64);
int max_events;
- int num_counters;
- int num_counters_fixed;
- int counter_bits;
- u64 counter_mask;
+ int num_events;
+ int num_events_fixed;
+ int event_bits;
+ u64 event_mask;
int apic;
u64 max_period;
u64 intel_ctrl;
@@ -106,7 +106,7 @@ struct x86_pmu {
static struct x86_pmu x86_pmu __read_mostly;
-static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
+static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
.enabled = 1,
};
@@ -124,35 +124,35 @@ static const u64 p6_perfmon_event_map[] =
[PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
};
-static u64 p6_pmu_event_map(int event)
+static u64 p6_pmu_event_map(int hw_event)
{
- return p6_perfmon_event_map[event];
+ return p6_perfmon_event_map[hw_event];
}
/*
- * Counter setting that is specified not to count anything.
+ * Event setting that is specified not to count anything.
* We use this to effectively disable a counter.
*
* L2_RQSTS with 0 MESI unit mask.
*/
-#define P6_NOP_COUNTER 0x0000002EULL
+#define P6_NOP_EVENT 0x0000002EULL
-static u64 p6_pmu_raw_event(u64 event)
+static u64 p6_pmu_raw_event(u64 hw_event)
{
#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
#define P6_EVNTSEL_INV_MASK 0x00800000ULL
-#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
+#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
#define P6_EVNTSEL_MASK \
(P6_EVNTSEL_EVENT_MASK | \
P6_EVNTSEL_UNIT_MASK | \
P6_EVNTSEL_EDGE_MASK | \
P6_EVNTSEL_INV_MASK | \
- P6_EVNTSEL_COUNTER_MASK)
+ P6_EVNTSEL_REG_MASK)
- return event & P6_EVNTSEL_MASK;
+ return hw_event & P6_EVNTSEL_MASK;
}
@@ -170,16 +170,16 @@ static const u64 intel_perfmon_event_map[] =
[PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
};
-static u64 intel_pmu_event_map(int event)
+static u64 intel_pmu_event_map(int hw_event)
{
- return intel_perfmon_event_map[event];
+ return intel_perfmon_event_map[hw_event];
}
/*
- * Generalized hw caching related event table, filled
+ * Generalized hw caching related hw_event table, filled
* in on a per model basis. A value of 0 means
- * 'not supported', -1 means 'event makes no sense on
- * this CPU', any other value means the raw event
+ * 'not supported', -1 means 'hw_event makes no sense on
+ * this CPU', any other value means the raw hw_event
* ID.
*/
@@ -463,22 +463,22 @@ static const u64 atom_hw_cache_event_ids
},
};
-static u64 intel_pmu_raw_event(u64 event)
+static u64 intel_pmu_raw_event(u64 hw_event)
{
#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
-#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL
+#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
#define CORE_EVNTSEL_MASK \
(CORE_EVNTSEL_EVENT_MASK | \
CORE_EVNTSEL_UNIT_MASK | \
CORE_EVNTSEL_EDGE_MASK | \
CORE_EVNTSEL_INV_MASK | \
- CORE_EVNTSEL_COUNTER_MASK)
+ CORE_EVNTSEL_REG_MASK)
- return event & CORE_EVNTSEL_MASK;
+ return hw_event & CORE_EVNTSEL_MASK;
}
static const u64 amd_hw_cache_event_ids
@@ -585,39 +585,39 @@ static const u64 amd_perfmon_event_map[] =
[PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
};
-static u64 amd_pmu_event_map(int event)
+static u64 amd_pmu_event_map(int hw_event)
{
- return amd_perfmon_event_map[event];
+ return amd_perfmon_event_map[hw_event];
}
-static u64 amd_pmu_raw_event(u64 event)
+static u64 amd_pmu_raw_event(u64 hw_event)
{
#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
#define K7_EVNTSEL_INV_MASK 0x000800000ULL
-#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL
+#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
#define K7_EVNTSEL_MASK \
(K7_EVNTSEL_EVENT_MASK | \
K7_EVNTSEL_UNIT_MASK | \
K7_EVNTSEL_EDGE_MASK | \
K7_EVNTSEL_INV_MASK | \
- K7_EVNTSEL_COUNTER_MASK)
+ K7_EVNTSEL_REG_MASK)
- return event & K7_EVNTSEL_MASK;
+ return hw_event & K7_EVNTSEL_MASK;
}
/*
- * Propagate counter elapsed time into the generic counter.
- * Can only be executed on the CPU where the counter is active.
+ * Propagate event elapsed time into the generic event.
+ * Can only be executed on the CPU where the event is active.
* Returns the delta events processed.
*/
static u64
-x86_perf_counter_update(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int idx)
+x86_perf_event_update(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx)
{
- int shift = 64 - x86_pmu.counter_bits;
+ int shift = 64 - x86_pmu.event_bits;
u64 prev_raw_count, new_raw_count;
s64 delta;
@@ -625,15 +625,15 @@ x86_perf_counter_update(struct perf_counter *counter,
return 0;
/*
- * Careful: an NMI might modify the previous counter value.
+ * Careful: an NMI might modify the previous event value.
*
* Our tactic to handle this is to first atomically read and
* exchange a new raw count - then add that new-prev delta
- * count to the generic counter atomically:
+ * count to the generic event atomically:
*/
again:
prev_raw_count = atomic64_read(&hwc->prev_count);
- rdmsrl(hwc->counter_base + idx, new_raw_count);
+ rdmsrl(hwc->event_base + idx, new_raw_count);
if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
@@ -642,7 +642,7 @@ again:
/*
* Now we have the new raw value and have updated the prev
* timestamp already. We can now calculate the elapsed delta
- * (counter-)time and add that to the generic counter.
+ * (event-)time and add that to the generic event.
*
* Careful, not all hw sign-extends above the physical width
* of the count.
@@ -650,13 +650,13 @@ again:
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- atomic64_add(delta, &counter->count);
+ atomic64_add(delta, &event->count);
atomic64_sub(delta, &hwc->period_left);
return new_raw_count;
}
-static atomic_t active_counters;
+static atomic_t active_events;
static DEFINE_MUTEX(pmc_reserve_mutex);
static bool reserve_pmc_hardware(void)
@@ -667,12 +667,12 @@ static bool reserve_pmc_hardware(void)
if (nmi_watchdog == NMI_LOCAL_APIC)
disable_lapic_nmi_watchdog();
- for (i = 0; i < x86_pmu.num_counters; i++) {
+ for (i = 0; i < x86_pmu.num_events; i++) {
if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
goto perfctr_fail;
}
- for (i = 0; i < x86_pmu.num_counters; i++) {
+ for (i = 0; i < x86_pmu.num_events; i++) {
if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
goto eventsel_fail;
}
@@ -685,7 +685,7 @@ eventsel_fail:
for (i--; i >= 0; i--)
release_evntsel_nmi(x86_pmu.eventsel + i);
- i = x86_pmu.num_counters;
+ i = x86_pmu.num_events;
perfctr_fail:
for (i--; i >= 0; i--)
@@ -703,7 +703,7 @@ static void release_pmc_hardware(void)
#ifdef CONFIG_X86_LOCAL_APIC
int i;
- for (i = 0; i < x86_pmu.num_counters; i++) {
+ for (i = 0; i < x86_pmu.num_events; i++) {
release_perfctr_nmi(x86_pmu.perfctr + i);
release_evntsel_nmi(x86_pmu.eventsel + i);
}
@@ -720,7 +720,7 @@ static inline bool bts_available(void)
static inline void init_debug_store_on_cpu(int cpu)
{
- struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!ds)
return;
@@ -732,7 +732,7 @@ static inline void init_debug_store_on_cpu(int cpu)
static inline void fini_debug_store_on_cpu(int cpu)
{
- if (!per_cpu(cpu_hw_counters, cpu).ds)
+ if (!per_cpu(cpu_hw_events, cpu).ds)
return;
wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
@@ -751,12 +751,12 @@ static void release_bts_hardware(void)
fini_debug_store_on_cpu(cpu);
for_each_possible_cpu(cpu) {
- struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds;
+ struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
if (!ds)
continue;
- per_cpu(cpu_hw_counters, cpu).ds = NULL;
+ per_cpu(cpu_hw_events, cpu).ds = NULL;
kfree((void *)(unsigned long)ds->bts_buffer_base);
kfree(ds);
@@ -796,7 +796,7 @@ static int reserve_bts_hardware(void)
ds->bts_interrupt_threshold =
ds->bts_absolute_maximum - BTS_OVFL_TH;
- per_cpu(cpu_hw_counters, cpu).ds = ds;
+ per_cpu(cpu_hw_events, cpu).ds = ds;
err = 0;
}
@@ -812,9 +812,9 @@ static int reserve_bts_hardware(void)
return err;
}
-static void hw_perf_counter_destroy(struct perf_counter *counter)
+static void hw_perf_event_destroy(struct perf_event *event)
{
- if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) {
+ if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
release_pmc_hardware();
release_bts_hardware();
mutex_unlock(&pmc_reserve_mutex);
@@ -827,7 +827,7 @@ static inline int x86_pmu_initialized(void)
}
static inline int
-set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
{
unsigned int cache_type, cache_op, cache_result;
u64 config, val;
@@ -880,7 +880,7 @@ static void intel_pmu_enable_bts(u64 config)
static void intel_pmu_disable_bts(void)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
unsigned long debugctlmsr;
if (!cpuc->ds)
@@ -898,10 +898,10 @@ static void intel_pmu_disable_bts(void)
/*
* Setup the hardware configuration for a given attr_type
*/
-static int __hw_perf_counter_init(struct perf_counter *counter)
+static int __hw_perf_event_init(struct perf_event *event)
{
- struct perf_counter_attr *attr = &counter->attr;
- struct hw_perf_counter *hwc = &counter->hw;
+ struct perf_event_attr *attr = &event->attr;
+ struct hw_perf_event *hwc = &event->hw;
u64 config;
int err;
@@ -909,21 +909,23 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
return -ENODEV;
err = 0;
- if (!atomic_inc_not_zero(&active_counters)) {
+ if (!atomic_inc_not_zero(&active_events)) {
mutex_lock(&pmc_reserve_mutex);
- if (atomic_read(&active_counters) == 0) {
+ if (atomic_read(&active_events) == 0) {
if (!reserve_pmc_hardware())
err = -EBUSY;
else
err = reserve_bts_hardware();
}
if (!err)
- atomic_inc(&active_counters);
+ atomic_inc(&active_events);
mutex_unlock(&pmc_reserve_mutex);
}
if (err)
return err;
+ event->destroy = hw_perf_event_destroy;
+
/*
* Generate PMC IRQs:
* (keep 'enabled' bit clear for now)
@@ -946,17 +948,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
/*
* If we have a PMU initialized but no APIC
* interrupts, we cannot sample hardware
- * counters (user-space has to fall back and
- * sample via a hrtimer based software counter):
+ * events (user-space has to fall back and
+ * sample via a hrtimer based software event):
*/
if (!x86_pmu.apic)
return -EOPNOTSUPP;
}
- counter->destroy = hw_perf_counter_destroy;
-
/*
- * Raw event type provide the config in the event structure
+ * Raw hw_event type provide the config in the hw_event structure
*/
if (attr->type == PERF_TYPE_RAW) {
hwc->config |= x86_pmu.raw_event(attr->config);
@@ -1001,7 +1001,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
static void p6_pmu_disable_all(void)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
u64 val;
if (!cpuc->enabled)
@@ -1018,7 +1018,7 @@ static void p6_pmu_disable_all(void)
static void intel_pmu_disable_all(void)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (!cpuc->enabled)
return;
@@ -1034,7 +1034,7 @@ static void intel_pmu_disable_all(void)
static void amd_pmu_disable_all(void)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
if (!cpuc->enabled)
@@ -1043,12 +1043,12 @@ static void amd_pmu_disable_all(void)
cpuc->enabled = 0;
/*
* ensure we write the disable before we start disabling the
- * counters proper, so that amd_pmu_enable_counter() does the
+ * events proper, so that amd_pmu_enable_event() does the
* right thing.
*/
barrier();
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for (idx = 0; idx < x86_pmu.num_events; idx++) {
u64 val;
if (!test_bit(idx, cpuc->active_mask))
@@ -1070,7 +1070,7 @@ void hw_perf_disable(void)
static void p6_pmu_enable_all(void)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
unsigned long val;
if (cpuc->enabled)
@@ -1087,7 +1087,7 @@ static void p6_pmu_enable_all(void)
static void intel_pmu_enable_all(void)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (cpuc->enabled)
return;
@@ -1098,19 +1098,19 @@ static void intel_pmu_enable_all(void)
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
- struct perf_counter *counter =
- cpuc->counters[X86_PMC_IDX_FIXED_BTS];
+ struct perf_event *event =
+ cpuc->events[X86_PMC_IDX_FIXED_BTS];
- if (WARN_ON_ONCE(!counter))
+ if (WARN_ON_ONCE(!event))
return;
- intel_pmu_enable_bts(counter->hw.config);
+ intel_pmu_enable_bts(event->hw.config);
}
}
static void amd_pmu_enable_all(void)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
if (cpuc->enabled)
@@ -1119,14 +1119,14 @@ static void amd_pmu_enable_all(void)
cpuc->enabled = 1;
barrier();
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
- struct perf_counter *counter = cpuc->counters[idx];
+ for (idx = 0; idx < x86_pmu.num_events; idx++) {
+ struct perf_event *event = cpuc->events[idx];
u64 val;
if (!test_bit(idx, cpuc->active_mask))
continue;
- val = counter->hw.config;
+ val = event->hw.config;
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
}
@@ -1153,19 +1153,19 @@ static inline void intel_pmu_ack_status(u64 ack)
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
}
-static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
(void)checking_wrmsrl(hwc->config_base + idx,
hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
}
-static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
(void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
}
static inline void
-intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
+intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, mask;
@@ -1178,10 +1178,10 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
}
static inline void
-p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
- u64 val = P6_NOP_COUNTER;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ u64 val = P6_NOP_EVENT;
if (cpuc->enabled)
val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1190,7 +1190,7 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
}
static inline void
-intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
intel_pmu_disable_bts();
@@ -1202,24 +1202,24 @@ intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
return;
}
- x86_pmu_disable_counter(hwc, idx);
+ x86_pmu_disable_event(hwc, idx);
}
static inline void
-amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
+amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
{
- x86_pmu_disable_counter(hwc, idx);
+ x86_pmu_disable_event(hwc, idx);
}
static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
/*
* Set the next IRQ period, based on the hwc->period_left value.
- * To be called with the counter disabled in hw:
+ * To be called with the event disabled in hw:
*/
static int
-x86_perf_counter_set_period(struct perf_counter *counter,
- struct hw_perf_counter *hwc, int idx)
+x86_perf_event_set_period(struct perf_event *event,
+ struct hw_perf_event *hwc, int idx)
{
s64 left = atomic64_read(&hwc->period_left);
s64 period = hwc->sample_period;
@@ -1245,7 +1245,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
ret = 1;
}
/*
- * Quirk: certain CPUs dont like it if just 1 event is left:
+ * Quirk: certain CPUs dont like it if just 1 hw_event is left:
*/
if (unlikely(left < 2))
left = 2;
@@ -1256,21 +1256,21 @@ x86_perf_counter_set_period(struct perf_counter *counter,
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
/*
- * The hw counter starts counting from this counter offset,
+ * The hw event starts counting from this event offset,
* mark it to be able to extra future deltas:
*/
atomic64_set(&hwc->prev_count, (u64)-left);
- err = checking_wrmsrl(hwc->counter_base + idx,
- (u64)(-left) & x86_pmu.counter_mask);
+ err = checking_wrmsrl(hwc->event_base + idx,
+ (u64)(-left) & x86_pmu.event_mask);
- perf_counter_update_userpage(counter);
+ perf_event_update_userpage(event);
return ret;
}
static inline void
-intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
+intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
{
int idx = __idx - X86_PMC_IDX_FIXED;
u64 ctrl_val, bits, mask;
@@ -1295,9 +1295,9 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
err = checking_wrmsrl(hwc->config_base, ctrl_val);
}
-static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
u64 val;
val = hwc->config;
@@ -1308,10 +1308,10 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
}
-static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
- if (!__get_cpu_var(cpu_hw_counters).enabled)
+ if (!__get_cpu_var(cpu_hw_events).enabled)
return;
intel_pmu_enable_bts(hwc->config);
@@ -1323,134 +1323,134 @@ static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
return;
}
- x86_pmu_enable_counter(hwc, idx);
+ x86_pmu_enable_event(hwc, idx);
}
-static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
+static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (cpuc->enabled)
- x86_pmu_enable_counter(hwc, idx);
+ x86_pmu_enable_event(hwc, idx);
}
static int
-fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
+fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
{
- unsigned int event;
+ unsigned int hw_event;
- event = hwc->config & ARCH_PERFMON_EVENT_MASK;
+ hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
- if (unlikely((event ==
+ if (unlikely((hw_event ==
x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
(hwc->sample_period == 1)))
return X86_PMC_IDX_FIXED_BTS;
- if (!x86_pmu.num_counters_fixed)
+ if (!x86_pmu.num_events_fixed)
return -1;
- if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
+ if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
return X86_PMC_IDX_FIXED_INSTRUCTIONS;
- if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
+ if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
return X86_PMC_IDX_FIXED_CPU_CYCLES;
- if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
+ if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
return X86_PMC_IDX_FIXED_BUS_CYCLES;
return -1;
}
/*
- * Find a PMC slot for the freshly enabled / scheduled in counter:
+ * Find a PMC slot for the freshly enabled / scheduled in event:
*/
-static int x86_pmu_enable(struct perf_counter *counter)
+static int x86_pmu_enable(struct perf_event *event)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
- struct hw_perf_counter *hwc = &counter->hw;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
int idx;
- idx = fixed_mode_idx(counter, hwc);
+ idx = fixed_mode_idx(event, hwc);
if (idx == X86_PMC_IDX_FIXED_BTS) {
/* BTS is already occupied. */
if (test_and_set_bit(idx, cpuc->used_mask))
return -EAGAIN;
hwc->config_base = 0;
- hwc->counter_base = 0;
+ hwc->event_base = 0;
hwc->idx = idx;
} else if (idx >= 0) {
/*
- * Try to get the fixed counter, if that is already taken
- * then try to get a generic counter:
+ * Try to get the fixed event, if that is already taken
+ * then try to get a generic event:
*/
if (test_and_set_bit(idx, cpuc->used_mask))
goto try_generic;
hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
/*
- * We set it so that counter_base + idx in wrmsr/rdmsr maps to
+ * We set it so that event_base + idx in wrmsr/rdmsr maps to
* MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
*/
- hwc->counter_base =
+ hwc->event_base =
MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
hwc->idx = idx;
} else {
idx = hwc->idx;
- /* Try to get the previous generic counter again */
+ /* Try to get the previous generic event again */
if (test_and_set_bit(idx, cpuc->used_mask)) {
try_generic:
idx = find_first_zero_bit(cpuc->used_mask,
- x86_pmu.num_counters);
- if (idx == x86_pmu.num_counters)
+ x86_pmu.num_events);
+ if (idx == x86_pmu.num_events)
return -EAGAIN;
set_bit(idx, cpuc->used_mask);
hwc->idx = idx;
}
hwc->config_base = x86_pmu.eventsel;
- hwc->counter_base = x86_pmu.perfctr;
+ hwc->event_base = x86_pmu.perfctr;
}
- perf_counters_lapic_init();
+ perf_events_lapic_init();
x86_pmu.disable(hwc, idx);
- cpuc->counters[idx] = counter;
+ cpuc->events[idx] = event;
set_bit(idx, cpuc->active_mask);
- x86_perf_counter_set_period(counter, hwc, idx);
+ x86_perf_event_set_period(event, hwc, idx);
x86_pmu.enable(hwc, idx);
- perf_counter_update_userpage(counter);
+ perf_event_update_userpage(event);
return 0;
}
-static void x86_pmu_unthrottle(struct perf_counter *counter)
+static void x86_pmu_unthrottle(struct perf_event *event)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
- struct hw_perf_counter *hwc = &counter->hw;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
- cpuc->counters[hwc->idx] != counter))
+ cpuc->events[hwc->idx] != event))
return;
x86_pmu.enable(hwc, hwc->idx);
}
-void perf_counter_print_debug(void)
+void perf_event_print_debug(void)
{
u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
- struct cpu_hw_counters *cpuc;
+ struct cpu_hw_events *cpuc;
unsigned long flags;
int cpu, idx;
- if (!x86_pmu.num_counters)
+ if (!x86_pmu.num_events)
return;
local_irq_save(flags);
cpu = smp_processor_id();
- cpuc = &per_cpu(cpu_hw_counters, cpu);
+ cpuc = &per_cpu(cpu_hw_events, cpu);
if (x86_pmu.version >= 2) {
rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
@@ -1466,7 +1466,7 @@ void perf_counter_print_debug(void)
}
pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for (idx = 0; idx < x86_pmu.num_events; idx++) {
rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
rdmsrl(x86_pmu.perfctr + idx, pmc_count);
@@ -1479,7 +1479,7 @@ void perf_counter_print_debug(void)
pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
cpu, idx, prev_left);
}
- for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
+ for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1488,8 +1488,7 @@ void perf_counter_print_debug(void)
local_irq_restore(flags);
}
-static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
- struct perf_sample_data *data)
+static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
{
struct debug_store *ds = cpuc->ds;
struct bts_record {
@@ -1497,11 +1496,14 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
u64 to;
u64 flags;
};
- struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS];
- unsigned long orig_ip = data->regs->ip;
+ struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
struct bts_record *at, *top;
+ struct perf_output_handle handle;
+ struct perf_event_header header;
+ struct perf_sample_data data;
+ struct pt_regs regs;
- if (!counter)
+ if (!event)
return;
if (!ds)
@@ -1510,26 +1512,45 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc,
at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
top = (struct bts_record *)(unsigned long)ds->bts_index;
+ if (top <= at)
+ return;
+
ds->bts_index = ds->bts_buffer_base;
+
+ data.period = event->hw.last_period;
+ data.addr = 0;
+ regs.ip = 0;
+
+ /*
+ * Prepare a generic sample, i.e. fill in the invariant fields.
+ * We will overwrite the from and to address before we output
+ * the sample.
+ */
+ perf_prepare_sample(&header, &data, event, &regs);
+
+ if (perf_output_begin(&handle, event,
+ header.size * (top - at), 1, 1))
+ return;
+
for (; at < top; at++) {
- data->regs->ip = at->from;
- data->addr = at->to;
+ data.ip = at->from;
+ data.addr = at->to;
- perf_counter_output(counter, 1, data);
+ perf_output_sample(&handle, &header, &data, event);
}
- data->regs->ip = orig_ip;
- data->addr = 0;
+ perf_output_end(&handle);
/* There's new data available. */
- counter->pending_kill = POLL_IN;
+ event->hw.interrupts++;
+ event->pending_kill = POLL_IN;
}
-static void x86_pmu_disable(struct perf_counter *counter)
+static void x86_pmu_disable(struct perf_event *event)
{
- struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
- struct hw_perf_counter *hwc = &counter->hw;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
/*
@@ -1541,67 +1562,63 @@ static void x86_pmu_disable(struct perf_counter *counter)
/*
* Make sure the cleared pointer becomes visible before we
- * (potentially) free the counter:
+ * (potentially) free the event:
*/
barrier();
/*
- * Drain the remaining delta count out of a counter
+ * Drain the remaining delta count out of a event
* that we are disabling:
*/
- x86_perf_counter_update(counter, hwc, idx);
+ x86_perf_event_update(event, hwc, idx);
/* Drain the remaining BTS records. */
- if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
- struct perf_sample_data data;
- struct pt_regs regs;
+ if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
+ intel_pmu_drain_bts_buffer(cpuc);
- data.regs = &regs;
- intel_pmu_drain_bts_buffer(cpuc, &data);
- }
- cpuc->counters[idx] = NULL;
+ cpuc->events[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
- perf_counter_update_userpage(counter);
+ perf_event_update_userpage(event);
}
/*
- * Save and restart an expired counter. Called by NMI contexts,
- * so it has to be careful about preempting normal counter ops:
+ * Save and restart an expired event. Called by NMI contexts,
+ * so it has to be careful about preempting normal event ops:
*/
-static int intel_pmu_save_and_restart(struct perf_counter *counter)
+static int intel_pmu_save_and_restart(struct perf_event *event)
{
- struct hw_perf_counter *hwc = &counter->hw;
+ struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
int ret;
- x86_perf_counter_update(counter, hwc, idx);
- ret = x86_perf_counter_set_period(counter, hwc, idx);
+ x86_perf_event_update(event, hwc, idx);
+ ret = x86_perf_event_set_period(event, hwc, idx);
- if (counter->state == PERF_COUNTER_STATE_ACTIVE)
- intel_pmu_enable_counter(hwc, idx);
+ if (event->state == PERF_EVENT_STATE_ACTIVE)
+ intel_pmu_enable_event(hwc, idx);
return ret;
}
static void intel_pmu_reset(void)
{
- struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds;
+ struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
unsigned long flags;
int idx;
- if (!x86_pmu.num_counters)
+ if (!x86_pmu.num_events)
return;
local_irq_save(flags);
printk("clearing PMU state on CPU#%d\n", smp_processor_id());
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for (idx = 0; idx < x86_pmu.num_events; idx++) {
checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
}
- for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
+ for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
}
if (ds)
@@ -1613,39 +1630,38 @@ static void intel_pmu_reset(void)
static int p6_pmu_handle_irq(struct pt_regs *regs)
{
struct perf_sample_data data;
- struct cpu_hw_counters *cpuc;
- struct perf_counter *counter;
- struct hw_perf_counter *hwc;
+ struct cpu_hw_events *cpuc;
+ struct perf_event *event;
+ struct hw_perf_event *hwc;
int idx, handled = 0;
u64 val;
- data.regs = regs;
data.addr = 0;
- cpuc = &__get_cpu_var(cpu_hw_counters);
+ cpuc = &__get_cpu_var(cpu_hw_events);
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for (idx = 0; idx < x86_pmu.num_events; idx++) {
if (!test_bit(idx, cpuc->active_mask))
continue;
- counter = cpuc->counters[idx];
- hwc = &counter->hw;
+ event = cpuc->events[idx];
+ hwc = &event->hw;
- val = x86_perf_counter_update(counter, hwc, idx);
- if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+ val = x86_perf_event_update(event, hwc, idx);
+ if (val & (1ULL << (x86_pmu.event_bits - 1)))
continue;
/*
- * counter overflow
+ * event overflow
*/
handled = 1;
- data.period = counter->hw.last_period;
+ data.period = event->hw.last_period;
- if (!x86_perf_counter_set_period(counter, hwc, idx))
+ if (!x86_perf_event_set_period(event, hwc, idx))
continue;
- if (perf_counter_overflow(counter, 1, &data))
- p6_pmu_disable_counter(hwc, idx);
+ if (perf_event_overflow(event, 1, &data, regs))
+ p6_pmu_disable_event(hwc, idx);
}
if (handled)
@@ -1661,17 +1677,16 @@ static int p6_pmu_handle_irq(struct pt_regs *regs)
static int intel_pmu_handle_irq(struct pt_regs *regs)
{
struct perf_sample_data data;
- struct cpu_hw_counters *cpuc;
+ struct cpu_hw_events *cpuc;
int bit, loops;
u64 ack, status;
- data.regs = regs;
data.addr = 0;
- cpuc = &__get_cpu_var(cpu_hw_counters);
+ cpuc = &__get_cpu_var(cpu_hw_events);
perf_disable();
- intel_pmu_drain_bts_buffer(cpuc, &data);
+ intel_pmu_drain_bts_buffer(cpuc);
status = intel_pmu_get_status();
if (!status) {
perf_enable();
@@ -1681,8 +1696,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
loops = 0;
again:
if (++loops > 100) {
- WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
- perf_counter_print_debug();
+ WARN_ONCE(1, "perfevents: irq loop stuck!\n");
+ perf_event_print_debug();
intel_pmu_reset();
perf_enable();
return 1;
@@ -1691,19 +1706,19 @@ again:
inc_irq_stat(apic_perf_irqs);
ack = status;
for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
- struct perf_counter *counter = cpuc->counters[bit];
+ struct perf_event *event = cpuc->events[bit];
clear_bit(bit, (unsigned long *) &status);
if (!test_bit(bit, cpuc->active_mask))
continue;
- if (!intel_pmu_save_and_restart(counter))
+ if (!intel_pmu_save_and_restart(event))
continue;
- data.period = counter->hw.last_period;
+ data.period = event->hw.last_period;
- if (perf_counter_overflow(counter, 1, &data))
- intel_pmu_disable_counter(&counter->hw, bit);
+ if (perf_event_overflow(event, 1, &data, regs))
+ intel_pmu_disable_event(&event->hw, bit);
}
intel_pmu_ack_status(ack);
@@ -1723,39 +1738,38 @@ again:
static int amd_pmu_handle_irq(struct pt_regs *regs)
{
struct perf_sample_data data;
- struct cpu_hw_counters *cpuc;
- struct perf_counter *counter;
- struct hw_perf_counter *hwc;
+ struct cpu_hw_events *cpuc;
+ struct perf_event *event;
+ struct hw_perf_event *hwc;
int idx, handled = 0;
u64 val;
- data.regs = regs;
data.addr = 0;
- cpuc = &__get_cpu_var(cpu_hw_counters);
+ cpuc = &__get_cpu_var(cpu_hw_events);
- for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+ for (idx = 0; idx < x86_pmu.num_events; idx++) {
if (!test_bit(idx, cpuc->active_mask))
continue;
- counter = cpuc->counters[idx];
- hwc = &counter->hw;
+ event = cpuc->events[idx];
+ hwc = &event->hw;
- val = x86_perf_counter_update(counter, hwc, idx);
- if (val & (1ULL << (x86_pmu.counter_bits - 1)))
+ val = x86_perf_event_update(event, hwc, idx);
+ if (val & (1ULL << (x86_pmu.event_bits - 1)))
continue;
/*
- * counter overflow
+ * event overflow
*/
handled = 1;
- data.period = counter->hw.last_period;
+ data.period = event->hw.last_period;
- if (!x86_perf_counter_set_period(counter, hwc, idx))
+ if (!x86_perf_event_set_period(event, hwc, idx))
continue;
- if (perf_counter_overflow(counter, 1, &data))
- amd_pmu_disable_counter(hwc, idx);
+ if (perf_event_overflow(event, 1, &data, regs))
+ amd_pmu_disable_event(hwc, idx);
}
if (handled)
@@ -1769,18 +1783,21 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
irq_enter();
ack_APIC_irq();
inc_irq_stat(apic_pending_irqs);
- perf_counter_do_pending();
+ perf_event_do_pending();
irq_exit();
}
-void set_perf_counter_pending(void)
+void set_perf_event_pending(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
+ if (!x86_pmu.apic || !x86_pmu_initialized())
+ return;
+
apic->send_IPI_self(LOCAL_PENDING_VECTOR);
#endif
}
-void perf_counters_lapic_init(void)
+void perf_events_lapic_init(void)
{
#ifdef CONFIG_X86_LOCAL_APIC
if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1794,13 +1811,13 @@ void perf_counters_lapic_init(void)
}
static int __kprobes
-perf_counter_nmi_handler(struct notifier_block *self,
+perf_event_nmi_handler(struct notifier_block *self,
unsigned long cmd, void *__args)
{
struct die_args *args = __args;
struct pt_regs *regs;
- if (!atomic_read(&active_counters))
+ if (!atomic_read(&active_events))
return NOTIFY_DONE;
switch (cmd) {
@@ -1819,7 +1836,7 @@ perf_counter_nmi_handler(struct notifier_block *self,
#endif
/*
* Can't rely on the handled return value to say it was our NMI, two
- * counters could trigger 'simultaneously' raising two back-to-back NMIs.
+ * events could trigger 'simultaneously' raising two back-to-back NMIs.
*
* If the first NMI handles both, the latter will be empty and daze
* the CPU.
@@ -1829,8 +1846,8 @@ perf_counter_nmi_handler(struct notifier_block *self,
return NOTIFY_STOP;
}
-static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
- .notifier_call = perf_counter_nmi_handler,
+static __read_mostly struct notifier_block perf_event_nmi_notifier = {
+ .notifier_call = perf_event_nmi_handler,
.next = NULL,
.priority = 1
};
@@ -1840,8 +1857,8 @@ static struct x86_pmu p6_pmu = {
.handle_irq = p6_pmu_handle_irq,
.disable_all = p6_pmu_disable_all,
.enable_all = p6_pmu_enable_all,
- .enable = p6_pmu_enable_counter,
- .disable = p6_pmu_disable_counter,
+ .enable = p6_pmu_enable_event,
+ .disable = p6_pmu_disable_event,
.eventsel = MSR_P6_EVNTSEL0,
.perfctr = MSR_P6_PERFCTR0,
.event_map = p6_pmu_event_map,
@@ -1850,16 +1867,16 @@ static struct x86_pmu p6_pmu = {
.apic = 1,
.max_period = (1ULL << 31) - 1,
.version = 0,
- .num_counters = 2,
+ .num_events = 2,
/*
- * Counters have 40 bits implemented. However they are designed such
+ * Events have 40 bits implemented. However they are designed such
* that bits [32-39] are sign extensions of bit 31. As such the
- * effective width of a counter for P6-like PMU is 32 bits only.
+ * effective width of a event for P6-like PMU is 32 bits only.
*
* See IA-32 Intel Architecture Software developer manual Vol 3B
*/
- .counter_bits = 32,
- .counter_mask = (1ULL << 32) - 1,
+ .event_bits = 32,
+ .event_mask = (1ULL << 32) - 1,
};
static struct x86_pmu intel_pmu = {
@@ -1867,8 +1884,8 @@ static struct x86_pmu intel_pmu = {
.handle_irq = intel_pmu_handle_irq,
.disable_all = intel_pmu_disable_all,
.enable_all = intel_pmu_enable_all,
- .enable = intel_pmu_enable_counter,
- .disable = intel_pmu_disable_counter,
+ .enable = intel_pmu_enable_event,
+ .disable = intel_pmu_disable_event,
.eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
.event_map = intel_pmu_event_map,
@@ -1878,7 +1895,7 @@ static struct x86_pmu intel_pmu = {
/*
* Intel PMCs cannot be accessed sanely above 32 bit width,
* so we install an artificial 1<<31 period regardless of
- * the generic counter period:
+ * the generic event period:
*/
.max_period = (1ULL << 31) - 1,
.enable_bts = intel_pmu_enable_bts,
@@ -1890,16 +1907,16 @@ static struct x86_pmu amd_pmu = {
.handle_irq = amd_pmu_handle_irq,
.disable_all = amd_pmu_disable_all,
.enable_all = amd_pmu_enable_all,
- .enable = amd_pmu_enable_counter,
- .disable = amd_pmu_disable_counter,
+ .enable = amd_pmu_enable_event,
+ .disable = amd_pmu_disable_event,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
.event_map = amd_pmu_event_map,
.raw_event = amd_pmu_raw_event,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
- .num_counters = 4,
- .counter_bits = 48,
- .counter_mask = (1ULL << 48) - 1,
+ .num_events = 4,
+ .event_bits = 48,
+ .event_mask = (1ULL << 48) - 1,
.apic = 1,
/* use highest bit to detect overflow */
.max_period = (1ULL << 47) - 1,
@@ -1956,7 +1973,7 @@ static int intel_pmu_init(void)
/*
* Check whether the Architectural PerfMon supports
- * Branch Misses Retired Event or not.
+ * Branch Misses Retired hw_event or not.
*/
cpuid(10, &eax.full, &ebx, &unused, &edx.full);
if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
@@ -1968,15 +1985,15 @@ static int intel_pmu_init(void)
x86_pmu = intel_pmu;
x86_pmu.version = version;
- x86_pmu.num_counters = eax.split.num_counters;
- x86_pmu.counter_bits = eax.split.bit_width;
- x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
+ x86_pmu.num_events = eax.split.num_events;
+ x86_pmu.event_bits = eax.split.bit_width;
+ x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
/*
- * Quirk: v2 perfmon does not report fixed-purpose counters, so
- * assume at least 3 counters:
+ * Quirk: v2 perfmon does not report fixed-purpose events, so
+ * assume at least 3 events:
*/
- x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3);
+ x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
/*
* Install the hw-cache-events table:
@@ -2023,11 +2040,11 @@ static int amd_pmu_init(void)
return 0;
}
-void __init init_hw_perf_counters(void)
+void __init init_hw_perf_events(void)
{
int err;
- pr_info("Performance Counters: ");
+ pr_info("Performance Events: ");
switch (boot_cpu_data.x86_vendor) {
case X86_VENDOR_INTEL:
@@ -2040,45 +2057,45 @@ void __init init_hw_perf_counters(void)
return;
}
if (err != 0) {
- pr_cont("no PMU driver, software counters only.\n");
+ pr_cont("no PMU driver, software events only.\n");
return;
}
pr_cont("%s PMU driver.\n", x86_pmu.name);
- if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
- WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
- x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
- x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
+ if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
+ WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
+ x86_pmu.num_events, X86_PMC_MAX_GENERIC);
+ x86_pmu.num_events = X86_PMC_MAX_GENERIC;
}
- perf_counter_mask = (1 << x86_pmu.num_counters) - 1;
- perf_max_counters = x86_pmu.num_counters;
+ perf_event_mask = (1 << x86_pmu.num_events) - 1;
+ perf_max_events = x86_pmu.num_events;
- if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
- WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
- x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
- x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
+ if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
+ WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
+ x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
+ x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
}
- perf_counter_mask |=
- ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
- x86_pmu.intel_ctrl = perf_counter_mask;
+ perf_event_mask |=
+ ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
+ x86_pmu.intel_ctrl = perf_event_mask;
- perf_counters_lapic_init();
- register_die_notifier(&perf_counter_nmi_notifier);
+ perf_events_lapic_init();
+ register_die_notifier(&perf_event_nmi_notifier);
- pr_info("... version: %d\n", x86_pmu.version);
- pr_info("... bit width: %d\n", x86_pmu.counter_bits);
- pr_info("... generic counters: %d\n", x86_pmu.num_counters);
- pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask);
- pr_info("... max period: %016Lx\n", x86_pmu.max_period);
- pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed);
- pr_info("... counter mask: %016Lx\n", perf_counter_mask);
+ pr_info("... version: %d\n", x86_pmu.version);
+ pr_info("... bit width: %d\n", x86_pmu.event_bits);
+ pr_info("... generic registers: %d\n", x86_pmu.num_events);
+ pr_info("... value mask: %016Lx\n", x86_pmu.event_mask);
+ pr_info("... max period: %016Lx\n", x86_pmu.max_period);
+ pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
+ pr_info("... event mask: %016Lx\n", perf_event_mask);
}
-static inline void x86_pmu_read(struct perf_counter *counter)
+static inline void x86_pmu_read(struct perf_event *event)
{
- x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
+ x86_perf_event_update(event, &event->hw, event->hw.idx);
}
static const struct pmu pmu = {
@@ -2088,13 +2105,16 @@ static const struct pmu pmu = {
.unthrottle = x86_pmu_unthrottle,
};
-const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
+const struct pmu *hw_perf_event_init(struct perf_event *event)
{
int err;
- err = __hw_perf_counter_init(counter);
- if (err)
+ err = __hw_perf_event_init(event);
+ if (err) {
+ if (event->destroy)
+ event->destroy(event);
return ERR_PTR(err);
+ }
return &pmu;
}
@@ -2275,7 +2295,7 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return entry;
}
-void hw_perf_counter_setup_online(int cpu)
+void hw_perf_event_setup_online(int cpu)
{
init_debug_store_on_cpu(cpu);
}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 392bea4..fab786f 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -20,7 +20,7 @@
#include <linux/kprobes.h>
#include <asm/apic.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
struct nmi_watchdog_ctlblk {
unsigned int cccr_msr;
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 0a46b4d..1cbed97 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -58,6 +58,9 @@ static unsigned long vmware_get_tsc_khz(void)
tsc_hz = eax | (((uint64_t)ebx) << 32);
do_div(tsc_hz, 1000);
BUG_ON(tsc_hz >> 32);
+ printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
+ (unsigned long) tsc_hz / 1000,
+ (unsigned long) tsc_hz % 1000);
return tsc_hz;
}
@@ -69,6 +72,9 @@ void __init vmware_platform_setup(void)
if (ebx != UINT_MAX)
x86_platform.calibrate_tsc = vmware_get_tsc_khz;
+ else
+ printk(KERN_WARNING
+ "Failed to get TSC freq from the hypervisor\n");
}
/*
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index b07af88..6a52d4b 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -182,7 +182,7 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier =
.notifier_call = cpuid_class_cpu_callback,
};
-static char *cpuid_nodename(struct device *dev)
+static char *cpuid_devnode(struct device *dev, mode_t *mode)
{
return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
}
@@ -203,7 +203,7 @@ static int __init cpuid_init(void)
err = PTR_ERR(cpuid_class);
goto out_chrdev;
}
- cpuid_class->nodename = cpuid_nodename;
+ cpuid_class->devnode = cpuid_devnode;
for_each_online_cpu(i) {
err = cpuid_device_create(i);
if (err != 0)
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index f7cdb3b..cd97ce1 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -16,6 +16,22 @@ static void *kdump_buf_page;
/* Stores the physical address of elf header of crash image. */
unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+static inline bool is_crashed_pfn_valid(unsigned long pfn)
+{
+#ifndef CONFIG_X86_PAE
+ /*
+ * non-PAE kdump kernel executed from a PAE one will crop high pte
+ * bits and poke unwanted space counting again from address 0, we
+ * don't want that. pte must fit into unsigned long. In fact the
+ * test checks high 12 bits for being zero (pfn will be shifted left
+ * by PAGE_SHIFT).
+ */
+ return pte_pfn(pfn_pte(pfn, __pgprot(0))) == pfn;
+#else
+ return true;
+#endif
+}
+
/**
* copy_oldmem_page - copy one page from "oldmem"
* @pfn: page frame number to be copied
@@ -41,6 +57,9 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
if (!csize)
return 0;
+ if (!is_crashed_pfn_valid(pfn))
+ return -EFAULT;
+
vaddr = kmap_atomic_pfn(pfn, KM_PTE0);
if (!userbuf) {
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index bca5fba..f7dd2a7 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -5,7 +5,6 @@
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
-#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 54b0a32..a071e6b 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -5,7 +5,6 @@
#include <linux/kallsyms.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
-#include <linux/utsname.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
#include <linux/module.h>
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index a3210ce..d17d482 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1331,7 +1331,7 @@ void __init e820_reserve_resources(void)
struct resource *res;
u64 end;
- res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
+ res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
e820_res = res;
for (i = 0; i < e820.nr_map; i++) {
end = e820.map[i].addr + e820.map[i].size - 1;
@@ -1378,8 +1378,8 @@ static unsigned long ram_alignment(resource_size_t pos)
if (mb < 16)
return 1024*1024;
- /* To 32MB for anything above that */
- return 32*1024*1024;
+ /* To 64MB for anything above that */
+ return 64*1024*1024;
}
#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 335f049..b9c830c 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -160,721 +160,6 @@ static struct console early_serial_console = {
.index = -1,
};
-#ifdef CONFIG_EARLY_PRINTK_DBGP
-
-static struct ehci_caps __iomem *ehci_caps;
-static struct ehci_regs __iomem *ehci_regs;
-static struct ehci_dbg_port __iomem *ehci_debug;
-static unsigned int dbgp_endpoint_out;
-
-struct ehci_dev {
- u32 bus;
- u32 slot;
- u32 func;
-};
-
-static struct ehci_dev ehci_dev;
-
-#define USB_DEBUG_DEVNUM 127
-
-#define DBGP_DATA_TOGGLE 0x8800
-
-static inline u32 dbgp_pid_update(u32 x, u32 tok)
-{
- return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
-}
-
-static inline u32 dbgp_len_update(u32 x, u32 len)
-{
- return (x & ~0x0f) | (len & 0x0f);
-}
-
-/*
- * USB Packet IDs (PIDs)
- */
-
-/* token */
-#define USB_PID_OUT 0xe1
-#define USB_PID_IN 0x69
-#define USB_PID_SOF 0xa5
-#define USB_PID_SETUP 0x2d
-/* handshake */
-#define USB_PID_ACK 0xd2
-#define USB_PID_NAK 0x5a
-#define USB_PID_STALL 0x1e
-#define USB_PID_NYET 0x96
-/* data */
-#define USB_PID_DATA0 0xc3
-#define USB_PID_DATA1 0x4b
-#define USB_PID_DATA2 0x87
-#define USB_PID_MDATA 0x0f
-/* Special */
-#define USB_PID_PREAMBLE 0x3c
-#define USB_PID_ERR 0x3c
-#define USB_PID_SPLIT 0x78
-#define USB_PID_PING 0xb4
-#define USB_PID_UNDEF_0 0xf0
-
-#define USB_PID_DATA_TOGGLE 0x88
-#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
-
-#define PCI_CAP_ID_EHCI_DEBUG 0xa
-
-#define HUB_ROOT_RESET_TIME 50 /* times are in msec */
-#define HUB_SHORT_RESET_TIME 10
-#define HUB_LONG_RESET_TIME 200
-#define HUB_RESET_TIMEOUT 500
-
-#define DBGP_MAX_PACKET 8
-
-static int dbgp_wait_until_complete(void)
-{
- u32 ctrl;
- int loop = 0x100000;
-
- do {
- ctrl = readl(&ehci_debug->control);
- /* Stop when the transaction is finished */
- if (ctrl & DBGP_DONE)
- break;
- } while (--loop > 0);
-
- if (!loop)
- return -1;
-
- /*
- * Now that we have observed the completed transaction,
- * clear the done bit.
- */
- writel(ctrl | DBGP_DONE, &ehci_debug->control);
- return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
-}
-
-static void __init dbgp_mdelay(int ms)
-{
- int i;
-
- while (ms--) {
- for (i = 0; i < 1000; i++)
- outb(0x1, 0x80);
- }
-}
-
-static void dbgp_breath(void)
-{
- /* Sleep to give the debug port a chance to breathe */
-}
-
-static int dbgp_wait_until_done(unsigned ctrl)
-{
- u32 pids, lpid;
- int ret;
- int loop = 3;
-
-retry:
- writel(ctrl | DBGP_GO, &ehci_debug->control);
- ret = dbgp_wait_until_complete();
- pids = readl(&ehci_debug->pids);
- lpid = DBGP_PID_GET(pids);
-
- if (ret < 0)
- return ret;
-
- /*
- * If the port is getting full or it has dropped data
- * start pacing ourselves, not necessary but it's friendly.
- */
- if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
- dbgp_breath();
-
- /* If I get a NACK reissue the transmission */
- if (lpid == USB_PID_NAK) {
- if (--loop > 0)
- goto retry;
- }
-
- return ret;
-}
-
-static void dbgp_set_data(const void *buf, int size)
-{
- const unsigned char *bytes = buf;
- u32 lo, hi;
- int i;
-
- lo = hi = 0;
- for (i = 0; i < 4 && i < size; i++)
- lo |= bytes[i] << (8*i);
- for (; i < 8 && i < size; i++)
- hi |= bytes[i] << (8*(i - 4));
- writel(lo, &ehci_debug->data03);
- writel(hi, &ehci_debug->data47);
-}
-
-static void __init dbgp_get_data(void *buf, int size)
-{
- unsigned char *bytes = buf;
- u32 lo, hi;
- int i;
-
- lo = readl(&ehci_debug->data03);
- hi = readl(&ehci_debug->data47);
- for (i = 0; i < 4 && i < size; i++)
- bytes[i] = (lo >> (8*i)) & 0xff;
- for (; i < 8 && i < size; i++)
- bytes[i] = (hi >> (8*(i - 4))) & 0xff;
-}
-
-static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
- const char *bytes, int size)
-{
- u32 pids, addr, ctrl;
- int ret;
-
- if (size > DBGP_MAX_PACKET)
- return -1;
-
- addr = DBGP_EPADDR(devnum, endpoint);
-
- pids = readl(&ehci_debug->pids);
- pids = dbgp_pid_update(pids, USB_PID_OUT);
-
- ctrl = readl(&ehci_debug->control);
- ctrl = dbgp_len_update(ctrl, size);
- ctrl |= DBGP_OUT;
- ctrl |= DBGP_GO;
-
- dbgp_set_data(bytes, size);
- writel(addr, &ehci_debug->address);
- writel(pids, &ehci_debug->pids);
-
- ret = dbgp_wait_until_done(ctrl);
- if (ret < 0)
- return ret;
-
- return ret;
-}
-
-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
- int size)
-{
- u32 pids, addr, ctrl;
- int ret;
-
- if (size > DBGP_MAX_PACKET)
- return -1;
-
- addr = DBGP_EPADDR(devnum, endpoint);
-
- pids = readl(&ehci_debug->pids);
- pids = dbgp_pid_update(pids, USB_PID_IN);
-
- ctrl = readl(&ehci_debug->control);
- ctrl = dbgp_len_update(ctrl, size);
- ctrl &= ~DBGP_OUT;
- ctrl |= DBGP_GO;
-
- writel(addr, &ehci_debug->address);
- writel(pids, &ehci_debug->pids);
- ret = dbgp_wait_until_done(ctrl);
- if (ret < 0)
- return ret;
-
- if (size > ret)
- size = ret;
- dbgp_get_data(data, size);
- return ret;
-}
-
-static int __init dbgp_control_msg(unsigned devnum, int requesttype,
- int request, int value, int index, void *data, int size)
-{
- u32 pids, addr, ctrl;
- struct usb_ctrlrequest req;
- int read;
- int ret;
-
- read = (requesttype & USB_DIR_IN) != 0;
- if (size > (read ? DBGP_MAX_PACKET:0))
- return -1;
-
- /* Compute the control message */
- req.bRequestType = requesttype;
- req.bRequest = request;
- req.wValue = cpu_to_le16(value);
- req.wIndex = cpu_to_le16(index);
- req.wLength = cpu_to_le16(size);
-
- pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
- addr = DBGP_EPADDR(devnum, 0);
-
- ctrl = readl(&ehci_debug->control);
- ctrl = dbgp_len_update(ctrl, sizeof(req));
- ctrl |= DBGP_OUT;
- ctrl |= DBGP_GO;
-
- /* Send the setup message */
- dbgp_set_data(&req, sizeof(req));
- writel(addr, &ehci_debug->address);
- writel(pids, &ehci_debug->pids);
- ret = dbgp_wait_until_done(ctrl);
- if (ret < 0)
- return ret;
-
- /* Read the result */
- return dbgp_bulk_read(devnum, 0, data, size);
-}
-
-
-/* Find a PCI capability */
-static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
-{
- u8 pos;
- int bytes;
-
- if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
- PCI_STATUS_CAP_LIST))
- return 0;
-
- pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
- for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
- u8 id;
-
- pos &= ~3;
- id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
- if (id == 0xff)
- break;
- if (id == cap)
- return pos;
-
- pos = read_pci_config_byte(num, slot, func,
- pos+PCI_CAP_LIST_NEXT);
- }
- return 0;
-}
-
-static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
-{
- u32 class;
-
- class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
- if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
- return 0;
-
- return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
-}
-
-static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
-{
- u32 bus, slot, func;
-
- for (bus = 0; bus < 256; bus++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
- unsigned cap;
-
- cap = __find_dbgp(bus, slot, func);
-
- if (!cap)
- continue;
- if (ehci_num-- != 0)
- continue;
- *rbus = bus;
- *rslot = slot;
- *rfunc = func;
- return cap;
- }
- }
- }
- return 0;
-}
-
-static int __init ehci_reset_port(int port)
-{
- u32 portsc;
- u32 delay_time, delay;
- int loop;
-
- /* Reset the usb debug port */
- portsc = readl(&ehci_regs->port_status[port - 1]);
- portsc &= ~PORT_PE;
- portsc |= PORT_RESET;
- writel(portsc, &ehci_regs->port_status[port - 1]);
-
- delay = HUB_ROOT_RESET_TIME;
- for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
- delay_time += delay) {
- dbgp_mdelay(delay);
-
- portsc = readl(&ehci_regs->port_status[port - 1]);
- if (portsc & PORT_RESET) {
- /* force reset to complete */
- loop = 2;
- writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
- &ehci_regs->port_status[port - 1]);
- do {
- portsc = readl(&ehci_regs->port_status[port-1]);
- } while ((portsc & PORT_RESET) && (--loop > 0));
- }
-
- /* Device went away? */
- if (!(portsc & PORT_CONNECT))
- return -ENOTCONN;
-
- /* bomb out completely if something weird happend */
- if ((portsc & PORT_CSC))
- return -EINVAL;
-
- /* If we've finished resetting, then break out of the loop */
- if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
- return 0;
- }
- return -EBUSY;
-}
-
-static int __init ehci_wait_for_port(int port)
-{
- u32 status;
- int ret, reps;
-
- for (reps = 0; reps < 3; reps++) {
- dbgp_mdelay(100);
- status = readl(&ehci_regs->status);
- if (status & STS_PCD) {
- ret = ehci_reset_port(port);
- if (ret == 0)
- return 0;
- }
- }
- return -ENOTCONN;
-}
-
-#ifdef DBGP_DEBUG
-# define dbgp_printk early_printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
-
-typedef void (*set_debug_port_t)(int port);
-
-static void __init default_set_debug_port(int port)
-{
-}
-
-static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
-
-static void __init nvidia_set_debug_port(int port)
-{
- u32 dword;
- dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
- 0x74);
- dword &= ~(0x0f<<12);
- dword |= ((port & 0x0f)<<12);
- write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
- dword);
- dbgp_printk("set debug port to %d\n", port);
-}
-
-static void __init detect_set_debug_port(void)
-{
- u32 vendorid;
-
- vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
- 0x00);
-
- if ((vendorid & 0xffff) == 0x10de) {
- dbgp_printk("using nvidia set_debug_port\n");
- set_debug_port = nvidia_set_debug_port;
- }
-}
-
-static int __init ehci_setup(void)
-{
- struct usb_debug_descriptor dbgp_desc;
- u32 cmd, ctrl, status, portsc, hcs_params;
- u32 debug_port, new_debug_port = 0, n_ports;
- u32 devnum;
- int ret, i;
- int loop;
- int port_map_tried;
- int playtimes = 3;
-
-try_next_time:
- port_map_tried = 0;
-
-try_next_port:
-
- hcs_params = readl(&ehci_caps->hcs_params);
- debug_port = HCS_DEBUG_PORT(hcs_params);
- n_ports = HCS_N_PORTS(hcs_params);
-
- dbgp_printk("debug_port: %d\n", debug_port);
- dbgp_printk("n_ports: %d\n", n_ports);
-
- for (i = 1; i <= n_ports; i++) {
- portsc = readl(&ehci_regs->port_status[i-1]);
- dbgp_printk("portstatus%d: %08x\n", i, portsc);
- }
-
- if (port_map_tried && (new_debug_port != debug_port)) {
- if (--playtimes) {
- set_debug_port(new_debug_port);
- goto try_next_time;
- }
- return -1;
- }
-
- loop = 10;
- /* Reset the EHCI controller */
- cmd = readl(&ehci_regs->command);
- cmd |= CMD_RESET;
- writel(cmd, &ehci_regs->command);
- do {
- cmd = readl(&ehci_regs->command);
- } while ((cmd & CMD_RESET) && (--loop > 0));
-
- if (!loop) {
- dbgp_printk("can not reset ehci\n");
- return -1;
- }
- dbgp_printk("ehci reset done\n");
-
- /* Claim ownership, but do not enable yet */
- ctrl = readl(&ehci_debug->control);
- ctrl |= DBGP_OWNER;
- ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
- writel(ctrl, &ehci_debug->control);
-
- /* Start the ehci running */
- cmd = readl(&ehci_regs->command);
- cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
- cmd |= CMD_RUN;
- writel(cmd, &ehci_regs->command);
-
- /* Ensure everything is routed to the EHCI */
- writel(FLAG_CF, &ehci_regs->configured_flag);
-
- /* Wait until the controller is no longer halted */
- loop = 10;
- do {
- status = readl(&ehci_regs->status);
- } while ((status & STS_HALT) && (--loop > 0));
-
- if (!loop) {
- dbgp_printk("ehci can be started\n");
- return -1;
- }
- dbgp_printk("ehci started\n");
-
- /* Wait for a device to show up in the debug port */
- ret = ehci_wait_for_port(debug_port);
- if (ret < 0) {
- dbgp_printk("No device found in debug port\n");
- goto next_debug_port;
- }
- dbgp_printk("ehci wait for port done\n");
-
- /* Enable the debug port */
- ctrl = readl(&ehci_debug->control);
- ctrl |= DBGP_CLAIM;
- writel(ctrl, &ehci_debug->control);
- ctrl = readl(&ehci_debug->control);
- if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
- dbgp_printk("No device in debug port\n");
- writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
- goto err;
- }
- dbgp_printk("debug ported enabled\n");
-
- /* Completely transfer the debug device to the debug controller */
- portsc = readl(&ehci_regs->port_status[debug_port - 1]);
- portsc &= ~PORT_PE;
- writel(portsc, &ehci_regs->port_status[debug_port - 1]);
-
- dbgp_mdelay(100);
-
- /* Find the debug device and make it device number 127 */
- for (devnum = 0; devnum <= 127; devnum++) {
- ret = dbgp_control_msg(devnum,
- USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
- USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
- &dbgp_desc, sizeof(dbgp_desc));
- if (ret > 0)
- break;
- }
- if (devnum > 127) {
- dbgp_printk("Could not find attached debug device\n");
- goto err;
- }
- if (ret < 0) {
- dbgp_printk("Attached device is not a debug device\n");
- goto err;
- }
- dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
-
- /* Move the device to 127 if it isn't already there */
- if (devnum != USB_DEBUG_DEVNUM) {
- ret = dbgp_control_msg(devnum,
- USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
- USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
- if (ret < 0) {
- dbgp_printk("Could not move attached device to %d\n",
- USB_DEBUG_DEVNUM);
- goto err;
- }
- devnum = USB_DEBUG_DEVNUM;
- dbgp_printk("debug device renamed to 127\n");
- }
-
- /* Enable the debug interface */
- ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
- USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
- USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
- if (ret < 0) {
- dbgp_printk(" Could not enable the debug device\n");
- goto err;
- }
- dbgp_printk("debug interface enabled\n");
-
- /* Perform a small write to get the even/odd data state in sync
- */
- ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
- if (ret < 0) {
- dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
- goto err;
- }
- dbgp_printk("small write doned\n");
-
- return 0;
-err:
- /* Things didn't work so remove my claim */
- ctrl = readl(&ehci_debug->control);
- ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
- writel(ctrl, &ehci_debug->control);
- return -1;
-
-next_debug_port:
- port_map_tried |= (1<<(debug_port - 1));
- new_debug_port = ((debug_port-1+1)%n_ports) + 1;
- if (port_map_tried != ((1<<n_ports) - 1)) {
- set_debug_port(new_debug_port);
- goto try_next_port;
- }
- if (--playtimes) {
- set_debug_port(new_debug_port);
- goto try_next_time;
- }
-
- return -1;
-}
-
-static int __init early_dbgp_init(char *s)
-{
- u32 debug_port, bar, offset;
- u32 bus, slot, func, cap;
- void __iomem *ehci_bar;
- u32 dbgp_num;
- u32 bar_val;
- char *e;
- int ret;
- u8 byte;
-
- if (!early_pci_allowed())
- return -1;
-
- dbgp_num = 0;
- if (*s)
- dbgp_num = simple_strtoul(s, &e, 10);
- dbgp_printk("dbgp_num: %d\n", dbgp_num);
-
- cap = find_dbgp(dbgp_num, &bus, &slot, &func);
- if (!cap)
- return -1;
-
- dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
- func);
-
- debug_port = read_pci_config(bus, slot, func, cap);
- bar = (debug_port >> 29) & 0x7;
- bar = (bar * 4) + 0xc;
- offset = (debug_port >> 16) & 0xfff;
- dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
- if (bar != PCI_BASE_ADDRESS_0) {
- dbgp_printk("only debug ports on bar 1 handled.\n");
-
- return -1;
- }
-
- bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
- dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
- if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
- dbgp_printk("only simple 32bit mmio bars supported\n");
-
- return -1;
- }
-
- /* double check if the mem space is enabled */
- byte = read_pci_config_byte(bus, slot, func, 0x04);
- if (!(byte & 0x2)) {
- byte |= 0x02;
- write_pci_config_byte(bus, slot, func, 0x04, byte);
- dbgp_printk("mmio for ehci enabled\n");
- }
-
- /*
- * FIXME I don't have the bar size so just guess PAGE_SIZE is more
- * than enough. 1K is the biggest I have seen.
- */
- set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
- ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
- ehci_bar += bar_val & ~PAGE_MASK;
- dbgp_printk("ehci_bar: %p\n", ehci_bar);
-
- ehci_caps = ehci_bar;
- ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
- ehci_debug = ehci_bar + offset;
- ehci_dev.bus = bus;
- ehci_dev.slot = slot;
- ehci_dev.func = func;
-
- detect_set_debug_port();
-
- ret = ehci_setup();
- if (ret < 0) {
- dbgp_printk("ehci_setup failed\n");
- ehci_debug = NULL;
-
- return -1;
- }
-
- return 0;
-}
-
-static void early_dbgp_write(struct console *con, const char *str, u32 n)
-{
- int chunk, ret;
-
- if (!ehci_debug)
- return;
- while (n > 0) {
- chunk = n;
- if (chunk > DBGP_MAX_PACKET)
- chunk = DBGP_MAX_PACKET;
- ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
- dbgp_endpoint_out, str, chunk);
- str += chunk;
- n -= chunk;
- }
-}
-
-static struct console early_dbgp_console = {
- .name = "earlydbg",
- .write = early_dbgp_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-#endif
-
/* Direct interface for emergencies */
static struct console *early_console = &early_vga_console;
static int __initdata early_console_initialized;
@@ -891,10 +176,24 @@ asmlinkage void early_printk(const char *fmt, ...)
va_end(ap);
}
+static inline void early_console_register(struct console *con, int keep_early)
+{
+ if (early_console->index != -1) {
+ printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
+ con->name);
+ return;
+ }
+ early_console = con;
+ if (keep_early)
+ early_console->flags &= ~CON_BOOT;
+ else
+ early_console->flags |= CON_BOOT;
+ register_console(early_console);
+}
static int __init setup_early_printk(char *buf)
{
- int keep_early;
+ int keep;
if (!buf)
return 0;
@@ -903,42 +202,37 @@ static int __init setup_early_printk(char *buf)
return 0;
early_console_initialized = 1;
- keep_early = (strstr(buf, "keep") != NULL);
-
- if (!strncmp(buf, "serial", 6)) {
- early_serial_init(buf + 6);
- early_console = &early_serial_console;
- } else if (!strncmp(buf, "ttyS", 4)) {
- early_serial_init(buf);
- early_console = &early_serial_console;
- } else if (!strncmp(buf, "vga", 3)
- && boot_params.screen_info.orig_video_isVGA == 1) {
- max_xpos = boot_params.screen_info.orig_video_cols;
- max_ypos = boot_params.screen_info.orig_video_lines;
- current_ypos = boot_params.screen_info.orig_y;
- early_console = &early_vga_console;
+ keep = (strstr(buf, "keep") != NULL);
+
+ while (*buf != '\0') {
+ if (!strncmp(buf, "serial", 6)) {
+ buf += 6;
+ early_serial_init(buf);
+ early_console_register(&early_serial_console, keep);
+ if (!strncmp(buf, ",ttyS", 5))
+ buf += 5;
+ }
+ if (!strncmp(buf, "ttyS", 4)) {
+ early_serial_init(buf + 4);
+ early_console_register(&early_serial_console, keep);
+ }
+ if (!strncmp(buf, "vga", 3) &&
+ boot_params.screen_info.orig_video_isVGA == 1) {
+ max_xpos = boot_params.screen_info.orig_video_cols;
+ max_ypos = boot_params.screen_info.orig_video_lines;
+ current_ypos = boot_params.screen_info.orig_y;
+ early_console_register(&early_vga_console, keep);
+ }
#ifdef CONFIG_EARLY_PRINTK_DBGP
- } else if (!strncmp(buf, "dbgp", 4)) {
- if (early_dbgp_init(buf+4) < 0)
- return 0;
- early_console = &early_dbgp_console;
- /*
- * usb subsys will reset ehci controller, so don't keep
- * that early console
- */
- keep_early = 0;
+ if (!strncmp(buf, "dbgp", 4) && !early_dbgp_init(buf + 4))
+ early_console_register(&early_dbgp_console, keep);
#endif
#ifdef CONFIG_HVC_XEN
- } else if (!strncmp(buf, "xen", 3)) {
- early_console = &xenboot_console;
+ if (!strncmp(buf, "xen", 3))
+ early_console_register(&xenboot_console, keep);
#endif
+ buf++;
}
-
- if (keep_early)
- early_console->flags &= ~CON_BOOT;
- else
- early_console->flags |= CON_BOOT;
- register_console(early_console);
return 0;
}
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index ad5bd98..cdcfb12 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -454,8 +454,10 @@ void __init efi_init(void)
if (add_efi_memmap)
do_add_efi_memmap();
+#ifdef CONFIG_X86_32
x86_platform.get_wallclock = efi_get_time;
x86_platform.set_wallclock = efi_set_rtc_mmss;
+#endif
/* Setup for EFI runtime service */
reboot_type = BOOT_EFI;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index d59fe32..b5c061f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -536,20 +536,13 @@ sysret_signal:
bt $TIF_SYSCALL_AUDIT,%edx
jc sysret_audit
#endif
- /* edx: work flags (arg3) */
- leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
- xorl %esi,%esi # oldset -> arg2
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
- call do_notify_resume
- RESTORE_TOP_OF_STACK %r11
- RESTORE_REST
- movl $_TIF_WORK_MASK,%edi
- /* Use IRET because user could have changed frame. This
- works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- jmp int_with_check
+ /*
+ * We have a signal, or exit tracing or single-step.
+ * These all wind up with the iret return path anyway,
+ * so just join that path right now.
+ */
+ FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
+ jmp int_check_syscall_exit_work
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -654,6 +647,7 @@ int_careful:
int_very_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
+int_check_syscall_exit_work:
SAVE_REST
/* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
@@ -1021,7 +1015,7 @@ apicinterrupt ERROR_APIC_VECTOR \
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
apicinterrupt LOCAL_PENDING_VECTOR \
perf_pending_interrupt smp_perf_pending_interrupt
#endif
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b766e8c..050c278 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -79,7 +79,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
* any particular GDT layout, because we load our own as soon as we
* can.
*/
-.section .text.head,"ax",@progbits
+__HEAD
ENTRY(startup_32)
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
@@ -608,7 +608,7 @@ ENTRY(initial_code)
/*
* BSS section
*/
-.section ".bss.page_aligned","wa"
+__PAGE_ALIGNED_BSS
.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
swapper_pg_pmd:
@@ -626,7 +626,7 @@ ENTRY(empty_zero_page)
* This starts the data section.
*/
#ifdef CONFIG_X86_PAE
-.section ".data.page_aligned","wa"
+__PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */
.align PAGE_SIZE_asm
ENTRY(swapper_pg_dir)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index fa54f78..780cd92 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -40,7 +40,7 @@ L4_START_KERNEL = pgd_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
- .section .text.head
+ __HEAD
.code64
.globl startup_64
startup_64:
@@ -418,7 +418,7 @@ ENTRY(phys_base)
ENTRY(idt_table)
.skip IDT_ENTRIES * 16
- .section .bss.page_aligned, "aw", @nobits
+ __PAGE_ALIGNED_BSS
.align PAGE_SIZE
ENTRY(empty_zero_page)
.skip PAGE_SIZE
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index 43cec6b..9c3bd4a 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -10,6 +10,16 @@
EXPORT_SYMBOL(mcount);
#endif
+/*
+ * Note, this is a prototype to get at the symbol for
+ * the export, but dont use it from C code, it is used
+ * by assembly code and is not using C calling convention!
+ */
+#ifndef CONFIG_X86_CMPXCHG64
+extern void cmpxchg8b_emu(void);
+EXPORT_SYMBOL(cmpxchg8b_emu);
+#endif
+
/* Networking helper routines. */
EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c
index 270ff83..3a54dcb 100644
--- a/arch/x86/kernel/init_task.c
+++ b/arch/x86/kernel/init_task.c
@@ -20,9 +20,8 @@ static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
* way process stacks are handled. This is done by having a special
* "init_task" linker map entry..
*/
-union thread_union init_thread_union
- __attribute__((__section__(".data.init_task"))) =
- { INIT_THREAD_INFO(init_task) };
+union thread_union init_thread_union __init_task_data =
+ { INIT_THREAD_INFO(init_task) };
/*
* Initial task structure.
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 74656d1..04bbd52 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -63,10 +63,10 @@ static int show_other_interrupts(struct seq_file *p, int prec)
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
seq_printf(p, " Spurious interrupts\n");
- seq_printf(p, "%*s: ", prec, "CNT");
+ seq_printf(p, "%*s: ", prec, "PMI");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
- seq_printf(p, " Performance counter interrupts\n");
+ seq_printf(p, " Performance monitoring interrupts\n");
seq_printf(p, "%*s: ", prec, "PND");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 3008831..40f3077 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -208,7 +208,7 @@ static void __init apic_intr_init(void)
alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
/* Performance monitoring interrupts: */
-# ifdef CONFIG_PERF_COUNTERS
+# ifdef CONFIG_PERF_EVENTS
alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
# endif
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 71f1d99..ec6ef60 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -67,8 +67,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
#ifdef CONFIG_SMP
preempt_disable();
load_LDT(pc);
- if (!cpus_equal(current->mm->cpu_vm_mask,
- cpumask_of_cpu(smp_processor_id())))
+ if (!cpumask_equal(mm_cpumask(current->mm),
+ cpumask_of(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 366baa1..f4c538b 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -317,6 +317,12 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device)
return UCODE_NFOUND;
}
+ if (*(u32 *)firmware->data != UCODE_MAGIC) {
+ printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n",
+ *(u32 *)firmware->data);
+ return UCODE_ERROR;
+ }
+
ret = generic_load_microcode(cpu, firmware->data, firmware->size);
release_firmware(firmware);
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 9371448..378e9a8 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -210,8 +210,8 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
{
ssize_t ret = -EINVAL;
- if ((len >> PAGE_SHIFT) > num_physpages) {
- pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+ if ((len >> PAGE_SHIFT) > totalram_pages) {
+ pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
return ret;
}
@@ -236,7 +236,7 @@ static const struct file_operations microcode_fops = {
static struct miscdevice microcode_dev = {
.minor = MICROCODE_MINOR,
.name = "microcode",
- .devnode = "cpu/microcode",
+ .nodename = "cpu/microcode",
.fops = &microcode_fops,
};
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 7dd9500..6a3cefc 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -241,7 +241,7 @@ static struct notifier_block __refdata msr_class_cpu_notifier = {
.notifier_call = msr_class_cpu_callback,
};
-static char *msr_nodename(struct device *dev)
+static char *msr_devnode(struct device *dev, mode_t *mode)
{
return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
}
@@ -262,7 +262,7 @@ static int __init msr_init(void)
err = PTR_ERR(msr_class);
goto out_chrdev;
}
- msr_class->nodename = msr_nodename;
+ msr_class->devnode = msr_devnode;
for_each_online_cpu(i) {
err = msr_device_create(i);
if (err != 0)
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 64b838e..a6e804d 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0;
/*
* This variable becomes 1 if iommu=pt is passed on the kernel command line.
- * If this variable is 1, IOMMU implementations do no DMA ranslation for
+ * If this variable is 1, IOMMU implementations do no DMA translation for
* devices and allow every device to access to whole physical memory. This is
* useful if a user want to use an IOMMU only for KVM device assignment to
* guests and not for driver dma translation.
@@ -45,12 +45,10 @@ int iommu_pass_through __read_mostly;
dma_addr_t bad_dma_address __read_mostly = 0;
EXPORT_SYMBOL(bad_dma_address);
-/* Dummy device used for NULL arguments (normally ISA). Better would
- be probably a smaller DMA mask, but this is bug-to-bug compatible
- to older i386. */
+/* Dummy device used for NULL arguments (normally ISA). */
struct device x86_dma_fallback_dev = {
.init_name = "fallback device",
- .coherent_dma_mask = DMA_BIT_MASK(32),
+ .coherent_dma_mask = ISA_DMA_BIT_MASK,
.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
};
EXPORT_SYMBOL(x86_dma_fallback_dev);
@@ -311,7 +309,7 @@ void pci_iommu_shutdown(void)
amd_iommu_shutdown();
}
/* Must execute after PCI subsystem */
-fs_initcall(pci_iommu_init);
+rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 98a827e..a7f1b64 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -16,6 +16,7 @@
#include <linux/agp_backend.h>
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/sched.h>
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index e8a3501..aaa6b78 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -46,9 +46,8 @@ void __init pci_swiotlb_init(void)
{
/* don't initialize swiotlb if iommu=off (no_iommu=1) */
#ifdef CONFIG_X86_64
- if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) ||
- iommu_pass_through)
- swiotlb = 1;
+ if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN))
+ swiotlb = 1;
#endif
if (swiotlb_force)
swiotlb = 1;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 071166a..5284cd2 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,7 +9,7 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
@@ -25,9 +25,6 @@ EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep;
-DEFINE_TRACE(power_start);
-DEFINE_TRACE(power_end);
-
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
*dst = *src;
@@ -299,9 +296,7 @@ static inline int hlt_use_halt(void)
void default_idle(void)
{
if (hlt_use_halt()) {
- struct power_trace it;
-
- trace_power_start(&it, POWER_CSTATE, 1);
+ trace_power_start(POWER_CSTATE, 1);
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -314,7 +309,6 @@ void default_idle(void)
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
- trace_power_end(&it);
} else {
local_irq_enable();
/* loop is done by the caller */
@@ -372,9 +366,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
*/
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
- struct power_trace it;
-
- trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
+ trace_power_start(POWER_CSTATE, (ax>>4)+1);
if (!need_resched()) {
if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -384,15 +376,13 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
if (!need_resched())
__mwait(ax, cx);
}
- trace_power_end(&it);
}
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
- struct power_trace it;
if (!need_resched()) {
- trace_power_start(&it, POWER_CSTATE, 1);
+ trace_power_start(POWER_CSTATE, 1);
if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -402,7 +392,6 @@ static void mwait_idle(void)
__sti_mwait(0, 0);
else
local_irq_enable();
- trace_power_end(&it);
} else
local_irq_enable();
}
@@ -414,13 +403,11 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
- struct power_trace it;
-
- trace_power_start(&it, POWER_CSTATE, 0);
+ trace_power_start(POWER_CSTATE, 0);
local_irq_enable();
while (!need_resched())
cpu_relax();
- trace_power_end(&it);
+ trace_power_end(0);
}
/*
@@ -568,10 +555,8 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
void __init init_c1e_mask(void)
{
/* If we're using c1e_idle, we need to allocate c1e_mask. */
- if (pm_idle == c1e_idle) {
- alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
- cpumask_clear(c1e_mask);
- }
+ if (pm_idle == c1e_idle)
+ zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
}
static int __init idle_setup(char *str)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ad535b6..eb62cbc 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -664,3 +664,8 @@ long sys_arch_prctl(int code, unsigned long addr)
return do_arch_prctl(current, code, addr);
}
+unsigned long KSTK_ESP(struct task_struct *task)
+{
+ return (test_tsk_thread_flag(task, TIF_IA32)) ?
+ (task_pt_regs(task)->sp) : ((task)->thread.usersp);
+}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 8d7d5c9..7b058a2 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -325,16 +325,6 @@ static int putreg(struct task_struct *child,
return set_flags(child, value);
#ifdef CONFIG_X86_64
- /*
- * Orig_ax is really just a flag with small positive and
- * negative values, so make sure to always sign-extend it
- * from 32 bits so that it works correctly regardless of
- * whether we come from a 32-bit environment or not.
- */
- case offsetof(struct user_regs_struct, orig_ax):
- value = (long) (s32) value;
- break;
-
case offsetof(struct user_regs_struct,fs_base):
if (value >= TASK_SIZE_OF(child))
return -EIO;
@@ -1126,10 +1116,15 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
case offsetof(struct user32, regs.orig_eax):
/*
- * Sign-extend the value so that orig_eax = -1
- * causes (long)orig_ax < 0 tests to fire correctly.
+ * A 32-bit debugger setting orig_eax means to restore
+ * the state of the task restarting a 32-bit syscall.
+ * Make sure we interpret the -ERESTART* codes correctly
+ * in case the task is not actually still sitting at the
+ * exit from a 32-bit syscall with TS_COMPAT still set.
*/
- regs->orig_ax = (long) (s32) value;
+ regs->orig_ax = value;
+ if (syscall_get_nr(child, regs) >= 0)
+ task_thread_info(child)->status |= TS_COMPAT;
break;
case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 27349f9..f930787 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -4,6 +4,7 @@
#include <linux/pm.h>
#include <linux/efi.h>
#include <linux/dmi.h>
+#include <linux/sched.h>
#include <linux/tboot.h>
#include <acpi/reboot.h>
#include <asm/io.h>
@@ -435,6 +436,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
},
},
+ { /* Handle problems with rebooting on Apple Macmini3,1 */
+ .callback = set_pci_reboot,
+ .ident = "Apple Macmini3,1",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"),
+ },
+ },
{ }
};
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a55f660..2a34f9c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -27,6 +27,7 @@
#include <linux/screen_info.h>
#include <linux/ioport.h>
#include <linux/acpi.h>
+#include <linux/sfi.h>
#include <linux/apm_bios.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
@@ -659,6 +660,13 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
},
},
{
+ .callback = dmi_low_memory_corruption,
+ .ident = "Phoenix/MSC BIOS",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
+ },
+ },
+ {
/*
* AMI BIOS with low memory corruption was found on Intel DG45ID board.
* It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
@@ -697,21 +705,6 @@ void __init setup_arch(char **cmdline_p)
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
- strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
- *cmdline_p = command_line;
-
-#ifdef CONFIG_X86_64
- /*
- * Must call this twice: Once just to detect whether hardware doesn't
- * support NX (so that the early EHCI debug console setup can safely
- * call set_fixmap(), and then again after parsing early parameters to
- * honor the respective command line option.
- */
- check_efer();
-#endif
-
- parse_early_param();
-
/* VMI may relocate the fixmap; do this before touching ioremap area */
vmi_init();
@@ -794,6 +787,21 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif
+ strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
+ *cmdline_p = command_line;
+
+#ifdef CONFIG_X86_64
+ /*
+ * Must call this twice: Once just to detect whether hardware doesn't
+ * support NX (so that the early EHCI debug console setup can safely
+ * call set_fixmap(), and then again after parsing early parameters to
+ * honor the respective command line option.
+ */
+ check_efer();
+#endif
+
+ parse_early_param();
+
#ifdef CONFIG_X86_64
check_efer();
#endif
@@ -985,6 +993,8 @@ void __init setup_arch(char **cmdline_p)
*/
acpi_boot_init();
+ sfi_init();
+
/*
* get boot-time SMP configuration:
*/
diff --git a/arch/x86/kernel/sfi.c b/arch/x86/kernel/sfi.c
new file mode 100644
index 0000000..34e0993
--- /dev/null
+++ b/arch/x86/kernel/sfi.c
@@ -0,0 +1,122 @@
+/*
+ * sfi.c - x86 architecture SFI support.
+ *
+ * Copyright (c) 2009, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#define KMSG_COMPONENT "SFI"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/init.h>
+#include <linux/sfi.h>
+#include <linux/io.h>
+
+#include <asm/io_apic.h>
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+
+#ifdef CONFIG_X86_LOCAL_APIC
+static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
+
+void __init mp_sfi_register_lapic_address(unsigned long address)
+{
+ mp_lapic_addr = address;
+
+ set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
+ if (boot_cpu_physical_apicid == -1U)
+ boot_cpu_physical_apicid = read_apic_id();
+
+ pr_info("Boot CPU = %d\n", boot_cpu_physical_apicid);
+}
+
+/* All CPUs enumerated by SFI must be present and enabled */
+void __cpuinit mp_sfi_register_lapic(u8 id)
+{
+ if (MAX_APICS - id <= 0) {
+ pr_warning("Processor #%d invalid (max %d)\n",
+ id, MAX_APICS);
+ return;
+ }
+
+ pr_info("registering lapic[%d]\n", id);
+
+ generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
+}
+
+static int __init sfi_parse_cpus(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_cpu_table_entry *pentry;
+ int i;
+ int cpu_num;
+
+ sb = (struct sfi_table_simple *)table;
+ cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry);
+ pentry = (struct sfi_cpu_table_entry *)sb->pentry;
+
+ for (i = 0; i < cpu_num; i++) {
+ mp_sfi_register_lapic(pentry->apic_id);
+ pentry++;
+ }
+
+ smp_found_config = 1;
+ return 0;
+}
+#endif /* CONFIG_X86_LOCAL_APIC */
+
+#ifdef CONFIG_X86_IO_APIC
+static u32 gsi_base;
+
+static int __init sfi_parse_ioapic(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+ struct sfi_apic_table_entry *pentry;
+ int i, num;
+
+ sb = (struct sfi_table_simple *)table;
+ num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
+ pentry = (struct sfi_apic_table_entry *)sb->pentry;
+
+ for (i = 0; i < num; i++) {
+ mp_register_ioapic(i, pentry->phys_addr, gsi_base);
+ gsi_base += io_apic_get_redir_entries(i);
+ pentry++;
+ }
+
+ WARN(pic_mode, KERN_WARNING
+ "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
+ pic_mode = 0;
+ return 0;
+}
+#endif /* CONFIG_X86_IO_APIC */
+
+/*
+ * sfi_platform_init(): register lapics & io-apics
+ */
+int __init sfi_platform_init(void)
+{
+#ifdef CONFIG_X86_LOCAL_APIC
+ mp_sfi_register_lapic_address(sfi_lapic_addr);
+ sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
+#endif
+#ifdef CONFIG_X86_IO_APIC
+ sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic);
+#endif
+ return 0;
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 09c5e07..565ebc6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1059,12 +1059,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
#endif
current_thread_info()->cpu = 0; /* needed? */
for_each_possible_cpu(i) {
- alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
- alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
- cpumask_clear(per_cpu(cpu_core_map, i));
- cpumask_clear(per_cpu(cpu_sibling_map, i));
- cpumask_clear(cpu_data(i).llc_shared_map);
+ zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
+ zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
}
set_cpu_sibling_map(0);
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d51321d..0157cd2 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -335,4 +335,4 @@ ENTRY(sys_call_table)
.long sys_preadv
.long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */
- .long sys_perf_counter_open
+ .long sys_perf_event_open
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e293ac5..be25734 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -38,7 +38,8 @@ unsigned long profile_pc(struct pt_regs *regs)
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
#else
- unsigned long *sp = (unsigned long *)regs->sp;
+ unsigned long *sp =
+ (unsigned long *)kernel_stack_pointer(regs);
/*
* Return address is either directly at stack pointer
* or above a saved flags. Eflags has bits 22-31 zero,
@@ -93,7 +94,6 @@ static struct irqaction irq0 = {
void __init setup_default_timer_irq(void)
{
- irq0.mask = cpumask_of_cpu(0);
setup_irq(0, &irq0);
}
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 503c1f2..1740c85 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -23,8 +23,6 @@
static struct bau_control **uv_bau_table_bases __read_mostly;
static int uv_bau_retry_limit __read_mostly;
-/* position of pnode (which is nasid>>1): */
-static int uv_nshift __read_mostly;
/* base pnode in this partition */
static int uv_partition_base_pnode __read_mostly;
@@ -723,7 +721,7 @@ uv_activation_descriptor_init(int node, int pnode)
BUG_ON(!adp);
pa = uv_gpa(adp); /* need the real nasid*/
- n = pa >> uv_nshift;
+ n = uv_gpa_to_pnode(pa);
m = pa & uv_mmask;
uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE,
@@ -778,7 +776,7 @@ uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
* need the pnode of where the memory was really allocated
*/
pa = uv_gpa(pqp);
- pn = pa >> uv_nshift;
+ pn = uv_gpa_to_pnode(pa);
uv_write_global_mmr64(pnode,
UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) |
@@ -843,8 +841,7 @@ static int __init uv_bau_init(void)
GFP_KERNEL, cpu_to_node(cur_cpu));
uv_bau_retry_limit = 1;
- uv_nshift = uv_hub_info->n_val;
- uv_mmask = (1UL << uv_hub_info->n_val) - 1;
+ uv_mmask = (1UL << uv_hub_info->m_val) - 1;
nblades = uv_num_possible_blades();
uv_bau_table_bases = (struct bau_control **)
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index 808031a..cd02212 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -3,8 +3,16 @@
#include <asm/trampoline.h>
#include <asm/e820.h>
+#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
+#define __trampinit
+#define __trampinitdata
+#else
+#define __trampinit __cpuinit
+#define __trampinitdata __cpuinitdata
+#endif
+
/* ready for x86_64 and x86 */
-unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
+unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE);
void __init reserve_trampoline_memory(void)
{
@@ -26,7 +34,7 @@ void __init reserve_trampoline_memory(void)
* bootstrap into the page concerned. The caller
* has made sure it's suitably aligned.
*/
-unsigned long setup_trampoline(void)
+unsigned long __trampinit setup_trampoline(void)
{
memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
return virt_to_phys(trampoline_base);
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index 66d874e..8508237 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -28,16 +28,12 @@
*/
#include <linux/linkage.h>
+#include <linux/init.h>
#include <asm/segment.h>
#include <asm/page_types.h>
/* We can free up trampoline after bootup if cpu hotplug is not supported. */
-#ifndef CONFIG_HOTPLUG_CPU
-.section ".cpuinit.data","aw",@progbits
-#else
-.section .rodata,"a",@progbits
-#endif
-
+__CPUINITRODATA
.code16
ENTRY(trampoline_data)
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index cddfb8d..3af2dff 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -25,14 +25,19 @@
*/
#include <linux/linkage.h>
+#include <linux/init.h>
#include <asm/pgtable_types.h>
#include <asm/page_types.h>
#include <asm/msr.h>
#include <asm/segment.h>
#include <asm/processor-flags.h>
+#ifdef CONFIG_ACPI_SLEEP
.section .rodata, "a", @progbits
-
+#else
+/* We can free up the trampoline after bootup if cpu hotplug is not supported. */
+__CPUINITRODATA
+#endif
.code16
ENTRY(trampoline_data)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7dc0de9..7e37dce 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -14,7 +14,6 @@
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
-#include <linux/utsname.h>
#include <linux/kdebug.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -65,7 +64,6 @@
#else
#include <asm/processor-flags.h>
#include <asm/setup.h>
-#include <asm/traps.h>
asmlinkage int system_call(void);
@@ -74,11 +72,9 @@ char ignore_fpu_irq;
/*
* The IDT has to be page-aligned to simplify the Pentium
- * F0 0F bug workaround.. We have a special link segment
- * for this.
+ * F0 0F bug workaround.
*/
-gate_desc idt_table[NR_VECTORS]
- __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
+gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
#endif
DECLARE_BITMAP(used_vectors, NR_VECTORS);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 17409e8..cd982f4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -666,7 +666,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
(val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
(val == CPUFREQ_RESUMECHANGE)) {
- *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+ *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 027b5b4..f379309 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -114,7 +114,7 @@ void __cpuinit check_tsc_sync_source(int cpu)
return;
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
- pr_info("Skipping synchronization checks as TSC is reliable.\n");
+ printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n");
return;
}
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 31e6f6c..d430e4c 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -648,7 +648,7 @@ static inline int __init activate_vmi(void)
pv_info.paravirt_enabled = 1;
pv_info.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK;
- pv_info.name = "vmi";
+ pv_info.name = "vmi [deprecated]";
pv_init_ops.patch = vmi_patch;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0ccb57d..3c68fe2 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -45,9 +45,9 @@ PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(7); /* RWE */
#ifdef CONFIG_X86_64
- user PT_LOAD FLAGS(7); /* RWE */
+ user PT_LOAD FLAGS(5); /* R_E */
#ifdef CONFIG_SMP
- percpu PT_LOAD FLAGS(7); /* RWE */
+ percpu PT_LOAD FLAGS(6); /* RW_ */
#endif
init PT_LOAD FLAGS(7); /* RWE */
#endif
@@ -65,17 +65,11 @@ SECTIONS
#endif
/* Text and read-only data */
-
- /* bootstrapping code */
- .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
- _text = .;
- *(.text.head)
- } :text = 0x9090
-
- /* The rest of the text */
.text : AT(ADDR(.text) - LOAD_OFFSET) {
+ _text = .;
+ /* bootstrapping code */
+ HEAD_TEXT
#ifdef CONFIG_X86_32
- /* not really needed, already page aligned */
. = ALIGN(PAGE_SIZE);
*(.text.page_aligned)
#endif
@@ -94,13 +88,7 @@ SECTIONS
NOTES :text :note
- /* Exception table */
- . = ALIGN(16);
- __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
- __start___ex_table = .;
- *(__ex_table)
- __stop___ex_table = .;
- } :text = 0x9090
+ EXCEPTION_TABLE(16) :text = 0x9090
RO_DATA(PAGE_SIZE)
@@ -118,7 +106,6 @@ SECTIONS
#endif
PAGE_ALIGNED_DATA(PAGE_SIZE)
- *(.data.idt)
CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES)
@@ -135,24 +122,21 @@ SECTIONS
#ifdef CONFIG_X86_64
#define VSYSCALL_ADDR (-10*1024*1024)
-#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data) + SIZEOF(.data) + \
- PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
-#define VSYSCALL_VIRT_ADDR ((ADDR(.data) + SIZEOF(.data) + \
- PAGE_SIZE - 1) & ~(PAGE_SIZE - 1))
-#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
+#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
-#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
+#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
+ . = ALIGN(4096);
+ __vsyscall_0 = .;
+
. = VSYSCALL_ADDR;
- .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) {
+ .vsyscall_0 : AT(VLOAD(.vsyscall_0)) {
*(.vsyscall_0)
} :user
- __vsyscall_0 = VSYSCALL_VIRT_ADDR;
-
. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
.vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
*(.vsyscall_fn)
@@ -192,11 +176,9 @@ SECTIONS
*(.vsyscall_3)
}
- . = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
+ . = __vsyscall_0 + PAGE_SIZE;
#undef VSYSCALL_ADDR
-#undef VSYSCALL_PHYS_ADDR
-#undef VSYSCALL_VIRT_ADDR
#undef VLOAD_OFFSET
#undef VLOAD
#undef VVIRT_OFFSET
@@ -219,36 +201,12 @@ SECTIONS
PERCPU_VADDR(0, :percpu)
#endif
- .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
- _sinittext = .;
- INIT_TEXT
- _einittext = .;
- }
+ INIT_TEXT_SECTION(PAGE_SIZE)
#ifdef CONFIG_X86_64
:init
#endif
- .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
- INIT_DATA
- }
-
- . = ALIGN(16);
- .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
- __setup_start = .;
- *(.init.setup)
- __setup_end = .;
- }
- .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
- __initcall_start = .;
- INITCALLS
- __initcall_end = .;
- }
-
- .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
- __con_initcall_start = .;
- *(.con_initcall.init)
- __con_initcall_end = .;
- }
+ INIT_DATA_SECTION(16)
.x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
__x86_cpu_dev_start = .;
@@ -256,8 +214,6 @@ SECTIONS
__x86_cpu_dev_end = .;
}
- SECURITY_INIT
-
. = ALIGN(8);
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
__parainstructions = .;
@@ -288,15 +244,6 @@ SECTIONS
EXIT_DATA
}
-#ifdef CONFIG_BLK_DEV_INITRD
- . = ALIGN(PAGE_SIZE);
- .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
- __initramfs_start = .;
- *(.init.ramfs)
- __initramfs_end = .;
- }
-#endif
-
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
PERCPU(PAGE_SIZE)
#endif
@@ -358,6 +305,9 @@ SECTIONS
#ifdef CONFIG_X86_32
+/*
+ * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:
+ */
. = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
#else
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index cf53a78..8cb4974 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -228,19 +228,11 @@ static long __vsyscall(3) venosys_1(void)
}
#ifdef CONFIG_SYSCTL
-
-static int
-vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-}
-
static ctl_table kernel_table2[] = {
{ .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = vsyscall_sysctl_change },
+ .proc_handler = proc_dointvec },
{}
};
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 82ad523..144e7f6 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -116,7 +116,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
* itself with the initial count and continues counting
* from there.
*/
- remaining = hrtimer_expires_remaining(&ps->pit_timer.timer);
+ remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
elapsed = mod_64(elapsed, ps->pit_timer.period);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1ae5ceb..23c2176 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -521,7 +521,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
if (apic_get_reg(apic, APIC_TMICT) == 0)
return 0;
- remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer);
+ remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
if (ktime_to_ns(remaining) < 0)
remaining = ktime_set(0, 0);
@@ -664,7 +664,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
{
ktime_t now = apic->lapic_timer.timer.base->get_time();
- apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) *
+ apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) *
APIC_BUS_CYCLE_NS * apic->divide_count;
atomic_set(&apic->lapic_timer.pending, 0);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index eca41ae..818b92a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -156,6 +156,8 @@ module_param(oos_shadow, bool, 0644);
#define CREATE_TRACE_POINTS
#include "mmutrace.h"
+#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
+
#define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
struct kvm_rmap_desc {
@@ -634,9 +636,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
if (*spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
if (is_writeble_pte(*spte))
- kvm_release_pfn_dirty(pfn);
- else
- kvm_release_pfn_clean(pfn);
+ kvm_set_pfn_dirty(pfn);
rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -748,7 +748,8 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
return write_protected;
}
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long data)
{
u64 *spte;
int need_tlb_flush = 0;
@@ -763,8 +764,47 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
return need_tlb_flush;
}
+static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long data)
+{
+ int need_flush = 0;
+ u64 *spte, new_spte;
+ pte_t *ptep = (pte_t *)data;
+ pfn_t new_pfn;
+
+ WARN_ON(pte_huge(*ptep));
+ new_pfn = pte_pfn(*ptep);
+ spte = rmap_next(kvm, rmapp, NULL);
+ while (spte) {
+ BUG_ON(!is_shadow_present_pte(*spte));
+ rmap_printk("kvm_set_pte_rmapp: spte %p %llx\n", spte, *spte);
+ need_flush = 1;
+ if (pte_write(*ptep)) {
+ rmap_remove(kvm, spte);
+ __set_spte(spte, shadow_trap_nonpresent_pte);
+ spte = rmap_next(kvm, rmapp, NULL);
+ } else {
+ new_spte = *spte &~ (PT64_BASE_ADDR_MASK);
+ new_spte |= (u64)new_pfn << PAGE_SHIFT;
+
+ new_spte &= ~PT_WRITABLE_MASK;
+ new_spte &= ~SPTE_HOST_WRITEABLE;
+ if (is_writeble_pte(*spte))
+ kvm_set_pfn_dirty(spte_to_pfn(*spte));
+ __set_spte(spte, new_spte);
+ spte = rmap_next(kvm, rmapp, spte);
+ }
+ }
+ if (need_flush)
+ kvm_flush_remote_tlbs(kvm);
+
+ return 0;
+}
+
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
- int (*handler)(struct kvm *kvm, unsigned long *rmapp))
+ unsigned long data,
+ int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long data))
{
int i, j;
int retval = 0;
@@ -786,13 +826,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
if (hva >= start && hva < end) {
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
- retval |= handler(kvm, &memslot->rmap[gfn_offset]);
+ retval |= handler(kvm, &memslot->rmap[gfn_offset],
+ data);
for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
int idx = gfn_offset;
idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
retval |= handler(kvm,
- &memslot->lpage_info[j][idx].rmap_pde);
+ &memslot->lpage_info[j][idx].rmap_pde,
+ data);
}
}
}
@@ -802,10 +844,16 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{
- return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+ return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
}
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+ kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
+}
+
+static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+ unsigned long data)
{
u64 *spte;
int young = 0;
@@ -841,13 +889,13 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
gfn = unalias_gfn(vcpu->kvm, gfn);
rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
- kvm_unmap_rmapp(vcpu->kvm, rmapp);
+ kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
kvm_flush_remote_tlbs(vcpu->kvm);
}
int kvm_age_hva(struct kvm *kvm, unsigned long hva)
{
- return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+ return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
}
#ifdef MMU_DEBUG
@@ -1756,7 +1804,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int user_fault,
int write_fault, int dirty, int level,
gfn_t gfn, pfn_t pfn, bool speculative,
- bool can_unsync)
+ bool can_unsync, bool reset_host_protection)
{
u64 spte;
int ret = 0;
@@ -1783,6 +1831,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
kvm_is_mmio_pfn(pfn));
+ if (reset_host_protection)
+ spte |= SPTE_HOST_WRITEABLE;
+
spte |= (u64)pfn << PAGE_SHIFT;
if ((pte_access & ACC_WRITE_MASK)
@@ -1828,7 +1879,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty,
int *ptwrite, int level, gfn_t gfn,
- pfn_t pfn, bool speculative)
+ pfn_t pfn, bool speculative,
+ bool reset_host_protection)
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*sptep);
@@ -1860,7 +1912,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
}
if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
- dirty, level, gfn, pfn, speculative, true)) {
+ dirty, level, gfn, pfn, speculative, true,
+ reset_host_protection)) {
if (write_fault)
*ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu);
@@ -1877,8 +1930,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
page_header_update_slot(vcpu->kvm, sptep, gfn);
if (!was_rmapped) {
rmap_count = rmap_add(vcpu, sptep, gfn);
- if (!is_rmap_spte(*sptep))
- kvm_release_pfn_clean(pfn);
+ kvm_release_pfn_clean(pfn);
if (rmap_count > RMAP_RECYCLE_THRESHOLD)
rmap_recycle(vcpu, sptep, gfn);
} else {
@@ -1909,7 +1961,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
if (iterator.level == level) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
0, write, 1, &pt_write,
- level, gfn, pfn, false);
+ level, gfn, pfn, false, true);
++vcpu->stat.pf_fixed;
break;
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d2fec9c..72558f8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -273,9 +273,13 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq))
return;
kvm_get_pfn(pfn);
+ /*
+ * we call mmu_set_spte() with reset_host_protection = true beacuse that
+ * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
+ */
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, PT_PAGE_TABLE_LEVEL,
- gpte_to_gfn(gpte), pfn, true);
+ gpte_to_gfn(gpte), pfn, true, true);
}
/*
@@ -308,7 +312,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
user_fault, write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
ptwrite, level,
- gw->gfn, pfn, false);
+ gw->gfn, pfn, false, true);
break;
}
@@ -558,6 +562,7 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
int i, offset, nr_present;
+ bool reset_host_protection;
offset = nr_present = 0;
@@ -595,9 +600,16 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+ if (!(sp->spt[i] & SPTE_HOST_WRITEABLE)) {
+ pte_access &= ~ACC_WRITE_MASK;
+ reset_host_protection = 0;
+ } else {
+ reset_host_protection = 1;
+ }
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn,
- spte_to_pfn(sp->spt[i]), true, false);
+ spte_to_pfn(sp->spt[i]), true, false,
+ reset_host_protection);
}
return !nr_present;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 944cc9c..c17404a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -767,6 +767,8 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
rdtscll(tsc_this);
delta = vcpu->arch.host_tsc - tsc_this;
svm->vmcb->control.tsc_offset += delta;
+ if (is_nested(svm))
+ svm->nested.hsave->control.tsc_offset += delta;
vcpu->cpu = cpu;
kvm_migrate_timers(vcpu);
svm->asid_generation = 0;
@@ -2057,10 +2059,14 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
switch (ecx) {
case MSR_IA32_TSC: {
- u64 tsc;
+ u64 tsc_offset;
+
+ if (is_nested(svm))
+ tsc_offset = svm->nested.hsave->control.tsc_offset;
+ else
+ tsc_offset = svm->vmcb->control.tsc_offset;
- rdtscll(tsc);
- *data = svm->vmcb->control.tsc_offset + tsc;
+ *data = tsc_offset + native_read_tsc();
break;
}
case MSR_K6_STAR:
@@ -2146,10 +2152,17 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
switch (ecx) {
case MSR_IA32_TSC: {
- u64 tsc;
+ u64 tsc_offset = data - native_read_tsc();
+ u64 g_tsc_offset = 0;
+
+ if (is_nested(svm)) {
+ g_tsc_offset = svm->vmcb->control.tsc_offset -
+ svm->nested.hsave->control.tsc_offset;
+ svm->nested.hsave->control.tsc_offset = tsc_offset;
+ }
+
+ svm->vmcb->control.tsc_offset = tsc_offset + g_tsc_offset;
- rdtscll(tsc);
- svm->vmcb->control.tsc_offset = data - tsc;
break;
}
case MSR_K6_STAR:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f381201..ed53b42 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -709,7 +709,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (vcpu->cpu != cpu) {
vcpu_clear(vmx);
kvm_migrate_timers(vcpu);
- vpid_sync_vcpu_all(vmx);
+ set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
local_irq_disable();
list_add(&vmx->local_vcpus_link,
&per_cpu(vcpus_on_cpu, cpu));
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index be451ee44..ae07d26 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1591,6 +1591,8 @@ static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
if (cpuid->nent < 1)
goto out;
+ if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
+ cpuid->nent = KVM_MAX_CPUID_ENTRIES;
r = -ENOMEM;
cpuid_entries = vmalloc(sizeof(struct kvm_cpuid_entry2) * cpuid->nent);
if (!cpuid_entries)
@@ -1690,7 +1692,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
unsigned bank_num = mcg_cap & 0xff, bank;
r = -EINVAL;
- if (!bank_num)
+ if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS)
goto out;
if (mcg_cap & ~(KVM_MCE_CAP_SUPPORTED | 0xff | 0xff0000))
goto out;
@@ -4049,7 +4051,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
}
-static u32 get_tss_base_addr(struct kvm_vcpu *vcpu,
+static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu,
struct desc_struct *seg_desc)
{
u32 base_addr = get_desc_base(seg_desc);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 4cb7d5d..7e59dc1 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1135,11 +1135,6 @@ static struct notifier_block paniced = {
/* Setting up memory is fairly easy. */
static __init char *lguest_memory_setup(void)
{
- /* We do this here and not earlier because lockcheck used to barf if we
- * did it before start_kernel(). I think we fixed that, so it'd be
- * nice to move it back to lguest_init. Patch welcome... */
- atomic_notifier_chain_register(&panic_notifier_list, &paniced);
-
/*
*The Linux bootloader header contains an "e820" memory map: the
* Launcher populated the first entry with our memory limit.
@@ -1364,10 +1359,13 @@ __init void lguest_init(void)
/*
* If we don't initialize the lock dependency checker now, it crashes
- * paravirt_disable_iospace.
+ * atomic_notifier_chain_register, then paravirt_disable_iospace.
*/
lockdep_init();
+ /* Hook in our special panic hypercall code. */
+ atomic_notifier_chain_register(&panic_notifier_list, &paniced);
+
/*
* The IDE code spends about 3 seconds probing for disks: if we reserve
* all the I/O ports up front it can't get them and so doesn't probe.
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 9e60920..85f5db9 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -16,7 +16,9 @@ ifeq ($(CONFIG_X86_32),y)
lib-y += checksum_32.o
lib-y += strstr_32.o
lib-y += semaphore_32.o string_32.o
-
+ifneq ($(CONFIG_X86_CMPXCHG64),y)
+ lib-y += cmpxchg8b_emu.o
+endif
lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
else
obj-y += io_64.o iomap_copy_64.o
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
new file mode 100644
index 0000000..828cb71
--- /dev/null
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -0,0 +1,57 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ *
+ */
+
+#include <linux/linkage.h>
+#include <asm/alternative-asm.h>
+#include <asm/frame.h>
+#include <asm/dwarf2.h>
+
+
+.text
+
+/*
+ * Inputs:
+ * %esi : memory location to compare
+ * %eax : low 32 bits of old value
+ * %edx : high 32 bits of old value
+ * %ebx : low 32 bits of new value
+ * %ecx : high 32 bits of new value
+ */
+ENTRY(cmpxchg8b_emu)
+CFI_STARTPROC
+
+#
+# Emulate 'cmpxchg8b (%esi)' on UP except we don't
+# set the whole ZF thing (caller will just compare
+# eax:edx with the expected value)
+#
+cmpxchg8b_emu:
+ pushfl
+ cli
+
+ cmpl (%esi), %eax
+ jne not_same
+ cmpl 4(%esi), %edx
+ jne half_same
+
+ movl %ebx, (%esi)
+ movl %ecx, 4(%esi)
+
+ popfl
+ ret
+
+ not_same:
+ movl (%esi), %eax
+ half_same:
+ movl 4(%esi), %edx
+
+ popfl
+ ret
+
+CFI_ENDPROC
+ENDPROC(cmpxchg8b_emu)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 9b5a9f5..06630d2 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,9 +1,10 @@
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o gup.o
+ pat.o pgtable.o physaddr.o gup.o setup_nx.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_physaddr.o := $(nostackp)
+CFLAGS_setup_nx.o := $(nostackp)
obj-$(CONFIG_SMP) += tlb.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 775a020..f4cee90 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -10,7 +10,7 @@
#include <linux/bootmem.h> /* max_low_pfn */
#include <linux/kprobes.h> /* __kprobes, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
-#include <linux/perf_counter.h> /* perf_swcounter_event */
+#include <linux/perf_event.h> /* perf_sw_event */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
info.si_errno = 0;
info.si_code = si_code;
info.si_addr = (void __user *)address;
+ info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
force_sig_info(si_signo, &info, tsk);
}
@@ -790,10 +791,12 @@ out_of_memory(struct pt_regs *regs, unsigned long error_code,
}
static void
-do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+ unsigned int fault)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
+ int code = BUS_ADRERR;
up_read(&mm->mmap_sem);
@@ -809,7 +812,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+#ifdef CONFIG_MEMORY_FAILURE
+ if (fault & VM_FAULT_HWPOISON) {
+ printk(KERN_ERR
+ "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+ tsk->comm, tsk->pid, address);
+ code = BUS_MCEERR_AR;
+ }
+#endif
+ force_sig_info_fault(SIGBUS, code, address, tsk);
}
static noinline void
@@ -819,8 +830,8 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
if (fault & VM_FAULT_OOM) {
out_of_memory(regs, error_code, address);
} else {
- if (fault & VM_FAULT_SIGBUS)
- do_sigbus(regs, error_code, address);
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+ do_sigbus(regs, error_code, address, fault);
else
BUG();
}
@@ -1017,7 +1028,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
/*
* If we're in an interrupt, have no user context or are running
@@ -1114,11 +1125,11 @@ good_area:
if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
regs, address);
} else {
tsk->min_flt++;
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
regs, address);
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 0607119..73ffd55 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -28,69 +28,6 @@ int direct_gbpages
#endif
;
-int nx_enabled;
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec = on|off
- *
- * Control non-executable mappings for processes.
- *
- * on Enable
- * off Disable
- */
-static int __init noexec_setup(char *str)
-{
- if (!str)
- return -EINVAL;
- if (!strncmp(str, "on", 2)) {
- __supported_pte_mask |= _PAGE_NX;
- disable_nx = 0;
- } else if (!strncmp(str, "off", 3)) {
- disable_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
- }
- return 0;
-}
-early_param("noexec", noexec_setup);
-#endif
-
-#ifdef CONFIG_X86_PAE
-static void __init set_nx(void)
-{
- unsigned int v[4], l, h;
-
- if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
- cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
- if ((v[3] & (1 << 20)) && !disable_nx) {
- rdmsr(MSR_EFER, l, h);
- l |= EFER_NX;
- wrmsr(MSR_EFER, l, h);
- nx_enabled = 1;
- __supported_pte_mask |= _PAGE_NX;
- }
- }
-}
-#else
-static inline void set_nx(void)
-{
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void __cpuinit check_efer(void)
-{
- unsigned long efer;
-
- rdmsrl(MSR_EFER, efer);
- if (!(efer & EFER_NX) || disable_nx)
- __supported_pte_mask &= ~_PAGE_NX;
-}
-#endif
-
static void __init find_early_table_space(unsigned long end, int use_pse,
int use_gbpages)
{
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 3cd7711..30938c1 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -84,7 +84,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
#ifdef CONFIG_X86_PAE
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
if (after_bootmem)
- pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
else
pmd_table = (pmd_t *)alloc_low_page();
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
@@ -116,7 +116,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
#endif
if (!page_table)
page_table =
- (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ (pte_t *)alloc_bootmem_pages(PAGE_SIZE);
} else
page_table = (pte_t *)alloc_low_page();
@@ -857,8 +857,6 @@ static void __init test_wp_bit(void)
}
}
-static struct kcore_list kcore_mem, kcore_vmalloc;
-
void __init mem_init(void)
{
int codesize, reservedpages, datasize, initsize;
@@ -886,13 +884,9 @@ void __init mem_init(void)
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
- kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
- kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
- VMALLOC_END-VMALLOC_START);
-
printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+ nr_free_pages() << (PAGE_SHIFT-10),
num_physpages << (PAGE_SHIFT-10),
codesize >> 10,
reservedpages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ea56b8c..5a4398a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -647,8 +647,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif /* CONFIG_MEMORY_HOTPLUG */
-static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
- kcore_modules, kcore_vsyscall;
+static struct kcore_list kcore_vsyscall;
void __init mem_init(void)
{
@@ -677,17 +676,12 @@ void __init mem_init(void)
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
/* Register memory areas for /proc/kcore */
- kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
- kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
- VMALLOC_END-VMALLOC_START);
- kclist_add(&kcore_kernel, &_stext, _end - _stext);
- kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
- VSYSCALL_END - VSYSCALL_START);
+ VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
"%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+ nr_free_pages() << (PAGE_SHIFT-10),
max_pfn << (PAGE_SHIFT-10),
codesize >> 10,
absent_pages << (PAGE_SHIFT-10),
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 334e63c..2feb9bd 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -170,8 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
(unsigned long long)phys_addr,
(unsigned long long)(phys_addr + size),
prot_val, new_prot_val);
- free_memtype(phys_addr, phys_addr + size);
- return NULL;
+ goto err_free_memtype;
}
prot_val = new_prot_val;
}
@@ -197,26 +196,25 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr,
*/
area = get_vm_area_caller(size, VM_IOREMAP, caller);
if (!area)
- return NULL;
+ goto err_free_memtype;
area->phys_addr = phys_addr;
vaddr = (unsigned long) area->addr;
- if (kernel_map_sync_memtype(phys_addr, size, prot_val)) {
- free_memtype(phys_addr, phys_addr + size);
- free_vm_area(area);
- return NULL;
- }
+ if (kernel_map_sync_memtype(phys_addr, size, prot_val))
+ goto err_free_area;
- if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot)) {
- free_memtype(phys_addr, phys_addr + size);
- free_vm_area(area);
- return NULL;
- }
+ if (ioremap_page_range(vaddr, vaddr + size, phys_addr, prot))
+ goto err_free_area;
ret_addr = (void __iomem *) (vaddr + offset);
mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
return ret_addr;
+err_free_area:
+ free_vm_area(area);
+err_free_memtype:
+ free_memtype(phys_addr, phys_addr + size);
+ return NULL;
}
/**
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c
index 528bf95..8cc18334 100644
--- a/arch/x86/mm/kmemcheck/kmemcheck.c
+++ b/arch/x86/mm/kmemcheck/kmemcheck.c
@@ -225,9 +225,6 @@ void kmemcheck_hide(struct pt_regs *regs)
BUG_ON(!irqs_disabled());
- if (data->balance == 0)
- return;
-
if (unlikely(data->balance != 1)) {
kmemcheck_show_all();
kmemcheck_error_save_bug(regs);
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c
index e773b6b..3f66b82 100644
--- a/arch/x86/mm/kmemcheck/shadow.c
+++ b/arch/x86/mm/kmemcheck/shadow.c
@@ -1,7 +1,6 @@
#include <linux/kmemcheck.h>
#include <linux/module.h>
#include <linux/mm.h>
-#include <linux/module.h>
#include <asm/page.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 24952fd..dd38bfb 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -144,6 +144,7 @@ void clflush_cache_range(void *vaddr, unsigned int size)
mb();
}
+EXPORT_SYMBOL_GPL(clflush_cache_range);
static void __cpa_flush_all(void *arg)
{
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 7257cf3..e78cd0e 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -81,6 +81,7 @@ enum {
void pat_init(void)
{
u64 pat;
+ bool boot_cpu = !boot_pat_state;
if (!pat_enabled)
return;
@@ -122,8 +123,10 @@ void pat_init(void)
rdmsrl(MSR_IA32_CR_PAT, boot_pat_state);
wrmsrl(MSR_IA32_CR_PAT, pat);
- printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
- smp_processor_id(), boot_pat_state, pat);
+
+ if (boot_cpu)
+ printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
+ smp_processor_id(), boot_pat_state, pat);
}
#undef PAT
diff --git a/arch/x86/mm/setup_nx.c b/arch/x86/mm/setup_nx.c
new file mode 100644
index 0000000..513d8ed
--- /dev/null
+++ b/arch/x86/mm/setup_nx.c
@@ -0,0 +1,69 @@
+#include <linux/spinlock.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+
+#include <asm/pgtable.h>
+
+int nx_enabled;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static int disable_nx __cpuinitdata;
+
+/*
+ * noexec = on|off
+ *
+ * Control non-executable mappings for processes.
+ *
+ * on Enable
+ * off Disable
+ */
+static int __init noexec_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+ if (!strncmp(str, "on", 2)) {
+ __supported_pte_mask |= _PAGE_NX;
+ disable_nx = 0;
+ } else if (!strncmp(str, "off", 3)) {
+ disable_nx = 1;
+ __supported_pte_mask &= ~_PAGE_NX;
+ }
+ return 0;
+}
+early_param("noexec", noexec_setup);
+#endif
+
+#ifdef CONFIG_X86_PAE
+void __init set_nx(void)
+{
+ unsigned int v[4], l, h;
+
+ if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
+ cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+
+ if ((v[3] & (1 << 20)) && !disable_nx) {
+ rdmsr(MSR_EFER, l, h);
+ l |= EFER_NX;
+ wrmsr(MSR_EFER, l, h);
+ nx_enabled = 1;
+ __supported_pte_mask |= _PAGE_NX;
+ }
+ }
+}
+#else
+void set_nx(void)
+{
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void __cpuinit check_efer(void)
+{
+ unsigned long efer;
+
+ rdmsrl(MSR_EFER, efer);
+ if (!(efer & EFER_NX) || disable_nx)
+ __supported_pte_mask &= ~_PAGE_NX;
+}
+#endif
+
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index c814e14..36fe08e 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -59,7 +59,8 @@ void leave_mm(int cpu)
{
if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
BUG();
- cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
+ cpumask_clear_cpu(cpu,
+ mm_cpumask(percpu_read(cpu_tlbstate.active_mm)));
load_cr3(swapper_pg_dir);
}
EXPORT_SYMBOL_GPL(leave_mm);
@@ -234,8 +235,8 @@ void flush_tlb_current_task(void)
preempt_disable();
local_flush_tlb();
- if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
+ if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
preempt_enable();
}
@@ -249,8 +250,8 @@ void flush_tlb_mm(struct mm_struct *mm)
else
leave_mm(smp_processor_id());
}
- if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
+ if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(mm_cpumask(mm), mm, TLB_FLUSH_ALL);
preempt_enable();
}
@@ -268,8 +269,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
leave_mm(smp_processor_id());
}
- if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(&mm->cpu_vm_mask, mm, va);
+ if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+ flush_tlb_others(mm_cpumask(mm), mm, va);
preempt_enable();
}
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 4899215..8eb0587 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -234,11 +234,11 @@ static void arch_perfmon_setup_counters(void)
if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 &&
current_cpu_data.x86_model == 15) {
eax.split.version_id = 2;
- eax.split.num_counters = 2;
+ eax.split.num_events = 2;
eax.split.bit_width = 40;
}
- num_counters = eax.split.num_counters;
+ num_counters = eax.split.num_events;
op_arch_perfmon_spec.num_counters = num_counters;
op_arch_perfmon_spec.num_controls = num_counters;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index b837761..7b8e75d 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -13,7 +13,7 @@
#define OP_X86_MODEL_H
#include <asm/types.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
struct op_msr {
unsigned long addr;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 5db96d43..1331fcf 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -646,7 +646,7 @@ int get_mp_bus_to_node(int busnum)
#else /* CONFIG_X86_32 */
-static unsigned char mp_bus_to_node[BUS_NR] = {
+static int mp_bus_to_node[BUS_NR] = {
[0 ... BUS_NR - 1] = -1
};
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 52e62e5..b22d13b 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -266,7 +266,7 @@ void pcibios_set_master(struct pci_dev *dev)
pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat);
}
-static struct vm_operations_struct pci_mmap_ops = {
+static const struct vm_operations_struct pci_mmap_ops = {
.access = generic_access_phys,
};
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 712443e..602c172d 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -13,10 +13,14 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/acpi.h>
+#include <linux/sfi_acpi.h>
#include <linux/bitmap.h>
#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/pci_x86.h>
+#include <asm/acpi.h>
+
+#define PREFIX "PCI: "
/* aperture is up to 256MB but BIOS may reserve less */
#define MMCONFIG_APER_MIN (2 * 1024*1024)
@@ -491,7 +495,7 @@ static void __init pci_mmcfg_reject_broken(int early)
(unsigned int)cfg->start_bus_number,
(unsigned int)cfg->end_bus_number);
- if (!early)
+ if (!early && !acpi_disabled)
valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0);
if (valid)
@@ -606,7 +610,7 @@ static void __init __pci_mmcfg_init(int early)
}
if (!known_bridge)
- acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
+ acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
pci_mmcfg_reject_broken(early);
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index 8b2d561..f10a7e9 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -11,9 +11,9 @@
#include <linux/pci.h>
#include <linux/init.h>
-#include <linux/acpi.h>
#include <asm/e820.h>
#include <asm/pci_x86.h>
+#include <acpi/acpi.h>
/* Assume systems with more busses have correct MCFG */
#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 417c9f5..8aa85f1 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -243,10 +243,6 @@ static void __restore_processor_state(struct saved_context *ctxt)
do_fpu_end();
mtrr_bp_restore();
-
-#ifdef CONFIG_X86_OLD_MCE
- mcheck_init(&boot_cpu_data);
-#endif
}
/* Needed by apm.c */
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile
index 88112b49..6b4ffed 100644
--- a/arch/x86/vdso/Makefile
+++ b/arch/x86/vdso/Makefile
@@ -122,7 +122,7 @@ quiet_cmd_vdso = VDSO $@
$(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^)
-VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
GCOV_PROFILE := n
#
diff --git a/arch/x86/xen/debugfs.c b/arch/x86/xen/debugfs.c
index b53225d..e133ce2 100644
--- a/arch/x86/xen/debugfs.c
+++ b/arch/x86/xen/debugfs.c
@@ -100,7 +100,7 @@ static int xen_array_release(struct inode *inode, struct file *file)
return 0;
}
-static struct file_operations u32_array_fops = {
+static const struct file_operations u32_array_fops = {
.owner = THIS_MODULE,
.open = u32_array_open,
.release= xen_array_release,
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 544eb74..dfbf70e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -178,6 +178,7 @@ static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
{
+ unsigned maskebx = ~0;
unsigned maskecx = ~0;
unsigned maskedx = ~0;
@@ -185,9 +186,16 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
*/
- if (*ax == 1) {
+ switch (*ax) {
+ case 1:
maskecx = cpuid_leaf1_ecx_mask;
maskedx = cpuid_leaf1_edx_mask;
+ break;
+
+ case 0xb:
+ /* Suppress extended topology stuff */
+ maskebx = 0;
+ break;
}
asm(XEN_EMULATE_PREFIX "cpuid"
@@ -197,6 +205,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
"=d" (*dx)
: "0" (*ax), "2" (*cx));
+ *bx &= maskebx;
*cx &= maskecx;
*dx &= maskedx;
}
@@ -1075,6 +1084,8 @@ asmlinkage void __init xen_start_kernel(void)
* Set up some pagetable state before starting to set any ptes.
*/
+ xen_init_mmu_ops();
+
/* Prevent unwanted bits from being set in PTEs. */
__supported_pte_mask &= ~_PAGE_GLOBAL;
if (!xen_initial_domain())
@@ -1082,6 +1093,11 @@ asmlinkage void __init xen_start_kernel(void)
__supported_pte_mask |= _PAGE_IOMAP;
+#ifdef CONFIG_X86_64
+ /* Work out if we support NX */
+ check_efer();
+#endif
+
xen_setup_features();
/* Get mfn list */
@@ -1094,7 +1110,6 @@ asmlinkage void __init xen_start_kernel(void)
*/
xen_setup_stackprotector();
- xen_init_mmu_ops();
xen_init_irq_ops();
xen_init_cpuid_mask();
@@ -1123,11 +1138,6 @@ asmlinkage void __init xen_start_kernel(void)
pgd = (pgd_t *)xen_start_info->pt_base;
-#ifdef CONFIG_X86_64
- /* Work out if we support NX */
- check_efer();
-#endif
-
/* Don't do the full vcpu_info placement stuff until we have a
possible map and a non-dummy shared_info. */
per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 093dd59..3bf7b1d 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1165,14 +1165,14 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
/* Get the "official" set of cpus referring to our pagetable. */
if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
for_each_online_cpu(cpu) {
- if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask)
+ if (!cpumask_test_cpu(cpu, mm_cpumask(mm))
&& per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
continue;
smp_call_function_single(cpu, drop_other_mm_ref, mm, 1);
}
return;
}
- cpumask_copy(mask, &mm->cpu_vm_mask);
+ cpumask_copy(mask, mm_cpumask(mm));
/* It's possible that a vcpu may have a stale reference to our
cr3, because its in lazy mode, and it hasn't yet flushed
OpenPOWER on IntegriCloud