diff options
Diffstat (limited to 'arch/x86')
90 files changed, 1344 insertions, 1055 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 731318e..bc01e3e 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -187,8 +187,8 @@ config HAVE_MMIOTRACE_SUPPORT def_bool y config X86_DECODER_SELFTEST - bool "x86 instruction decoder selftest" - depends on DEBUG_KERNEL + bool "x86 instruction decoder selftest" + depends on DEBUG_KERNEL && KPROBES ---help--- Perform x86 instruction decoder selftests at build time. This option is useful for checking the sanity of x86 instruction diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 4eefdca..53147ad 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -696,7 +696,7 @@ ia32_sys_call_table: .quad quiet_ni_syscall /* streams2 */ .quad stub32_vfork /* 190 */ .quad compat_sys_getrlimit - .quad sys32_mmap2 + .quad sys_mmap_pgoff .quad sys32_truncate64 .quad sys32_ftruncate64 .quad sys32_stat64 /* 195 */ diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index df82c0e..422572c 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -155,9 +155,6 @@ struct mmap_arg_struct { asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) { struct mmap_arg_struct a; - struct file *file = NULL; - unsigned long retval; - struct mm_struct *mm ; if (copy_from_user(&a, arg, sizeof(a))) return -EFAULT; @@ -165,22 +162,8 @@ asmlinkage long sys32_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) return -EINVAL; - if (!(a.flags & MAP_ANONYMOUS)) { - file = fget(a.fd); - if (!file) - return -EBADF; - } - - mm = current->mm; - down_write(&mm->mmap_sem); - retval = do_mmap_pgoff(file, a.addr, a.len, a.prot, a.flags, + return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset>>PAGE_SHIFT); - if (file) - fput(file); - - up_write(&mm->mmap_sem); - - return retval; } asmlinkage long sys32_mprotect(unsigned long start, size_t len, @@ -483,30 +466,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd, return ret; } -asmlinkage long sys32_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - struct mm_struct *mm = current->mm; - unsigned long error; - struct file *file = NULL; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - return -EBADF; - } - - down_write(&mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(&mm->mmap_sem); - - if (file) - fput(file); - return error; -} - asmlinkage long sys32_olduname(struct oldold_utsname __user *name) { char *arch = "x86_64"; diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index 84786fb..4d817f9 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -28,7 +28,9 @@ extern void amd_iommu_flush_all_domains(void); extern void amd_iommu_flush_all_devices(void); extern void amd_iommu_apply_erratum_63(u16 devid); extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); - +extern int amd_iommu_init_devices(void); +extern void amd_iommu_uninit_devices(void); +extern void amd_iommu_init_notifier(void); #ifndef CONFIG_AMD_IOMMU_STATS static inline void amd_iommu_stats_init(void) { } diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 6a635bd..4611f08 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -113,7 +113,7 @@ */ #define LOCAL_PENDING_VECTOR 0xec -#define UV_BAU_MESSAGE 0xec +#define UV_BAU_MESSAGE 0xea /* * Self IPI vector for machine checks diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 5bef931..2d228fc 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -244,6 +244,9 @@ do { \ #define write_rdtscp_aux(val) wrmsr(0xc0000103, (val), 0) +struct msr *msrs_alloc(void); +void msrs_free(struct msr *msrs); + #ifdef CONFIG_SMP int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index b399988..b4bf9a9 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -118,11 +118,27 @@ extern int __init pcibios_init(void); /* pci-mmconfig.c */ +/* "PCI MMCONFIG %04x [bus %02x-%02x]" */ +#define PCI_MMCFG_RESOURCE_NAME_LEN (22 + 4 + 2 + 2) + +struct pci_mmcfg_region { + struct list_head list; + struct resource res; + u64 address; + char __iomem *virt; + u16 segment; + u8 start_bus; + u8 end_bus; + char name[PCI_MMCFG_RESOURCE_NAME_LEN]; +}; + extern int __init pci_mmcfg_arch_init(void); extern void __init pci_mmcfg_arch_free(void); +extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus); + +extern struct list_head pci_mmcfg_list; -extern struct acpi_mcfg_allocation *pci_mmcfg_config; -extern int pci_mmcfg_config_num; +#define PCI_MMCFG_BUS_OFFSET(bus) ((bus) << 20) /* * AMD Fam10h CPUs are buggy, and cannot access MMIO config space diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index b65a36d..0c44196 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -74,31 +74,31 @@ extern void __bad_percpu_size(void); #define percpu_to_op(op, var, val) \ do { \ - typedef typeof(var) T__; \ + typedef typeof(var) pto_T__; \ if (0) { \ - T__ tmp__; \ - tmp__ = (val); \ + pto_T__ pto_tmp__; \ + pto_tmp__ = (val); \ } \ switch (sizeof(var)) { \ case 1: \ asm(op "b %1,"__percpu_arg(0) \ : "+m" (var) \ - : "qi" ((T__)(val))); \ + : "qi" ((pto_T__)(val))); \ break; \ case 2: \ asm(op "w %1,"__percpu_arg(0) \ : "+m" (var) \ - : "ri" ((T__)(val))); \ + : "ri" ((pto_T__)(val))); \ break; \ case 4: \ asm(op "l %1,"__percpu_arg(0) \ : "+m" (var) \ - : "ri" ((T__)(val))); \ + : "ri" ((pto_T__)(val))); \ break; \ case 8: \ asm(op "q %1,"__percpu_arg(0) \ : "+m" (var) \ - : "re" ((T__)(val))); \ + : "re" ((pto_T__)(val))); \ break; \ default: __bad_percpu_size(); \ } \ @@ -106,31 +106,31 @@ do { \ #define percpu_from_op(op, var, constraint) \ ({ \ - typeof(var) ret__; \ + typeof(var) pfo_ret__; \ switch (sizeof(var)) { \ case 1: \ asm(op "b "__percpu_arg(1)",%0" \ - : "=q" (ret__) \ + : "=q" (pfo_ret__) \ : constraint); \ break; \ case 2: \ asm(op "w "__percpu_arg(1)",%0" \ - : "=r" (ret__) \ + : "=r" (pfo_ret__) \ : constraint); \ break; \ case 4: \ asm(op "l "__percpu_arg(1)",%0" \ - : "=r" (ret__) \ + : "=r" (pfo_ret__) \ : constraint); \ break; \ case 8: \ asm(op "q "__percpu_arg(1)",%0" \ - : "=r" (ret__) \ + : "=r" (pfo_ret__) \ : constraint); \ break; \ default: __bad_percpu_size(); \ } \ - ret__; \ + pfo_ret__; \ }) /* @@ -153,6 +153,84 @@ do { \ #define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val) #define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val) +#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) + +#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) +#define __this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) +#define __this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) + +#define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) +#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) +#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) + +#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_xor_1(pcp, val) percpu_to_op("xor", (pcp), val) +#define irqsafe_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val) +#define irqsafe_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val) + +/* + * Per cpu atomic 64 bit operations are only available under 64 bit. + * 32 bit must fall back to generic operations. + */ +#ifdef CONFIG_X86_64 +#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) +#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) + +#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) +#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) +#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) + +#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val) +#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) +#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) +#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val) + +#endif + /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h index 9af9dec..4a5a089e 100644 --- a/arch/x86/include/asm/sys_ia32.h +++ b/arch/x86/include/asm/sys_ia32.h @@ -57,9 +57,6 @@ asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); asmlinkage long sys32_personality(unsigned long); asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); -asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); - struct oldold_utsname; struct old_utsname; asmlinkage long sys32_olduname(struct oldold_utsname __user *); diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 372b76e..1bb6e39 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -55,8 +55,6 @@ struct sel_arg_struct; struct oldold_utsname; struct old_utsname; -asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); asmlinkage int old_mmap(struct mmap_arg_struct __user *); asmlinkage int old_select(struct sel_arg_struct __user *); asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 022a843..ecb544e 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -23,6 +23,7 @@ struct task_struct *__switch_to(struct task_struct *prev, struct tss_struct; void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss); +extern void show_regs_common(void); #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index 90f06c2..cb507bb 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h @@ -16,7 +16,6 @@ extern unsigned long initial_code; extern unsigned long initial_gs; #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) -#define TRAMPOLINE_BASE 0x6000 extern unsigned long setup_trampoline(void); extern void __init reserve_trampoline_memory(void); diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index d5b7e90..396ff4c 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -37,31 +37,4 @@ extern struct shared_info *HYPERVISOR_shared_info; extern struct start_info *xen_start_info; -enum xen_domain_type { - XEN_NATIVE, /* running on bare hardware */ - XEN_PV_DOMAIN, /* running in a PV domain */ - XEN_HVM_DOMAIN, /* running in a Xen hvm domain */ -}; - -#ifdef CONFIG_XEN -extern enum xen_domain_type xen_domain_type; -#else -#define xen_domain_type XEN_NATIVE -#endif - -#define xen_domain() (xen_domain_type != XEN_NATIVE) -#define xen_pv_domain() (xen_domain() && \ - xen_domain_type == XEN_PV_DOMAIN) -#define xen_hvm_domain() (xen_domain() && \ - xen_domain_type == XEN_HVM_DOMAIN) - -#ifdef CONFIG_XEN_DOM0 -#include <xen/interface/xen.h> - -#define xen_initial_domain() (xen_pv_domain() && \ - xen_start_info->flags & SIF_INITDOMAIN) -#else /* !CONFIG_XEN_DOM0 */ -#define xen_initial_domain() (0) -#endif /* CONFIG_XEN_DOM0 */ - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 1c0fb4d..b990b5c 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -166,6 +166,43 @@ static void iommu_uninit_device(struct device *dev) { kfree(dev->archdata.iommu); } + +void __init amd_iommu_uninit_devices(void) +{ + struct pci_dev *pdev = NULL; + + for_each_pci_dev(pdev) { + + if (!check_device(&pdev->dev)) + continue; + + iommu_uninit_device(&pdev->dev); + } +} + +int __init amd_iommu_init_devices(void) +{ + struct pci_dev *pdev = NULL; + int ret = 0; + + for_each_pci_dev(pdev) { + + if (!check_device(&pdev->dev)) + continue; + + ret = iommu_init_device(&pdev->dev); + if (ret) + goto out_free; + } + + return 0; + +out_free: + + amd_iommu_uninit_devices(); + + return ret; +} #ifdef CONFIG_AMD_IOMMU_STATS /* @@ -1587,6 +1624,11 @@ static struct notifier_block device_nb = { .notifier_call = device_change_notifier, }; +void amd_iommu_init_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &device_nb); +} + /***************************************************************************** * * The next functions belong to the dma_ops mapping/unmapping code. @@ -2145,8 +2187,6 @@ static void prealloc_protection_domains(void) if (!check_device(&dev->dev)) continue; - iommu_init_device(&dev->dev); - /* Is there already any domain for it? */ if (domain_for_device(&dev->dev)) continue; @@ -2215,8 +2255,6 @@ int __init amd_iommu_init_dma_ops(void) register_iommu(&amd_iommu_ops); - bus_register_notifier(&pci_bus_type, &device_nb); - amd_iommu_stats_init(); return 0; diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 7ffc399..1dca9c3 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1274,6 +1274,10 @@ static int __init amd_iommu_init(void) if (ret) goto free; + ret = amd_iommu_init_devices(); + if (ret) + goto free; + if (iommu_pass_through) ret = amd_iommu_init_passthrough(); else @@ -1281,6 +1285,8 @@ static int __init amd_iommu_init(void) if (ret) goto free; + amd_iommu_init_notifier(); + enable_iommus(); if (iommu_pass_through) @@ -1296,6 +1302,9 @@ out: return ret; free: + + amd_iommu_uninit_devices(); + free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, get_order(MAX_DOMAIN_ID/8)); @@ -1336,6 +1345,9 @@ void __init amd_iommu_detect(void) iommu_detected = 1; amd_iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; + + /* Make sure ACS will be enabled */ + pci_request_acs(); } } diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index e0dfb68..3704997 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -280,7 +280,8 @@ void __init early_gart_iommu_check(void) * or BIOS forget to put that in reserved. * try to update e820 to make that region as reserved. */ - int i, fix, slot; + u32 agp_aper_base = 0, agp_aper_order = 0; + int i, fix, slot, valid_agp = 0; u32 ctl; u32 aper_size = 0, aper_order = 0, last_aper_order = 0; u64 aper_base = 0, last_aper_base = 0; @@ -290,6 +291,8 @@ void __init early_gart_iommu_check(void) return; /* This is mostly duplicate of iommu_hole_init */ + agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); + fix = 0; for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus; @@ -342,10 +345,10 @@ void __init early_gart_iommu_check(void) } } - if (!fix) + if (valid_agp) return; - /* different nodes have different setting, disable them all at first*/ + /* disable them all at first */ for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { int bus; int dev_base, dev_limit; @@ -458,8 +461,6 @@ out: if (aper_alloc) { /* Got the aperture from the AGP bridge */ - } else if (!valid_agp) { - /* Do nothing */ } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || force_iommu || valid_agp || diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index efb2b9c..aa57c07 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1341,7 +1341,7 @@ void enable_x2apic(void) rdmsr(MSR_IA32_APICBASE, msr, msr2); if (!(msr & X2APIC_ENABLE)) { - pr_info("Enabling x2apic\n"); + printk_once(KERN_INFO "Enabling x2apic\n"); wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); } } diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index d9acc3b..e31b9ff 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -127,7 +127,7 @@ static u32 noop_apic_read(u32 reg) static void noop_apic_write(u32 reg, u32 v) { - WARN_ON_ONCE((cpu_has_apic || !disable_apic)); + WARN_ON_ONCE(cpu_has_apic && !disable_apic); } struct apic apic_noop = { diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index e85f8fb..dd2b5f2 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c @@ -27,6 +27,9 @@ * * http://www.unisys.com */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/notifier.h> #include <linux/spinlock.h> #include <linux/cpumask.h> @@ -223,9 +226,9 @@ static int parse_unisys_oem(char *oemptr) mip_addr = val; mip = (struct mip_reg *)val; mip_reg = __va(mip); - pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", + pr_debug("host_reg = 0x%lx\n", (unsigned long)host_reg); - pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", + pr_debug("mip_reg = 0x%lx\n", (unsigned long)mip_reg); success++; break; @@ -401,7 +404,7 @@ static void es7000_enable_apic_mode(void) if (!es7000_plat) return; - printk(KERN_INFO "ES7000: Enabling APIC mode.\n"); + pr_info("Enabling APIC mode.\n"); memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); es7000_mip_reg.off_0x00 = MIP_SW_APIC; es7000_mip_reg.off_0x38 = MIP_VALID; @@ -514,8 +517,7 @@ static void es7000_setup_apic_routing(void) { int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); - printk(KERN_INFO - "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", + pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", (apic_version[apic] == 0x14) ? "Physical Cluster" : "Logical Cluster", nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 6389432..0159a69 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -361,7 +361,7 @@ void stop_apic_nmi_watchdog(void *unused) */ static DEFINE_PER_CPU(unsigned, last_irq_sum); -static DEFINE_PER_CPU(local_t, alert_counter); +static DEFINE_PER_CPU(long, alert_counter); static DEFINE_PER_CPU(int, nmi_touch); void touch_nmi_watchdog(void) @@ -438,8 +438,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) * Ayiee, looks like this CPU is stuck ... * wait a few IRQs (5 seconds) before doing the oops ... */ - local_inc(&__get_cpu_var(alert_counter)); - if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) + __this_cpu_inc(per_cpu_var(alert_counter)); + if (__this_cpu_read(per_cpu_var(alert_counter)) == 5 * nmi_hz) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ @@ -447,7 +447,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) regs, panic_on_timeout); } else { __get_cpu_var(last_irq_sum) = sum; - local_set(&__get_cpu_var(alert_counter), 0); + __this_cpu_write(per_cpu_var(alert_counter), 0); } /* see if the nmi watchdog went off */ diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index c965e52..468489b 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -74,6 +74,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; + static bool printed; if (c->cpuid_level < 0xb) return; @@ -127,12 +128,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) c->x86_max_cores = (core_level_siblings / smp_num_siblings); - - printk(KERN_INFO "CPU: Physical Processor ID: %d\n", - c->phys_proc_id); - if (c->x86_max_cores > 1) - printk(KERN_INFO "CPU: Processor Core ID: %d\n", - c->cpu_core_id); + if (!printed) { + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + c->phys_proc_id); + if (c->x86_max_cores > 1) + printk(KERN_INFO "CPU: Processor Core ID: %d\n", + c->cpu_core_id); + printed = 1; + } return; #endif } diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7128b37..8dc3ea1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -375,8 +375,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) node = nearby_node(apicid); } numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c1afa99..4868e4a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -427,6 +427,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_HT u32 eax, ebx, ecx, edx; int index_msb, core_bits; + static bool printed; if (!cpu_has(c, X86_FEATURE_HT)) return; @@ -442,7 +443,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { - printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); goto out; } @@ -469,11 +470,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) ((1 << core_bits) - 1); out: - if ((c->x86_max_cores * smp_num_siblings) > 1) { + if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { printk(KERN_INFO "CPU: Physical Processor ID: %d\n", c->phys_proc_id); printk(KERN_INFO "CPU: Processor Core ID: %d\n", c->cpu_core_id); + printed = 1; } #endif } @@ -1093,7 +1095,7 @@ static void clear_all_debug_regs(void) void __cpuinit cpu_init(void) { - struct orig_ist *orig_ist; + struct orig_ist *oist; struct task_struct *me; struct tss_struct *t; unsigned long v; @@ -1102,7 +1104,7 @@ void __cpuinit cpu_init(void) cpu = stack_smp_processor_id(); t = &per_cpu(init_tss, cpu); - orig_ist = &per_cpu(orig_ist, cpu); + oist = &per_cpu(orig_ist, cpu); #ifdef CONFIG_NUMA if (cpu != 0 && percpu_read(node_number) == 0 && @@ -1115,7 +1117,7 @@ void __cpuinit cpu_init(void) if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) panic("CPU#%d already initialized!\n", cpu); - printk(KERN_INFO "Initializing CPU#%d\n", cpu); + pr_debug("Initializing CPU#%d\n", cpu); clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); @@ -1143,12 +1145,12 @@ void __cpuinit cpu_init(void) /* * set up and load the per-CPU TSS */ - if (!orig_ist->ist[0]) { + if (!oist->ist[0]) { char *estacks = per_cpu(exception_stacks, cpu); for (v = 0; v < N_EXCEPTION_STACKS; v++) { estacks += exception_stack_sizes[v]; - orig_ist->ist[v] = t->x86_tss.ist[v] = + oist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; } } diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index dca325c..b368cd8 100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c @@ -30,9 +30,9 @@ #include <asm/apic.h> #include <asm/desc.h> -static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr); -static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr); -static DEFINE_PER_CPU(int, cpu_priv_count); +static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpud_arr); +static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], cpud_priv_arr); +static DEFINE_PER_CPU(int, cpud_priv_count); static DEFINE_MUTEX(cpu_debug_lock); @@ -531,7 +531,7 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, /* Already intialized */ if (file == CPU_INDEX_BIT) - if (per_cpu(cpu_arr[type].init, cpu)) + if (per_cpu(cpud_arr[type].init, cpu)) return 0; priv = kzalloc(sizeof(*priv), GFP_KERNEL); @@ -543,8 +543,8 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, priv->reg = reg; priv->file = file; mutex_lock(&cpu_debug_lock); - per_cpu(priv_arr[type], cpu) = priv; - per_cpu(cpu_priv_count, cpu)++; + per_cpu(cpud_priv_arr[type], cpu) = priv; + per_cpu(cpud_priv_count, cpu)++; mutex_unlock(&cpu_debug_lock); if (file) @@ -552,10 +552,10 @@ static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, dentry, (void *)priv, &cpu_fops); else { debugfs_create_file(cpu_base[type].name, S_IRUGO, - per_cpu(cpu_arr[type].dentry, cpu), + per_cpu(cpud_arr[type].dentry, cpu), (void *)priv, &cpu_fops); mutex_lock(&cpu_debug_lock); - per_cpu(cpu_arr[type].init, cpu) = 1; + per_cpu(cpud_arr[type].init, cpu) = 1; mutex_unlock(&cpu_debug_lock); } @@ -615,7 +615,7 @@ static int cpu_init_allreg(unsigned cpu, struct dentry *dentry) if (!is_typeflag_valid(cpu, cpu_base[type].flag)) continue; cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); - per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry; + per_cpu(cpud_arr[type].dentry, cpu) = cpu_dentry; if (type < CPU_TSS_BIT) err = cpu_init_msr(cpu, type, cpu_dentry); @@ -647,11 +647,11 @@ static int cpu_init_cpu(void) err = cpu_init_allreg(cpu, cpu_dentry); pr_info("cpu%d(%d) debug files %d\n", - cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu)); - if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) { + cpu, nr_cpu_ids, per_cpu(cpud_priv_count, cpu)); + if (per_cpu(cpud_priv_count, cpu) > MAX_CPU_FILES) { pr_err("Register files count %d exceeds limit %d\n", - per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES); - per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES; + per_cpu(cpud_priv_count, cpu), MAX_CPU_FILES); + per_cpu(cpud_priv_count, cpu) = MAX_CPU_FILES; err = -ENFILE; } if (err) @@ -676,8 +676,8 @@ static void __exit cpu_debug_exit(void) debugfs_remove_recursive(cpu_debugfs_dir); for (cpu = 0; cpu < nr_cpu_ids; cpu++) - for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++) - kfree(per_cpu(priv_arr[i], cpu)); + for (i = 0; i < per_cpu(cpud_priv_count, cpu); i++) + kfree(per_cpu(cpud_priv_arr[i], cpu)); } module_init(cpu_debug_init); diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8b581d3..f28decf 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -68,9 +68,9 @@ struct acpi_cpufreq_data { unsigned int cpu_feature; }; -static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); +static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); -static DEFINE_PER_CPU(struct aperfmperf, old_perf); +static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance *acpi_perf_data; @@ -214,14 +214,14 @@ static u32 get_cur_val(const struct cpumask *mask) if (unlikely(cpumask_empty(mask))) return 0; - switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) { + switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data; + perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -268,8 +268,8 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) return 0; - ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); - per_cpu(old_perf, cpu) = perf; + ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); + per_cpu(acfreq_old_perf, cpu) = perf; retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; @@ -278,7 +278,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { - struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); unsigned int freq; unsigned int cached_freq; @@ -322,7 +322,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, unsigned int relation) { - struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); struct acpi_processor_performance *perf; struct cpufreq_freqs freqs; struct drv_cmd cmd; @@ -416,7 +416,7 @@ out: static int acpi_cpufreq_verify(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); dprintk("acpi_cpufreq_verify\n"); @@ -574,7 +574,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) return -ENOMEM; data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); - per_cpu(drv_data, cpu) = data; + per_cpu(acfreq_data, cpu) = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; @@ -725,20 +725,20 @@ err_unreg: acpi_processor_unregister_performance(perf, cpu); err_free: kfree(data); - per_cpu(drv_data, cpu) = NULL; + per_cpu(acfreq_data, cpu) = NULL; return result; } static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); dprintk("acpi_cpufreq_cpu_exit\n"); if (data) { cpufreq_frequency_table_put_attr(policy->cpu); - per_cpu(drv_data, policy->cpu) = NULL; + per_cpu(acfreq_data, policy->cpu) = NULL; acpi_processor_unregister_performance(data->acpi_data, policy->cpu); kfree(data); @@ -749,7 +749,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) static int acpi_cpufreq_resume(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); + struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); dprintk("acpi_cpufreq_resume\n"); @@ -764,14 +764,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = { }; static struct cpufreq_driver acpi_cpufreq_driver = { - .verify = acpi_cpufreq_verify, - .target = acpi_cpufreq_target, - .init = acpi_cpufreq_cpu_init, - .exit = acpi_cpufreq_cpu_exit, - .resume = acpi_cpufreq_resume, - .name = "acpi-cpufreq", - .owner = THIS_MODULE, - .attr = acpi_cpufreq_attr, + .verify = acpi_cpufreq_verify, + .target = acpi_cpufreq_target, + .bios_limit = acpi_processor_get_bios_limit, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .resume = acpi_cpufreq_resume, + .name = "acpi-cpufreq", + .owner = THIS_MODULE, + .attr = acpi_cpufreq_attr, }; static int __init acpi_cpufreq_init(void) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index f10dea4..cb01dac 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c @@ -164,7 +164,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) } /* cpuinfo and default policy values */ - policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cpuinfo.transition_latency = 200000; policy->cur = busfreq * max_multiplier; result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index d47c775..9a97116 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c @@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = { }; static struct cpufreq_driver powernow_driver = { - .verify = powernow_verify, - .target = powernow_target, - .get = powernow_get, - .init = powernow_cpu_init, - .exit = powernow_cpu_exit, - .name = "powernow-k7", - .owner = THIS_MODULE, - .attr = powernow_table_attr, + .verify = powernow_verify, + .target = powernow_target, + .get = powernow_get, +#ifdef CONFIG_X86_POWERNOW_K7_ACPI + .bios_limit = acpi_processor_get_bios_limit, +#endif + .init = powernow_cpu_init, + .exit = powernow_cpu_exit, + .name = "powernow-k7", + .owner = THIS_MODULE, + .attr = powernow_table_attr, }; static int __init powernow_init(void) diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 3f12dab..a9df944 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1118,7 +1118,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) { - cpumask_t oldmask; + cpumask_var_t oldmask; struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); u32 checkfid; u32 checkvid; @@ -1131,9 +1131,13 @@ static int powernowk8_target(struct cpufreq_policy *pol, checkfid = data->currfid; checkvid = data->currvid; - /* only run on specific CPU from here on */ - oldmask = current->cpus_allowed; - set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); + /* only run on specific CPU from here on. */ + /* This is poor form: use a workqueue or smp_call_function_single */ + if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_copy(oldmask, tsk_cpumask(current)); + set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); @@ -1193,7 +1197,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, ret = 0; err_out: - set_cpus_allowed_ptr(current, &oldmask); + set_cpus_allowed_ptr(current, oldmask); + free_cpumask_var(oldmask); return ret; } @@ -1393,14 +1398,15 @@ static struct freq_attr *powernow_k8_attr[] = { }; static struct cpufreq_driver cpufreq_amd64_driver = { - .verify = powernowk8_verify, - .target = powernowk8_target, - .init = powernowk8_cpu_init, - .exit = __devexit_p(powernowk8_cpu_exit), - .get = powernowk8_get, - .name = "powernow-k8", - .owner = THIS_MODULE, - .attr = powernow_k8_attr, + .verify = powernowk8_verify, + .target = powernowk8_target, + .bios_limit = acpi_processor_get_bios_limit, + .init = powernowk8_cpu_init, + .exit = __devexit_p(powernowk8_cpu_exit), + .get = powernowk8_get, + .name = "powernow-k8", + .owner = THIS_MODULE, + .attr = powernow_k8_attr, }; /* driver entry point for init */ diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 3ae5a7a..2ce8e0b 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c @@ -39,7 +39,7 @@ static struct pci_dev *speedstep_chipset_dev; /* speedstep_processor */ -static unsigned int speedstep_processor; +static enum speedstep_processor speedstep_processor; static u32 pmbase; diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index f4c290b..ad0083a 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c @@ -34,7 +34,7 @@ static int relaxed_check; * GET PROCESSOR CORE SPEED IN KHZ * *********************************************************************/ -static unsigned int pentium3_get_frequency(unsigned int processor) +static unsigned int pentium3_get_frequency(enum speedstep_processor processor) { /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ struct { @@ -227,7 +227,7 @@ static unsigned int pentium4_get_frequency(void) /* Warning: may get called from smp_call_function_single. */ -unsigned int speedstep_get_frequency(unsigned int processor) +unsigned int speedstep_get_frequency(enum speedstep_processor processor) { switch (processor) { case SPEEDSTEP_CPU_PCORE: @@ -380,7 +380,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); * DETECT SPEEDSTEP SPEEDS * *********************************************************************/ -unsigned int speedstep_get_freqs(unsigned int processor, +unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int *low_speed, unsigned int *high_speed, unsigned int *transition_latency, diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h index 2b6c04e..70d9cea 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h @@ -11,18 +11,18 @@ /* processors */ - -#define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ -#define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ -#define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ - +enum speedstep_processor { + SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */ + SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */ + SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */ + SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */ /* the following processors are not speedstep-capable and are not auto-detected * in speedstep_detect_processor(). However, their speed can be detected using * the speedstep_get_frequency() call. */ -#define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ -#define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ -#define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ + SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */ + SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */ + SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */ +}; /* speedstep states -- only two of them */ @@ -31,10 +31,10 @@ /* detect a speedstep-capable processor */ -extern unsigned int speedstep_detect_processor (void); +extern enum speedstep_processor speedstep_detect_processor(void); /* detect the current speed (in khz) of the processor */ -extern unsigned int speedstep_get_frequency(unsigned int processor); +extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); /* detect the low and high speeds of the processor. The callback @@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor); * SPEEDSTEP_LOW; the second argument is zero so that no * cpufreq_notify_transition calls are initiated. */ -extern unsigned int speedstep_get_freqs(unsigned int processor, +extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, unsigned int *low_speed, unsigned int *high_speed, unsigned int *transition_latency, diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index befea08..04d73c1 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c @@ -35,7 +35,7 @@ static int smi_cmd; static unsigned int smi_sig; /* info about the processor */ -static unsigned int speedstep_processor; +static enum speedstep_processor speedstep_processor; /* * There are only two frequency states for each processor. Values diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c900b73..9c31e8b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -270,8 +270,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) node = cpu_to_node(cpu); } numa_set_node(cpu, node); - - printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); #endif } diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 6c40f6b..fc6c8ef 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -499,26 +499,27 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) #ifdef CONFIG_SYSFS /* pointer to _cpuid4_info array (for each cache leaf) */ -static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); -#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) +static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); +#define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; - int index_msb, i; + int index_msb, i, sibling; struct cpuinfo_x86 *c = &cpu_data(cpu); if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { - struct cpuinfo_x86 *d; - for_each_online_cpu(i) { - if (!per_cpu(cpuid4_info, i)) + for_each_cpu(i, c->llc_shared_map) { + if (!per_cpu(ici_cpuid4_info, i)) continue; - d = &cpu_data(i); this_leaf = CPUID4_INFO_IDX(i, index); - cpumask_copy(to_cpumask(this_leaf->shared_cpu_map), - d->llc_shared_map); + for_each_cpu(sibling, c->llc_shared_map) { + if (!cpu_online(sibling)) + continue; + set_bit(sibling, this_leaf->shared_cpu_map); + } } return; } @@ -535,7 +536,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) c->apicid >> index_msb) { cpumask_set_cpu(i, to_cpumask(this_leaf->shared_cpu_map)); - if (i != cpu && per_cpu(cpuid4_info, i)) { + if (i != cpu && per_cpu(ici_cpuid4_info, i)) { sibling_leaf = CPUID4_INFO_IDX(i, index); cpumask_set_cpu(cpu, to_cpumask( @@ -574,8 +575,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) for (i = 0; i < num_cache_leaves; i++) cache_remove_shared_cpu_map(cpu, i); - kfree(per_cpu(cpuid4_info, cpu)); - per_cpu(cpuid4_info, cpu) = NULL; + kfree(per_cpu(ici_cpuid4_info, cpu)); + per_cpu(ici_cpuid4_info, cpu) = NULL; } static int @@ -614,15 +615,15 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) if (num_cache_leaves == 0) return -ENOENT; - per_cpu(cpuid4_info, cpu) = kzalloc( + per_cpu(ici_cpuid4_info, cpu) = kzalloc( sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); - if (per_cpu(cpuid4_info, cpu) == NULL) + if (per_cpu(ici_cpuid4_info, cpu) == NULL) return -ENOMEM; smp_call_function_single(cpu, get_cpu_leaves, &retval, true); if (retval) { - kfree(per_cpu(cpuid4_info, cpu)); - per_cpu(cpuid4_info, cpu) = NULL; + kfree(per_cpu(ici_cpuid4_info, cpu)); + per_cpu(ici_cpuid4_info, cpu) = NULL; } return retval; @@ -634,7 +635,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ /* pointer to kobject for cpuX/cache */ -static DEFINE_PER_CPU(struct kobject *, cache_kobject); +static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); struct _index_kobject { struct kobject kobj; @@ -643,8 +644,8 @@ struct _index_kobject { }; /* pointer to array of kobjects for cpuX/cache/indexY */ -static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); -#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) +static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject); +#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) #define show_one_plus(file_name, object, val) \ static ssize_t show_##file_name \ @@ -863,10 +864,10 @@ static struct kobj_type ktype_percpu_entry = { static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) { - kfree(per_cpu(cache_kobject, cpu)); - kfree(per_cpu(index_kobject, cpu)); - per_cpu(cache_kobject, cpu) = NULL; - per_cpu(index_kobject, cpu) = NULL; + kfree(per_cpu(ici_cache_kobject, cpu)); + kfree(per_cpu(ici_index_kobject, cpu)); + per_cpu(ici_cache_kobject, cpu) = NULL; + per_cpu(ici_index_kobject, cpu) = NULL; free_cache_attributes(cpu); } @@ -882,14 +883,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) return err; /* Allocate all required memory */ - per_cpu(cache_kobject, cpu) = + per_cpu(ici_cache_kobject, cpu) = kzalloc(sizeof(struct kobject), GFP_KERNEL); - if (unlikely(per_cpu(cache_kobject, cpu) == NULL)) + if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL)) goto err_out; - per_cpu(index_kobject, cpu) = kzalloc( + per_cpu(ici_index_kobject, cpu) = kzalloc( sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); - if (unlikely(per_cpu(index_kobject, cpu) == NULL)) + if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL)) goto err_out; return 0; @@ -913,7 +914,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) if (unlikely(retval < 0)) return retval; - retval = kobject_init_and_add(per_cpu(cache_kobject, cpu), + retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), &ktype_percpu_entry, &sys_dev->kobj, "%s", "cache"); if (retval < 0) { @@ -927,12 +928,12 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) this_object->index = i; retval = kobject_init_and_add(&(this_object->kobj), &ktype_cache, - per_cpu(cache_kobject, cpu), + per_cpu(ici_cache_kobject, cpu), "index%1lu", i); if (unlikely(retval)) { for (j = 0; j < i; j++) kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); - kobject_put(per_cpu(cache_kobject, cpu)); + kobject_put(per_cpu(ici_cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); return retval; } @@ -940,7 +941,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) } cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); - kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); + kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD); return 0; } @@ -949,7 +950,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) unsigned int cpu = sys_dev->id; unsigned long i; - if (per_cpu(cpuid4_info, cpu) == NULL) + if (per_cpu(ici_cpuid4_info, cpu) == NULL) return; if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) return; @@ -957,7 +958,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) for (i = 0; i < num_cache_leaves; i++) kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); - kobject_put(per_cpu(cache_kobject, cpu)); + kobject_put(per_cpu(ici_cache_kobject, cpu)); cpuid4_cache_sysfs_exit(cpu); } diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d7ebf25..a8aacd4 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1388,13 +1388,14 @@ static void __mcheck_cpu_init_timer(void) struct timer_list *t = &__get_cpu_var(mce_timer); int *n = &__get_cpu_var(mce_next_interval); + setup_timer(t, mce_start_timer, smp_processor_id()); + if (mce_ignore_ce) return; *n = check_interval * HZ; if (!*n) return; - setup_timer(t, mce_start_timer, smp_processor_id()); t->expires = round_jiffies(jiffies + *n); add_timer_on(t, smp_processor_id()); } @@ -1928,7 +1929,7 @@ error2: sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); error: while (--i >= 0) - sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); + sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); sysdev_unregister(&per_cpu(mce_dev, cpu)); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 4fef985..81c499e 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -256,6 +256,16 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) ack_APIC_irq(); } +/* Thermal monitoring depends on APIC, ACPI and clock modulation */ +static int intel_thermal_supported(struct cpuinfo_x86 *c) +{ + if (!cpu_has_apic) + return 0; + if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) + return 0; + return 1; +} + void __init mcheck_intel_therm_init(void) { /* @@ -263,8 +273,7 @@ void __init mcheck_intel_therm_init(void) * LVT value on BSP and use that value to restore APs' thermal LVT * entry BIOS programmed later */ - if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && - cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) + if (intel_thermal_supported(&boot_cpu_data)) lvtthmr_init = apic_read(APIC_LVTTHMR); } @@ -274,8 +283,7 @@ void intel_init_thermal(struct cpuinfo_x86 *c) int tm2 = 0; u32 l, h; - /* Thermal monitoring depends on ACPI and clock modulation*/ - if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) + if (!intel_thermal_supported(c)) return; /* @@ -339,8 +347,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c) l = apic_read(APIC_LVTTHMR); apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); - printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", - cpu, tm2 ? "TM2" : "TM1"); + printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", + tm2 ? "TM2" : "TM1"); /* enable thermal throttle processing */ atomic_set(&therm_throt_en, 1); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ab1a8a8..45506d5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1632,6 +1632,7 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) data.period = event->hw.last_period; data.addr = 0; + data.raw = NULL; regs.ip = 0; /* @@ -1749,6 +1750,7 @@ static int p6_pmu_handle_irq(struct pt_regs *regs) u64 val; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); @@ -1794,6 +1796,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) u64 ack, status; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); @@ -1857,6 +1860,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) u64 val; data.addr = 0; + data.raw = NULL; cpuc = &__get_cpu_var(cpu_hw_events); @@ -2062,12 +2066,6 @@ static __init int p6_pmu_init(void) x86_pmu = p6_pmu; - if (!cpu_has_apic) { - pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); - pr_info("no hardware sampling interrupt available.\n"); - x86_pmu.apic = 0; - } - return 0; } @@ -2159,6 +2157,16 @@ static __init int amd_pmu_init(void) return 0; } +static void __init pmu_check_apic(void) +{ + if (cpu_has_apic) + return; + + x86_pmu.apic = 0; + pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); + pr_info("no hardware sampling interrupt available.\n"); +} + void __init init_hw_perf_events(void) { int err; @@ -2180,6 +2188,8 @@ void __init init_hw_perf_events(void) return; } + pmu_check_apic(); + pr_cont("%s PMU driver.\n", x86_pmu.name); if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { @@ -2287,7 +2297,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); -static DEFINE_PER_CPU(int, in_nmi_frame); +static DEFINE_PER_CPU(int, in_ignored_frame); static void @@ -2303,8 +2313,9 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - per_cpu(in_nmi_frame, smp_processor_id()) = - x86_is_stack_id(NMI_STACK, name); + per_cpu(in_ignored_frame, smp_processor_id()) = + x86_is_stack_id(NMI_STACK, name) || + x86_is_stack_id(DEBUG_STACK, name); return 0; } @@ -2313,7 +2324,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - if (per_cpu(in_nmi_frame, smp_processor_id())) + if (per_cpu(in_ignored_frame, smp_processor_id())) return; if (reliable) diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index ef42a03..1c47390 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -265,13 +265,13 @@ struct ds_context { int cpu; }; -static DEFINE_PER_CPU(struct ds_context *, cpu_context); +static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context); static struct ds_context *ds_get_context(struct task_struct *task, int cpu) { struct ds_context **p_context = - (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu)); + (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu)); struct ds_context *context = NULL; struct ds_context *new_context = NULL; diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 8e74093..b13af53 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -103,6 +103,35 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, return NULL; } +static inline int +in_irq_stack(unsigned long *stack, unsigned long *irq_stack, + unsigned long *irq_stack_end) +{ + return (stack >= irq_stack && stack < irq_stack_end); +} + +/* + * We are returning from the irq stack and go to the previous one. + * If the previous stack is also in the irq stack, then bp in the first + * frame of the irq stack points to the previous, interrupted one. + * Otherwise we have another level of indirection: We first save + * the bp of the previous stack, then we switch the stack to the irq one + * and save a new bp that links to the previous one. + * (See save_args()) + */ +static inline unsigned long +fixup_bp_irq_link(unsigned long bp, unsigned long *stack, + unsigned long *irq_stack, unsigned long *irq_stack_end) +{ +#ifdef CONFIG_FRAME_POINTER + struct stack_frame *frame = (struct stack_frame *)bp; + + if (!in_irq_stack(stack, irq_stack, irq_stack_end)) + return (unsigned long)frame->next_frame; +#endif + return bp; +} + /* * x86-64 can have up to three kernel stacks: * process stack @@ -175,7 +204,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, irq_stack = irq_stack_end - (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); - if (stack >= irq_stack && stack < irq_stack_end) { + if (in_irq_stack(stack, irq_stack, irq_stack_end)) { if (ops->stack(data, "IRQ") < 0) break; bp = print_context_stack(tinfo, stack, bp, @@ -186,6 +215,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, * pointer (index -1 to end) in the IRQ stack: */ stack = (unsigned long *) (irq_stack_end[-1]); + bp = fixup_bp_irq_link(bp, stack, irq_stack, + irq_stack_end); irq_stack_end = NULL; ops->stack(data, "EOI"); continue; diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d17d482..f50447d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -732,7 +732,16 @@ struct early_res { char overlap_ok; }; static struct early_res early_res[MAX_EARLY_RES] __initdata = { - { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ + { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */ +#ifdef CONFIG_X86_32 + /* + * But first pinch a few for the stack/trampoline stuff + * FIXME: Don't need the extra page at 4K, but need to fix + * trampoline before removing it. (see the GDT stuff) + */ + { PAGE_SIZE, PAGE_SIZE, "EX TRAMPOLINE", 1 }, +#endif + {} }; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 63bca79..673f693 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1076,10 +1076,10 @@ ENTRY(\sym) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - PER_CPU(init_tss, %rbp) - subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + PER_CPU(init_tss, %r12) + subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) call \do_sym - addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) + addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC END(\sym) diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 4f8e250..5051b94 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -29,8 +29,6 @@ static void __init i386_default_early_setup(void) void __init i386_start_kernel(void) { - reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 0b06cd7..b5a9896 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -98,8 +98,6 @@ void __init x86_64_start_reservations(char *real_mode_data) { copy_bootdata(__va(real_mode_data)); - reserve_trampoline_memory(); - reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index d42f65a..05d5fec 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -362,8 +362,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, return ret; } - if (bp->callback) - ret = arch_store_info(bp); + ret = arch_store_info(bp); if (ret < 0) return ret; @@ -519,7 +518,7 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) break; } - (bp->callback)(bp, args->regs); + perf_bp_event(bp, args->regs); rcu_read_unlock(); } diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 20a5b36..dd74fe72 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -86,9 +86,15 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs[GDB_DS] = regs->ds; gdb_regs[GDB_ES] = regs->es; gdb_regs[GDB_CS] = regs->cs; - gdb_regs[GDB_SS] = __KERNEL_DS; gdb_regs[GDB_FS] = 0xFFFF; gdb_regs[GDB_GS] = 0xFFFF; + if (user_mode_vm(regs)) { + gdb_regs[GDB_SS] = regs->ss; + gdb_regs[GDB_SP] = regs->sp; + } else { + gdb_regs[GDB_SS] = __KERNEL_DS; + gdb_regs[GDB_SP] = kernel_stack_pointer(regs); + } #else gdb_regs[GDB_R8] = regs->r8; gdb_regs[GDB_R9] = regs->r9; @@ -101,8 +107,8 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) gdb_regs32[GDB_PS] = regs->flags; gdb_regs32[GDB_CS] = regs->cs; gdb_regs32[GDB_SS] = regs->ss; -#endif gdb_regs[GDB_SP] = kernel_stack_pointer(regs); +#endif } /** @@ -220,8 +226,7 @@ static void kgdb_correct_hw_break(void) dr7 |= ((breakinfo[breakno].len << 2) | breakinfo[breakno].type) << ((breakno << 2) + 16); - if (breakno >= 0 && breakno <= 3) - set_debugreg(breakinfo[breakno].addr, breakno); + set_debugreg(breakinfo[breakno].addr, breakno); } else { if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { @@ -395,7 +400,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, /* set the trace bit if we're stepping */ if (remcomInBuffer[0] == 's') { linux_regs->flags |= X86_EFLAGS_TF; - kgdb_single_step = 1; atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); } diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 63123d9..37542b67 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -13,6 +13,9 @@ * Licensed under the terms of the GNU General Public * License version 2. See file COPYING for details. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/firmware.h> #include <linux/pci_ids.h> #include <linux/uaccess.h> @@ -81,7 +84,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) memset(csig, 0, sizeof(*csig)); rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); - pr_info("microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); + pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); return 0; } @@ -111,8 +114,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev) /* ucode might be chipset specific -- currently we don't support this */ if (mc_header->nb_dev_id || mc_header->sb_dev_id) { - pr_err(KERN_ERR "microcode: CPU%d: loading of chipset " - "specific code not yet supported\n", cpu); + pr_err("CPU%d: loading of chipset specific code not yet supported\n", + cpu); return 0; } @@ -141,12 +144,12 @@ static int apply_microcode_amd(int cpu) /* check current patch id and patch's id for match */ if (rev != mc_amd->hdr.patch_id) { - pr_err("microcode: CPU%d: update failed " - "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); + pr_err("CPU%d: update failed (for patch_level=0x%x)\n", + cpu, mc_amd->hdr.patch_id); return -1; } - pr_info("microcode: CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); + pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); uci->cpu_sig.rev = rev; return 0; @@ -169,15 +172,14 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) return NULL; if (section_hdr[0] != UCODE_UCODE_TYPE) { - pr_err("microcode: error: invalid type field in " - "container file section header\n"); + pr_err("error: invalid type field in container file section header\n"); return NULL; } total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); if (total_size > size || total_size > UCODE_MAX_SIZE) { - pr_err("microcode: error: size mismatch\n"); + pr_err("error: size mismatch\n"); return NULL; } @@ -206,14 +208,13 @@ static int install_equiv_cpu_table(const u8 *buf) size = buf_pos[2]; if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { - pr_err("microcode: error: invalid type field in " - "container file section header\n"); + pr_err("error: invalid type field in container file section header\n"); return 0; } equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); if (!equiv_cpu_table) { - pr_err("microcode: failed to allocate equivalent CPU table\n"); + pr_err("failed to allocate equivalent CPU table\n"); return 0; } @@ -246,7 +247,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) offset = install_equiv_cpu_table(ucode_ptr); if (!offset) { - pr_err("microcode: failed to create equivalent cpu table\n"); + pr_err("failed to create equivalent cpu table\n"); return UCODE_ERROR; } @@ -277,8 +278,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) if (!leftover) { vfree(uci->mc); uci->mc = new_mc; - pr_debug("microcode: CPU%d found a matching microcode " - "update with version 0x%x (current=0x%x)\n", + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); } else { vfree(new_mc); @@ -300,7 +300,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) return UCODE_NFOUND; if (*(u32 *)firmware->data != UCODE_MAGIC) { - pr_err("microcode: invalid UCODE_MAGIC (0x%08x)\n", + pr_err("invalid UCODE_MAGIC (0x%08x)\n", *(u32 *)firmware->data); return UCODE_ERROR; } @@ -313,8 +313,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) static enum ucode_state request_microcode_user(int cpu, const void __user *buf, size_t size) { - pr_info("microcode: AMD microcode update via " - "/dev/cpu/microcode not supported\n"); + pr_info("AMD microcode update via /dev/cpu/microcode not supported\n"); return UCODE_ERROR; } @@ -334,14 +333,13 @@ void init_microcode_amd(struct device *device) WARN_ON(c->x86_vendor != X86_VENDOR_AMD); if (c->x86 < 0x10) { - pr_warning("microcode: AMD CPU family 0x%x not supported\n", - c->x86); + pr_warning("AMD CPU family 0x%x not supported\n", c->x86); return; } supported_cpu = 1; if (request_firmware(&firmware, fw_name, device)) - pr_err("microcode: failed to load file %s\n", fw_name); + pr_err("failed to load file %s\n", fw_name); } void fini_microcode_amd(void) diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index e68aae3..844c02c 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -70,6 +70,9 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/platform_device.h> #include <linux/miscdevice.h> #include <linux/capability.h> @@ -209,7 +212,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, ssize_t ret = -EINVAL; if ((len >> PAGE_SHIFT) > totalram_pages) { - pr_err("microcode: too much data (max %ld pages)\n", totalram_pages); + pr_err("too much data (max %ld pages)\n", totalram_pages); return ret; } @@ -244,7 +247,7 @@ static int __init microcode_dev_init(void) error = misc_register(µcode_dev); if (error) { - pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR); + pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR); return error; } @@ -359,7 +362,7 @@ static enum ucode_state microcode_resume_cpu(int cpu) if (!uci->mc) return UCODE_NFOUND; - pr_debug("microcode: CPU%d updated upon resume\n", cpu); + pr_debug("CPU%d updated upon resume\n", cpu); apply_microcode_on_target(cpu); return UCODE_OK; @@ -379,7 +382,7 @@ static enum ucode_state microcode_init_cpu(int cpu) ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); if (ustate == UCODE_OK) { - pr_debug("microcode: CPU%d updated upon init\n", cpu); + pr_debug("CPU%d updated upon init\n", cpu); apply_microcode_on_target(cpu); } @@ -406,7 +409,7 @@ static int mc_sysdev_add(struct sys_device *sys_dev) if (!cpu_online(cpu)) return 0; - pr_debug("microcode: CPU%d added\n", cpu); + pr_debug("CPU%d added\n", cpu); err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); if (err) @@ -425,7 +428,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) if (!cpu_online(cpu)) return 0; - pr_debug("microcode: CPU%d removed\n", cpu); + pr_debug("CPU%d removed\n", cpu); microcode_fini_cpu(cpu); sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); return 0; @@ -473,15 +476,15 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) microcode_update_cpu(cpu); case CPU_DOWN_FAILED: case CPU_DOWN_FAILED_FROZEN: - pr_debug("microcode: CPU%d added\n", cpu); + pr_debug("CPU%d added\n", cpu); if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) - pr_err("microcode: Failed to create group for CPU%d\n", cpu); + pr_err("Failed to create group for CPU%d\n", cpu); break; case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: /* Suspend is in progress, only remove the interface */ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); - pr_debug("microcode: CPU%d removed\n", cpu); + pr_debug("CPU%d removed\n", cpu); break; case CPU_DEAD: case CPU_UP_CANCELED_FROZEN: @@ -507,7 +510,7 @@ static int __init microcode_init(void) microcode_ops = init_amd_microcode(); if (!microcode_ops) { - pr_err("microcode: no support for this CPU vendor\n"); + pr_err("no support for this CPU vendor\n"); return -ENODEV; } @@ -541,8 +544,7 @@ static int __init microcode_init(void) register_hotcpu_notifier(&mc_cpu_notifier); pr_info("Microcode Update Driver: v" MICROCODE_VERSION - " <tigran@aivazian.fsnet.co.uk>," - " Peter Oruba\n"); + " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n"); return 0; } diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0d334dd..ebd193e 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c @@ -70,6 +70,9 @@ * Fix sigmatch() macro to handle old CPUs with pf == 0. * Thanks to Stuart Swales for pointing out this bug. */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/firmware.h> #include <linux/uaccess.h> #include <linux/kernel.h> @@ -146,8 +149,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || cpu_has(c, X86_FEATURE_IA64)) { - printk(KERN_ERR "microcode: CPU%d not a capable Intel " - "processor\n", cpu_num); + pr_err("CPU%d not a capable Intel processor\n", cpu_num); return -1; } @@ -165,8 +167,8 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) /* get the current revision from MSR 0x8B */ rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); - printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", - cpu_num, csig->sig, csig->pf, csig->rev); + pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", + cpu_num, csig->sig, csig->pf, csig->rev); return 0; } @@ -194,28 +196,24 @@ static int microcode_sanity_check(void *mc) data_size = get_datasize(mc_header); if (data_size + MC_HEADER_SIZE > total_size) { - printk(KERN_ERR "microcode: error! " - "Bad data size in microcode data file\n"); + pr_err("error! Bad data size in microcode data file\n"); return -EINVAL; } if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { - printk(KERN_ERR "microcode: error! " - "Unknown microcode update format\n"); + pr_err("error! Unknown microcode update format\n"); return -EINVAL; } ext_table_size = total_size - (MC_HEADER_SIZE + data_size); if (ext_table_size) { if ((ext_table_size < EXT_HEADER_SIZE) || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - printk(KERN_ERR "microcode: error! " - "Small exttable size in microcode data file\n"); + pr_err("error! Small exttable size in microcode data file\n"); return -EINVAL; } ext_header = mc + MC_HEADER_SIZE + data_size; if (ext_table_size != exttable_size(ext_header)) { - printk(KERN_ERR "microcode: error! " - "Bad exttable size in microcode data file\n"); + pr_err("error! Bad exttable size in microcode data file\n"); return -EFAULT; } ext_sigcount = ext_header->count; @@ -230,8 +228,7 @@ static int microcode_sanity_check(void *mc) while (i--) ext_table_sum += ext_tablep[i]; if (ext_table_sum) { - printk(KERN_WARNING "microcode: aborting, " - "bad extended signature table checksum\n"); + pr_warning("aborting, bad extended signature table checksum\n"); return -EINVAL; } } @@ -242,7 +239,7 @@ static int microcode_sanity_check(void *mc) while (i--) orig_sum += ((int *)mc)[i]; if (orig_sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); + pr_err("aborting, bad checksum\n"); return -EINVAL; } if (!ext_table_size) @@ -255,7 +252,7 @@ static int microcode_sanity_check(void *mc) - (mc_header->sig + mc_header->pf + mc_header->cksum) + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); if (sum) { - printk(KERN_ERR "microcode: aborting, bad checksum\n"); + pr_err("aborting, bad checksum\n"); return -EINVAL; } } @@ -327,13 +324,11 @@ static int apply_microcode(int cpu) rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); if (val[1] != mc_intel->hdr.rev) { - printk(KERN_ERR "microcode: CPU%d update " - "to revision 0x%x failed\n", - cpu_num, mc_intel->hdr.rev); + pr_err("CPU%d update to revision 0x%x failed\n", + cpu_num, mc_intel->hdr.rev); return -1; } - printk(KERN_INFO "microcode: CPU%d updated to revision " - "0x%x, date = %04x-%02x-%02x \n", + pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x \n", cpu_num, val[1], mc_intel->hdr.date & 0xffff, mc_intel->hdr.date >> 24, @@ -362,8 +357,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, mc_size = get_totalsize(&mc_header); if (!mc_size || mc_size > leftover) { - printk(KERN_ERR "microcode: error!" - "Bad data in microcode data file\n"); + pr_err("error! Bad data in microcode data file\n"); break; } @@ -405,9 +399,8 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, vfree(uci->mc); uci->mc = (struct microcode_intel *)new_mc; - pr_debug("microcode: CPU%d found a matching microcode update with" - " version 0x%x (current=0x%x)\n", - cpu, new_rev, uci->cpu_sig.rev); + pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", + cpu, new_rev, uci->cpu_sig.rev); out: return state; } @@ -429,7 +422,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) c->x86, c->x86_model, c->x86_mask); if (request_firmware(&firmware, name, device)) { - pr_debug("microcode: data file %s load failed\n", name); + pr_debug("data file %s load failed\n", name); return UCODE_NFOUND; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 35a57c9..40b54ce 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -945,9 +945,6 @@ void __init early_reserve_e820_mpc_new(void) { if (enable_update_mptable && alloc_mptable) { u64 startt = 0; -#ifdef CONFIG_X86_TRAMPOLINE - startt = TRAMPOLINE_BASE; -#endif mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); } } diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index afcc58b..fcc2f2b 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -120,11 +120,14 @@ static void __init dma32_free_bootmem(void) void __init pci_iommu_alloc(void) { + int use_swiotlb; + + use_swiotlb = pci_swiotlb_init(); #ifdef CONFIG_X86_64 /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); #endif - if (pci_swiotlb_init()) + if (use_swiotlb) return; gart_iommu_hole_init(); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index e6a0d40..56c0e73 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -710,7 +710,8 @@ static void gart_iommu_shutdown(void) struct pci_dev *dev; int i; - if (no_agp) + /* don't shutdown it if there is AGP installed */ + if (!no_agp) return; for (i = 0; i < num_k8_northbridges; i++) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5e2ba63..7a7bd4e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,8 @@ #include <linux/clockchips.h> #include <linux/random.h> #include <linux/user-return-notifier.h> +#include <linux/dmi.h> +#include <linux/utsname.h> #include <trace/events/power.h> #include <linux/hw_breakpoint.h> #include <asm/system.h> @@ -90,6 +92,25 @@ void exit_thread(void) } } +void show_regs_common(void) +{ + const char *board, *product; + + board = dmi_get_system_info(DMI_BOARD_NAME); + if (!board) + board = ""; + product = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!product) + product = ""; + + printk("\n"); + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version, board, product); +} + void flush_thread(void) { struct task_struct *tsk = current; diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 075580b..120b887 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -23,7 +23,6 @@ #include <linux/vmalloc.h> #include <linux/user.h> #include <linux/interrupt.h> -#include <linux/utsname.h> #include <linux/delay.h> #include <linux/reboot.h> #include <linux/init.h> @@ -35,7 +34,6 @@ #include <linux/tick.h> #include <linux/percpu.h> #include <linux/prctl.h> -#include <linux/dmi.h> #include <linux/ftrace.h> #include <linux/uaccess.h> #include <linux/io.h> @@ -128,7 +126,6 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned long sp; unsigned short ss, gs; - const char *board; if (user_mode_vm(regs)) { sp = regs->sp; @@ -140,16 +137,7 @@ void __show_regs(struct pt_regs *regs, int all) savesegment(gs, gs); } - printk("\n"); - - board = dmi_get_system_info(DMI_PRODUCT_NAME); - if (!board) - board = ""; - printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", - task_pid_nr(current), current->comm, - print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, board); + show_regs_common(); printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c95c8f4..e5ab0cd 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -26,7 +26,6 @@ #include <linux/slab.h> #include <linux/user.h> #include <linux/interrupt.h> -#include <linux/utsname.h> #include <linux/delay.h> #include <linux/module.h> #include <linux/ptrace.h> @@ -38,7 +37,6 @@ #include <linux/uaccess.h> #include <linux/io.h> #include <linux/ftrace.h> -#include <linux/dmi.h> #include <asm/pgtable.h> #include <asm/system.h> @@ -163,18 +161,8 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned int fsindex, gsindex; unsigned int ds, cs, es; - const char *board; - - printk("\n"); - print_modules(); - board = dmi_get_system_info(DMI_PRODUCT_NAME); - if (!board) - board = ""; - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", - current->pid, current->comm, print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, board); + + show_regs_common(); printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 04d182a..7079dda 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -555,7 +555,9 @@ static int genregs_set(struct task_struct *target, return ret; } -static void ptrace_triggered(struct perf_event *bp, void *data) +static void ptrace_triggered(struct perf_event *bp, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { int i; struct thread_struct *thread = &(current->thread); @@ -593,13 +595,13 @@ static unsigned long ptrace_get_dr7(struct perf_event *bp[]) return dr7; } -static struct perf_event * +static int ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, struct task_struct *tsk, int disabled) { int err; int gen_len, gen_type; - DEFINE_BREAKPOINT_ATTR(attr); + struct perf_event_attr attr; /* * We shoud have at least an inactive breakpoint at this @@ -607,18 +609,18 @@ ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, * written the address register first */ if (!bp) - return ERR_PTR(-EINVAL); + return -EINVAL; err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); if (err) - return ERR_PTR(err); + return err; attr = bp->attr; attr.bp_len = gen_len; attr.bp_type = gen_type; attr.disabled = disabled; - return modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); + return modify_user_hw_breakpoint(bp, &attr); } /* @@ -656,28 +658,17 @@ restore: if (!second_pass) continue; - thread->ptrace_bps[i] = NULL; - bp = ptrace_modify_breakpoint(bp, len, type, + rc = ptrace_modify_breakpoint(bp, len, type, tsk, 1); - if (IS_ERR(bp)) { - rc = PTR_ERR(bp); - thread->ptrace_bps[i] = NULL; + if (rc) break; - } - thread->ptrace_bps[i] = bp; } continue; } - bp = ptrace_modify_breakpoint(bp, len, type, tsk, 0); - - /* Incorrect bp, or we have a bug in bp API */ - if (IS_ERR(bp)) { - rc = PTR_ERR(bp); - thread->ptrace_bps[i] = NULL; + rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); + if (rc) break; - } - thread->ptrace_bps[i] = bp; } /* * Make a second pass to free the remaining unused breakpoints @@ -721,9 +712,10 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, { struct perf_event *bp; struct thread_struct *t = &tsk->thread; - DEFINE_BREAKPOINT_ATTR(attr); + struct perf_event_attr attr; if (!t->ptrace_bps[nr]) { + hw_breakpoint_init(&attr); /* * Put stub len and type to register (reserve) an inactive but * correct bp @@ -734,26 +726,32 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, attr.disabled = 1; bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); + + /* + * CHECKME: the previous code returned -EIO if the addr wasn't + * a valid task virtual addr. The new one will return -EINVAL in + * this case. + * -EINVAL may be what we want for in-kernel breakpoints users, + * but -EIO looks better for ptrace, since we refuse a register + * writing for the user. And anyway this is the previous + * behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; } else { + int err; + bp = t->ptrace_bps[nr]; - t->ptrace_bps[nr] = NULL; attr = bp->attr; attr.bp_addr = addr; - bp = modify_user_hw_breakpoint(bp, &attr, bp->callback, tsk); + err = modify_user_hw_breakpoint(bp, &attr); + if (err) + return err; } - /* - * CHECKME: the previous code returned -EIO if the addr wasn't a - * valid task virtual addr. The new one will return -EINVAL in this - * case. - * -EINVAL may be what we want for in-kernel breakpoints users, but - * -EIO looks better for ptrace, since we refuse a register writing - * for the user. And anyway this is the previous behaviour. - */ - if (IS_ERR(bp)) - return PTR_ERR(bp); - t->ptrace_bps[nr] = bp; return 0; } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 2b97fc5..1545bc0 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -259,6 +259,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), }, }, + { /* Handle problems with rebooting on ASUS P4S800 */ + .callback = set_bios_reboot, + .ident = "ASUS P4S800", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), + DMI_MATCH(DMI_BOARD_NAME, "P4S800"), + }, + }, { } }; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 946a311..f7b8b98 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -73,6 +73,7 @@ #include <asm/mtrr.h> #include <asm/apic.h> +#include <asm/trampoline.h> #include <asm/e820.h> #include <asm/mpspec.h> #include <asm/setup.h> @@ -875,6 +876,13 @@ void __init setup_arch(char **cmdline_p) reserve_brk(); + /* + * Find and reserve possible boot-time SMP configuration: + */ + find_smp_config(); + + reserve_trampoline_memory(); + #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. @@ -921,11 +929,6 @@ void __init setup_arch(char **cmdline_p) early_acpi_boot_init(); - /* - * Find and reserve possible boot-time SMP configuration: - */ - find_smp_config(); - #ifdef CONFIG_ACPI_NUMA /* * Parse SRAT to discover nodes. diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d559af9..35abcb8 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -1,3 +1,5 @@ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -20,9 +22,9 @@ #include <asm/stackprotector.h> #ifdef CONFIG_DEBUG_PER_CPU_MAPS -# define DBG(x...) printk(KERN_DEBUG x) +# define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__) #else -# define DBG(x...) +# define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0) #endif DEFINE_PER_CPU(int, cpu_number); @@ -116,8 +118,8 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, } else { ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), size, align, goal); - pr_debug("per cpu data for cpu%d %lu bytes on node%d at " - "%016lx\n", cpu, size, node, __pa(ptr)); + pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", + cpu, size, node, __pa(ptr)); } return ptr; #else @@ -198,8 +200,7 @@ void __init setup_per_cpu_areas(void) pcpu_cpu_distance, pcpu_fc_alloc, pcpu_fc_free); if (rc < 0) - pr_warning("PERCPU: %s allocator failed (%d), " - "falling back to page size\n", + pr_warning("%s allocator failed (%d), falling back to page size\n", pcpu_fc_names[pcpu_chosen_fc], rc); } if (rc < 0) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 324f2a4..678d0b8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -671,6 +671,26 @@ static void __cpuinit do_fork_idle(struct work_struct *work) complete(&c_idle->done); } +/* reduce the number of lines printed when booting a large cpu count system */ +static void __cpuinit announce_cpu(int cpu, int apicid) +{ + static int current_node = -1; + int node = cpu_to_node(cpu); + + if (system_state == SYSTEM_BOOTING) { + if (node != current_node) { + if (current_node > (-1)) + pr_cont(" Ok.\n"); + current_node = node; + pr_info("Booting Node %3d, Processors ", node); + } + pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); + return; + } else + pr_info("Booting Node %d Processor %d APIC 0x%x\n", + node, cpu, apicid); +} + /* * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad * (ie clustered apic addressing mode), this is a LOGICAL apic ID. @@ -687,7 +707,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), }; - INIT_WORK(&c_idle.work, do_fork_idle); + INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle); alternatives_smp_switch(1); @@ -713,6 +733,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) if (IS_ERR(c_idle.idle)) { printk("failed fork for CPU %d\n", cpu); + destroy_work_on_stack(&c_idle.work); return PTR_ERR(c_idle.idle); } @@ -736,9 +757,8 @@ do_rest: /* start_ip had better be page-aligned! */ start_ip = setup_trampoline(); - /* So we see what's up */ - printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n", - cpu, apicid, start_ip); + /* So we see what's up */ + announce_cpu(cpu, apicid); /* * This grunge runs the startup process for @@ -787,21 +807,17 @@ do_rest: udelay(100); } - if (cpumask_test_cpu(cpu, cpu_callin_mask)) { - /* number CPUs logically, starting from 1 (BSP is 0) */ - pr_debug("OK.\n"); - printk(KERN_INFO "CPU%d: ", cpu); - print_cpu_info(&cpu_data(cpu)); - pr_debug("CPU has booted.\n"); - } else { + if (cpumask_test_cpu(cpu, cpu_callin_mask)) + pr_debug("CPU%d: has booted.\n", cpu); + else { boot_error = 1; if (*((volatile unsigned char *)trampoline_base) == 0xA5) /* trampoline started but...? */ - printk(KERN_ERR "Stuck ??\n"); + pr_err("CPU%d: Stuck ??\n", cpu); else /* trampoline code not run */ - printk(KERN_ERR "Not responding.\n"); + pr_err("CPU%d: Not responding.\n", cpu); if (apic->inquire_remote_apic) apic->inquire_remote_apic(apicid); } @@ -831,6 +847,7 @@ do_rest: smpboot_restore_warm_reset_vector(); } + destroy_work_on_stack(&c_idle.work); return boot_error; } @@ -1291,14 +1308,16 @@ void native_cpu_die(unsigned int cpu) for (i = 0; i < 10; i++) { /* They ack this in play_dead by setting CPU_DEAD */ if (per_cpu(cpu_state, cpu) == CPU_DEAD) { - printk(KERN_INFO "CPU %d is now offline\n", cpu); + if (system_state == SYSTEM_RUNNING) + pr_info("CPU %u is now offline\n", cpu); + if (1 == num_online_cpus()) alternatives_smp_switch(0); return; } msleep(100); } - printk(KERN_ERR "CPU %u didn't die...\n", cpu); + pr_err("CPU %u didn't die...\n", cpu); } void play_dead_common(void) diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 1884a8d..dee1ff7 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -24,31 +24,6 @@ #include <asm/syscalls.h> -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - int error = -EBADF; - struct file *file = NULL; - struct mm_struct *mm = current->mm; - - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - - down_write(&mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(&mm->mmap_sem); - - if (file) - fput(file); -out: - return error; -} - /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/i386 didn't use to be able to handle more than @@ -77,7 +52,7 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) goto out; - err = sys_mmap2(a.addr, a.len, a.prot, a.flags, + err = sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); out: return err; diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 45e00eb..8aa2057 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -23,26 +23,11 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, unsigned long, fd, unsigned long, off) { long error; - struct file *file; - error = -EINVAL; if (off & ~PAGE_MASK) goto out; - error = -EBADF; - file = NULL; - flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - if (!(flags & MAP_ANONYMOUS)) { - file = fget(fd); - if (!file) - goto out; - } - down_write(¤t->mm->mmap_sem); - error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT); - up_write(¤t->mm->mmap_sem); - - if (file) - fput(file); + error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); out: return error; } diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 70c2125..15228b5 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -191,7 +191,7 @@ ENTRY(sys_call_table) .long sys_ni_syscall /* reserved for streams2 */ .long ptregs_vfork /* 190 */ .long sys_getrlimit - .long sys_mmap2 + .long sys_mmap_pgoff .long sys_truncate64 .long sys_ftruncate64 .long sys_stat64 /* 195 */ diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index cd02212..c652ef6 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -12,21 +12,19 @@ #endif /* ready for x86_64 and x86 */ -unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE); +unsigned char *__trampinitdata trampoline_base; void __init reserve_trampoline_memory(void) { -#ifdef CONFIG_X86_32 - /* - * But first pinch a few for the stack/trampoline stuff - * FIXME: Don't need the extra page at 4K, but need to fix - * trampoline before removing it. (see the GDT stuff) - */ - reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); -#endif + unsigned long mem; + /* Has to be in very low memory so we can execute real-mode AP code. */ - reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE, - "TRAMPOLINE"); + mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); + if (mem == -1L) + panic("Cannot allocate trampoline\n"); + + trampoline_base = __va(mem); + reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); } /* diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index fab7440..296aba4 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -29,6 +29,8 @@ * Based on QEMU and Xen. */ +#define pr_fmt(fmt) "pit: " fmt + #include <linux/kvm_host.h> #include "irq.h" @@ -262,7 +264,7 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) static void destroy_pit_timer(struct kvm_timer *pt) { - pr_debug("pit: execute del timer!\n"); + pr_debug("execute del timer!\n"); hrtimer_cancel(&pt->timer); } @@ -284,7 +286,7 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); - pr_debug("pit: create pit timer, interval is %llu nsec\n", interval); + pr_debug("create pit timer, interval is %llu nsec\n", interval); /* TODO The new value only affected after the retriggered */ hrtimer_cancel(&pt->timer); @@ -309,7 +311,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) WARN_ON(!mutex_is_locked(&ps->lock)); - pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); + pr_debug("load_count val is %d, channel is %d\n", val, channel); /* * The largest possible initial count is 0; this is equivalent @@ -395,8 +397,8 @@ static int pit_ioport_write(struct kvm_io_device *this, mutex_lock(&pit_state->lock); if (val != 0) - pr_debug("pit: write addr is 0x%x, len is %d, val is 0x%x\n", - (unsigned int)addr, len, val); + pr_debug("write addr is 0x%x, len is %d, val is 0x%x\n", + (unsigned int)addr, len, val); if (addr == 3) { channel = val >> 6; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3de0b37..1d9b338 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -316,7 +316,7 @@ static void svm_hardware_disable(void *garbage) static int svm_hardware_enable(void *garbage) { - struct svm_cpu_data *svm_data; + struct svm_cpu_data *sd; uint64_t efer; struct descriptor_table gdt_descr; struct desc_struct *gdt; @@ -331,63 +331,61 @@ static int svm_hardware_enable(void *garbage) me); return -EINVAL; } - svm_data = per_cpu(svm_data, me); + sd = per_cpu(svm_data, me); - if (!svm_data) { + if (!sd) { printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n", me); return -EINVAL; } - svm_data->asid_generation = 1; - svm_data->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; - svm_data->next_asid = svm_data->max_asid + 1; + sd->asid_generation = 1; + sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1; + sd->next_asid = sd->max_asid + 1; kvm_get_gdt(&gdt_descr); gdt = (struct desc_struct *)gdt_descr.base; - svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); + sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS); wrmsrl(MSR_EFER, efer | EFER_SVME); - wrmsrl(MSR_VM_HSAVE_PA, - page_to_pfn(svm_data->save_area) << PAGE_SHIFT); + wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); return 0; } static void svm_cpu_uninit(int cpu) { - struct svm_cpu_data *svm_data - = per_cpu(svm_data, raw_smp_processor_id()); + struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id()); - if (!svm_data) + if (!sd) return; per_cpu(svm_data, raw_smp_processor_id()) = NULL; - __free_page(svm_data->save_area); - kfree(svm_data); + __free_page(sd->save_area); + kfree(sd); } static int svm_cpu_init(int cpu) { - struct svm_cpu_data *svm_data; + struct svm_cpu_data *sd; int r; - svm_data = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); - if (!svm_data) + sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL); + if (!sd) return -ENOMEM; - svm_data->cpu = cpu; - svm_data->save_area = alloc_page(GFP_KERNEL); + sd->cpu = cpu; + sd->save_area = alloc_page(GFP_KERNEL); r = -ENOMEM; - if (!svm_data->save_area) + if (!sd->save_area) goto err_1; - per_cpu(svm_data, cpu) = svm_data; + per_cpu(svm_data, cpu) = sd; return 0; err_1: - kfree(svm_data); + kfree(sd); return r; } @@ -1092,16 +1090,16 @@ static void save_host_msrs(struct kvm_vcpu *vcpu) #endif } -static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) +static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) { - if (svm_data->next_asid > svm_data->max_asid) { - ++svm_data->asid_generation; - svm_data->next_asid = 1; + if (sd->next_asid > sd->max_asid) { + ++sd->asid_generation; + sd->next_asid = 1; svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; } - svm->asid_generation = svm_data->asid_generation; - svm->vmcb->control.asid = svm_data->next_asid++; + svm->asid_generation = sd->asid_generation; + svm->vmcb->control.asid = sd->next_asid++; } static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) @@ -2429,8 +2427,8 @@ static void reload_tss(struct kvm_vcpu *vcpu) { int cpu = raw_smp_processor_id(); - struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); - svm_data->tss_desc->type = 9; /* available 32/64-bit TSS */ + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); + sd->tss_desc->type = 9; /* available 32/64-bit TSS */ load_TR_desc(); } @@ -2438,12 +2436,12 @@ static void pre_svm_run(struct vcpu_svm *svm) { int cpu = raw_smp_processor_id(); - struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); + struct svm_cpu_data *sd = per_cpu(svm_data, cpu); svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; /* FIXME: handle wraparound of asid_generation */ - if (svm->asid_generation != svm_data->asid_generation) - new_asid(svm, svm_data); + if (svm->asid_generation != sd->asid_generation) + new_asid(svm, sd); } static void svm_inject_nmi(struct kvm_vcpu *vcpu) diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index a2d6472..45b20e4 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -5,7 +5,7 @@ inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt quiet_cmd_inat_tables = GEN $@ - cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ + cmd_inat_tables = $(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ $(obj)/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) $(call cmd,inat_tables) @@ -20,7 +20,7 @@ lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o -lib-y += insn.o inat.o +lib-$(CONFIG_KPROBES) += insn.o inat.o obj-y += msr-reg.o msr-reg-export.o diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 41628b1..8728341 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c @@ -7,7 +7,6 @@ struct msr_info { u32 msr_no; struct msr reg; struct msr *msrs; - int off; int err; }; @@ -18,7 +17,7 @@ static void __rdmsr_on_cpu(void *info) int this_cpu = raw_smp_processor_id(); if (rv->msrs) - reg = &rv->msrs[this_cpu - rv->off]; + reg = per_cpu_ptr(rv->msrs, this_cpu); else reg = &rv->reg; @@ -32,7 +31,7 @@ static void __wrmsr_on_cpu(void *info) int this_cpu = raw_smp_processor_id(); if (rv->msrs) - reg = &rv->msrs[this_cpu - rv->off]; + reg = per_cpu_ptr(rv->msrs, this_cpu); else reg = &rv->reg; @@ -80,7 +79,6 @@ static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no, memset(&rv, 0, sizeof(rv)); - rv.off = cpumask_first(mask); rv.msrs = msrs; rv.msr_no = msr_no; @@ -120,6 +118,26 @@ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs) } EXPORT_SYMBOL(wrmsr_on_cpus); +struct msr *msrs_alloc(void) +{ + struct msr *msrs = NULL; + + msrs = alloc_percpu(struct msr); + if (!msrs) { + pr_warning("%s: error allocating msrs\n", __func__); + return NULL; + } + + return msrs; +} +EXPORT_SYMBOL(msrs_alloc); + +void msrs_free(struct msr *msrs) +{ + free_percpu(msrs); +} +EXPORT_SYMBOL(msrs_free); + /* These "safe" variants are slower and should be used when the target MSR may not actually exist. */ static void __rdmsr_safe_on_cpu(void *info) diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 07bcc30..c0f6198 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -5,6 +5,8 @@ * 2008 Pekka Paalanen <pq@iki.fi> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/list.h> #include <linux/rculist.h> #include <linux/spinlock.h> @@ -136,7 +138,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) pte_t *pte = lookup_address(f->page, &level); if (!pte) { - pr_err("kmmio: no pte for page 0x%08lx\n", f->page); + pr_err("no pte for page 0x%08lx\n", f->page); return -1; } @@ -148,7 +150,7 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) clear_pte_presence(pte, clear, &f->old_presence); break; default: - pr_err("kmmio: unexpected page level 0x%x.\n", level); + pr_err("unexpected page level 0x%x.\n", level); return -1; } @@ -170,13 +172,14 @@ static int clear_page_presence(struct kmmio_fault_page *f, bool clear) static int arm_kmmio_fault_page(struct kmmio_fault_page *f) { int ret; - WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); + WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); if (f->armed) { - pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", - f->page, f->count, !!f->old_presence); + pr_warning("double-arm: page 0x%08lx, ref %d, old %d\n", + f->page, f->count, !!f->old_presence); } ret = clear_page_presence(f, true); - WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); + WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming 0x%08lx failed.\n"), + f->page); f->armed = true; return ret; } @@ -240,24 +243,21 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr) * condition needs handling by do_page_fault(), the * page really not being present is the most common. */ - pr_debug("kmmio: secondary hit for 0x%08lx CPU %d.\n", - addr, smp_processor_id()); + pr_debug("secondary hit for 0x%08lx CPU %d.\n", + addr, smp_processor_id()); if (!faultpage->old_presence) - pr_info("kmmio: unexpected secondary hit for " - "address 0x%08lx on CPU %d.\n", addr, - smp_processor_id()); + pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n", + addr, smp_processor_id()); } else { /* * Prevent overwriting already in-flight context. * This should not happen, let's hope disarming at * least prevents a panic. */ - pr_emerg("kmmio: recursive probe hit on CPU %d, " - "for address 0x%08lx. Ignoring.\n", - smp_processor_id(), addr); - pr_emerg("kmmio: previous hit was at 0x%08lx.\n", - ctx->addr); + pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n", + smp_processor_id(), addr); + pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr); disarm_kmmio_fault_page(faultpage); } goto no_kmmio_ctx; @@ -316,8 +316,8 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) * something external causing them (f.e. using a debugger while * mmio tracing enabled), or erroneous behaviour */ - pr_warning("kmmio: unexpected debug trap on CPU %d.\n", - smp_processor_id()); + pr_warning("unexpected debug trap on CPU %d.\n", + smp_processor_id()); goto out; } @@ -425,7 +425,7 @@ int register_kmmio_probe(struct kmmio_probe *p) list_add_rcu(&p->list, &kmmio_probes); while (size < size_lim) { if (add_kmmio_fault_page(p->addr + size)) - pr_err("kmmio: Unable to set page fault.\n"); + pr_err("Unable to set page fault.\n"); size += PAGE_SIZE; } out: @@ -490,7 +490,7 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) * 2. remove_kmmio_fault_pages() * Remove the pages from kmmio_page_table. * 3. rcu_free_kmmio_fault_pages() - * Actally free the kmmio_fault_page structs as with RCU. + * Actually free the kmmio_fault_page structs as with RCU. */ void unregister_kmmio_probe(struct kmmio_probe *p) { @@ -511,7 +511,7 @@ void unregister_kmmio_probe(struct kmmio_probe *p) drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); if (!drelease) { - pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); + pr_crit("leaking kmmio_fault_page objects.\n"); return; } drelease->release_list = release_list; diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index 132772a..34a3291 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -19,6 +19,9 @@ * * Derived from the read-mod example from relay-examples by Tom Zanussi. */ + +#define pr_fmt(fmt) "mmiotrace: " fmt + #define DEBUG 1 #include <linux/module.h> @@ -36,8 +39,6 @@ #include "pf_in.h" -#define NAME "mmiotrace: " - struct trap_reason { unsigned long addr; unsigned long ip; @@ -96,17 +97,18 @@ static void print_pte(unsigned long address) pte_t *pte = lookup_address(address, &level); if (!pte) { - pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", - __func__, address); + pr_err("Error in %s: no pte for page 0x%08lx\n", + __func__, address); return; } if (level == PG_LEVEL_2M) { - pr_emerg(NAME "4MB pages are not currently supported: " - "0x%08lx\n", address); + pr_emerg("4MB pages are not currently supported: 0x%08lx\n", + address); BUG(); } - pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, + pr_info("pte for 0x%lx: 0x%llx 0x%llx\n", + address, (unsigned long long)pte_val(*pte), (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); } @@ -118,22 +120,21 @@ static void print_pte(unsigned long address) static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) { const struct trap_reason *my_reason = &get_cpu_var(pf_reason); - pr_emerg(NAME "unexpected fault for address: 0x%08lx, " - "last fault for address: 0x%08lx\n", - addr, my_reason->addr); + pr_emerg("unexpected fault for address: 0x%08lx, last fault for address: 0x%08lx\n", + addr, my_reason->addr); print_pte(addr); print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); #ifdef __i386__ pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->ax, regs->bx, regs->cx, regs->dx); + regs->ax, regs->bx, regs->cx, regs->dx); pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->si, regs->di, regs->bp, regs->sp); + regs->si, regs->di, regs->bp, regs->sp); #else pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", - regs->ax, regs->cx, regs->dx); + regs->ax, regs->cx, regs->dx); pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", - regs->si, regs->di, regs->bp, regs->sp); + regs->si, regs->di, regs->bp, regs->sp); #endif put_cpu_var(pf_reason); BUG(); @@ -213,7 +214,7 @@ static void post(struct kmmio_probe *p, unsigned long condition, /* this should always return the active_trace count to 0 */ my_reason->active_traces--; if (my_reason->active_traces) { - pr_emerg(NAME "unexpected post handler"); + pr_emerg("unexpected post handler"); BUG(); } @@ -244,7 +245,7 @@ static void ioremap_trace_core(resource_size_t offset, unsigned long size, }; if (!trace) { - pr_err(NAME "kmalloc failed in ioremap\n"); + pr_err("kmalloc failed in ioremap\n"); return; } @@ -282,8 +283,8 @@ void mmiotrace_ioremap(resource_size_t offset, unsigned long size, if (!is_enabled()) /* recheck and proper locking in *_core() */ return; - pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", - (unsigned long long)offset, size, addr); + pr_debug("ioremap_*(0x%llx, 0x%lx) = %p\n", + (unsigned long long)offset, size, addr); if ((filter_offset) && (offset != filter_offset)) return; ioremap_trace_core(offset, size, addr); @@ -301,7 +302,7 @@ static void iounmap_trace_core(volatile void __iomem *addr) struct remap_trace *tmp; struct remap_trace *found_trace = NULL; - pr_debug(NAME "Unmapping %p.\n", addr); + pr_debug("Unmapping %p.\n", addr); spin_lock_irq(&trace_lock); if (!is_enabled()) @@ -363,9 +364,8 @@ static void clear_trace_list(void) * Caller also ensures is_enabled() cannot change. */ list_for_each_entry(trace, &trace_list, list) { - pr_notice(NAME "purging non-iounmapped " - "trace @0x%08lx, size 0x%lx.\n", - trace->probe.addr, trace->probe.len); + pr_notice("purging non-iounmapped trace @0x%08lx, size 0x%lx.\n", + trace->probe.addr, trace->probe.len); if (!nommiotrace) unregister_kmmio_probe(&trace->probe); } @@ -387,7 +387,7 @@ static void enter_uniprocessor(void) if (downed_cpus == NULL && !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) { - pr_notice(NAME "Failed to allocate mask\n"); + pr_notice("Failed to allocate mask\n"); goto out; } @@ -395,20 +395,19 @@ static void enter_uniprocessor(void) cpumask_copy(downed_cpus, cpu_online_mask); cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus); if (num_online_cpus() > 1) - pr_notice(NAME "Disabling non-boot CPUs...\n"); + pr_notice("Disabling non-boot CPUs...\n"); put_online_cpus(); for_each_cpu(cpu, downed_cpus) { err = cpu_down(cpu); if (!err) - pr_info(NAME "CPU%d is down.\n", cpu); + pr_info("CPU%d is down.\n", cpu); else - pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); + pr_err("Error taking CPU%d down: %d\n", cpu, err); } out: if (num_online_cpus() > 1) - pr_warning(NAME "multiple CPUs still online, " - "may miss events.\n"); + pr_warning("multiple CPUs still online, may miss events.\n"); } /* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, @@ -420,13 +419,13 @@ static void __ref leave_uniprocessor(void) if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0) return; - pr_notice(NAME "Re-enabling CPUs...\n"); + pr_notice("Re-enabling CPUs...\n"); for_each_cpu(cpu, downed_cpus) { err = cpu_up(cpu); if (!err) - pr_info(NAME "enabled CPU%d.\n", cpu); + pr_info("enabled CPU%d.\n", cpu); else - pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); + pr_err("cannot re-enable CPU%d: %d\n", cpu, err); } } @@ -434,8 +433,8 @@ static void __ref leave_uniprocessor(void) static void enter_uniprocessor(void) { if (num_online_cpus() > 1) - pr_warning(NAME "multiple CPUs are online, may miss events. " - "Suggest booting with maxcpus=1 kernel argument.\n"); + pr_warning("multiple CPUs are online, may miss events. " + "Suggest booting with maxcpus=1 kernel argument.\n"); } static void leave_uniprocessor(void) @@ -450,13 +449,13 @@ void enable_mmiotrace(void) goto out; if (nommiotrace) - pr_info(NAME "MMIO tracing disabled.\n"); + pr_info("MMIO tracing disabled.\n"); kmmio_init(); enter_uniprocessor(); spin_lock_irq(&trace_lock); atomic_inc(&mmiotrace_enabled); spin_unlock_irq(&trace_lock); - pr_info(NAME "enabled.\n"); + pr_info("enabled.\n"); out: mutex_unlock(&mmiotrace_mutex); } @@ -475,7 +474,7 @@ void disable_mmiotrace(void) clear_trace_list(); /* guarantees: no more kmmio callbacks */ leave_uniprocessor(); kmmio_cleanup(); - pr_info(NAME "disabled.\n"); + pr_info("disabled.\n"); out: mutex_unlock(&mmiotrace_mutex); } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 66b55d6..ae9648e 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -704,9 +704,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (!range_is_allowed(pfn, size)) return 0; - if (file->f_flags & O_SYNC) { + if (file->f_flags & O_DSYNC) flags = _PAGE_CACHE_UC_MINUS; - } #ifdef CONFIG_X86_32 /* diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index d49202e..564b008 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile @@ -15,3 +15,8 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-y += common.o early.o obj-y += amd_bus.o +obj-$(CONFIG_X86_64) += bus_numa.o intel_bus.o + +ifeq ($(CONFIG_PCI_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1014eb4..959e548 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -7,6 +7,7 @@ #include <asm/pci_x86.h> struct pci_root_info { + struct acpi_device *bridge; char *name; unsigned int res_num; struct resource *res; @@ -58,6 +59,30 @@ bus_has_transparent_bridge(struct pci_bus *bus) return false; } +static void +align_resource(struct acpi_device *bridge, struct resource *res) +{ + int align = (res->flags & IORESOURCE_MEM) ? 16 : 4; + + /* + * Host bridge windows are not BARs, but the decoders on the PCI side + * that claim this address space have starting alignment and length + * constraints, so fix any obvious BIOS goofs. + */ + if (!IS_ALIGNED(res->start, align)) { + dev_printk(KERN_DEBUG, &bridge->dev, + "host bridge window %pR invalid; " + "aligning start to %d-byte boundary\n", res, align); + res->start &= ~(align - 1); + } + if (!IS_ALIGNED(res->end + 1, align)) { + dev_printk(KERN_DEBUG, &bridge->dev, + "host bridge window %pR invalid; " + "aligning end to %d-byte boundary\n", res, align); + res->end = ALIGN(res->end, align) - 1; + } +} + static acpi_status setup_resource(struct acpi_resource *acpi_res, void *data) { @@ -91,11 +116,12 @@ setup_resource(struct acpi_resource *acpi_res, void *data) start = addr.minimum + addr.translation_offset; end = start + addr.address_length - 1; if (info->res_num >= max_root_bus_resources) { - printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx " - "from %s for %s due to _CRS returning more than " - "%d resource descriptors\n", (unsigned long) start, - (unsigned long) end, root->name, info->name, - max_root_bus_resources); + if (pci_probe & PCI_USE__CRS) + printk(KERN_WARNING "PCI: Failed to allocate " + "0x%lx-0x%lx from %s for %s due to _CRS " + "returning more than %d resource descriptors\n", + (unsigned long) start, (unsigned long) end, + root->name, info->name, max_root_bus_resources); return AE_OK; } @@ -105,14 +131,28 @@ setup_resource(struct acpi_resource *acpi_res, void *data) res->start = start; res->end = end; res->child = NULL; + align_resource(info->bridge, res); + + if (!(pci_probe & PCI_USE__CRS)) { + dev_printk(KERN_DEBUG, &info->bridge->dev, + "host bridge window %pR (ignored)\n", res); + return AE_OK; + } if (insert_resource(root, res)) { - printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " - "from %s for %s\n", (unsigned long) res->start, - (unsigned long) res->end, root->name, info->name); + dev_err(&info->bridge->dev, + "can't allocate host bridge window %pR\n", res); } else { info->bus->resource[info->res_num] = res; info->res_num++; + if (addr.translation_offset) + dev_info(&info->bridge->dev, "host bridge window %pR " + "(PCI address [%#llx-%#llx])\n", + res, res->start - addr.translation_offset, + res->end - addr.translation_offset); + else + dev_info(&info->bridge->dev, + "host bridge window %pR\n", res); } return AE_OK; } @@ -124,6 +164,12 @@ get_current_resources(struct acpi_device *device, int busnum, struct pci_root_info info; size_t size; + if (!(pci_probe & PCI_USE__CRS)) + dev_info(&device->dev, + "ignoring host bridge windows from ACPI; " + "boot with \"pci=use_crs\" to use them\n"); + + info.bridge = device; info.bus = bus; info.res_num = 0; acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_resource, @@ -163,8 +209,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do #endif if (domain && !pci_domains_supported) { - printk(KERN_WARNING "PCI: Multiple domains not supported " - "(dom %d, bus %d)\n", domain, busnum); + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (multiple domains not supported)\n", + domain, busnum); return NULL; } @@ -188,7 +235,8 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do */ sd = kzalloc(sizeof(*sd), GFP_KERNEL); if (!sd) { - printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum); + printk(KERN_WARNING "pci_bus %04x:%02x: " + "ignored (out of memory)\n", domain, busnum); return NULL; } @@ -209,9 +257,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do } else { bus = pci_create_bus(NULL, busnum, &pci_root_ops, sd); if (bus) { - if (pci_probe & PCI_USE__CRS) - get_current_resources(device, busnum, domain, - bus); + get_current_resources(device, busnum, domain, bus); bus->subordinate = pci_scan_child_bus(bus); } } diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 572ee97..95ecbd4 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -6,10 +6,10 @@ #ifdef CONFIG_X86_64 #include <asm/pci-direct.h> -#include <asm/mpspec.h> -#include <linux/cpumask.h> #endif +#include "bus_numa.h" + /* * This discovers the pcibus <-> node mapping on AMD K8. * also get peer root bus resource for io,mmio @@ -17,67 +17,6 @@ #ifdef CONFIG_X86_64 -/* - * sub bus (transparent) will use entres from 3 to store extra from root, - * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? - */ -#define RES_NUM 16 -struct pci_root_info { - char name[12]; - unsigned int res_num; - struct resource res[RES_NUM]; - int bus_min; - int bus_max; - int node; - int link; -}; - -/* 4 at this time, it may become to 32 */ -#define PCI_ROOT_NR 4 -static int pci_root_num; -static struct pci_root_info pci_root_info[PCI_ROOT_NR]; - -void x86_pci_root_bus_res_quirks(struct pci_bus *b) -{ - int i; - int j; - struct pci_root_info *info; - - /* don't go for it if _CRS is used already */ - if (b->resource[0] != &ioport_resource || - b->resource[1] != &iomem_resource) - return; - - /* if only one root bus, don't need to anything */ - if (pci_root_num < 2) - return; - - for (i = 0; i < pci_root_num; i++) { - if (pci_root_info[i].bus_min == b->number) - break; - } - - if (i == pci_root_num) - return; - - printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", - b->number); - - info = &pci_root_info[i]; - for (j = 0; j < info->res_num; j++) { - struct resource *res; - struct resource *root; - - res = &info->res[j]; - b->resource[j] = res; - if (res->flags & IORESOURCE_IO) - root = &ioport_resource; - else - root = &iomem_resource; - insert_resource(root, res); - } -} - #define RANGE_NUM 16 struct res_range { @@ -130,52 +69,6 @@ static void __init update_range(struct res_range *range, size_t start, } } -static void __init update_res(struct pci_root_info *info, size_t start, - size_t end, unsigned long flags, int merge) -{ - int i; - struct resource *res; - - if (!merge) - goto addit; - - /* try to merge it with old one */ - for (i = 0; i < info->res_num; i++) { - size_t final_start, final_end; - size_t common_start, common_end; - - res = &info->res[i]; - if (res->flags != flags) - continue; - - common_start = max((size_t)res->start, start); - common_end = min((size_t)res->end, end); - if (common_start > common_end + 1) - continue; - - final_start = min((size_t)res->start, start); - final_end = max((size_t)res->end, end); - - res->start = final_start; - res->end = final_end; - return; - } - -addit: - - /* need to add that */ - if (info->res_num >= RES_NUM) - return; - - res = &info->res[info->res_num]; - res->name = info->name; - res->flags = flags; - res->start = start; - res->end = end; - res->child = NULL; - info->res_num++; -} - struct pci_hostbridge_probe { u32 bus; u32 slot; @@ -230,7 +123,6 @@ static int __init early_fill_mp_bus_info(void) int j; unsigned bus; unsigned slot; - int found; int node; int link; int def_node; @@ -247,7 +139,7 @@ static int __init early_fill_mp_bus_info(void) if (!early_pci_allowed()) return -1; - found = 0; + found_all_numa_early = 0; for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { u32 id; u16 device; @@ -261,12 +153,12 @@ static int __init early_fill_mp_bus_info(void) device = (id>>16) & 0xffff; if (pci_probes[i].vendor == vendor && pci_probes[i].device == device) { - found = 1; + found_all_numa_early = 1; break; } } - if (!found) + if (!found_all_numa_early) return 0; pci_root_num = 0; @@ -488,7 +380,7 @@ static int __init early_fill_mp_bus_info(void) info = &pci_root_info[i]; res_num = info->res_num; busnum = info->bus_min; - printk(KERN_DEBUG "bus: [%02x,%02x] on node %x link %x\n", + printk(KERN_DEBUG "bus: [%02x, %02x] on node %x link %x\n", info->bus_min, info->bus_max, info->node, info->link); for (j = 0; j < res_num; j++) { res = &info->res[j]; diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c new file mode 100644 index 0000000..145df00 --- /dev/null +++ b/arch/x86/pci/bus_numa.c @@ -0,0 +1,101 @@ +#include <linux/init.h> +#include <linux/pci.h> + +#include "bus_numa.h" + +int pci_root_num; +struct pci_root_info pci_root_info[PCI_ROOT_NR]; +int found_all_numa_early; + +void x86_pci_root_bus_res_quirks(struct pci_bus *b) +{ + int i; + int j; + struct pci_root_info *info; + + /* don't go for it if _CRS is used already */ + if (b->resource[0] != &ioport_resource || + b->resource[1] != &iomem_resource) + return; + + if (!pci_root_num) + return; + + /* for amd, if only one root bus, don't need to do anything */ + if (pci_root_num < 2 && found_all_numa_early) + return; + + for (i = 0; i < pci_root_num; i++) { + if (pci_root_info[i].bus_min == b->number) + break; + } + + if (i == pci_root_num) + return; + + printk(KERN_DEBUG "PCI: peer root bus %02x res updated from pci conf\n", + b->number); + + info = &pci_root_info[i]; + for (j = 0; j < info->res_num; j++) { + struct resource *res; + struct resource *root; + + res = &info->res[j]; + b->resource[j] = res; + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + insert_resource(root, res); + } +} + +void __init update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge) +{ + int i; + struct resource *res; + + if (start > end) + return; + + if (!merge) + goto addit; + + /* try to merge it with old one */ + for (i = 0; i < info->res_num; i++) { + size_t final_start, final_end; + size_t common_start, common_end; + + res = &info->res[i]; + if (res->flags != flags) + continue; + + common_start = max((size_t)res->start, start); + common_end = min((size_t)res->end, end); + if (common_start > common_end + 1) + continue; + + final_start = min((size_t)res->start, start); + final_end = max((size_t)res->end, end); + + res->start = final_start; + res->end = final_end; + return; + } + +addit: + + /* need to add that */ + if (info->res_num >= RES_NUM) + return; + + res = &info->res[info->res_num]; + res->name = info->name; + res->flags = flags; + res->start = start; + res->end = end; + res->child = NULL; + info->res_num++; +} diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h new file mode 100644 index 0000000..adbc23fe --- /dev/null +++ b/arch/x86/pci/bus_numa.h @@ -0,0 +1,27 @@ +#ifdef CONFIG_X86_64 + +/* + * sub bus (transparent) will use entres from 3 to store extra from + * root, so need to make sure we have enough slot there, Should we + * increase PCI_BUS_NUM_RESOURCES? + */ +#define RES_NUM 16 +struct pci_root_info { + char name[12]; + unsigned int res_num; + struct resource res[RES_NUM]; + int bus_min; + int bus_max; + int node; + int link; +}; + +/* 4 at this time, it may become to 32 */ +#define PCI_ROOT_NR 4 +extern int pci_root_num; +extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; +extern int found_all_numa_early; + +extern void update_res(struct pci_root_info *info, size_t start, + size_t end, unsigned long flags, int merge); +#endif diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 1331fcf..d2552c6 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -410,8 +410,6 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) return bus; } -extern u8 pci_cache_line_size; - int __init pcibios_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -422,15 +420,19 @@ int __init pcibios_init(void) } /* - * Assume PCI cacheline size of 32 bytes for all x86s except K7/K8 - * and P4. It's also good for 386/486s (which actually have 16) + * Set PCI cacheline size to that of the CPU if the CPU has reported it. + * (For older CPUs that don't support cpuid, we se it to 32 bytes + * It's also good for 386/486s (which actually have 16) * as quite a few PCI devices do not support smaller values. */ - pci_cache_line_size = 32 >> 2; - if (c->x86 >= 6 && c->x86_vendor == X86_VENDOR_AMD) - pci_cache_line_size = 64 >> 2; /* K7 & K8 */ - else if (c->x86 > 6 && c->x86_vendor == X86_VENDOR_INTEL) - pci_cache_line_size = 128 >> 2; /* P4 */ + if (c->x86_clflush_size > 0) { + pci_dfl_cache_line_size = c->x86_clflush_size >> 2; + printk(KERN_DEBUG "PCI: pci_cache_line_size set to %d bytes\n", + pci_dfl_cache_line_size << 2); + } else { + pci_dfl_cache_line_size = 32 >> 2; + printk(KERN_DEBUG "PCI: Unknown cacheline size. Setting to 32 bytes\n"); + } pcibios_resource_survey(); diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index aaf26ae..d1067d5 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c @@ -12,8 +12,6 @@ u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) u32 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inl(0xcfc); - if (v != 0xffffffff) - pr_debug("%x reading 4 from %x: %x\n", slot, offset, v); return v; } @@ -22,7 +20,6 @@ u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) u8 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inb(0xcfc + (offset&3)); - pr_debug("%x reading 1 from %x: %x\n", slot, offset, v); return v; } @@ -31,28 +28,24 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) u16 v; outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); v = inw(0xcfc + (offset&2)); - pr_debug("%x reading 2 from %x: %x\n", slot, offset, v); return v; } void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, u32 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outl(val, 0xcfc); } void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outb(val, 0xcfc + (offset&3)); } void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) { - pr_debug("%x writing to %x: %x\n", slot, offset, val); outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); outw(val, 0xcfc + (offset&2)); } diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index b22d13b..5dc9e8c 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -129,7 +129,9 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) continue; if (!r->start || pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); + dev_info(&dev->dev, + "can't reserve window %pR\n", + r); /* * Something is wrong with the region. * Invalidate the resource to prevent @@ -144,16 +146,29 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) } } +struct pci_check_idx_range { + int start; + int end; +}; + static void __init pcibios_allocate_resources(int pass) { struct pci_dev *dev = NULL; - int idx, disabled; + int idx, disabled, i; u16 command; struct resource *r; + struct pci_check_idx_range idx_range[] = { + { PCI_STD_RESOURCES, PCI_STD_RESOURCE_END }, +#ifdef CONFIG_PCI_IOV + { PCI_IOV_RESOURCES, PCI_IOV_RESOURCE_END }, +#endif + }; + for_each_pci_dev(dev) { pci_read_config_word(dev, PCI_COMMAND, &command); - for (idx = 0; idx < PCI_ROM_RESOURCE; idx++) { + for (i = 0; i < ARRAY_SIZE(idx_range); i++) + for (idx = idx_range[i].start; idx <= idx_range[i].end; idx++) { r = &dev->resource[idx]; if (r->parent) /* Already allocated */ continue; @@ -164,12 +179,12 @@ static void __init pcibios_allocate_resources(int pass) else disabled = !(command & PCI_COMMAND_MEMORY); if (pass == disabled) { - dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n", - (unsigned long long) r->start, - (unsigned long long) r->end, - r->flags, disabled, pass); + dev_dbg(&dev->dev, + "BAR %d: reserving %pr (d=%d, p=%d)\n", + idx, r, disabled, pass); if (pci_claim_resource(dev, idx) < 0) { - dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); + dev_info(&dev->dev, + "can't reserve %pR\n", r); /* We'll assign a new address later */ r->end -= r->start; r->start = 0; @@ -182,7 +197,7 @@ static void __init pcibios_allocate_resources(int pass) /* Turn the ROM off, leave the resource region, * but keep it unregistered. */ u32 reg; - dev_dbg(&dev->dev, "disabling ROM\n"); + dev_dbg(&dev->dev, "disabling ROM %pR\n", r); r->flags &= ~IORESOURCE_ROM_ENABLE; pci_read_config_dword(dev, dev->rom_base_reg, ®); @@ -282,6 +297,15 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return -EINVAL; prot = pgprot_val(vma->vm_page_prot); + + /* + * Return error if pat is not enabled and write_combine is requested. + * Caller can followup with UC MINUS request and add a WC mtrr if there + * is a free mtrr slot. + */ + if (!pat_enabled && write_combine) + return -EINVAL; + if (pat_enabled && write_combine) prot |= _PAGE_CACHE_WC; else if (pat_enabled || boot_cpu_data.x86 > 3) diff --git a/arch/x86/pci/intel_bus.c b/arch/x86/pci/intel_bus.c new file mode 100644 index 0000000..b7a55dc --- /dev/null +++ b/arch/x86/pci/intel_bus.c @@ -0,0 +1,90 @@ +/* + * to read io range from IOH pci conf, need to do it after mmconfig is there + */ + +#include <linux/delay.h> +#include <linux/dmi.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <asm/pci_x86.h> + +#include "bus_numa.h" + +static inline void print_ioh_resources(struct pci_root_info *info) +{ + int res_num; + int busnum; + int i; + + printk(KERN_DEBUG "IOH bus: [%02x, %02x]\n", + info->bus_min, info->bus_max); + res_num = info->res_num; + busnum = info->bus_min; + for (i = 0; i < res_num; i++) { + struct resource *res; + + res = &info->res[i]; + printk(KERN_DEBUG "IOH bus: %02x index %x %s: [%llx, %llx]\n", + busnum, i, + (res->flags & IORESOURCE_IO) ? "io port" : + "mmio", + res->start, res->end); + } +} + +#define IOH_LIO 0x108 +#define IOH_LMMIOL 0x10c +#define IOH_LMMIOH 0x110 +#define IOH_LMMIOH_BASEU 0x114 +#define IOH_LMMIOH_LIMITU 0x118 +#define IOH_LCFGBUS 0x11c + +static void __devinit pci_root_bus_res(struct pci_dev *dev) +{ + u16 word; + u32 dword; + struct pci_root_info *info; + u16 io_base, io_end; + u32 mmiol_base, mmiol_end; + u64 mmioh_base, mmioh_end; + int bus_base, bus_end; + + if (pci_root_num >= PCI_ROOT_NR) { + printk(KERN_DEBUG "intel_bus.c: PCI_ROOT_NR is too small\n"); + return; + } + + info = &pci_root_info[pci_root_num]; + pci_root_num++; + + pci_read_config_word(dev, IOH_LCFGBUS, &word); + bus_base = (word & 0xff); + bus_end = (word & 0xff00) >> 8; + sprintf(info->name, "PCI Bus #%02x", bus_base); + info->bus_min = bus_base; + info->bus_max = bus_end; + + pci_read_config_word(dev, IOH_LIO, &word); + io_base = (word & 0xf0) << (12 - 4); + io_end = (word & 0xf000) | 0xfff; + update_res(info, io_base, io_end, IORESOURCE_IO, 0); + + pci_read_config_dword(dev, IOH_LMMIOL, &dword); + mmiol_base = (dword & 0xff00) << (24 - 8); + mmiol_end = (dword & 0xff000000) | 0xffffff; + update_res(info, mmiol_base, mmiol_end, IORESOURCE_MEM, 0); + + pci_read_config_dword(dev, IOH_LMMIOH, &dword); + mmioh_base = ((u64)(dword & 0xfc00)) << (26 - 10); + mmioh_end = ((u64)(dword & 0xfc000000) | 0x3ffffff); + pci_read_config_dword(dev, IOH_LMMIOH_BASEU, &dword); + mmioh_base |= ((u64)(dword & 0x7ffff)) << 32; + pci_read_config_dword(dev, IOH_LMMIOH_LIMITU, &dword); + mmioh_end |= ((u64)(dword & 0x7ffff)) << 32; + update_res(info, mmioh_base, mmioh_end, IORESOURCE_MEM, 0); + + print_ioh_resources(info); +} + +/* intel IOH */ +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, pci_root_bus_res); diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 602c172d..b19d1e5 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c @@ -15,48 +15,98 @@ #include <linux/acpi.h> #include <linux/sfi_acpi.h> #include <linux/bitmap.h> -#include <linux/sort.h> +#include <linux/dmi.h> #include <asm/e820.h> #include <asm/pci_x86.h> #include <asm/acpi.h> #define PREFIX "PCI: " -/* aperture is up to 256MB but BIOS may reserve less */ -#define MMCONFIG_APER_MIN (2 * 1024*1024) -#define MMCONFIG_APER_MAX (256 * 1024*1024) - /* Indicate if the mmcfg resources have been placed into the resource table. */ static int __initdata pci_mmcfg_resources_inserted; -static __init int extend_mmcfg(int num) +LIST_HEAD(pci_mmcfg_list); + +static __init void pci_mmconfig_remove(struct pci_mmcfg_region *cfg) { - struct acpi_mcfg_allocation *new; - int new_num = pci_mmcfg_config_num + num; + if (cfg->res.parent) + release_resource(&cfg->res); + list_del(&cfg->list); + kfree(cfg); +} - new = kzalloc(sizeof(pci_mmcfg_config[0]) * new_num, GFP_KERNEL); - if (!new) - return -1; +static __init void free_all_mmcfg(void) +{ + struct pci_mmcfg_region *cfg, *tmp; - if (pci_mmcfg_config) { - memcpy(new, pci_mmcfg_config, - sizeof(pci_mmcfg_config[0]) * new_num); - kfree(pci_mmcfg_config); + pci_mmcfg_arch_free(); + list_for_each_entry_safe(cfg, tmp, &pci_mmcfg_list, list) + pci_mmconfig_remove(cfg); +} + +static __init void list_add_sorted(struct pci_mmcfg_region *new) +{ + struct pci_mmcfg_region *cfg; + + /* keep list sorted by segment and starting bus number */ + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->segment > new->segment || + (cfg->segment == new->segment && + cfg->start_bus >= new->start_bus)) { + list_add_tail(&new->list, &cfg->list); + return; + } } - pci_mmcfg_config = new; + list_add_tail(&new->list, &pci_mmcfg_list); +} - return 0; +static __init struct pci_mmcfg_region *pci_mmconfig_add(int segment, int start, + int end, u64 addr) +{ + struct pci_mmcfg_region *new; + int num_buses; + struct resource *res; + + if (addr == 0) + return NULL; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + new->address = addr; + new->segment = segment; + new->start_bus = start; + new->end_bus = end; + + list_add_sorted(new); + + num_buses = end - start + 1; + res = &new->res; + res->start = addr + PCI_MMCFG_BUS_OFFSET(start); + res->end = addr + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + snprintf(new->name, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %04x [bus %02x-%02x]", segment, start, end); + res->name = new->name; + + printk(KERN_INFO PREFIX "MMCONFIG for domain %04x [bus %02x-%02x] at " + "%pR (base %#lx)\n", segment, start, end, &new->res, + (unsigned long) addr); + + return new; } -static __init void fill_one_mmcfg(u64 addr, int segment, int start, int end) +struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus) { - int i = pci_mmcfg_config_num; + struct pci_mmcfg_region *cfg; - pci_mmcfg_config_num++; - pci_mmcfg_config[i].address = addr; - pci_mmcfg_config[i].pci_segment = segment; - pci_mmcfg_config[i].start_bus_number = start; - pci_mmcfg_config[i].end_bus_number = end; + list_for_each_entry(cfg, &pci_mmcfg_list, list) + if (cfg->segment == segment && + cfg->start_bus <= bus && bus <= cfg->end_bus) + return cfg; + + return NULL; } static const char __init *pci_mmcfg_e7520(void) @@ -68,11 +118,9 @@ static const char __init *pci_mmcfg_e7520(void) if (win == 0x0000 || win == 0xf000) return NULL; - if (extend_mmcfg(1) == -1) + if (pci_mmconfig_add(0, 0, 255, win << 16) == NULL) return NULL; - fill_one_mmcfg(win << 16, 0, 0, 255); - return "Intel Corporation E7520 Memory Controller Hub"; } @@ -114,11 +162,9 @@ static const char __init *pci_mmcfg_intel_945(void) if ((pciexbar & mask) >= 0xf0000000U) return NULL; - if (extend_mmcfg(1) == -1) + if (pci_mmconfig_add(0, 0, (len >> 20) - 1, pciexbar & mask) == NULL) return NULL; - fill_one_mmcfg(pciexbar & mask, 0, 0, (len >> 20) - 1); - return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; } @@ -127,7 +173,7 @@ static const char __init *pci_mmcfg_amd_fam10h(void) u32 low, high, address; u64 base, msr; int i; - unsigned segnbits = 0, busnbits; + unsigned segnbits = 0, busnbits, end_bus; if (!(pci_probe & PCI_CHECK_ENABLE_AMD_MMCONF)) return NULL; @@ -161,11 +207,13 @@ static const char __init *pci_mmcfg_amd_fam10h(void) busnbits = 8; } - if (extend_mmcfg(1 << segnbits) == -1) - return NULL; - + end_bus = (1 << busnbits) - 1; for (i = 0; i < (1 << segnbits); i++) - fill_one_mmcfg(base + (1<<28) * i, i, 0, (1 << busnbits) - 1); + if (pci_mmconfig_add(i, 0, end_bus, + base + (1<<28) * i) == NULL) { + free_all_mmcfg(); + return NULL; + } return "AMD Family 10h NB"; } @@ -190,7 +238,7 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) /* * do check if amd fam10h already took over */ - if (!acpi_disabled || pci_mmcfg_config_num || mcp55_checked) + if (!acpi_disabled || !list_empty(&pci_mmcfg_list) || mcp55_checked) return NULL; mcp55_checked = true; @@ -213,16 +261,14 @@ static const char __init *pci_mmcfg_nvidia_mcp55(void) if (!(extcfg & extcfg_enable_mask)) continue; - if (extend_mmcfg(1) == -1) - continue; - size_index = (extcfg & extcfg_size_mask) >> extcfg_size_shift; base = extcfg & extcfg_base_mask[size_index]; /* base could > 4G */ base <<= extcfg_base_lshift; start = (extcfg & extcfg_start_mask) >> extcfg_start_shift; end = start + extcfg_sizebus[size_index] - 1; - fill_one_mmcfg(base, 0, start, end); + if (pci_mmconfig_add(0, start, end, base) == NULL) + continue; mcp55_mmconf_found++; } @@ -253,45 +299,27 @@ static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { 0x0369, pci_mmcfg_nvidia_mcp55 }, }; -static int __init cmp_mmcfg(const void *x1, const void *x2) -{ - const typeof(pci_mmcfg_config[0]) *m1 = x1; - const typeof(pci_mmcfg_config[0]) *m2 = x2; - int start1, start2; - - start1 = m1->start_bus_number; - start2 = m2->start_bus_number; - - return start1 - start2; -} - static void __init pci_mmcfg_check_end_bus_number(void) { - int i; - typeof(pci_mmcfg_config[0]) *cfg, *cfgx; - - /* sort them at first */ - sort(pci_mmcfg_config, pci_mmcfg_config_num, - sizeof(pci_mmcfg_config[0]), cmp_mmcfg, NULL); + struct pci_mmcfg_region *cfg, *cfgx; /* last one*/ - if (pci_mmcfg_config_num > 0) { - i = pci_mmcfg_config_num - 1; - cfg = &pci_mmcfg_config[i]; - if (cfg->end_bus_number < cfg->start_bus_number) - cfg->end_bus_number = 255; - } + cfg = list_entry(pci_mmcfg_list.prev, typeof(*cfg), list); + if (cfg) + if (cfg->end_bus < cfg->start_bus) + cfg->end_bus = 255; - /* don't overlap please */ - for (i = 0; i < pci_mmcfg_config_num - 1; i++) { - cfg = &pci_mmcfg_config[i]; - cfgx = &pci_mmcfg_config[i+1]; + if (list_is_singular(&pci_mmcfg_list)) + return; - if (cfg->end_bus_number < cfg->start_bus_number) - cfg->end_bus_number = 255; + /* don't overlap please */ + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->end_bus < cfg->start_bus) + cfg->end_bus = 255; - if (cfg->end_bus_number >= cfgx->start_bus_number) - cfg->end_bus_number = cfgx->start_bus_number - 1; + cfgx = list_entry(cfg->list.next, typeof(*cfg), list); + if (cfg != cfgx && cfg->end_bus >= cfgx->start_bus) + cfg->end_bus = cfgx->start_bus - 1; } } @@ -306,8 +334,7 @@ static int __init pci_mmcfg_check_hostbridge(void) if (!raw_pci_ops) return 0; - pci_mmcfg_config_num = 0; - pci_mmcfg_config = NULL; + free_all_mmcfg(); for (i = 0; i < ARRAY_SIZE(pci_mmcfg_probes); i++) { bus = pci_mmcfg_probes[i].bus; @@ -322,45 +349,22 @@ static int __init pci_mmcfg_check_hostbridge(void) name = pci_mmcfg_probes[i].probe(); if (name) - printk(KERN_INFO "PCI: Found %s with MMCONFIG support.\n", + printk(KERN_INFO PREFIX "%s with MMCONFIG support\n", name); } /* some end_bus_number is crazy, fix it */ pci_mmcfg_check_end_bus_number(); - return pci_mmcfg_config_num != 0; + return !list_empty(&pci_mmcfg_list); } static void __init pci_mmcfg_insert_resources(void) { -#define PCI_MMCFG_RESOURCE_NAME_LEN 24 - int i; - struct resource *res; - char *names; - unsigned num_buses; - - res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res), - pci_mmcfg_config_num, GFP_KERNEL); - if (!res) { - printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n"); - return; - } + struct pci_mmcfg_region *cfg; - names = (void *)&res[pci_mmcfg_config_num]; - for (i = 0; i < pci_mmcfg_config_num; i++, res++) { - struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; - num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; - res->name = names; - snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, - "PCI MMCONFIG %u [%02x-%02x]", cfg->pci_segment, - cfg->start_bus_number, cfg->end_bus_number); - res->start = cfg->address + (cfg->start_bus_number << 20); - res->end = res->start + (num_buses << 20) - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - insert_resource(&iomem_resource, res); - names += PCI_MMCFG_RESOURCE_NAME_LEN; - } + list_for_each_entry(cfg, &pci_mmcfg_list, list) + insert_resource(&iomem_resource, &cfg->res); /* Mark that the resources have been inserted. */ pci_mmcfg_resources_inserted = 1; @@ -437,11 +441,12 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); static int __init is_mmconf_reserved(check_reserved_t is_reserved, - u64 addr, u64 size, int i, - typeof(pci_mmcfg_config[0]) *cfg, int with_e820) + struct pci_mmcfg_region *cfg, int with_e820) { + u64 addr = cfg->res.start; + u64 size = resource_size(&cfg->res); u64 old_size = size; - int valid = 0; + int valid = 0, num_buses; while (!is_reserved(addr, addr + size, E820_RESERVED)) { size >>= 1; @@ -450,19 +455,25 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, } if (size >= (16UL<<20) || size == old_size) { - printk(KERN_NOTICE - "PCI: MCFG area at %Lx reserved in %s\n", - addr, with_e820?"E820":"ACPI motherboard resources"); + printk(KERN_INFO PREFIX "MMCONFIG at %pR reserved in %s\n", + &cfg->res, + with_e820 ? "E820" : "ACPI motherboard resources"); valid = 1; if (old_size != size) { - /* update end_bus_number */ - cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1); - printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " - "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->pci_segment, - (unsigned int)cfg->start_bus_number, - (unsigned int)cfg->end_bus_number); + /* update end_bus */ + cfg->end_bus = cfg->start_bus + ((size>>20) - 1); + num_buses = cfg->end_bus - cfg->start_bus + 1; + cfg->res.end = cfg->res.start + + PCI_MMCFG_BUS_OFFSET(num_buses) - 1; + snprintf(cfg->name, PCI_MMCFG_RESOURCE_NAME_LEN, + "PCI MMCONFIG %04x [bus %02x-%02x]", + cfg->segment, cfg->start_bus, cfg->end_bus); + printk(KERN_INFO PREFIX + "MMCONFIG for %04x [bus%02x-%02x] " + "at %pR (base %#lx) (size reduced!)\n", + cfg->segment, cfg->start_bus, cfg->end_bus, + &cfg->res, (unsigned long) cfg->address); } } @@ -471,45 +482,26 @@ static int __init is_mmconf_reserved(check_reserved_t is_reserved, static void __init pci_mmcfg_reject_broken(int early) { - typeof(pci_mmcfg_config[0]) *cfg; - int i; + struct pci_mmcfg_region *cfg; - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || - (pci_mmcfg_config[0].address == 0)) - return; - - for (i = 0; i < pci_mmcfg_config_num; i++) { + list_for_each_entry(cfg, &pci_mmcfg_list, list) { int valid = 0; - u64 addr, size; - - cfg = &pci_mmcfg_config[i]; - addr = cfg->start_bus_number; - addr <<= 20; - addr += cfg->address; - size = cfg->end_bus_number + 1 - cfg->start_bus_number; - size <<= 20; - printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " - "segment %hu buses %u - %u\n", - i, (unsigned long)cfg->address, cfg->pci_segment, - (unsigned int)cfg->start_bus_number, - (unsigned int)cfg->end_bus_number); if (!early && !acpi_disabled) - valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); + valid = is_mmconf_reserved(is_acpi_reserved, cfg, 0); if (valid) continue; if (!early) - printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" - " reserved in ACPI motherboard resources\n", - cfg->address); + printk(KERN_ERR FW_BUG PREFIX + "MMCONFIG at %pR not reserved in " + "ACPI motherboard resources\n", &cfg->res); /* Don't try to do this check unless configuration type 1 is available. how about type 2 ?*/ if (raw_pci_ops) - valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1); + valid = is_mmconf_reserved(e820_all_mapped, cfg, 1); if (!valid) goto reject; @@ -518,34 +510,41 @@ static void __init pci_mmcfg_reject_broken(int early) return; reject: - printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); - pci_mmcfg_arch_free(); - kfree(pci_mmcfg_config); - pci_mmcfg_config = NULL; - pci_mmcfg_config_num = 0; + printk(KERN_INFO PREFIX "not using MMCONFIG\n"); + free_all_mmcfg(); } static int __initdata known_bridge; -static int acpi_mcfg_64bit_base_addr __initdata = FALSE; +static int __init acpi_mcfg_check_entry(struct acpi_table_mcfg *mcfg, + struct acpi_mcfg_allocation *cfg) +{ + int year; -/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ -struct acpi_mcfg_allocation *pci_mmcfg_config; -int pci_mmcfg_config_num; + if (cfg->address < 0xFFFFFFFF) + return 0; -static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) -{ if (!strcmp(mcfg->header.oem_id, "SGI")) - acpi_mcfg_64bit_base_addr = TRUE; + return 0; - return 0; + if (mcfg->header.revision >= 1) { + if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && + year >= 2010) + return 0; + } + + printk(KERN_ERR PREFIX "MCFG region for %04x [bus %02x-%02x] at %#llx " + "is above 4GB, ignored\n", cfg->pci_segment, + cfg->start_bus_number, cfg->end_bus_number, cfg->address); + return -EINVAL; } static int __init pci_parse_mcfg(struct acpi_table_header *header) { struct acpi_table_mcfg *mcfg; + struct acpi_mcfg_allocation *cfg_table, *cfg; unsigned long i; - int config_size; + int entries; if (!header) return -EINVAL; @@ -553,38 +552,33 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header) mcfg = (struct acpi_table_mcfg *)header; /* how many config structures do we have */ - pci_mmcfg_config_num = 0; + free_all_mmcfg(); + entries = 0; i = header->length - sizeof(struct acpi_table_mcfg); while (i >= sizeof(struct acpi_mcfg_allocation)) { - ++pci_mmcfg_config_num; + entries++; i -= sizeof(struct acpi_mcfg_allocation); }; - if (pci_mmcfg_config_num == 0) { + if (entries == 0) { printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); return -ENODEV; } - config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); - pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); - if (!pci_mmcfg_config) { - printk(KERN_WARNING PREFIX - "No memory for MCFG config tables\n"); - return -ENOMEM; - } - - memcpy(pci_mmcfg_config, &mcfg[1], config_size); - - acpi_mcfg_oem_check(mcfg); - - for (i = 0; i < pci_mmcfg_config_num; ++i) { - if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && - !acpi_mcfg_64bit_base_addr) { - printk(KERN_ERR PREFIX - "MMCONFIG not in low 4GB of memory\n"); - kfree(pci_mmcfg_config); - pci_mmcfg_config_num = 0; + cfg_table = (struct acpi_mcfg_allocation *) &mcfg[1]; + for (i = 0; i < entries; i++) { + cfg = &cfg_table[i]; + if (acpi_mcfg_check_entry(mcfg, cfg)) { + free_all_mmcfg(); return -ENODEV; } + + if (pci_mmconfig_add(cfg->pci_segment, cfg->start_bus_number, + cfg->end_bus_number, cfg->address) == NULL) { + printk(KERN_WARNING PREFIX + "no memory for MCFG entries\n"); + free_all_mmcfg(); + return -ENOMEM; + } } return 0; @@ -614,9 +608,7 @@ static void __init __pci_mmcfg_init(int early) pci_mmcfg_reject_broken(early); - if ((pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || - (pci_mmcfg_config[0].address == 0)) + if (list_empty(&pci_mmcfg_list)) return; if (pci_mmcfg_arch_init()) @@ -648,9 +640,7 @@ static int __init pci_mmcfg_late_insert_resources(void) */ if ((pci_mmcfg_resources_inserted == 1) || (pci_probe & PCI_PROBE_MMCONF) == 0 || - (pci_mmcfg_config_num == 0) || - (pci_mmcfg_config == NULL) || - (pci_mmcfg_config[0].address == 0)) + list_empty(&pci_mmcfg_list)) return 1; /* diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f10a7e9..90d5fd47 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c @@ -27,18 +27,10 @@ static int mmcfg_last_accessed_cpu; */ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) { - struct acpi_mcfg_allocation *cfg; - int cfg_num; - - for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { - cfg = &pci_mmcfg_config[cfg_num]; - if (cfg->pci_segment == seg && - (cfg->start_bus_number <= bus) && - (cfg->end_bus_number >= bus)) - return cfg->address; - } + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); - /* Fall back to type 0 */ + if (cfg) + return cfg->address; return 0; } @@ -47,7 +39,7 @@ static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) */ static void pci_exp_set_dev_base(unsigned int base, int bus, int devfn) { - u32 dev_base = base | (bus << 20) | (devfn << 12); + u32 dev_base = base | PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12); int cpu = smp_processor_id(); if (dev_base != mmcfg_last_accessed_device || cpu != mmcfg_last_accessed_cpu) { diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index 94349f8..e783841 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c @@ -12,38 +12,15 @@ #include <asm/e820.h> #include <asm/pci_x86.h> -/* Static virtual mapping of the MMCONFIG aperture */ -struct mmcfg_virt { - struct acpi_mcfg_allocation *cfg; - char __iomem *virt; -}; -static struct mmcfg_virt *pci_mmcfg_virt; - -static char __iomem *get_virt(unsigned int seg, unsigned bus) -{ - struct acpi_mcfg_allocation *cfg; - int cfg_num; - - for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { - cfg = pci_mmcfg_virt[cfg_num].cfg; - if (cfg->pci_segment == seg && - (cfg->start_bus_number <= bus) && - (cfg->end_bus_number >= bus)) - return pci_mmcfg_virt[cfg_num].virt; - } - - /* Fall back to type 0 */ - return NULL; -} +#define PREFIX "PCI: " static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { - char __iomem *addr; + struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus); - addr = get_virt(seg, bus); - if (!addr) - return NULL; - return addr + ((bus << 20) | (devfn << 12)); + if (cfg && cfg->virt) + return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12)); + return NULL; } static int pci_mmcfg_read(unsigned int seg, unsigned int bus, @@ -109,42 +86,30 @@ static struct pci_raw_ops pci_mmcfg = { .write = pci_mmcfg_write, }; -static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) +static void __iomem * __init mcfg_ioremap(struct pci_mmcfg_region *cfg) { void __iomem *addr; u64 start, size; + int num_buses; - start = cfg->start_bus_number; - start <<= 20; - start += cfg->address; - size = cfg->end_bus_number + 1 - cfg->start_bus_number; - size <<= 20; + start = cfg->address + PCI_MMCFG_BUS_OFFSET(cfg->start_bus); + num_buses = cfg->end_bus - cfg->start_bus + 1; + size = PCI_MMCFG_BUS_OFFSET(num_buses); addr = ioremap_nocache(start, size); - if (addr) { - printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", - start, start + size - 1); - addr -= cfg->start_bus_number << 20; - } + if (addr) + addr -= PCI_MMCFG_BUS_OFFSET(cfg->start_bus); return addr; } int __init pci_mmcfg_arch_init(void) { - int i; - pci_mmcfg_virt = kzalloc(sizeof(*pci_mmcfg_virt) * - pci_mmcfg_config_num, GFP_KERNEL); - if (pci_mmcfg_virt == NULL) { - printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n"); - return 0; - } + struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; ++i) { - pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; - pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]); - if (!pci_mmcfg_virt[i].virt) { - printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " - "segment %d\n", - pci_mmcfg_config[i].pci_segment); + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + cfg->virt = mcfg_ioremap(cfg); + if (!cfg->virt) { + printk(KERN_ERR PREFIX "can't map MMCONFIG at %pR\n", + &cfg->res); pci_mmcfg_arch_free(); return 0; } @@ -155,19 +120,12 @@ int __init pci_mmcfg_arch_init(void) void __init pci_mmcfg_arch_free(void) { - int i; - - if (pci_mmcfg_virt == NULL) - return; + struct pci_mmcfg_region *cfg; - for (i = 0; i < pci_mmcfg_config_num; ++i) { - if (pci_mmcfg_virt[i].virt) { - iounmap(pci_mmcfg_virt[i].virt + (pci_mmcfg_virt[i].cfg->start_bus_number << 20)); - pci_mmcfg_virt[i].virt = NULL; - pci_mmcfg_virt[i].cfg = NULL; + list_for_each_entry(cfg, &pci_mmcfg_list, list) { + if (cfg->virt) { + iounmap(cfg->virt + PCI_MMCFG_BUS_OFFSET(cfg->start_bus)); + cfg->virt = NULL; } } - - kfree(pci_mmcfg_virt); - pci_mmcfg_virt = NULL; } diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c index d8214dc..bee8d6a 100644 --- a/arch/x86/tools/test_get_len.c +++ b/arch/x86/tools/test_get_len.c @@ -113,7 +113,7 @@ int main(int argc, char **argv) char line[BUFSIZE], sym[BUFSIZE] = "<unknown>"; unsigned char insn_buf[16]; struct insn insn; - int insns = 0, c; + int insns = 0; int warnings = 0; parse_args(argc, argv); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c462cea..2b26dd5 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -27,7 +27,9 @@ #include <linux/page-flags.h> #include <linux/highmem.h> #include <linux/console.h> +#include <linux/pci.h> +#include <xen/xen.h> #include <xen/interface/xen.h> #include <xen/interface/version.h> #include <xen/interface/physdev.h> @@ -138,24 +140,23 @@ static void xen_vcpu_setup(int cpu) */ void xen_vcpu_restore(void) { - if (have_vcpu_info_placement) { - int cpu; + int cpu; - for_each_online_cpu(cpu) { - bool other_cpu = (cpu != smp_processor_id()); + for_each_online_cpu(cpu) { + bool other_cpu = (cpu != smp_processor_id()); - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) - BUG(); + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL)) + BUG(); - xen_vcpu_setup(cpu); + xen_setup_runstate_info(cpu); - if (other_cpu && - HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) - BUG(); - } + if (have_vcpu_info_placement) + xen_vcpu_setup(cpu); - BUG_ON(!have_vcpu_info_placement); + if (other_cpu && + HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL)) + BUG(); } } @@ -1176,10 +1177,16 @@ asmlinkage void __init xen_start_kernel(void) add_preferred_console("xenboot", 0, NULL); add_preferred_console("tty", 0, NULL); add_preferred_console("hvc", 0, NULL); + } else { + /* Make sure ACS will be enabled */ + pci_request_acs(); } + xen_raw_console_write("about to get started...\n"); + xen_setup_runstate_info(0); + /* Start the world */ #ifdef CONFIG_X86_32 i386_start_kernel(); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3bf7b1d..bf4cd6b 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -185,7 +185,7 @@ static inline unsigned p2m_index(unsigned long pfn) } /* Build the parallel p2m_top_mfn structures */ -static void __init xen_build_mfn_list_list(void) +void xen_build_mfn_list_list(void) { unsigned pfn, idx; diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 738da0c..563d205 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -35,10 +35,10 @@ cpumask_var_t xen_cpu_initialized_map; -static DEFINE_PER_CPU(int, resched_irq); -static DEFINE_PER_CPU(int, callfunc_irq); -static DEFINE_PER_CPU(int, callfuncsingle_irq); -static DEFINE_PER_CPU(int, debug_irq) = -1; +static DEFINE_PER_CPU(int, xen_resched_irq); +static DEFINE_PER_CPU(int, xen_callfunc_irq); +static DEFINE_PER_CPU(int, xen_callfuncsingle_irq); +static DEFINE_PER_CPU(int, xen_debug_irq) = -1; static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); @@ -103,7 +103,7 @@ static int xen_smp_intr_init(unsigned int cpu) NULL); if (rc < 0) goto fail; - per_cpu(resched_irq, cpu) = rc; + per_cpu(xen_resched_irq, cpu) = rc; callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, @@ -114,7 +114,7 @@ static int xen_smp_intr_init(unsigned int cpu) NULL); if (rc < 0) goto fail; - per_cpu(callfunc_irq, cpu) = rc; + per_cpu(xen_callfunc_irq, cpu) = rc; debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu); rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt, @@ -122,7 +122,7 @@ static int xen_smp_intr_init(unsigned int cpu) debug_name, NULL); if (rc < 0) goto fail; - per_cpu(debug_irq, cpu) = rc; + per_cpu(xen_debug_irq, cpu) = rc; callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, @@ -133,19 +133,20 @@ static int xen_smp_intr_init(unsigned int cpu) NULL); if (rc < 0) goto fail; - per_cpu(callfuncsingle_irq, cpu) = rc; + per_cpu(xen_callfuncsingle_irq, cpu) = rc; return 0; fail: - if (per_cpu(resched_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); - if (per_cpu(callfunc_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); - if (per_cpu(debug_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); - if (per_cpu(callfuncsingle_irq, cpu) >= 0) - unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); + if (per_cpu(xen_resched_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); + if (per_cpu(xen_callfunc_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); + if (per_cpu(xen_debug_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); + if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), + NULL); return rc; } @@ -295,6 +296,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) (unsigned long)task_stack_page(idle) - KERNEL_STACK_OFFSET + THREAD_SIZE; #endif + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_init_lock_cpu(cpu); @@ -348,10 +350,10 @@ static void xen_cpu_die(unsigned int cpu) current->state = TASK_UNINTERRUPTIBLE; schedule_timeout(HZ/10); } - unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); - unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); xen_uninit_lock_cpu(cpu); xen_teardown_timer(cpu); diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 95be7b4..987267f 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -1,4 +1,5 @@ #include <linux/types.h> +#include <linux/clockchips.h> #include <xen/interface/xen.h> #include <xen/grant_table.h> @@ -27,6 +28,8 @@ void xen_pre_suspend(void) void xen_post_suspend(int suspend_cancelled) { + xen_build_mfn_list_list(); + xen_setup_shared_info(); if (suspend_cancelled) { @@ -44,7 +47,19 @@ void xen_post_suspend(int suspend_cancelled) } +static void xen_vcpu_notify_restore(void *data) +{ + unsigned long reason = (unsigned long)data; + + /* Boot processor notified via generic timekeeping_resume() */ + if ( smp_processor_id() == 0) + return; + + clockevents_notify(reason, NULL); +} + void xen_arch_resume(void) { - /* nothing */ + smp_call_function(xen_vcpu_notify_restore, + (void *)CLOCK_EVT_NOTIFY_RESUME, 1); } diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 0a5aa44..0d3f07c 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -31,14 +31,14 @@ #define NS_PER_TICK (1000000000LL / HZ) /* runstate info updated by Xen */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); /* snapshots of runstate info */ -static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot); +static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate_snapshot); /* unused ns of stolen and blocked time */ -static DEFINE_PER_CPU(u64, residual_stolen); -static DEFINE_PER_CPU(u64, residual_blocked); +static DEFINE_PER_CPU(u64, xen_residual_stolen); +static DEFINE_PER_CPU(u64, xen_residual_blocked); /* return an consistent snapshot of 64-bit time/counter value */ static u64 get64(const u64 *p) @@ -79,7 +79,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) BUG_ON(preemptible()); - state = &__get_cpu_var(runstate); + state = &__get_cpu_var(xen_runstate); /* * The runstate info is always updated by the hypervisor on @@ -97,14 +97,14 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) /* return true when a vcpu could run but has no real cpu to run on */ bool xen_vcpu_stolen(int vcpu) { - return per_cpu(runstate, vcpu).state == RUNSTATE_runnable; + return per_cpu(xen_runstate, vcpu).state == RUNSTATE_runnable; } -static void setup_runstate_info(int cpu) +void xen_setup_runstate_info(int cpu) { struct vcpu_register_runstate_memory_area area; - area.addr.v = &per_cpu(runstate, cpu); + area.addr.v = &per_cpu(xen_runstate, cpu); if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area)) @@ -122,7 +122,7 @@ static void do_stolen_accounting(void) WARN_ON(state.state != RUNSTATE_running); - snap = &__get_cpu_var(runstate_snapshot); + snap = &__get_cpu_var(xen_runstate_snapshot); /* work out how much time the VCPU has not been runn*ing* */ blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked]; @@ -133,24 +133,24 @@ static void do_stolen_accounting(void) /* Add the appropriate number of ticks of stolen time, including any left-overs from last time. */ - stolen = runnable + offline + __get_cpu_var(residual_stolen); + stolen = runnable + offline + __get_cpu_var(xen_residual_stolen); if (stolen < 0) stolen = 0; ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); - __get_cpu_var(residual_stolen) = stolen; + __get_cpu_var(xen_residual_stolen) = stolen; account_steal_ticks(ticks); /* Add the appropriate number of ticks of blocked time, including any left-overs from last time. */ - blocked += __get_cpu_var(residual_blocked); + blocked += __get_cpu_var(xen_residual_blocked); if (blocked < 0) blocked = 0; ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); - __get_cpu_var(residual_blocked) = blocked; + __get_cpu_var(xen_residual_blocked) = blocked; account_idle_ticks(ticks); } @@ -434,7 +434,7 @@ void xen_setup_timer(int cpu) name = "<timer kasprintf failed>"; irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, name, NULL); evt = &per_cpu(xen_clock_events, cpu); @@ -442,8 +442,6 @@ void xen_setup_timer(int cpu) evt->cpumask = cpumask_of(cpu); evt->irq = irq; - - setup_runstate_info(cpu); } void xen_teardown_timer(int cpu) @@ -494,6 +492,7 @@ __init void xen_time_init(void) setup_force_cpu_cap(X86_FEATURE_TSC); + xen_setup_runstate_info(cpu); xen_setup_timer(cpu); xen_setup_cpu_clockevents(); } diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index 02f496a..53adefd 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -96,7 +96,7 @@ ENTRY(xen_sysret32) pushq $__USER32_CS pushq %rcx - pushq $VGCF_in_syscall + pushq $0 1: jmp hypercall_iret ENDPATCH(xen_sysret32) RELOC(xen_sysret32, 1b+1) @@ -151,7 +151,7 @@ ENTRY(xen_syscall32_target) ENTRY(xen_sysenter_target) lea 16(%rsp), %rsp /* strip %rcx, %r11 */ mov $-ENOSYS, %rax - pushq $VGCF_in_syscall + pushq $0 jmp hypercall_iret ENDPROC(xen_syscall32_target) ENDPROC(xen_sysenter_target) diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 355fa6b..f9153a3 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -25,6 +25,7 @@ extern struct shared_info *HYPERVISOR_shared_info; void xen_setup_mfn_list_list(void); void xen_setup_shared_info(void); +void xen_build_mfn_list_list(void); void xen_setup_machphys_mapping(void); pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); void xen_ident_map_ISA(void); @@ -41,6 +42,7 @@ void __init xen_build_dynamic_phys_to_machine(void); void xen_init_irq_ops(void); void xen_setup_timer(int cpu); +void xen_setup_runstate_info(int cpu); void xen_teardown_timer(int cpu); cycle_t xen_clocksource_read(void); void xen_setup_cpu_clockevents(void); |