diff options
author | Len Brown <len.brown@intel.com> | 2006-01-07 03:50:18 -0500 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2006-01-07 03:50:18 -0500 |
commit | ed03f430cdc8c802652467e9097606fedc2c7abc (patch) | |
tree | 30941ec1e6f93e99358fefe18175e5dd800a4379 /arch/x86_64 | |
parent | ed349a8a0a780ed27e2a765f16cee54d9b63bfee (diff) | |
parent | 6f957eaf79356a32e838f5f262ee9a60544b1d5b (diff) | |
download | op-kernel-dev-ed03f430cdc8c802652467e9097606fedc2c7abc.zip op-kernel-dev-ed03f430cdc8c802652467e9097606fedc2c7abc.tar.gz |
Pull pnpacpi into acpica branch
Diffstat (limited to 'arch/x86_64')
-rw-r--r-- | arch/x86_64/Kconfig.debug | 10 | ||||
-rw-r--r-- | arch/x86_64/boot/.gitignore | 3 | ||||
-rw-r--r-- | arch/x86_64/boot/tools/.gitignore | 1 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_binfmt.c | 3 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32entry.S | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/kprobes.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/process.c | 5 | ||||
-rw-r--r-- | arch/x86_64/kernel/setup.c | 6 | ||||
-rw-r--r-- | arch/x86_64/kernel/smpboot.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/syscall.c | 2 | ||||
-rw-r--r-- | arch/x86_64/kernel/time.c | 6 | ||||
-rw-r--r-- | arch/x86_64/mm/init.c | 25 | ||||
-rw-r--r-- | arch/x86_64/mm/ioremap.c | 37 | ||||
-rw-r--r-- | arch/x86_64/mm/numa.c | 4 | ||||
-rw-r--r-- | arch/x86_64/mm/pageattr.c | 9 | ||||
-rw-r--r-- | arch/x86_64/pci/Makefile | 2 | ||||
-rw-r--r-- | arch/x86_64/pci/mmconfig.c | 65 |
17 files changed, 151 insertions, 33 deletions
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index e2c6e64a..fcb06a5 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug @@ -9,6 +9,16 @@ config INIT_DEBUG Fill __init and __initdata at the end of boot. This helps debugging illegal uses of __init and __initdata after initialization. +config DEBUG_RODATA + bool "Write protect kernel read-only data structures" + depends on DEBUG_KERNEL + help + Mark the kernel read-only data as write-protected in the pagetables, + in order to catch accidental (and incorrect) writes to such const data. + This option may have a slight performance impact because a portion + of the kernel code won't be covered by a 2MB TLB anymore. + If in doubt, say "N". + config IOMMU_DEBUG depends on GART_IOMMU && DEBUG_KERNEL bool "Enable IOMMU debugging" diff --git a/arch/x86_64/boot/.gitignore b/arch/x86_64/boot/.gitignore new file mode 100644 index 0000000..495f20c --- /dev/null +++ b/arch/x86_64/boot/.gitignore @@ -0,0 +1,3 @@ +bootsect +bzImage +setup diff --git a/arch/x86_64/boot/tools/.gitignore b/arch/x86_64/boot/tools/.gitignore new file mode 100644 index 0000000..378eac2 --- /dev/null +++ b/arch/x86_64/boot/tools/.gitignore @@ -0,0 +1 @@ +build diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 830feb2..2b760d0 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -217,8 +217,7 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpr if (!tsk_used_math(tsk)) return 0; if (!regs) - regs = (struct pt_regs *)tsk->thread.rsp0; - --regs; + regs = ((struct pt_regs *)tsk->thread.rsp0) - 1; if (tsk == current) unlazy_fpu(tsk); set_fs(KERNEL_DS); diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index e0eb0c7..df0773c 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -341,7 +341,7 @@ ENTRY(ia32_ptregs_common) jmp ia32_sysret /* misbalances the return cache */ CFI_ENDPROC - .data + .section .rodata,"a" .align 8 .globl ia32_sys_call_table ia32_sys_call_table: diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index dddeb67..afe11f4 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -329,7 +329,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs) */ save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); - p->nmissed++; + kprobes_inc_nmissed_count(p); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_REENTER; return 1; diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 7519fc5..3060ed9 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); DECLARE_PER_CPU(int, cpu_state); #include <asm/nmi.h> -/* We don't actually take CPU down, just spin without interrupts. */ +/* We halt the CPU with physical CPU hotplug */ static inline void play_dead(void) { idle_task_exit(); @@ -166,8 +166,9 @@ static inline void play_dead(void) /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; + local_irq_disable(); while (1) - safe_halt(); + halt(); } #else static inline void play_dead(void) diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 750e01d..64c4534 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -42,6 +42,7 @@ #include <linux/edd.h> #include <linux/mmzone.h> #include <linux/kexec.h> +#include <linux/cpufreq.h> #include <asm/mtrr.h> #include <asm/uaccess.h> @@ -1256,8 +1257,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { + unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + if (!freq) + freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - cpu_khz / 1000, (cpu_khz % 1000)); + freq / 1000, (freq % 1000)); } /* Cache size */ diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 683c33f..ecbd7b8 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -1181,7 +1181,7 @@ int __cpu_disable(void) if (cpu == 0) return -EBUSY; - disable_APIC_timer(); + clear_local_APIC(); /* * HACK: diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c index e263685..7c176b3 100644 --- a/arch/x86_64/kernel/syscall.c +++ b/arch/x86_64/kernel/syscall.c @@ -19,7 +19,7 @@ typedef void (*sys_call_ptr_t)(void); extern void sys_ni_syscall(void); -sys_call_ptr_t sys_call_table[__NR_syscall_max+1] __cacheline_aligned = { +const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */ [0 ... __NR_syscall_max] = &sys_ni_syscall, #include <asm-x86_64/unistd.h> diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index fdaddc4..7410279 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -59,7 +59,7 @@ static int notsc __initdata = 0; unsigned int cpu_khz; /* TSC clocks / usec, not used here */ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ -static int hpet_use_timer; +static int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ unsigned long long monotonic_base; @@ -908,12 +908,14 @@ void __init time_init(void) if (!hpet_init()) vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; + else + vxtime.hpet_address = 0; if (hpet_use_timer) { cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; #ifdef CONFIG_X86_PM_TIMER - } else if (pmtmr_ioport) { + } else if (pmtmr_ioport && !vxtime.hpet_address) { vxtime_hz = PM_TIMER_FREQUENCY; timename = "PM"; pit_init(); diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 286f6a6..1faae5f 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -348,7 +348,7 @@ size_zones(unsigned long *z, unsigned long *h, } /* Compute holes */ - w = 0; + w = start_pfn; for (i = 0; i < MAX_NR_ZONES; i++) { unsigned long s = w; w += z[i]; @@ -498,6 +498,29 @@ void free_initmem(void) printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10); } +#ifdef CONFIG_DEBUG_RODATA + +extern char __start_rodata, __end_rodata; +void mark_rodata_ro(void) +{ + unsigned long addr = (unsigned long)&__start_rodata; + + for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE) + change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); + + printk ("Write protecting the kernel read-only data: %luk\n", + (&__end_rodata - &__start_rodata) >> 10); + + /* + * change_page_attr_addr() requires a global_flush_tlb() call after it. + * We do this after the printk so that if something went wrong in the + * change, the printk gets out at least to give a better debug hint + * of who is the culprit. + */ + global_flush_tlb(); +} +#endif + #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index ecf7acb..ae20706 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -247,9 +247,15 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) return __ioremap(phys_addr, size, _PAGE_PCD); } +/** + * iounmap - Free a IO remapping + * @addr: virtual address from ioremap_* + * + * Caller must ensure there is only one unmapping for the same pointer. + */ void iounmap(volatile void __iomem *addr) { - struct vm_struct *p; + struct vm_struct *p, *o; if (addr <= high_memory) return; @@ -257,12 +263,31 @@ void iounmap(volatile void __iomem *addr) addr < phys_to_virt(ISA_END_ADDRESS)) return; - write_lock(&vmlist_lock); - p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK)); - if (!p) + addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr); + /* Use the vm area unlocked, assuming the caller + ensures there isn't another iounmap for the same address + in parallel. Reuse of the virtual address is prevented by + leaving it in the global lists until we're done with it. + cpa takes care of the direct mappings. */ + read_lock(&vmlist_lock); + for (p = vmlist; p; p = p->next) { + if (p->addr == addr) + break; + } + read_unlock(&vmlist_lock); + + if (!p) { printk("iounmap: bad address %p\n", addr); - else if (p->flags >> 20) + dump_stack(); + return; + } + + /* Reset the direct mapping. Can block */ + if (p->flags >> 20) ioremap_change_attr(p->phys_addr, p->size, 0); - write_unlock(&vmlist_lock); + + /* Finally remove it */ + o = remove_vm_area((void *)addr); + BUG_ON(p != o || o == NULL); kfree(p); } diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index a828a01..15b67d2 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -53,6 +53,8 @@ static int __init populate_memnodemap( int res = -1; unsigned long addr, end; + if (shift >= 64) + return -1; memset(memnodemap, 0xff, sizeof(memnodemap)); for (i = 0; i < numnodes; i++) { addr = nodes[i].start; @@ -65,7 +67,7 @@ static int __init populate_memnodemap( if (memnodemap[addr >> shift] != 0xff) return -1; memnodemap[addr >> shift] = i; - addr += (1 << shift); + addr += (1UL << shift); } while (addr < end); res = 1; } diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index b90e8fe..35f1f1a 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -128,6 +128,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, pte_t *kpte; struct page *kpte_page; unsigned kpte_flags; + pgprot_t ref_prot2; kpte = lookup_address(address); if (!kpte) return 0; kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); @@ -140,10 +141,14 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, * split_large_page will take the reference for this change_page_attr * on the split page. */ - struct page *split = split_large_page(address, prot, ref_prot); + + struct page *split; + ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE)); + + split = split_large_page(address, prot, ref_prot2); if (!split) return -ENOMEM; - set_pte(kpte,mk_pte(split, ref_prot)); + set_pte(kpte,mk_pte(split, ref_prot2)); kpte_page = split; } get_page(kpte_page); diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile index bb34e5e..a8f75a2 100644 --- a/arch/x86_64/pci/Makefile +++ b/arch/x86_64/pci/Makefile @@ -11,7 +11,7 @@ obj-y += fixup.o obj-$(CONFIG_ACPI) += acpi.o obj-y += legacy.o irq.o common.o # mmconfig has a 64bit special -obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o +obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o obj-$(CONFIG_NUMA) += k8-bus.o diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index a0838c4..f16c0d5 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c @@ -8,18 +8,21 @@ #include <linux/pci.h> #include <linux/init.h> #include <linux/acpi.h> +#include <linux/bitmap.h> #include "pci.h" #define MMCONFIG_APER_SIZE (256*1024*1024) +static DECLARE_BITMAP(fallback_slots, 32); + /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { struct acpi_table_mcfg_config *cfg; - char *virt; + char __iomem *virt; }; static struct mmcfg_virt *pci_mmcfg_virt; -static char *get_virt(unsigned int seg, int bus) +static char __iomem *get_virt(unsigned int seg, unsigned bus) { int cfg_num = -1; struct acpi_table_mcfg_config *cfg; @@ -27,10 +30,9 @@ static char *get_virt(unsigned int seg, int bus) while (1) { ++cfg_num; if (cfg_num >= pci_mmcfg_config_num) { - /* something bad is going on, no cfg table is found. */ - /* so we fall back to the old way we used to do this */ - /* and just rely on the first entry to be correct. */ - return pci_mmcfg_virt[0].virt; + /* Not found - fall back to type 1. This happens + e.g. on the internal devices of a K8 northbridge. */ + return NULL; } cfg = pci_mmcfg_virt[cfg_num].cfg; if (cfg->pci_segment_group_number != seg) @@ -41,20 +43,30 @@ static char *get_virt(unsigned int seg, int bus) } } -static inline char *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) +static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { - - return get_virt(seg, bus) + ((bus << 20) | (devfn << 12)); + char __iomem *addr; + if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots)) + return NULL; + addr = get_virt(seg, bus); + if (!addr) + return NULL; + return addr + ((bus << 20) | (devfn << 12)); } static int pci_mmcfg_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { - char *addr = pci_dev_base(seg, bus, devfn); + char __iomem *addr; + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095))) return -EINVAL; + addr = pci_dev_base(seg, bus, devfn); + if (!addr) + return pci_conf1_read(seg,bus,devfn,reg,len,value); + switch (len) { case 1: *value = readb(addr + reg); @@ -73,11 +85,16 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus, static int pci_mmcfg_write(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 value) { - char *addr = pci_dev_base(seg, bus, devfn); + char __iomem *addr; + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) return -EINVAL; + addr = pci_dev_base(seg, bus, devfn); + if (!addr) + return pci_conf1_write(seg,bus,devfn,reg,len,value); + switch (len) { case 1: writeb(value, addr + reg); @@ -98,6 +115,30 @@ static struct pci_raw_ops pci_mmcfg = { .write = pci_mmcfg_write, }; +/* K8 systems have some devices (typically in the builtin northbridge) + that are only accessible using type1 + Normally this can be expressed in the MCFG by not listing them + and assigning suitable _SEGs, but this isn't implemented in some BIOS. + Instead try to discover all devices on bus 0 that are unreachable using MM + and fallback for them. + We only do this for bus 0/seg 0 */ +static __init void unreachable_devices(void) +{ + int i; + for (i = 0; i < 32; i++) { + u32 val1; + char __iomem *addr; + + pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1); + if (val1 == 0xffffffff) + continue; + addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0)); + if (addr == NULL|| readl(addr) != val1) { + set_bit(i, &fallback_slots); + } + } +} + static int __init pci_mmcfg_init(void) { int i; @@ -128,6 +169,8 @@ static int __init pci_mmcfg_init(void) printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_config[i].base_address); } + unreachable_devices(); + raw_pci_ops = &pci_mmcfg; pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; |