From 95235ca2c20ac0b31a8eb39e2d599bcc3e9c9a10 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Fri, 2 Dec 2005 10:43:20 -0800 Subject: [CPUFREQ] CPU frequency display in /proc/cpuinfo What is the value shown in "cpu MHz" of /proc/cpuinfo when CPUs are capable of changing frequency? Today the answer is: It depends. On i386: SMP kernel - It is always the boot frequency UP kernel - Scales with the frequency change and shows that was last set. On x86_64: There is one single variable cpu_khz that gets written by all the CPUs. So, the frequency set by last CPU will be seen on /proc/cpuinfo of all the CPUs in the system. What you see also depends on whether you have constant_tsc capable CPU or not. On ia64: It is always boot time frequency of a particular CPU that gets displayed. The patch below changes this to: Show the last known frequency of the particular CPU, when cpufreq is present. If cpu doesnot support changing of frequency through cpufreq, then boot frequency will be shown. The patch affects i386, x86_64 and ia64 architectures. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- arch/x86_64/kernel/setup.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 750e01d..64c4534 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -1256,8 +1257,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { + unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + if (!freq) + freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", - cpu_khz / 1000, (cpu_khz % 1000)); + freq / 1000, (freq % 1000)); } /* Cache size */ -- cgit v1.1 From bf8d5c52c3b6b27061e3b7d779057fd9a6cac164 Mon Sep 17 00:00:00 2001 From: Keshavamurthy Anil S Date: Mon, 12 Dec 2005 00:37:34 -0800 Subject: [PATCH] kprobes: increment kprobe missed count for multiprobes When multiple probes are registered at the same address and if due to some recursion (probe getting triggered within a probe handler), we skip calling pre_handlers and just increment nmissed field. The below patch make sure it walks the list for multiple probes case. Without the below patch we get incorrect results of nmissed count for multiple probe case. Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index dddeb67..afe11f4 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c @@ -329,7 +329,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs) */ save_previous_kprobe(kcb); set_current_kprobe(p, regs, kcb); - p->nmissed++; + kprobes_inc_nmissed_count(p); prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_REENTER; return 1; -- cgit v1.1 From 68e188911263adb06e28fa2c9dc9e017774de68d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 12 Dec 2005 22:17:07 -0800 Subject: [PATCH] x86_64: Make sure hpet_address is 0 when any part of HPET initialization fails Otherwise TSC->HPET fallback could see incorrect state and crash later. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/time.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index fdaddc4..4434e154 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -59,7 +59,7 @@ static int notsc __initdata = 0; unsigned int cpu_khz; /* TSC clocks / usec, not used here */ static unsigned long hpet_period; /* fsecs / HPET clock */ unsigned long hpet_tick; /* HPET clocks / interrupt */ -static int hpet_use_timer; +static int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ unsigned long vxtime_hz = PIT_TICK_RATE; int report_lost_ticks; /* command line option */ unsigned long long monotonic_base; @@ -908,6 +908,8 @@ void __init time_init(void) if (!hpet_init()) vxtime_hz = (1000000000000000L + hpet_period / 2) / hpet_period; + else + vxtime.hpet_address = 0; if (hpet_use_timer) { cpu_khz = hpet_calibrate_tsc(); -- cgit v1.1 From 5e9ef02ec00c70840661d174dc2f4862db471bb6 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 12 Dec 2005 22:17:08 -0800 Subject: [PATCH] i386/x86-64 disable LAPIC completely for offline CPU Disabling LAPIC timer isn't sufficient. In some situations, such as we enabled NMI watchdog, there is still unexpected interrupt (such as NMI) invoked in offline CPU. This also avoids offline CPU receives spurious interrupt and anything similar. Signed-off-by: Shaohua Li Signed-off-by: Andi Kleen Acked-by: "Seth, Rohit" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 683c33f..ecbd7b8 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -1181,7 +1181,7 @@ int __cpu_disable(void) if (cpu == 0) return -EBUSY; - disable_APIC_timer(); + clear_local_APIC(); /* * HACK: -- cgit v1.1 From bf5421c309bb89e5106452bc840983b1b4754d61 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 12 Dec 2005 22:17:09 -0800 Subject: [PATCH] i386/x86-64: Don't call change_page_attr with a spinlock held It's illegal because it can sleep. Use a two step lookup scheme instead. First look up the vm_struct, then change the direct mapping, then finally unmap it. That's ok because nobody can change the particular virtual address range as long as the vm_struct is still in the global list. Also added some LinuxDoc documentation to iounmap. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/mm/ioremap.c | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index ecf7acb..0d260e4 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -247,9 +247,15 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) return __ioremap(phys_addr, size, _PAGE_PCD); } +/** + * iounmap - Free a IO remapping + * @addr: virtual address from ioremap_* + * + * Caller must ensure there is only one unmapping for the same pointer. + */ void iounmap(volatile void __iomem *addr) { - struct vm_struct *p; + struct vm_struct *p, *o; if (addr <= high_memory) return; @@ -257,12 +263,31 @@ void iounmap(volatile void __iomem *addr) addr < phys_to_virt(ISA_END_ADDRESS)) return; - write_lock(&vmlist_lock); - p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK)); - if (!p) + addr = (volatile void *)(PAGE_MASK & (unsigned long __force)addr); + /* Use the vm area unlocked, assuming the caller + ensures there isn't another iounmap for the same address + in parallel. Reuse of the virtual address is prevented by + leaving it in the global lists until we're done with it. + cpa takes care of the direct mappings. */ + read_lock(&vmlist_lock); + for (p = vmlist; p; p = p->next) { + if (p->addr == addr) + break; + } + read_unlock(&vmlist_lock); + + if (!p) { printk("iounmap: bad address %p\n", addr); - else if (p->flags >> 20) + dump_stack(); + return; + } + + /* Reset the direct mapping. Can block */ + if (p->flags >> 20) ioremap_change_attr(p->phys_addr, p->size, 0); - write_unlock(&vmlist_lock); + + /* Finally remove it */ + o = remove_vm_area((void *)addr); + BUG_ON(p != o || o == NULL); kfree(p); } -- cgit v1.1 From 928cf8c62763349efc550a12f6518e52c3390906 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 12 Dec 2005 22:17:10 -0800 Subject: [PATCH] i386/x86-64 Fall back to type 1 access when no entry found When there is no entry for a bus in MCFG fall back to type1. This is especially important on K8 systems where always some devices can't be accessed using mmconfig (in particular the builtin northbridge doesn't support it for its own devices) Cc: Cc: Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/pci/mmconfig.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index a0838c4..22ff1b0 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c @@ -19,7 +19,7 @@ struct mmcfg_virt { }; static struct mmcfg_virt *pci_mmcfg_virt; -static char *get_virt(unsigned int seg, int bus) +static char *get_virt(unsigned int seg, unsigned bus) { int cfg_num = -1; struct acpi_table_mcfg_config *cfg; @@ -27,10 +27,9 @@ static char *get_virt(unsigned int seg, int bus) while (1) { ++cfg_num; if (cfg_num >= pci_mmcfg_config_num) { - /* something bad is going on, no cfg table is found. */ - /* so we fall back to the old way we used to do this */ - /* and just rely on the first entry to be correct. */ - return pci_mmcfg_virt[0].virt; + /* Not found - fall back to type 1. This happens + e.g. on the internal devices of a K8 northbridge. */ + return NULL; } cfg = pci_mmcfg_virt[cfg_num].cfg; if (cfg->pci_segment_group_number != seg) @@ -43,18 +42,25 @@ static char *get_virt(unsigned int seg, int bus) static inline char *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { - - return get_virt(seg, bus) + ((bus << 20) | (devfn << 12)); + char *addr = get_virt(seg, bus); + if (!addr) + return NULL; + return addr + ((bus << 20) | (devfn << 12)); } static int pci_mmcfg_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { - char *addr = pci_dev_base(seg, bus, devfn); + char *addr; + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095))) return -EINVAL; + addr = pci_dev_base(seg, bus, devfn); + if (!addr) + return pci_conf1_read(seg,bus,devfn,reg,len,value); + switch (len) { case 1: *value = readb(addr + reg); @@ -73,11 +79,16 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus, static int pci_mmcfg_write(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 value) { - char *addr = pci_dev_base(seg, bus, devfn); + char *addr; + /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) return -EINVAL; + addr = pci_dev_base(seg, bus, devfn); + if (!addr) + return pci_conf1_write(seg,bus,devfn,reg,len,value); + switch (len) { case 1: writeb(value, addr + reg); -- cgit v1.1 From d6ece5491ae71ded1237f59def88bcd1b19b6f60 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 12 Dec 2005 22:17:11 -0800 Subject: [PATCH] i386/x86-64 Correct for broken MCFG tables on K8 systems They report all busses as MMCONFIG capable, but it never works for the internal devices in the CPU's builtin northbridge. It just probes all func 0 devices on bus 0 (the internal northbridge is currently always on bus 0) and if they are not accessible using MCFG they are put into a special fallback bitmap. On systems where it isn't we assume the BIOS vendor supplied correct MCFG. Requires the earlier patch for mmconfig type1 fallback Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/pci/mmconfig.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index 22ff1b0..9c4f907 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c @@ -8,10 +8,13 @@ #include #include #include +#include #include "pci.h" #define MMCONFIG_APER_SIZE (256*1024*1024) +static DECLARE_BITMAP(fallback_slots, 32); + /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { struct acpi_table_mcfg_config *cfg; @@ -40,9 +43,12 @@ static char *get_virt(unsigned int seg, unsigned bus) } } -static inline char *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) +static char *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { - char *addr = get_virt(seg, bus); + char *addr; + if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots)) + return NULL; + addr = get_virt(seg, bus); if (!addr) return NULL; return addr + ((bus << 20) | (devfn << 12)); @@ -109,6 +115,30 @@ static struct pci_raw_ops pci_mmcfg = { .write = pci_mmcfg_write, }; +/* K8 systems have some devices (typically in the builtin northbridge) + that are only accessible using type1 + Normally this can be expressed in the MCFG by not listing them + and assigning suitable _SEGs, but this isn't implemented in some BIOS. + Instead try to discover all devices on bus 0 that are unreachable using MM + and fallback for them. + We only do this for bus 0/seg 0 */ +static __init void unreachable_devices(void) +{ + int i; + for (i = 0; i < 32; i++) { + u32 val1; + char *addr; + + pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1); + if (val1 == 0xffffffff) + continue; + addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0)); + if (addr == NULL|| readl(addr) != val1) { + set_bit(i, &fallback_slots); + } + } +} + static int __init pci_mmcfg_init(void) { int i; @@ -139,6 +169,8 @@ static int __init pci_mmcfg_init(void) printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_config[i].base_address); } + unreachable_devices(); + raw_pci_ops = &pci_mmcfg; pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; -- cgit v1.1 From df818a52fb5e64c72b21029fb9ba80583106932c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 12 Dec 2005 22:17:12 -0800 Subject: [PATCH] x86_64: Fix 32bit thread coredumps When a register set is passed in don't try to fix up the pointer. Noticed by Al Viro Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/ia32/ia32_binfmt.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 830feb2..2b760d0 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c @@ -217,8 +217,7 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpr if (!tsk_used_math(tsk)) return 0; if (!regs) - regs = (struct pt_regs *)tsk->thread.rsp0; - --regs; + regs = ((struct pt_regs *)tsk->thread.rsp0) - 1; if (tsk == current) unlazy_fpu(tsk); set_fs(KERNEL_DS); -- cgit v1.1 From fd4954714e8e7db9f6eb5878fa6111c46445ca81 Mon Sep 17 00:00:00 2001 From: john stultz Date: Mon, 12 Dec 2005 22:17:13 -0800 Subject: [PATCH] x86_64: Fix collision between pmtimer and pit/hpet On systems that do not support the HPET legacy functions (basically the IBM x460, but there could be others), in time_init() we accidentally fall into a PM timer conditional and set the vxtime_hz value to the PM timer's frequency. We then use this value with the HPET for timekeeping. This patch (which mimics the behavior in time_init_gtod) corrects the collision. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 4434e154..7410279 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -915,7 +915,7 @@ void __init time_init(void) cpu_khz = hpet_calibrate_tsc(); timename = "HPET"; #ifdef CONFIG_X86_PM_TIMER - } else if (pmtmr_ioport) { + } else if (pmtmr_ioport && !vxtime.hpet_address) { vxtime_hz = PM_TIMER_FREQUENCY; timename = "PM"; pit_init(); -- cgit v1.1 From 8309cf66fd90ccba9894adde2f3a8d7e1507e4d2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Dec 2005 22:17:14 -0800 Subject: [PATCH] x86_64: Bug correction in populate_memnodemap() As reported by Keith Mannthey, there are problems in populate_memnodemap() The bug was that the compute_hash_shift() was returning 31, with incorrect initialization of memnodemap[] To correct the bug, we must use (1UL << shift) instead of (1 << shift) to avoid an integer overflow, and we must check that shift < 64 to avoid an infinite loop. Signed-off-by: Eric Dumazet Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/mm/numa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index a828a01..15b67d2 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c @@ -53,6 +53,8 @@ static int __init populate_memnodemap( int res = -1; unsigned long addr, end; + if (shift >= 64) + return -1; memset(memnodemap, 0xff, sizeof(memnodemap)); for (i = 0; i < numnodes; i++) { addr = nodes[i].start; @@ -65,7 +67,7 @@ static int __init populate_memnodemap( if (memnodemap[addr >> shift] != 0xff) return -1; memnodemap[addr >> shift] = i; - addr += (1 << shift); + addr += (1UL << shift); } while (addr < end); res = 1; } -- cgit v1.1 From 8b8a4e33e4a320735f353a092013b314f142493d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 15 Dec 2005 09:17:44 +0000 Subject: [PATCH] i386,amd64: mmconfig __iomem annotations Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86_64/pci/mmconfig.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index 9c4f907..f16c0d5 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c @@ -18,11 +18,11 @@ static DECLARE_BITMAP(fallback_slots, 32); /* Static virtual mapping of the MMCONFIG aperture */ struct mmcfg_virt { struct acpi_table_mcfg_config *cfg; - char *virt; + char __iomem *virt; }; static struct mmcfg_virt *pci_mmcfg_virt; -static char *get_virt(unsigned int seg, unsigned bus) +static char __iomem *get_virt(unsigned int seg, unsigned bus) { int cfg_num = -1; struct acpi_table_mcfg_config *cfg; @@ -43,9 +43,9 @@ static char *get_virt(unsigned int seg, unsigned bus) } } -static char *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) +static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) { - char *addr; + char __iomem *addr; if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots)) return NULL; addr = get_virt(seg, bus); @@ -57,7 +57,7 @@ static char *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn static int pci_mmcfg_read(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 *value) { - char *addr; + char __iomem *addr; /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095))) @@ -85,7 +85,7 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus, static int pci_mmcfg_write(unsigned int seg, unsigned int bus, unsigned int devfn, int reg, int len, u32 value) { - char *addr; + char __iomem *addr; /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) @@ -127,7 +127,7 @@ static __init void unreachable_devices(void) int i; for (i = 0; i < 32; i++) { u32 val1; - char *addr; + char __iomem *addr; pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1); if (val1 == 0xffffffff) -- cgit v1.1 From b16b88e55d808a6324d5ff02d8c686f7884870f8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 15 Dec 2005 09:17:50 +0000 Subject: [PATCH] i386,amd64: ioremap.c __iomem annotations Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/x86_64/mm/ioremap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index 0d260e4..ae20706 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c @@ -263,7 +263,7 @@ void iounmap(volatile void __iomem *addr) addr < phys_to_virt(ISA_END_ADDRESS)) return; - addr = (volatile void *)(PAGE_MASK & (unsigned long __force)addr); + addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr); /* Use the vm area unlocked, assuming the caller ensures there isn't another iounmap for the same address in parallel. Reuse of the virtual address is prevented by -- cgit v1.1 From 391eadeec836463a4e6e3843953bbe40f6522593 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 20 Dec 2005 07:23:47 +0100 Subject: [PATCH] Fix build with CONFIG_PCI_MMCONFIG Now needs to include the type 1 functions ("direct") too. Reported by Pavel Roskin Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/x86_64/pci/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile index bb34e5e..a8f75a2 100644 --- a/arch/x86_64/pci/Makefile +++ b/arch/x86_64/pci/Makefile @@ -11,7 +11,7 @@ obj-y += fixup.o obj-$(CONFIG_ACPI) += acpi.o obj-y += legacy.o irq.o common.o # mmconfig has a 64bit special -obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o +obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o obj-$(CONFIG_NUMA) += k8-bus.o -- cgit v1.1 From 576fc0978b6b3673fce6d4b405f36449e508826c Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Thu, 29 Dec 2005 13:06:11 +0100 Subject: [PATCH] x86_64: Fix incorrect node_present_pages on NUMA Currently, we do not pass the correct start_pfn to e820_hole_size, to calculate holes. Following patch fixes that. The bug results in incorrect number of node_present_pages for each pgdat and causes ugly output in /sys and probably VM inbalances. Signed-off-by: Alok N Kataria Signed-off-by: Ravikiran Thirumalai Signed-off-by: Andi Kleen Sighed-off-by: Shair Fultheim Sighed-off-by: Linus Torvalds --- arch/x86_64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index 286f6a6..c016dfe 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -348,7 +348,7 @@ size_zones(unsigned long *z, unsigned long *h, } /* Compute holes */ - w = 0; + w = start_pfn; for (i = 0; i < MAX_NR_ZONES; i++) { unsigned long s = w; w += z[i]; -- cgit v1.1 From 02959a875caec8cabd36111046ad537251ef405f Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Tue, 27 Dec 2005 23:39:51 -0500 Subject: gitignore: x86_64 files Add filters for x86_64 generated files. Signed-off-by: Brian Gerst Signed-off-by: Sam Ravnborg --- arch/x86_64/boot/.gitignore | 3 +++ arch/x86_64/boot/tools/.gitignore | 1 + arch/x86_64/ia32/.gitignore | 1 + 3 files changed, 5 insertions(+) create mode 100644 arch/x86_64/boot/.gitignore create mode 100644 arch/x86_64/boot/tools/.gitignore create mode 100644 arch/x86_64/ia32/.gitignore (limited to 'arch/x86_64') diff --git a/arch/x86_64/boot/.gitignore b/arch/x86_64/boot/.gitignore new file mode 100644 index 0000000..495f20c --- /dev/null +++ b/arch/x86_64/boot/.gitignore @@ -0,0 +1,3 @@ +bootsect +bzImage +setup diff --git a/arch/x86_64/boot/tools/.gitignore b/arch/x86_64/boot/tools/.gitignore new file mode 100644 index 0000000..378eac2 --- /dev/null +++ b/arch/x86_64/boot/tools/.gitignore @@ -0,0 +1 @@ +build diff --git a/arch/x86_64/ia32/.gitignore b/arch/x86_64/ia32/.gitignore new file mode 100644 index 0000000..48ab174 --- /dev/null +++ b/arch/x86_64/ia32/.gitignore @@ -0,0 +1 @@ +vsyscall*.so -- cgit v1.1 From c728252c7a072628bd3932ff87943d1e12423359 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 6 Jan 2006 00:12:03 -0800 Subject: [PATCH] x86/x86_64: mark rodata section read only: generic x86-64 bugfix Bug fix required for the .rodata work on x86-64: when change_page_attr() and friends need to break up a 2Mb page into 4Kb pages, it always set the NX bit on the PMD, which causes the cpu to consider the entire 2Mb region to be NX regardless of the actual PTE perms. This is fine in general, with one big exception: the 2Mb page that covers the last part of the kernel .text! The fix is to not invent a new permission for the new PMD entry, but to just inherit the existing one minus the PSE bit. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/mm/pageattr.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index b90e8fe..35f1f1a 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c @@ -128,6 +128,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, pte_t *kpte; struct page *kpte_page; unsigned kpte_flags; + pgprot_t ref_prot2; kpte = lookup_address(address); if (!kpte) return 0; kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); @@ -140,10 +141,14 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, * split_large_page will take the reference for this change_page_attr * on the split page. */ - struct page *split = split_large_page(address, prot, ref_prot); + + struct page *split; + ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE)); + + split = split_large_page(address, prot, ref_prot2); if (!split) return -ENOMEM; - set_pte(kpte,mk_pte(split, ref_prot)); + set_pte(kpte,mk_pte(split, ref_prot2)); kpte_page = split; } get_page(kpte_page); -- cgit v1.1 From 67df197b1a07944c2e0e40ded3d4fd07d108e110 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 6 Jan 2006 00:12:04 -0800 Subject: [PATCH] x86/x86_64: mark rodata section read-only: x86-64 support x86-64 specific parts to make the .rodata section read only Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/Kconfig.debug | 10 ++++++++++ arch/x86_64/mm/init.c | 23 +++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'arch/x86_64') diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index e2c6e64a..fcb06a5 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug @@ -9,6 +9,16 @@ config INIT_DEBUG Fill __init and __initdata at the end of boot. This helps debugging illegal uses of __init and __initdata after initialization. +config DEBUG_RODATA + bool "Write protect kernel read-only data structures" + depends on DEBUG_KERNEL + help + Mark the kernel read-only data as write-protected in the pagetables, + in order to catch accidental (and incorrect) writes to such const data. + This option may have a slight performance impact because a portion + of the kernel code won't be covered by a 2MB TLB anymore. + If in doubt, say "N". + config IOMMU_DEBUG depends on GART_IOMMU && DEBUG_KERNEL bool "Enable IOMMU debugging" diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index c016dfe..1faae5f 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c @@ -498,6 +498,29 @@ void free_initmem(void) printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10); } +#ifdef CONFIG_DEBUG_RODATA + +extern char __start_rodata, __end_rodata; +void mark_rodata_ro(void) +{ + unsigned long addr = (unsigned long)&__start_rodata; + + for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE) + change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); + + printk ("Write protecting the kernel read-only data: %luk\n", + (&__end_rodata - &__start_rodata) >> 10); + + /* + * change_page_attr_addr() requires a global_flush_tlb() call after it. + * We do this after the printk so that if something went wrong in the + * change, the printk gets out at least to give a better debug hint + * of who is the culprit. + */ + global_flush_tlb(); +} +#endif + #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { -- cgit v1.1 From bb152f53120d66c98c1f16518407df6a84f23714 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 6 Jan 2006 00:12:05 -0800 Subject: [PATCH] x86/x86_64: mark rodata section read-only: make some datastructures const Mark some key kernel datastructures readonly. This patch was previously posted on Jun 28th but was back then not merged because nothing was enforcing rodata anyway.. well that changed now :) Patch by Christoph Lameter and Dave Jones Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/ia32/ia32entry.S | 2 +- arch/x86_64/kernel/syscall.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index e0eb0c7..df0773c 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -341,7 +341,7 @@ ENTRY(ia32_ptregs_common) jmp ia32_sysret /* misbalances the return cache */ CFI_ENDPROC - .data + .section .rodata,"a" .align 8 .globl ia32_sys_call_table ia32_sys_call_table: diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c index e263685..7c176b3 100644 --- a/arch/x86_64/kernel/syscall.c +++ b/arch/x86_64/kernel/syscall.c @@ -19,7 +19,7 @@ typedef void (*sys_call_ptr_t)(void); extern void sys_ni_syscall(void); -sys_call_ptr_t sys_call_table[__NR_syscall_max+1] __cacheline_aligned = { +const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { /* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */ [0 ... __NR_syscall_max] = &sys_ni_syscall, #include -- cgit v1.1 From 1fa744e6e91a895750b9980d13fcfc5791a0cd91 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 6 Jan 2006 00:12:20 -0800 Subject: [PATCH] cpu hotplug/x86_64: disable interrupt in play_dead With physical CPU hotplug, the CPU is hot removed and it should not receive any interrupts. Disabling interrupt is much safer. This basically is what we do in ia64 & x86. Signed-off-by: Shaohua Li Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/process.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86_64') diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 7519fc5..3060ed9 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c @@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); DECLARE_PER_CPU(int, cpu_state); #include -/* We don't actually take CPU down, just spin without interrupts. */ +/* We halt the CPU with physical CPU hotplug */ static inline void play_dead(void) { idle_task_exit(); @@ -166,8 +166,9 @@ static inline void play_dead(void) /* Ack it */ __get_cpu_var(cpu_state) = CPU_DEAD; + local_irq_disable(); while (1) - safe_halt(); + halt(); } #else static inline void play_dead(void) -- cgit v1.1 From 20ede2741551d4a1d24313292beb0da915a55911 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Thu, 5 Jan 2006 12:10:52 -0500 Subject: gitignore: ignore shared objects Many arches make shared objects for VDSOs. Generally exclude them. Signed-off-by: Brian Gerst Signed-off-by: Sam Ravnborg --- arch/x86_64/ia32/.gitignore | 1 - 1 file changed, 1 deletion(-) delete mode 100644 arch/x86_64/ia32/.gitignore (limited to 'arch/x86_64') diff --git a/arch/x86_64/ia32/.gitignore b/arch/x86_64/ia32/.gitignore deleted file mode 100644 index 48ab174..0000000 --- a/arch/x86_64/ia32/.gitignore +++ /dev/null @@ -1 +0,0 @@ -vsyscall*.so -- cgit v1.1