summaryrefslogtreecommitdiffstats
path: root/arch/x86_64
diff options
context:
space:
mode:
authorLen Brown <len.brown@intel.com>2006-01-07 03:50:18 -0500
committerLen Brown <len.brown@intel.com>2006-01-07 03:50:18 -0500
commited03f430cdc8c802652467e9097606fedc2c7abc (patch)
tree30941ec1e6f93e99358fefe18175e5dd800a4379 /arch/x86_64
parented349a8a0a780ed27e2a765f16cee54d9b63bfee (diff)
parent6f957eaf79356a32e838f5f262ee9a60544b1d5b (diff)
downloadop-kernel-dev-ed03f430cdc8c802652467e9097606fedc2c7abc.zip
op-kernel-dev-ed03f430cdc8c802652467e9097606fedc2c7abc.tar.gz
Pull pnpacpi into acpica branch
Diffstat (limited to 'arch/x86_64')
-rw-r--r--arch/x86_64/Kconfig.debug10
-rw-r--r--arch/x86_64/boot/.gitignore3
-rw-r--r--arch/x86_64/boot/tools/.gitignore1
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c3
-rw-r--r--arch/x86_64/ia32/ia32entry.S2
-rw-r--r--arch/x86_64/kernel/kprobes.c2
-rw-r--r--arch/x86_64/kernel/process.c5
-rw-r--r--arch/x86_64/kernel/setup.c6
-rw-r--r--arch/x86_64/kernel/smpboot.c2
-rw-r--r--arch/x86_64/kernel/syscall.c2
-rw-r--r--arch/x86_64/kernel/time.c6
-rw-r--r--arch/x86_64/mm/init.c25
-rw-r--r--arch/x86_64/mm/ioremap.c37
-rw-r--r--arch/x86_64/mm/numa.c4
-rw-r--r--arch/x86_64/mm/pageattr.c9
-rw-r--r--arch/x86_64/pci/Makefile2
-rw-r--r--arch/x86_64/pci/mmconfig.c65
17 files changed, 151 insertions, 33 deletions
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug
index e2c6e64a..fcb06a5 100644
--- a/arch/x86_64/Kconfig.debug
+++ b/arch/x86_64/Kconfig.debug
@@ -9,6 +9,16 @@ config INIT_DEBUG
Fill __init and __initdata at the end of boot. This helps debugging
illegal uses of __init and __initdata after initialization.
+config DEBUG_RODATA
+ bool "Write protect kernel read-only data structures"
+ depends on DEBUG_KERNEL
+ help
+ Mark the kernel read-only data as write-protected in the pagetables,
+ in order to catch accidental (and incorrect) writes to such const data.
+ This option may have a slight performance impact because a portion
+ of the kernel code won't be covered by a 2MB TLB anymore.
+ If in doubt, say "N".
+
config IOMMU_DEBUG
depends on GART_IOMMU && DEBUG_KERNEL
bool "Enable IOMMU debugging"
diff --git a/arch/x86_64/boot/.gitignore b/arch/x86_64/boot/.gitignore
new file mode 100644
index 0000000..495f20c
--- /dev/null
+++ b/arch/x86_64/boot/.gitignore
@@ -0,0 +1,3 @@
+bootsect
+bzImage
+setup
diff --git a/arch/x86_64/boot/tools/.gitignore b/arch/x86_64/boot/tools/.gitignore
new file mode 100644
index 0000000..378eac2
--- /dev/null
+++ b/arch/x86_64/boot/tools/.gitignore
@@ -0,0 +1 @@
+build
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 830feb2..2b760d0 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -217,8 +217,7 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpr
if (!tsk_used_math(tsk))
return 0;
if (!regs)
- regs = (struct pt_regs *)tsk->thread.rsp0;
- --regs;
+ regs = ((struct pt_regs *)tsk->thread.rsp0) - 1;
if (tsk == current)
unlazy_fpu(tsk);
set_fs(KERNEL_DS);
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index e0eb0c7..df0773c 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -341,7 +341,7 @@ ENTRY(ia32_ptregs_common)
jmp ia32_sysret /* misbalances the return cache */
CFI_ENDPROC
- .data
+ .section .rodata,"a"
.align 8
.globl ia32_sys_call_table
ia32_sys_call_table:
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index dddeb67..afe11f4 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -329,7 +329,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
*/
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
- p->nmissed++;
+ kprobes_inc_nmissed_count(p);
prepare_singlestep(p, regs);
kcb->kprobe_status = KPROBE_REENTER;
return 1;
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 7519fc5..3060ed9 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -157,7 +157,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
DECLARE_PER_CPU(int, cpu_state);
#include <asm/nmi.h>
-/* We don't actually take CPU down, just spin without interrupts. */
+/* We halt the CPU with physical CPU hotplug */
static inline void play_dead(void)
{
idle_task_exit();
@@ -166,8 +166,9 @@ static inline void play_dead(void)
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
+ local_irq_disable();
while (1)
- safe_halt();
+ halt();
}
#else
static inline void play_dead(void)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 750e01d..64c4534 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -42,6 +42,7 @@
#include <linux/edd.h>
#include <linux/mmzone.h>
#include <linux/kexec.h>
+#include <linux/cpufreq.h>
#include <asm/mtrr.h>
#include <asm/uaccess.h>
@@ -1256,8 +1257,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "stepping\t: unknown\n");
if (cpu_has(c,X86_FEATURE_TSC)) {
+ unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
+ if (!freq)
+ freq = cpu_khz;
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
- cpu_khz / 1000, (cpu_khz % 1000));
+ freq / 1000, (freq % 1000));
}
/* Cache size */
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 683c33f..ecbd7b8 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -1181,7 +1181,7 @@ int __cpu_disable(void)
if (cpu == 0)
return -EBUSY;
- disable_APIC_timer();
+ clear_local_APIC();
/*
* HACK:
diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c
index e263685..7c176b3 100644
--- a/arch/x86_64/kernel/syscall.c
+++ b/arch/x86_64/kernel/syscall.c
@@ -19,7 +19,7 @@ typedef void (*sys_call_ptr_t)(void);
extern void sys_ni_syscall(void);
-sys_call_ptr_t sys_call_table[__NR_syscall_max+1] __cacheline_aligned = {
+const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */
[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm-x86_64/unistd.h>
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index fdaddc4..7410279 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -59,7 +59,7 @@ static int notsc __initdata = 0;
unsigned int cpu_khz; /* TSC clocks / usec, not used here */
static unsigned long hpet_period; /* fsecs / HPET clock */
unsigned long hpet_tick; /* HPET clocks / interrupt */
-static int hpet_use_timer;
+static int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */
unsigned long vxtime_hz = PIT_TICK_RATE;
int report_lost_ticks; /* command line option */
unsigned long long monotonic_base;
@@ -908,12 +908,14 @@ void __init time_init(void)
if (!hpet_init())
vxtime_hz = (1000000000000000L + hpet_period / 2) /
hpet_period;
+ else
+ vxtime.hpet_address = 0;
if (hpet_use_timer) {
cpu_khz = hpet_calibrate_tsc();
timename = "HPET";
#ifdef CONFIG_X86_PM_TIMER
- } else if (pmtmr_ioport) {
+ } else if (pmtmr_ioport && !vxtime.hpet_address) {
vxtime_hz = PM_TIMER_FREQUENCY;
timename = "PM";
pit_init();
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 286f6a6..1faae5f 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -348,7 +348,7 @@ size_zones(unsigned long *z, unsigned long *h,
}
/* Compute holes */
- w = 0;
+ w = start_pfn;
for (i = 0; i < MAX_NR_ZONES; i++) {
unsigned long s = w;
w += z[i];
@@ -498,6 +498,29 @@ void free_initmem(void)
printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
}
+#ifdef CONFIG_DEBUG_RODATA
+
+extern char __start_rodata, __end_rodata;
+void mark_rodata_ro(void)
+{
+ unsigned long addr = (unsigned long)&__start_rodata;
+
+ for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE)
+ change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
+
+ printk ("Write protecting the kernel read-only data: %luk\n",
+ (&__end_rodata - &__start_rodata) >> 10);
+
+ /*
+ * change_page_attr_addr() requires a global_flush_tlb() call after it.
+ * We do this after the printk so that if something went wrong in the
+ * change, the printk gets out at least to give a better debug hint
+ * of who is the culprit.
+ */
+ global_flush_tlb();
+}
+#endif
+
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c
index ecf7acb..ae20706 100644
--- a/arch/x86_64/mm/ioremap.c
+++ b/arch/x86_64/mm/ioremap.c
@@ -247,9 +247,15 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
return __ioremap(phys_addr, size, _PAGE_PCD);
}
+/**
+ * iounmap - Free a IO remapping
+ * @addr: virtual address from ioremap_*
+ *
+ * Caller must ensure there is only one unmapping for the same pointer.
+ */
void iounmap(volatile void __iomem *addr)
{
- struct vm_struct *p;
+ struct vm_struct *p, *o;
if (addr <= high_memory)
return;
@@ -257,12 +263,31 @@ void iounmap(volatile void __iomem *addr)
addr < phys_to_virt(ISA_END_ADDRESS))
return;
- write_lock(&vmlist_lock);
- p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK));
- if (!p)
+ addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr);
+ /* Use the vm area unlocked, assuming the caller
+ ensures there isn't another iounmap for the same address
+ in parallel. Reuse of the virtual address is prevented by
+ leaving it in the global lists until we're done with it.
+ cpa takes care of the direct mappings. */
+ read_lock(&vmlist_lock);
+ for (p = vmlist; p; p = p->next) {
+ if (p->addr == addr)
+ break;
+ }
+ read_unlock(&vmlist_lock);
+
+ if (!p) {
printk("iounmap: bad address %p\n", addr);
- else if (p->flags >> 20)
+ dump_stack();
+ return;
+ }
+
+ /* Reset the direct mapping. Can block */
+ if (p->flags >> 20)
ioremap_change_attr(p->phys_addr, p->size, 0);
- write_unlock(&vmlist_lock);
+
+ /* Finally remove it */
+ o = remove_vm_area((void *)addr);
+ BUG_ON(p != o || o == NULL);
kfree(p);
}
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index a828a01..15b67d2 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -53,6 +53,8 @@ static int __init populate_memnodemap(
int res = -1;
unsigned long addr, end;
+ if (shift >= 64)
+ return -1;
memset(memnodemap, 0xff, sizeof(memnodemap));
for (i = 0; i < numnodes; i++) {
addr = nodes[i].start;
@@ -65,7 +67,7 @@ static int __init populate_memnodemap(
if (memnodemap[addr >> shift] != 0xff)
return -1;
memnodemap[addr >> shift] = i;
- addr += (1 << shift);
+ addr += (1UL << shift);
} while (addr < end);
res = 1;
}
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index b90e8fe..35f1f1a 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -128,6 +128,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
pte_t *kpte;
struct page *kpte_page;
unsigned kpte_flags;
+ pgprot_t ref_prot2;
kpte = lookup_address(address);
if (!kpte) return 0;
kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
@@ -140,10 +141,14 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
* split_large_page will take the reference for this change_page_attr
* on the split page.
*/
- struct page *split = split_large_page(address, prot, ref_prot);
+
+ struct page *split;
+ ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
+
+ split = split_large_page(address, prot, ref_prot2);
if (!split)
return -ENOMEM;
- set_pte(kpte,mk_pte(split, ref_prot));
+ set_pte(kpte,mk_pte(split, ref_prot2));
kpte_page = split;
}
get_page(kpte_page);
diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile
index bb34e5e..a8f75a2 100644
--- a/arch/x86_64/pci/Makefile
+++ b/arch/x86_64/pci/Makefile
@@ -11,7 +11,7 @@ obj-y += fixup.o
obj-$(CONFIG_ACPI) += acpi.o
obj-y += legacy.o irq.o common.o
# mmconfig has a 64bit special
-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o
obj-$(CONFIG_NUMA) += k8-bus.o
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index a0838c4..f16c0d5 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -8,18 +8,21 @@
#include <linux/pci.h>
#include <linux/init.h>
#include <linux/acpi.h>
+#include <linux/bitmap.h>
#include "pci.h"
#define MMCONFIG_APER_SIZE (256*1024*1024)
+static DECLARE_BITMAP(fallback_slots, 32);
+
/* Static virtual mapping of the MMCONFIG aperture */
struct mmcfg_virt {
struct acpi_table_mcfg_config *cfg;
- char *virt;
+ char __iomem *virt;
};
static struct mmcfg_virt *pci_mmcfg_virt;
-static char *get_virt(unsigned int seg, int bus)
+static char __iomem *get_virt(unsigned int seg, unsigned bus)
{
int cfg_num = -1;
struct acpi_table_mcfg_config *cfg;
@@ -27,10 +30,9 @@ static char *get_virt(unsigned int seg, int bus)
while (1) {
++cfg_num;
if (cfg_num >= pci_mmcfg_config_num) {
- /* something bad is going on, no cfg table is found. */
- /* so we fall back to the old way we used to do this */
- /* and just rely on the first entry to be correct. */
- return pci_mmcfg_virt[0].virt;
+ /* Not found - fall back to type 1. This happens
+ e.g. on the internal devices of a K8 northbridge. */
+ return NULL;
}
cfg = pci_mmcfg_virt[cfg_num].cfg;
if (cfg->pci_segment_group_number != seg)
@@ -41,20 +43,30 @@ static char *get_virt(unsigned int seg, int bus)
}
}
-static inline char *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
+static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
{
-
- return get_virt(seg, bus) + ((bus << 20) | (devfn << 12));
+ char __iomem *addr;
+ if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots))
+ return NULL;
+ addr = get_virt(seg, bus);
+ if (!addr)
+ return NULL;
+ return addr + ((bus << 20) | (devfn << 12));
}
static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 *value)
{
- char *addr = pci_dev_base(seg, bus, devfn);
+ char __iomem *addr;
+ /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095)))
return -EINVAL;
+ addr = pci_dev_base(seg, bus, devfn);
+ if (!addr)
+ return pci_conf1_read(seg,bus,devfn,reg,len,value);
+
switch (len) {
case 1:
*value = readb(addr + reg);
@@ -73,11 +85,16 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
unsigned int devfn, int reg, int len, u32 value)
{
- char *addr = pci_dev_base(seg, bus, devfn);
+ char __iomem *addr;
+ /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
return -EINVAL;
+ addr = pci_dev_base(seg, bus, devfn);
+ if (!addr)
+ return pci_conf1_write(seg,bus,devfn,reg,len,value);
+
switch (len) {
case 1:
writeb(value, addr + reg);
@@ -98,6 +115,30 @@ static struct pci_raw_ops pci_mmcfg = {
.write = pci_mmcfg_write,
};
+/* K8 systems have some devices (typically in the builtin northbridge)
+ that are only accessible using type1
+ Normally this can be expressed in the MCFG by not listing them
+ and assigning suitable _SEGs, but this isn't implemented in some BIOS.
+ Instead try to discover all devices on bus 0 that are unreachable using MM
+ and fallback for them.
+ We only do this for bus 0/seg 0 */
+static __init void unreachable_devices(void)
+{
+ int i;
+ for (i = 0; i < 32; i++) {
+ u32 val1;
+ char __iomem *addr;
+
+ pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1);
+ if (val1 == 0xffffffff)
+ continue;
+ addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0));
+ if (addr == NULL|| readl(addr) != val1) {
+ set_bit(i, &fallback_slots);
+ }
+ }
+}
+
static int __init pci_mmcfg_init(void)
{
int i;
@@ -128,6 +169,8 @@ static int __init pci_mmcfg_init(void)
printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_config[i].base_address);
}
+ unreachable_devices();
+
raw_pci_ops = &pci_mmcfg;
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
OpenPOWER on IntegriCloud