diff options
author | Len Brown <len.brown@intel.com> | 2005-12-06 17:31:30 -0500 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2005-12-06 17:31:30 -0500 |
commit | 3d5271f9883cba7b54762bc4fe027d4172f06db7 (patch) | |
tree | ab8a881a14478598a0c8bda0d26c62cdccfffd6d /arch/ia64/kernel | |
parent | 378b2556f4e09fa6f87ff0cb5c4395ff28257d02 (diff) | |
parent | 9115a6c787596e687df03010d97fccc5e0762506 (diff) | |
download | op-kernel-dev-3d5271f9883cba7b54762bc4fe027d4172f06db7.zip op-kernel-dev-3d5271f9883cba7b54762bc4fe027d4172f06db7.tar.gz |
Pull release into acpica branch
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r-- | arch/ia64/kernel/acpi.c | 13 | ||||
-rw-r--r-- | arch/ia64/kernel/cyclone.c | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/efi.c | 510 | ||||
-rw-r--r-- | arch/ia64/kernel/ia64_ksyms.c | 1 | ||||
-rw-r--r-- | arch/ia64/kernel/irq.c | 12 | ||||
-rw-r--r-- | arch/ia64/kernel/ivt.S | 142 | ||||
-rw-r--r-- | arch/ia64/kernel/kprobes.c | 144 | ||||
-rw-r--r-- | arch/ia64/kernel/mca.c | 129 | ||||
-rw-r--r-- | arch/ia64/kernel/mca_asm.S | 96 | ||||
-rw-r--r-- | arch/ia64/kernel/mca_drv.c | 39 | ||||
-rw-r--r-- | arch/ia64/kernel/module.c | 6 | ||||
-rw-r--r-- | arch/ia64/kernel/patch.c | 16 | ||||
-rw-r--r-- | arch/ia64/kernel/perfmon.c | 5 | ||||
-rw-r--r-- | arch/ia64/kernel/process.c | 42 | ||||
-rw-r--r-- | arch/ia64/kernel/ptrace.c | 28 | ||||
-rw-r--r-- | arch/ia64/kernel/setup.c | 71 | ||||
-rw-r--r-- | arch/ia64/kernel/signal.c | 11 | ||||
-rw-r--r-- | arch/ia64/kernel/smp.c | 10 | ||||
-rw-r--r-- | arch/ia64/kernel/smpboot.c | 7 | ||||
-rw-r--r-- | arch/ia64/kernel/time.c | 4 | ||||
-rw-r--r-- | arch/ia64/kernel/traps.c | 62 | ||||
-rw-r--r-- | arch/ia64/kernel/uncached.c | 17 |
22 files changed, 884 insertions, 482 deletions
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 7e92647..9ad94dd 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -838,7 +838,7 @@ EXPORT_SYMBOL(acpi_unmap_lsapic); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ #ifdef CONFIG_ACPI_NUMA -acpi_status __devinit +static acpi_status __devinit acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; @@ -890,7 +890,16 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret) map_iosapic_to_node(gsi_base, node); return AE_OK; } -#endif /* CONFIG_NUMA */ + +static int __init +acpi_map_iosapics (void) +{ + acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL); + return 0; +} + +fs_initcall(acpi_map_iosapics); +#endif /* CONFIG_ACPI_NUMA */ int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) { diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c index 768c7e4..6ade379 100644 --- a/arch/ia64/kernel/cyclone.c +++ b/arch/ia64/kernel/cyclone.c @@ -2,6 +2,7 @@ #include <linux/smp.h> #include <linux/time.h> #include <linux/errno.h> +#include <linux/timex.h> #include <asm/io.h> /* IBM Summit (EXA) Cyclone counter code*/ diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index 179f230..a3aa45c 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -239,57 +239,30 @@ is_available_memory (efi_memory_desc_t *md) return 0; } -/* - * Trim descriptor MD so its starts at address START_ADDR. If the descriptor covers - * memory that is normally available to the kernel, issue a warning that some memory - * is being ignored. - */ -static void -trim_bottom (efi_memory_desc_t *md, u64 start_addr) -{ - u64 num_skipped_pages; +typedef struct kern_memdesc { + u64 attribute; + u64 start; + u64 num_pages; +} kern_memdesc_t; - if (md->phys_addr >= start_addr || !md->num_pages) - return; - - num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT; - if (num_skipped_pages > md->num_pages) - num_skipped_pages = md->num_pages; - - if (is_available_memory(md)) - printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " - "at 0x%lx\n", __FUNCTION__, - (num_skipped_pages << EFI_PAGE_SHIFT) >> 10, - md->phys_addr, start_addr - IA64_GRANULE_SIZE); - /* - * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory - * descriptor list to become unsorted. In such a case, md->num_pages will be - * zero, so the Right Thing will happen. - */ - md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT; - md->num_pages -= num_skipped_pages; -} +static kern_memdesc_t *kern_memmap; static void -trim_top (efi_memory_desc_t *md, u64 end_addr) +walk (efi_freemem_callback_t callback, void *arg, u64 attr) { - u64 num_dropped_pages, md_end_addr; - - md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); - - if (md_end_addr <= end_addr || !md->num_pages) - return; + kern_memdesc_t *k; + u64 start, end, voff; - num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT; - if (num_dropped_pages > md->num_pages) - num_dropped_pages = md->num_pages; - - if (is_available_memory(md)) - printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " - "at 0x%lx\n", __FUNCTION__, - (num_dropped_pages << EFI_PAGE_SHIFT) >> 10, - md->phys_addr, end_addr); - md->num_pages -= num_dropped_pages; + voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET; + for (k = kern_memmap; k->start != ~0UL; k++) { + if (k->attribute != attr) + continue; + start = PAGE_ALIGN(k->start); + end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK; + if (start < end) + if ((*callback)(start + voff, end + voff, arg) < 0) + return; + } } /* @@ -299,148 +272,19 @@ trim_top (efi_memory_desc_t *md, u64 end_addr) void efi_memmap_walk (efi_freemem_callback_t callback, void *arg) { - int prev_valid = 0; - struct range { - u64 start; - u64 end; - } prev, curr; - void *efi_map_start, *efi_map_end, *p, *q; - efi_memory_desc_t *md, *check_md; - u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0; - unsigned long total_mem = 0; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - - /* skip over non-WB memory descriptors; that's all we're interested in... */ - if (!(md->attribute & EFI_MEMORY_WB)) - continue; - - /* - * granule_addr is the base of md's first granule. - * [granule_addr - first_non_wb_addr) is guaranteed to - * be contiguous WB memory. - */ - granule_addr = GRANULEROUNDDOWN(md->phys_addr); - first_non_wb_addr = max(first_non_wb_addr, granule_addr); - - if (first_non_wb_addr < md->phys_addr) { - trim_bottom(md, granule_addr + IA64_GRANULE_SIZE); - granule_addr = GRANULEROUNDDOWN(md->phys_addr); - first_non_wb_addr = max(first_non_wb_addr, granule_addr); - } - - for (q = p; q < efi_map_end; q += efi_desc_size) { - check_md = q; - - if ((check_md->attribute & EFI_MEMORY_WB) && - (check_md->phys_addr == first_non_wb_addr)) - first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT; - else - break; /* non-WB or hole */ - } - - last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr); - if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) - trim_top(md, last_granule_addr); - - if (is_available_memory(md)) { - if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) { - if (md->phys_addr >= max_addr) - continue; - md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT; - first_non_wb_addr = max_addr; - } - - if (total_mem >= mem_limit) - continue; - - if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) { - unsigned long limit_addr = md->phys_addr; - - limit_addr += mem_limit - total_mem; - limit_addr = GRANULEROUNDDOWN(limit_addr); - - if (md->phys_addr > limit_addr) - continue; - - md->num_pages = (limit_addr - md->phys_addr) >> - EFI_PAGE_SHIFT; - first_non_wb_addr = max_addr = md->phys_addr + - (md->num_pages << EFI_PAGE_SHIFT); - } - total_mem += (md->num_pages << EFI_PAGE_SHIFT); - - if (md->num_pages == 0) - continue; - - curr.start = PAGE_OFFSET + md->phys_addr; - curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); - - if (!prev_valid) { - prev = curr; - prev_valid = 1; - } else { - if (curr.start < prev.start) - printk(KERN_ERR "Oops: EFI memory table not ordered!\n"); - - if (prev.end == curr.start) { - /* merge two consecutive memory ranges */ - prev.end = curr.end; - } else { - start = PAGE_ALIGN(prev.start); - end = prev.end & PAGE_MASK; - if ((end > start) && (*callback)(start, end, arg) < 0) - return; - prev = curr; - } - } - } - } - if (prev_valid) { - start = PAGE_ALIGN(prev.start); - end = prev.end & PAGE_MASK; - if (end > start) - (*callback)(start, end, arg); - } + walk(callback, arg, EFI_MEMORY_WB); } /* - * Walk the EFI memory map to pull out leftover pages in the lower - * memory regions which do not end up in the regular memory map and - * stick them into the uncached allocator - * - * The regular walk function is significantly more complex than the - * uncached walk which means it really doesn't make sense to try and - * marge the two. + * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that + * has memory that is available for uncached allocator. */ -void __init -efi_memmap_walk_uc (efi_freemem_callback_t callback) +void +efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg) { - void *efi_map_start, *efi_map_end, *p; - efi_memory_desc_t *md; - u64 efi_desc_size, start, end; - - efi_map_start = __va(ia64_boot_param->efi_memmap); - efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; - efi_desc_size = ia64_boot_param->efi_memdesc_size; - - for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { - md = p; - if (md->attribute == EFI_MEMORY_UC) { - start = PAGE_ALIGN(md->phys_addr); - end = PAGE_ALIGN((md->phys_addr+(md->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK); - if ((*callback)(start, end, NULL) < 0) - return; - } - } + walk(callback, arg, EFI_MEMORY_UC); } - /* * Look for the PAL_CODE region reported by EFI and maps it using an * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor @@ -862,3 +706,307 @@ efi_uart_console_only(void) printk(KERN_ERR "Malformed %s value\n", name); return 0; } + +#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT) + +static inline u64 +kmd_end(kern_memdesc_t *kmd) +{ + return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT)); +} + +static inline u64 +efi_md_end(efi_memory_desc_t *md) +{ + return (md->phys_addr + efi_md_size(md)); +} + +static inline int +efi_wb(efi_memory_desc_t *md) +{ + return (md->attribute & EFI_MEMORY_WB); +} + +static inline int +efi_uc(efi_memory_desc_t *md) +{ + return (md->attribute & EFI_MEMORY_UC); +} + +/* + * Look for the first granule aligned memory descriptor memory + * that is big enough to hold EFI memory map. Make sure this + * descriptor is atleast granule sized so it does not get trimmed + */ +struct kern_memdesc * +find_memmap_space (void) +{ + u64 contig_low=0, contig_high=0; + u64 as = 0, ae; + void *efi_map_start, *efi_map_end, *p, *q; + efi_memory_desc_t *md, *pmd = NULL, *check_md; + u64 space_needed, efi_desc_size; + unsigned long total_mem = 0; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + /* + * Worst case: we need 3 kernel descriptors for each efi descriptor + * (if every entry has a WB part in the middle, and UC head and tail), + * plus one for the end marker. + */ + space_needed = sizeof(kern_memdesc_t) * + (3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1); + + for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { + md = p; + if (!efi_wb(md)) { + continue; + } + if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) { + contig_low = GRANULEROUNDUP(md->phys_addr); + contig_high = efi_md_end(md); + for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) { + check_md = q; + if (!efi_wb(check_md)) + break; + if (contig_high != check_md->phys_addr) + break; + contig_high = efi_md_end(check_md); + } + contig_high = GRANULEROUNDDOWN(contig_high); + } + if (!is_available_memory(md) || md->type == EFI_LOADER_DATA) + continue; + + /* Round ends inward to granule boundaries */ + as = max(contig_low, md->phys_addr); + ae = min(contig_high, efi_md_end(md)); + + /* keep within max_addr= command line arg */ + ae = min(ae, max_addr); + if (ae <= as) + continue; + + /* avoid going over mem= command line arg */ + if (total_mem + (ae - as) > mem_limit) + ae -= total_mem + (ae - as) - mem_limit; + + if (ae <= as) + continue; + + if (ae - as > space_needed) + break; + } + if (p >= efi_map_end) + panic("Can't allocate space for kernel memory descriptors"); + + return __va(as); +} + +/* + * Walk the EFI memory map and gather all memory available for kernel + * to use. We can allocate partial granules only if the unavailable + * parts exist, and are WB. + */ +void +efi_memmap_init(unsigned long *s, unsigned long *e) +{ + struct kern_memdesc *k, *prev = 0; + u64 contig_low=0, contig_high=0; + u64 as, ae, lim; + void *efi_map_start, *efi_map_end, *p, *q; + efi_memory_desc_t *md, *pmd = NULL, *check_md; + u64 efi_desc_size; + unsigned long total_mem = 0; + + k = kern_memmap = find_memmap_space(); + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { + md = p; + if (!efi_wb(md)) { + if (efi_uc(md) && (md->type == EFI_CONVENTIONAL_MEMORY || + md->type == EFI_BOOT_SERVICES_DATA)) { + k->attribute = EFI_MEMORY_UC; + k->start = md->phys_addr; + k->num_pages = md->num_pages; + k++; + } + continue; + } + if (pmd == NULL || !efi_wb(pmd) || efi_md_end(pmd) != md->phys_addr) { + contig_low = GRANULEROUNDUP(md->phys_addr); + contig_high = efi_md_end(md); + for (q = p + efi_desc_size; q < efi_map_end; q += efi_desc_size) { + check_md = q; + if (!efi_wb(check_md)) + break; + if (contig_high != check_md->phys_addr) + break; + contig_high = efi_md_end(check_md); + } + contig_high = GRANULEROUNDDOWN(contig_high); + } + if (!is_available_memory(md)) + continue; + + /* + * Round ends inward to granule boundaries + * Give trimmings to uncached allocator + */ + if (md->phys_addr < contig_low) { + lim = min(efi_md_end(md), contig_low); + if (efi_uc(md)) { + if (k > kern_memmap && (k-1)->attribute == EFI_MEMORY_UC && + kmd_end(k-1) == md->phys_addr) { + (k-1)->num_pages += (lim - md->phys_addr) >> EFI_PAGE_SHIFT; + } else { + k->attribute = EFI_MEMORY_UC; + k->start = md->phys_addr; + k->num_pages = (lim - md->phys_addr) >> EFI_PAGE_SHIFT; + k++; + } + } + as = contig_low; + } else + as = md->phys_addr; + + if (efi_md_end(md) > contig_high) { + lim = max(md->phys_addr, contig_high); + if (efi_uc(md)) { + if (lim == md->phys_addr && k > kern_memmap && + (k-1)->attribute == EFI_MEMORY_UC && + kmd_end(k-1) == md->phys_addr) { + (k-1)->num_pages += md->num_pages; + } else { + k->attribute = EFI_MEMORY_UC; + k->start = lim; + k->num_pages = (efi_md_end(md) - lim) >> EFI_PAGE_SHIFT; + k++; + } + } + ae = contig_high; + } else + ae = efi_md_end(md); + + /* keep within max_addr= command line arg */ + ae = min(ae, max_addr); + if (ae <= as) + continue; + + /* avoid going over mem= command line arg */ + if (total_mem + (ae - as) > mem_limit) + ae -= total_mem + (ae - as) - mem_limit; + + if (ae <= as) + continue; + if (prev && kmd_end(prev) == md->phys_addr) { + prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT; + total_mem += ae - as; + continue; + } + k->attribute = EFI_MEMORY_WB; + k->start = as; + k->num_pages = (ae - as) >> EFI_PAGE_SHIFT; + total_mem += ae - as; + prev = k++; + } + k->start = ~0L; /* end-marker */ + + /* reserve the memory we are using for kern_memmap */ + *s = (u64)kern_memmap; + *e = (u64)++k; +} + +void +efi_initialize_iomem_resources(struct resource *code_resource, + struct resource *data_resource) +{ + struct resource *res; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + char *name; + unsigned long flags; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + res = NULL; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (md->num_pages == 0) /* should not happen */ + continue; + + flags = IORESOURCE_MEM; + switch (md->type) { + + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + continue; + + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_CONVENTIONAL_MEMORY: + if (md->attribute & EFI_MEMORY_WP) { + name = "System ROM"; + flags |= IORESOURCE_READONLY; + } else { + name = "System RAM"; + } + break; + + case EFI_ACPI_MEMORY_NVS: + name = "ACPI Non-volatile Storage"; + flags |= IORESOURCE_BUSY; + break; + + case EFI_UNUSABLE_MEMORY: + name = "reserved"; + flags |= IORESOURCE_BUSY | IORESOURCE_DISABLED; + break; + + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_CODE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_ACPI_RECLAIM_MEMORY: + default: + name = "reserved"; + flags |= IORESOURCE_BUSY; + break; + } + + if ((res = kzalloc(sizeof(struct resource), GFP_KERNEL)) == NULL) { + printk(KERN_ERR "failed to alocate resource for iomem\n"); + return; + } + + res->name = name; + res->start = md->phys_addr; + res->end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - 1; + res->flags = flags; + + if (insert_resource(&iomem_resource, res) < 0) + kfree(res); + else { + /* + * We don't know which region contains + * kernel data so we try it repeatedly and + * let the resource manager test it. + */ + insert_resource(res, code_resource); + insert_resource(res, data_resource); + } + } +} diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c index 0157281..5db9d3b 100644 --- a/arch/ia64/kernel/ia64_ksyms.c +++ b/arch/ia64/kernel/ia64_ksyms.c @@ -42,6 +42,7 @@ EXPORT_SYMBOL(clear_page); #ifdef CONFIG_VIRTUAL_MEM_MAP #include <linux/bootmem.h> +EXPORT_SYMBOL(min_low_pfn); /* defined by bootmem.c, but not exported by generic code */ EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic code */ #endif diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 205d980..d33244c 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -57,9 +57,9 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); - for (j=0; j<NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "CPU%d ",j); + for_each_online_cpu(j) { + seq_printf(p, "CPU%d ",j); + } seq_putc(p, '\n'); } @@ -72,9 +72,9 @@ int show_interrupts(struct seq_file *p, void *v) #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else - for (j = 0; j < NR_CPUS; j++) - if (cpu_online(j)) - seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + for_each_online_cpu(j) { + seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); + } #endif seq_printf(p, " %14s", irq_desc[i].handler->typename); seq_printf(p, " %s", action->name); diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S index c13ca0d..301f2e9 100644 --- a/arch/ia64/kernel/ivt.S +++ b/arch/ia64/kernel/ivt.S @@ -91,16 +91,17 @@ ENTRY(vhpt_miss) * (the "original") TLB miss, which may either be caused by an instruction * fetch or a data access (or non-access). * - * What we do here is normal TLB miss handing for the _original_ miss, followed - * by inserting the TLB entry for the virtual page table page that the VHPT - * walker was attempting to access. The latter gets inserted as long - * as both L1 and L2 have valid mappings for the faulting address. - * The TLB entry for the original miss gets inserted only if - * the L3 entry indicates that the page is present. + * What we do here is normal TLB miss handing for the _original_ miss, + * followed by inserting the TLB entry for the virtual page table page + * that the VHPT walker was attempting to access. The latter gets + * inserted as long as page table entry above pte level have valid + * mappings for the faulting address. The TLB entry for the original + * miss gets inserted only if the pte entry indicates that the page is + * present. * * do_page_fault gets invoked in the following cases: * - the faulting virtual address uses unimplemented address bits - * - the faulting virtual address has no L1, L2, or L3 mapping + * - the faulting virtual address has no valid page table mapping */ mov r16=cr.ifa // get address that caused the TLB miss #ifdef CONFIG_HUGETLB_PAGE @@ -114,7 +115,7 @@ ENTRY(vhpt_miss) shl r21=r16,3 // shift bit 60 into sign bit shr.u r17=r16,61 // get the region number into r17 ;; - shr r22=r21,3 + shr.u r22=r21,3 #ifdef CONFIG_HUGETLB_PAGE extr.u r26=r25,2,6 ;; @@ -126,7 +127,7 @@ ENTRY(vhpt_miss) #endif ;; cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? - shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address + shr.u r18=r22,PGDIR_SHIFT // get bottom portion of pgd index bit ;; (p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place @@ -137,24 +138,38 @@ ENTRY(vhpt_miss) (p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT (p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] cmp.eq p7,p6=0,r21 // unused address bits all zeroes? - shr.u r18=r22,PMD_SHIFT // shift L2 index into position +#ifdef CONFIG_PGTABLE_4 + shr.u r28=r22,PUD_SHIFT // shift pud index into position +#else + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +#endif + ;; + ld8 r17=[r17] // get *pgd (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? +#ifdef CONFIG_PGTABLE_4 + dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) ;; - ld8 r17=[r17] // fetch the L1 entry (may be 0) + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +(p7) ld8 r29=[r28] // get *pud (may be 0) ;; -(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry +(p7) cmp.eq.or.andcm p6,p7=r29,r0 // was pud_present(*pud) == NULL? + dep r17=r18,r29,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) +#else + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pgd,addr) +#endif ;; -(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0) - shr.u r19=r22,PAGE_SHIFT // shift L3 index into position +(p7) ld8 r20=[r17] // get *pmd (may be 0) + shr.u r19=r22,PAGE_SHIFT // shift pte index into position ;; -(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL? - dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry +(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was pmd_present(*pmd) == NULL? + dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr) ;; -(p7) ld8 r18=[r21] // read the L3 PTE - mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss +(p7) ld8 r18=[r21] // read *pte + mov r19=cr.isr // cr.isr bit 32 tells us if this is an insn miss ;; (p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? mov r22=cr.iha // get the VHPT address that caused the TLB miss @@ -188,18 +203,33 @@ ENTRY(vhpt_miss) dv_serialize_data /* - * Re-check L2 and L3 pagetable. If they changed, we may have received a ptc.g + * Re-check pagetable entry. If they changed, we may have received a ptc.g * between reading the pagetable and the "itc". If so, flush the entry we - * inserted and retry. + * inserted and retry. At this point, we have: + * + * r28 = equivalent of pud_offset(pgd, ifa) + * r17 = equivalent of pmd_offset(pud, ifa) + * r21 = equivalent of pte_offset(pmd, ifa) + * + * r29 = *pud + * r20 = *pmd + * r18 = *pte */ - ld8 r25=[r21] // read L3 PTE again - ld8 r26=[r17] // read L2 entry again + ld8 r25=[r21] // read *pte again + ld8 r26=[r17] // read *pmd again +#ifdef CONFIG_PGTABLE_4 + ld8 r19=[r28] // read *pud again +#endif + cmp.ne p6,p7=r0,r0 ;; - cmp.ne p6,p7=r26,r20 // did L2 entry change + cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change +#ifdef CONFIG_PGTABLE_4 + cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change +#endif mov r27=PAGE_SHIFT<<2 ;; (p6) ptc.l r22,r27 // purge PTE page translation -(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change +(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did *pte change ;; (p6) ptc.l r16,r27 // purge translation #endif @@ -214,19 +244,19 @@ END(vhpt_miss) ENTRY(itlb_miss) DBG_FAULT(1) /* - * The ITLB handler accesses the L3 PTE via the virtually mapped linear + * The ITLB handler accesses the PTE via the virtually mapped linear * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the L3 PTE read - * and go on normally after that. + * mode, walk the page table, and then re-execute the PTE read and + * go on normally after that. */ mov r16=cr.ifa // get virtual address mov r29=b0 // save b0 mov r31=pr // save predicates .itlb_fault: - mov r17=cr.iha // get virtual address of L3 PTE + mov r17=cr.iha // get virtual address of PTE movl r30=1f // load nested fault continuation point ;; -1: ld8 r18=[r17] // read L3 PTE +1: ld8 r18=[r17] // read *pte ;; mov b0=r29 tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? @@ -241,7 +271,7 @@ ENTRY(itlb_miss) */ dv_serialize_data - ld8 r19=[r17] // read L3 PTE again and see if same + ld8 r19=[r17] // read *pte again and see if same mov r20=PAGE_SHIFT<<2 // setup page size for purge ;; cmp.ne p7,p0=r18,r19 @@ -258,19 +288,19 @@ END(itlb_miss) ENTRY(dtlb_miss) DBG_FAULT(2) /* - * The DTLB handler accesses the L3 PTE via the virtually mapped linear + * The DTLB handler accesses the PTE via the virtually mapped linear * page table. If a nested TLB miss occurs, we switch into physical - * mode, walk the page table, and then re-execute the L3 PTE read - * and go on normally after that. + * mode, walk the page table, and then re-execute the PTE read and + * go on normally after that. */ mov r16=cr.ifa // get virtual address mov r29=b0 // save b0 mov r31=pr // save predicates dtlb_fault: - mov r17=cr.iha // get virtual address of L3 PTE + mov r17=cr.iha // get virtual address of PTE movl r30=1f // load nested fault continuation point ;; -1: ld8 r18=[r17] // read L3 PTE +1: ld8 r18=[r17] // read *pte ;; mov b0=r29 tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? @@ -285,7 +315,7 @@ dtlb_fault: */ dv_serialize_data - ld8 r19=[r17] // read L3 PTE again and see if same + ld8 r19=[r17] // read *pte again and see if same mov r20=PAGE_SHIFT<<2 // setup page size for purge ;; cmp.ne p7,p0=r18,r19 @@ -399,7 +429,7 @@ ENTRY(nested_dtlb_miss) * r30: continuation address * r31: saved pr * - * Output: r17: physical address of L3 PTE of faulting address + * Output: r17: physical address of PTE of faulting address * r29: saved b0 * r30: continuation address * r31: saved pr @@ -429,21 +459,33 @@ ENTRY(nested_dtlb_miss) (p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT (p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 ;; -(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 -(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] cmp.eq p7,p6=0,r21 // unused address bits all zeroes? - shr.u r18=r22,PMD_SHIFT // shift L2 index into position +#ifdef CONFIG_PGTABLE_4 + shr.u r18=r22,PUD_SHIFT // shift pud index into position +#else + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +#endif ;; - ld8 r17=[r17] // fetch the L1 entry (may be 0) + ld8 r17=[r17] // get *pgd (may be 0) ;; -(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? - dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry +(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) ;; -(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) - shr.u r19=r22,PAGE_SHIFT // shift L3 index into position +#ifdef CONFIG_PGTABLE_4 +(p7) ld8 r17=[r17] // get *pud (may be 0) + shr.u r18=r22,PMD_SHIFT // shift pmd index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pud_present(*pud) == NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) + ;; +#endif +(p7) ld8 r17=[r17] // get *pmd (may be 0) + shr.u r19=r22,PAGE_SHIFT // shift pte index into position ;; -(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? - dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pmd_present(*pmd) == NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // r17=pte_offset(pmd,addr); (p6) br.cond.spnt page_fault mov b0=r30 br.sptk.many b0 // return to continuation point diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index 471086b..2895d6e 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -26,7 +26,6 @@ #include <linux/config.h> #include <linux/kprobes.h> #include <linux/ptrace.h> -#include <linux/spinlock.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/preempt.h> @@ -38,13 +37,8 @@ extern void jprobe_inst_return(void); -/* kprobe_status settings */ -#define KPROBE_HIT_ACTIVE 0x00000001 -#define KPROBE_HIT_SS 0x00000002 - -static struct kprobe *current_kprobe, *kprobe_prev; -static unsigned long kprobe_status, kprobe_status_prev; -static struct pt_regs jprobe_saved_regs; +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); enum instruction_type {A, I, M, F, B, L, X, u}; static enum instruction_type bundle_encoding[32][3] = { @@ -313,21 +307,22 @@ static int __kprobes valid_kprobe_addr(int template, int slot, return 0; } -static inline void save_previous_kprobe(void) +static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { - kprobe_prev = current_kprobe; - kprobe_status_prev = kprobe_status; + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; } -static inline void restore_previous_kprobe(void) +static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - current_kprobe = kprobe_prev; - kprobe_status = kprobe_status_prev; + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; } -static inline void set_current_kprobe(struct kprobe *p) +static inline void set_current_kprobe(struct kprobe *p, + struct kprobe_ctlblk *kcb) { - current_kprobe = p; + __get_cpu_var(current_kprobe) = p; } static void kretprobe_trampoline(void) @@ -347,11 +342,12 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) struct kretprobe_instance *ri = NULL; struct hlist_head *head; struct hlist_node *node, *tmp; - unsigned long orig_ret_address = 0; + unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = ((struct fnptr *)kretprobe_trampoline)->ip; - head = kretprobe_inst_table_head(current); + spin_lock_irqsave(&kretprobe_lock, flags); + head = kretprobe_inst_table_head(current); /* * It is possible to have multiple instances associated with a given @@ -367,9 +363,9 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * kretprobe_trampoline */ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { - if (ri->task != current) + if (ri->task != current) /* another task is sharing our hash bucket */ - continue; + continue; if (ri->rp && ri->rp->handler) ri->rp->handler(ri, regs); @@ -389,17 +385,19 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); regs->cr_iip = orig_ret_address; - unlock_kprobes(); + reset_current_kprobe(); + spin_unlock_irqrestore(&kretprobe_lock, flags); preempt_enable_no_resched(); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption. - */ - return 1; + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; } +/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -606,17 +604,22 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) int ret = 0; struct pt_regs *regs = args->regs; kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); + struct kprobe_ctlblk *kcb; + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ preempt_disable(); + kcb = get_kprobe_ctlblk(); /* Handle recursion cases */ if (kprobe_running()) { p = get_kprobe(addr); if (p) { - if ( (kprobe_status == KPROBE_HIT_SS) && + if ((kcb->kprobe_status == KPROBE_HIT_SS) && (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) { ia64_psr(regs)->ss = 0; - unlock_kprobes(); goto no_kprobe; } /* We have reentered the pre_kprobe_handler(), since @@ -625,17 +628,17 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) * just single step on the instruction of the new probe * without calling any user handlers. */ - save_previous_kprobe(); - set_current_kprobe(p); + save_previous_kprobe(kcb); + set_current_kprobe(p, kcb); p->nmissed++; prepare_ss(p, regs); - kprobe_status = KPROBE_REENTER; + kcb->kprobe_status = KPROBE_REENTER; return 1; } else if (args->err == __IA64_BREAK_JPROBE) { /* * jprobe instrumented function just completed */ - p = current_kprobe; + p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } @@ -645,10 +648,8 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) } } - lock_kprobes(); p = get_kprobe(addr); if (!p) { - unlock_kprobes(); if (!is_ia64_break_inst(regs)) { /* * The breakpoint instruction was removed right @@ -665,8 +666,8 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) goto no_kprobe; } - kprobe_status = KPROBE_HIT_ACTIVE; - set_current_kprobe(p); + set_current_kprobe(p, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; if (p->pre_handler && p->pre_handler(p, regs)) /* @@ -678,7 +679,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) ss_probe: prepare_ss(p, regs); - kprobe_status = KPROBE_HIT_SS; + kcb->kprobe_status = KPROBE_HIT_SS; return 1; no_kprobe: @@ -688,23 +689,25 @@ no_kprobe: static int __kprobes post_kprobes_handler(struct pt_regs *regs) { - if (!kprobe_running()) + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) return 0; - if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { - kprobe_status = KPROBE_HIT_SSDONE; - current_kprobe->post_handler(current_kprobe, regs, 0); + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); } - resume_execution(current_kprobe, regs); + resume_execution(cur, regs); /*Restore back the original saved kprobes variables and continue. */ - if (kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); goto out; } - - unlock_kprobes(); + reset_current_kprobe(); out: preempt_enable_no_resched(); @@ -713,16 +716,15 @@ out: static int __kprobes kprobes_fault_handler(struct pt_regs *regs, int trapnr) { - if (!kprobe_running()) - return 0; + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (current_kprobe->fault_handler && - current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) return 1; - if (kprobe_status & KPROBE_HIT_SS) { - resume_execution(current_kprobe, regs); - unlock_kprobes(); + if (kcb->kprobe_status & KPROBE_HIT_SS) { + resume_execution(cur, regs); + reset_current_kprobe(); preempt_enable_no_resched(); } @@ -733,31 +735,42 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + switch(val) { case DIE_BREAK: - if (pre_kprobes_handler(args)) - return NOTIFY_STOP; + /* err is break number from ia64_bad_break() */ + if (args->err == 0x80200 || args->err == 0x80300 || args->err == 0) + if (pre_kprobes_handler(args)) + ret = NOTIFY_STOP; break; - case DIE_SS: - if (post_kprobes_handler(args->regs)) - return NOTIFY_STOP; + case DIE_FAULT: + /* err is vector number from ia64_fault() */ + if (args->err == 36) + if (post_kprobes_handler(args->regs)) + ret = NOTIFY_STOP; break; case DIE_PAGE_FAULT: - if (kprobes_fault_handler(args->regs, args->trapnr)) - return NOTIFY_STOP; + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); + if (kprobe_running() && + kprobes_fault_handler(args->regs, args->trapnr)) + ret = NOTIFY_STOP; + preempt_enable(); default: break; } - return NOTIFY_DONE; + return ret; } int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr = ((struct fnptr *)(jp->entry))->ip; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); /* save architectural state */ - jprobe_saved_regs = *regs; + kcb->jprobe_saved_regs = *regs; /* after rfi, execute the jprobe instrumented function */ regs->cr_iip = addr & ~0xFULL; @@ -775,7 +788,10 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { - *regs = jprobe_saved_regs; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + *regs = kcb->jprobe_saved_regs; + preempt_enable_no_resched(); return 1; } diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6dc726a..355af15 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -51,6 +51,9 @@ * * 2005-08-12 Keith Owens <kaos@sgi.com> * Convert MCA/INIT handlers to use per event stacks and SAL/OS state. + * + * 2005-10-07 Keith Owens <kaos@sgi.com> + * Add notify_die() hooks. */ #include <linux/config.h> #include <linux/types.h> @@ -58,7 +61,6 @@ #include <linux/sched.h> #include <linux/interrupt.h> #include <linux/irq.h> -#include <linux/kallsyms.h> #include <linux/smp_lock.h> #include <linux/bootmem.h> #include <linux/acpi.h> @@ -69,6 +71,7 @@ #include <linux/workqueue.h> #include <asm/delay.h> +#include <asm/kdebug.h> #include <asm/machvec.h> #include <asm/meminit.h> #include <asm/page.h> @@ -132,6 +135,14 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); static int mca_init; + +static void inline +ia64_mca_spin(const char *func) +{ + printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); + while (1) + cpu_relax(); +} /* * IA64_MCA log support */ @@ -508,9 +519,7 @@ ia64_mca_wakeup_all(void) int cpu; /* Clear the Rendez checkin flag for all cpus */ - for(cpu = 0; cpu < NR_CPUS; cpu++) { - if (!cpu_online(cpu)) - continue; + for_each_online_cpu(cpu) { if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE) ia64_mca_wakeup(cpu); } @@ -528,13 +537,16 @@ ia64_mca_wakeup_all(void) * Outputs : None */ static irqreturn_t -ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) +ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs) { unsigned long flags; int cpu = smp_processor_id(); /* Mask all interrupts */ local_irq_save(flags); + if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; /* Register with the SAL monarch that the slave has @@ -542,10 +554,18 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) */ ia64_sal_mc_rendez(); + if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + /* Wait for the monarch cpu to exit. */ while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ + if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + /* Enable all interrupts */ local_irq_restore(flags); return IRQ_HANDLED; @@ -935,6 +955,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); monarch_cpu = cpu; + if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); ia64_wait_for_slaves(cpu); /* Wakeup all the processors which are spinning in the rendezvous loop. @@ -944,6 +967,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, * spinning in SAL does not work. */ ia64_mca_wakeup_all(); + if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); /* Get the MCA error record and log it */ ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); @@ -962,6 +988,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); sos->os_status = IA64_MCA_CORRECTED; } + if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, 0, 0, recover) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); set_curr_task(cpu, previous_current); monarch_cpu = -1; @@ -1016,6 +1045,11 @@ ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) cmc_polling_enabled = 1; spin_unlock(&cmc_history_lock); + /* If we're being hit with CMC interrupts, we won't + * ever execute the schedule_work() below. Need to + * disable CMC interrupts on this processor now. + */ + ia64_mca_cmc_vector_disable(NULL); schedule_work(&cmc_disable_work); /* @@ -1185,6 +1219,37 @@ ia64_mca_cpe_poll (unsigned long dummy) #endif /* CONFIG_ACPI */ +static int +default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data) +{ + int c; + struct task_struct *g, *t; + if (val != DIE_INIT_MONARCH_PROCESS) + return NOTIFY_DONE; + printk(KERN_ERR "Processes interrupted by INIT -"); + for_each_online_cpu(c) { + struct ia64_sal_os_state *s; + t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); + s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); + g = s->prev_task; + if (g) { + if (g->pid) + printk(" %d", g->pid); + else + printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); + } + } + printk("\n\n"); + if (read_trylock(&tasklist_lock)) { + do_each_thread (g, t) { + printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); + show_stack(t, NULL); + } while_each_thread (g, t); + read_unlock(&tasklist_lock); + } + return NOTIFY_DONE; +} + /* * C portion of the OS INIT handler * @@ -1209,8 +1274,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, static atomic_t slaves; static atomic_t monarchs; task_t *previous_current; - int cpu = smp_processor_id(), c; - struct task_struct *g, *t; + int cpu = smp_processor_id(); oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ console_loglevel = 15; /* make sure printks make it to console */ @@ -1250,8 +1314,17 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; while (monarch_cpu == -1) cpu_relax(); /* spin until monarch enters */ + if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); while (monarch_cpu != -1) cpu_relax(); /* spin until monarch leaves */ + if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); printk("Slave on cpu %d returning to normal service.\n", cpu); set_curr_task(cpu, previous_current); ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; @@ -1260,6 +1333,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, } monarch_cpu = cpu; + if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); /* * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be @@ -1270,27 +1346,16 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, printk("Delaying for 5 seconds...\n"); udelay(5*1000000); ia64_wait_for_slaves(cpu); - printk(KERN_ERR "Processes interrupted by INIT -"); - for_each_online_cpu(c) { - struct ia64_sal_os_state *s; - t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); - s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); - g = s->prev_task; - if (g) { - if (g->pid) - printk(" %d", g->pid); - else - printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); - } - } - printk("\n\n"); - if (read_trylock(&tasklist_lock)) { - do_each_thread (g, t) { - printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); - show_stack(t, NULL); - } while_each_thread (g, t); - read_unlock(&tasklist_lock); - } + /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through + * to default_monarch_init_process() above and just print all the + * tasks. + */ + if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); + if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, 0, 0, 0) + == NOTIFY_STOP) + ia64_mca_spin(__FUNCTION__); printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); atomic_dec(&monarchs); set_curr_task(cpu, previous_current); @@ -1459,6 +1524,10 @@ ia64_mca_init(void) s64 rc; struct ia64_sal_retval isrv; u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ + static struct notifier_block default_init_monarch_nb = { + .notifier_call = default_monarch_init_process, + .priority = 0/* we need to notified last */ + }; IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__); @@ -1552,6 +1621,10 @@ ia64_mca_init(void) "(status %ld)\n", rc); return; } + if (register_die_notifier(&default_init_monarch_nb)) { + printk(KERN_ERR "Failed to register default monarch INIT process\n"); + return; + } IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__); diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 499a065..db32fc1 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -489,24 +489,27 @@ ia64_state_save: ;; st8 [temp1]=r17,16 // pal_min_state st8 [temp2]=r6,16 // prev_IA64_KR_CURRENT + mov r6=IA64_KR(CURRENT_STACK) + ;; + st8 [temp1]=r6,16 // prev_IA64_KR_CURRENT_STACK + st8 [temp2]=r0,16 // prev_task, starts off as NULL mov r6=cr.ifa ;; - st8 [temp1]=r0,16 // prev_task, starts off as NULL - st8 [temp2]=r12,16 // cr.isr + st8 [temp1]=r12,16 // cr.isr + st8 [temp2]=r6,16 // cr.ifa mov r12=cr.itir ;; - st8 [temp1]=r6,16 // cr.ifa - st8 [temp2]=r12,16 // cr.itir + st8 [temp1]=r12,16 // cr.itir + st8 [temp2]=r11,16 // cr.iipa mov r12=cr.iim ;; - st8 [temp1]=r11,16 // cr.iipa - st8 [temp2]=r12,16 // cr.iim - mov r6=cr.iha + st8 [temp1]=r12,16 // cr.iim (p1) mov r12=IA64_MCA_COLD_BOOT (p2) mov r12=IA64_INIT_WARM_BOOT + mov r6=cr.iha ;; - st8 [temp1]=r6,16 // cr.iha - st8 [temp2]=r12 // os_status, default is cold boot + st8 [temp2]=r6,16 // cr.iha + st8 [temp1]=r12 // os_status, default is cold boot mov r6=IA64_MCA_SAME_CONTEXT ;; st8 [temp1]=r6 // context, default is same context @@ -823,9 +826,12 @@ ia64_state_restore: ld8 r12=[temp1],16 // sal_ra ld8 r9=[temp2],16 // sal_gp ;; - ld8 r22=[temp1],24 // pal_min_state, virtual. skip prev_task + ld8 r22=[temp1],16 // pal_min_state, virtual ld8 r21=[temp2],16 // prev_IA64_KR_CURRENT ;; + ld8 r16=[temp1],16 // prev_IA64_KR_CURRENT_STACK + ld8 r20=[temp2],16 // prev_task + ;; ld8 temp3=[temp1],16 // cr.isr ld8 temp4=[temp2],16 // cr.ifa ;; @@ -846,6 +852,45 @@ ia64_state_restore: ld8 r8=[temp1] // os_status ld8 r10=[temp2] // context + /* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to. To + * avoid any dependencies on the algorithm in ia64_switch_to(), just + * purge any existing CURRENT_STACK mapping and insert the new one. + * + * r16 contains prev_IA64_KR_CURRENT_STACK, r21 contains + * prev_IA64_KR_CURRENT, these values may have been changed by the C + * code. Do not use r8, r9, r10, r22, they contain values ready for + * the return to SAL. + */ + + mov r15=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK + ;; + shl r15=r15,IA64_GRANULE_SHIFT + ;; + dep r15=-1,r15,61,3 // virtual granule + mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps + ;; + ptr.d r15,r18 + ;; + srlz.d + + extr.u r19=r21,61,3 // r21 = prev_IA64_KR_CURRENT + shl r20=r16,IA64_GRANULE_SHIFT // r16 = prev_IA64_KR_CURRENT_STACK + movl r21=PAGE_KERNEL // page properties + ;; + mov IA64_KR(CURRENT_STACK)=r16 + cmp.ne p6,p0=RGN_KERNEL,r19 // new stack is in the kernel region? + or r21=r20,r21 // construct PA | page properties +(p6) br.spnt 1f // the dreaded cpu 0 idle task in region 5:( + ;; + mov cr.itir=r18 + mov cr.ifa=r21 + mov r20=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r20]=r21 + ;; + srlz.d +1: + br.sptk b0 //EndStub////////////////////////////////////////////////////////////////////// @@ -982,6 +1027,7 @@ ia64_set_kernel_registers: add temp4=temp4, temp1 // &struct ia64_sal_os_state.os_gp add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack add r13=temp1, r3 // set current to start of MCA/INIT stack + add r20=temp1, r3 // physical start of MCA/INIT stack ;; ld8 r1=[temp4] // OS GP from SAL OS state ;; @@ -991,7 +1037,35 @@ ia64_set_kernel_registers: ;; mov IA64_KR(CURRENT)=r13 - // FIXME: do I need to wire IA64_KR_CURRENT_STACK and IA64_TR_CURRENT_STACK? + /* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack. To avoid + * any dependencies on the algorithm in ia64_switch_to(), just purge + * any existing CURRENT_STACK mapping and insert the new one. + */ + + mov r16=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK + ;; + shl r16=r16,IA64_GRANULE_SHIFT + ;; + dep r16=-1,r16,61,3 // virtual granule + mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps + ;; + ptr.d r16,r18 + ;; + srlz.d + + shr.u r16=r20,IA64_GRANULE_SHIFT // r20 = physical start of MCA/INIT stack + movl r21=PAGE_KERNEL // page properties + ;; + mov IA64_KR(CURRENT_STACK)=r16 + or r21=r20,r21 // construct PA | page properties + ;; + mov cr.itir=r18 + mov cr.ifa=r13 + mov r20=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r20]=r21 + ;; + srlz.d br.sptk b0 diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 80f83d6..3492e32 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -56,8 +56,9 @@ static struct page *page_isolate[MAX_PAGE_ISOLATE]; static int num_page_isolate = 0; typedef enum { - ISOLATE_NG = 0, - ISOLATE_OK = 1 + ISOLATE_NG, + ISOLATE_OK, + ISOLATE_NONE } isolate_status_t; /* @@ -74,7 +75,7 @@ static struct { * @paddr: poisoned memory location * * Return value: - * ISOLATE_OK / ISOLATE_NG + * one of isolate_status_t, ISOLATE_OK/NG/NONE. */ static isolate_status_t @@ -85,7 +86,10 @@ mca_page_isolate(unsigned long paddr) /* whether physical address is valid or not */ if (!ia64_phys_addr_valid(paddr)) - return ISOLATE_NG; + return ISOLATE_NONE; + + if (!pfn_valid(paddr >> PAGE_SHIFT)) + return ISOLATE_NONE; /* convert physical address to physical page number */ p = pfn_to_page(paddr>>PAGE_SHIFT); @@ -104,6 +108,7 @@ mca_page_isolate(unsigned long paddr) return ISOLATE_NG; /* add attribute 'Reserved' and register the page */ + get_page(p); SetPageReserved(p); page_isolate[num_page_isolate++] = p; @@ -122,10 +127,15 @@ mca_handler_bh(unsigned long paddr) current->pid, current->comm); spin_lock(&mca_bh_lock); - if (mca_page_isolate(paddr) == ISOLATE_OK) { + switch (mca_page_isolate(paddr)) { + case ISOLATE_OK: printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr); - } else { + break; + case ISOLATE_NG: printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr); + break; + default: + break; } spin_unlock(&mca_bh_lock); @@ -537,9 +547,20 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, (pal_processor_state_info_t*)peidx_psp(peidx); /* - * We cannot recover errors with other than bus_check. + * Processor recovery status must key off of the PAL recovery + * status in the Processor State Parameter. + */ + + /* + * The machine check is corrected. */ - if (psp->cc || psp->rc || psp->uc) + if (psp->cm == 1) + return 1; + + /* + * The error was not contained. Software must be reset. + */ + if (psp->us || psp->ci == 0) return 0; /* @@ -560,8 +581,6 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, return 0; if (pbci->eb && pbci->bsi > 0) return 0; - if (psp->ci == 0) - return 0; /* * This is a local MCA and estimated as recoverble external bus error. diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c index f1aca7c..7a2f0a7 100644 --- a/arch/ia64/kernel/module.c +++ b/arch/ia64/kernel/module.c @@ -947,8 +947,8 @@ void percpu_modcopy (void *pcpudst, const void *src, unsigned long size) { unsigned int i; - for (i = 0; i < NR_CPUS; i++) - if (cpu_possible(i)) - memcpy(pcpudst + __per_cpu_offset[i], src, size); + for_each_cpu(i) { + memcpy(pcpudst + __per_cpu_offset[i], src, size); + } } #endif /* CONFIG_SMP */ diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c index 367804a..6a4ac7d 100644 --- a/arch/ia64/kernel/patch.c +++ b/arch/ia64/kernel/patch.c @@ -64,22 +64,30 @@ ia64_patch (u64 insn_addr, u64 mask, u64 val) void ia64_patch_imm64 (u64 insn_addr, u64 val) { - ia64_patch(insn_addr, + /* The assembler may generate offset pointing to either slot 1 + or slot 2 for a long (2-slot) instruction, occupying slots 1 + and 2. */ + insn_addr &= -16UL; + ia64_patch(insn_addr + 2, 0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */ | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */ | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */ | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */ | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */)); - ia64_patch(insn_addr - 1, 0x1ffffffffffUL, val >> 22); + ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22); } void ia64_patch_imm60 (u64 insn_addr, u64 val) { - ia64_patch(insn_addr, + /* The assembler may generate offset pointing to either slot 1 + or slot 2 for a long (2-slot) instruction, occupying slots 1 + and 2. */ + insn_addr &= -16UL; + ia64_patch(insn_addr + 2, 0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */ | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */)); - ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18); + ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18); } /* diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index d71731e..410d480 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2352,7 +2352,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned lon insert_vm_struct(mm, vma); mm->total_vm += size >> PAGE_SHIFT; - vm_stat_account(vma); + vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, + vma_pages(vma)); up_write(&task->mm->mmap_sem); /* @@ -4939,7 +4940,7 @@ abort_locked: if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; error_args: - if (args_k) kfree(args_k); + kfree(args_k); DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 051e050..2e33665 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -4,6 +4,9 @@ * Copyright (C) 1998-2003 Hewlett-Packard Co * David Mosberger-Tang <davidm@hpl.hp.com> * 04/11/17 Ashok Raj <ashok.raj@intel.com> Added CPU Hotplug Support + * + * 2005-10-07 Keith Owens <kaos@sgi.com> + * Add notify_die() hooks. */ #define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */ #include <linux/config.h> @@ -34,6 +37,7 @@ #include <asm/elf.h> #include <asm/ia32.h> #include <asm/irq.h> +#include <asm/kdebug.h> #include <asm/pgalloc.h> #include <asm/processor.h> #include <asm/sal.h> @@ -197,11 +201,12 @@ void default_idle (void) { local_irq_enable(); - while (!need_resched()) + while (!need_resched()) { if (can_do_pal_halt) safe_halt(); else cpu_relax(); + } } #ifdef CONFIG_HOTPLUG_CPU @@ -263,16 +268,20 @@ void __attribute__((noreturn)) cpu_idle (void) { void (*mark_idle)(int) = ia64_mark_idle; + int cpu = smp_processor_id(); /* endless idle loop with no priority at all */ while (1) { + if (can_do_pal_halt) + clear_thread_flag(TIF_POLLING_NRFLAG); + else + set_thread_flag(TIF_POLLING_NRFLAG); + + if (!need_resched()) { + void (*idle)(void); #ifdef CONFIG_SMP - if (!need_resched()) min_xtp(); #endif - while (!need_resched()) { - void (*idle)(void); - if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; @@ -284,17 +293,17 @@ cpu_idle (void) if (!idle) idle = default_idle; (*idle)(); - } - - if (mark_idle) - (*mark_idle)(0); - + if (mark_idle) + (*mark_idle)(0); #ifdef CONFIG_SMP - normal_xtp(); + normal_xtp(); #endif + } + preempt_enable_no_resched(); schedule(); + preempt_disable(); check_pgt_cache(); - if (cpu_is_offline(smp_processor_id())) + if (cpu_is_offline(cpu)) play_dead(); } } @@ -709,13 +718,6 @@ kernel_thread_helper (int (*fn)(void *), void *arg) void flush_thread (void) { - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(current); - /* drop floating-point and debug-register state if it exists: */ current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); ia64_drop_fpu(current); @@ -804,12 +806,14 @@ cpu_halt (void) void machine_restart (char *restart_cmd) { + (void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0); (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL); } void machine_halt (void) { + (void) notify_die(DIE_MACHINE_HALT, "", NULL, 0, 0, 0); cpu_halt(); } diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index bbb8bc7..4b19d04 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -587,8 +587,9 @@ thread_matches (struct task_struct *thread, unsigned long addr) static struct task_struct * find_thread_for_addr (struct task_struct *child, unsigned long addr) { - struct task_struct *g, *p; + struct task_struct *p; struct mm_struct *mm; + struct list_head *this, *next; int mm_users; if (!(mm = get_task_mm(child))) @@ -600,28 +601,21 @@ find_thread_for_addr (struct task_struct *child, unsigned long addr) goto out; /* not multi-threaded */ /* - * First, traverse the child's thread-list. Good for scalability with - * NPTL-threads. + * Traverse the current process' children list. Every task that + * one attaches to becomes a child. And it is only attached children + * of the debugger that are of interest (ptrace_check_attach checks + * for this). */ - p = child; - do { - if (thread_matches(p, addr)) { - child = p; - goto out; - } - if (mm_users-- <= 1) - goto out; - } while ((p = next_thread(p)) != child); - - do_each_thread(g, p) { - if (child->mm != mm) + list_for_each_safe(this, next, ¤t->children) { + p = list_entry(this, struct task_struct, sibling); + if (p->mm != mm) continue; - if (thread_matches(p, addr)) { child = p; goto out; } - } while_each_thread(g, p); + } + out: mmput(mm); return child; diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 1f5c26d..5add0bc 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -78,7 +78,27 @@ struct screen_info screen_info; unsigned long vga_console_iobase; unsigned long vga_console_membase; +static struct resource data_resource = { + .name = "Kernel data", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; +extern void efi_initialize_iomem_resources(struct resource *, + struct resource *); +extern char _text[], _end[], _etext[]; + unsigned long ia64_max_cacheline_size; + +int dma_get_cache_alignment(void) +{ + return ia64_max_cacheline_size; +} +EXPORT_SYMBOL(dma_get_cache_alignment); + unsigned long ia64_iobase; /* virtual address for I/O accesses */ EXPORT_SYMBOL(ia64_iobase); struct io_space io_space[MAX_IO_SPACES]; @@ -171,6 +191,22 @@ sort_regions (struct rsvd_region *rsvd_region, int max) } } +/* + * Request address space for all standard resources + */ +static int __init register_memory(void) +{ + code_resource.start = ia64_tpa(_text); + code_resource.end = ia64_tpa(_etext) - 1; + data_resource.start = ia64_tpa(_etext); + data_resource.end = ia64_tpa(_end) - 1; + efi_initialize_iomem_resources(&code_resource, &data_resource); + + return 0; +} + +__initcall(register_memory); + /** * reserve_memory - setup reserved memory areas * @@ -211,6 +247,9 @@ reserve_memory (void) } #endif + efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); + n++; + /* end of memory marker */ rsvd_region[n].start = ~0UL; rsvd_region[n].end = ~0UL; @@ -244,28 +283,31 @@ find_initrd (void) static void __init io_port_init (void) { - extern unsigned long ia64_iobase; unsigned long phys_iobase; /* - * Set `iobase' to the appropriate address in region 6 (uncached access range). + * Set `iobase' based on the EFI memory map or, failing that, the + * value firmware left in ar.k0. * - * The EFI memory map is the "preferred" location to get the I/O port space base, - * rather the relying on AR.KR0. This should become more clear in future SAL - * specs. We'll fall back to getting it out of AR.KR0 if no appropriate entry is - * found in the memory map. + * Note that in ia32 mode, IN/OUT instructions use ar.k0 to compute + * the port's virtual address, so ia32_load_state() loads it with a + * user virtual address. But in ia64 mode, glibc uses the + * *physical* address in ar.k0 to mmap the appropriate area from + * /dev/mem, and the inX()/outX() interfaces use MMIO. In both + * cases, user-mode can only use the legacy 0-64K I/O port space. + * + * ar.k0 is not involved in kernel I/O port accesses, which can use + * any of the I/O port spaces and are done via MMIO using the + * virtual mmio_base from the appropriate io_space[]. */ phys_iobase = efi_get_iobase(); - if (phys_iobase) - /* set AR.KR0 since this is all we use it for anyway */ - ia64_set_kr(IA64_KR_IO_BASE, phys_iobase); - else { + if (!phys_iobase) { phys_iobase = ia64_get_kr(IA64_KR_IO_BASE); - printk(KERN_INFO "No I/O port range found in EFI memory map, falling back " - "to AR.KR0\n"); - printk(KERN_INFO "I/O port base = 0x%lx\n", phys_iobase); + printk(KERN_INFO "No I/O port range found in EFI memory map, " + "falling back to AR.KR0 (0x%lx)\n", phys_iobase); } ia64_iobase = (unsigned long) ioremap(phys_iobase, 0); + ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); /* setup legacy IO port space */ io_space[0].mmio_base = ia64_iobase; @@ -419,6 +461,7 @@ setup_arch (char **cmdline_p) #endif cpu_init(); /* initialize the bootstrap CPU */ + mmu_context_init(); /* initialize context_id bitmap */ #ifdef CONFIG_ACPI acpi_boot_init(); @@ -526,7 +569,7 @@ show_cpuinfo (struct seq_file *m, void *v) c->itc_freq / 1000000, c->itc_freq % 1000000, lpj*HZ/500000, (lpj*HZ/5000) % 100); #ifdef CONFIG_SMP - seq_printf(m, "siblings : %u\n", c->num_log); + seq_printf(m, "siblings : %u\n", cpus_weight(cpu_core_map[cpunum])); if (c->threads_per_core > 1 || c->cores_per_socket > 1) seq_printf(m, "physical id: %u\n" diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 774f34b..58ce07e 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -387,15 +387,14 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct sigscratch *scr) { extern char __kernel_sigtramp[]; - unsigned long tramp_addr, new_rbs = 0; + unsigned long tramp_addr, new_rbs = 0, new_sp; struct sigframe __user *frame; long err; - frame = (void __user *) scr->pt.r12; + new_sp = scr->pt.r12; tramp_addr = (unsigned long) __kernel_sigtramp; - if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags((unsigned long) frame) == 0) { - frame = (void __user *) ((current->sas_ss_sp + current->sas_ss_size) - & ~(STACK_ALIGN - 1)); + if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags(new_sp) == 0) { + new_sp = current->sas_ss_sp + current->sas_ss_size; /* * We need to check for the register stack being on the signal stack * separately, because it's switched separately (memory stack is switched @@ -404,7 +403,7 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, if (!rbs_on_sig_stack(scr->pt.ar_bspstore)) new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1); } - frame = (void __user *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1)); + frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN); if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) return force_sigsegv_info(sig, frame); diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c index 0166a98..657ac99 100644 --- a/arch/ia64/kernel/smp.c +++ b/arch/ia64/kernel/smp.c @@ -185,8 +185,8 @@ send_IPI_allbutself (int op) { unsigned int i; - for (i = 0; i < NR_CPUS; i++) { - if (cpu_online(i) && i != smp_processor_id()) + for_each_online_cpu(i) { + if (i != smp_processor_id()) send_IPI_single(i, op); } } @@ -199,9 +199,9 @@ send_IPI_all (int op) { int i; - for (i = 0; i < NR_CPUS; i++) - if (cpu_online(i)) - send_IPI_single(i, op); + for_each_online_cpu(i) { + send_IPI_single(i, op); + } } /* diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 7d72c0d..8f44e7d 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -399,6 +399,7 @@ start_secondary (void *unused) Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); efi_map_pal_code(); cpu_init(); + preempt_disable(); smp_callin(); cpu_idle(); @@ -694,9 +695,9 @@ smp_cpus_done (unsigned int dummy) * Allow the user to impress friends. */ - for (cpu = 0; cpu < NR_CPUS; cpu++) - if (cpu_online(cpu)) - bogosum += cpu_data(cpu)->loops_per_jiffy; + for_each_online_cpu(cpu) { + bogosum += cpu_data(cpu)->loops_per_jiffy; + } printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 8b8a5a4..5b7e736 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -32,10 +32,6 @@ extern unsigned long wall_jiffies; -u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - #define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ #ifdef CONFIG_IA64_DEBUG_IRQ diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index f970359..d3e0ecb 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -30,17 +30,20 @@ fpswa_interface_t *fpswa_interface; EXPORT_SYMBOL(fpswa_interface); struct notifier_block *ia64die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); -int register_die_notifier(struct notifier_block *nb) +int +register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&ia64die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; + return notifier_chain_register(&ia64die_chain, nb); } +EXPORT_SYMBOL_GPL(register_die_notifier); + +int +unregister_die_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ia64die_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_die_notifier); void __init trap_init (void) @@ -105,6 +108,7 @@ die (const char *str, struct pt_regs *regs, long err) if (++die.lock_owner_depth < 3) { printk("%s[%d]: %s %ld [%d]\n", current->comm, current->pid, str, err, ++die_counter); + (void) notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); show_regs(regs); } else printk(KERN_ERR "Recursive die() failure, output suppressed\n"); @@ -128,24 +132,6 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) siginfo_t siginfo; int sig, code; - /* break.b always sets cr.iim to 0, which causes problems for - * debuggers. Get the real break number from the original instruction, - * but only for kernel code. User space break.b is left alone, to - * preserve the existing behaviour. All break codings have the same - * format, so there is no need to check the slot type. - */ - if (break_num == 0 && !user_mode(regs)) { - struct ia64_psr *ipsr = ia64_psr(regs); - unsigned long *bundle = (unsigned long *)regs->cr_iip; - unsigned long slot; - switch (ipsr->ri) { - case 0: slot = (bundle[0] >> 5); break; - case 1: slot = (bundle[0] >> 46) | (bundle[1] << 18); break; - default: slot = (bundle[1] >> 23); break; - } - break_num = ((slot >> 36 & 1) << 20) | (slot >> 6 & 0xfffff); - } - /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); siginfo.si_imm = break_num; @@ -155,9 +141,8 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) switch (break_num) { case 0: /* unknown error (used by GCC for __builtin_abort()) */ if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP) - == NOTIFY_STOP) { + == NOTIFY_STOP) return; - } die_if_kernel("bugcheck!", regs, break_num); sig = SIGILL; code = ILL_ILLOPC; break; @@ -210,15 +195,6 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) sig = SIGILL; code = __ILL_BNDMOD; break; - case 0x80200: - case 0x80300: - if (notify_die(DIE_BREAK, "kprobe", regs, break_num, TRAP_BRKPT, SIGTRAP) - == NOTIFY_STOP) { - return; - } - sig = SIGTRAP; code = TRAP_BRKPT; - break; - default: if (break_num < 0x40000 || break_num > 0x100000) die_if_kernel("Bad break", regs, break_num); @@ -226,6 +202,9 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) if (break_num < 0x80000) { sig = SIGILL; code = __ILL_BREAK; } else { + if (notify_die(DIE_BREAK, "bad break", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) + return; sig = SIGTRAP; code = TRAP_BRKPT; } } @@ -578,12 +557,11 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, #endif break; case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; - case 36: - if (notify_die(DIE_SS, "ss", ®s, vector, - vector, SIGTRAP) == NOTIFY_STOP) - return; - siginfo.si_code = TRAP_TRACE; ifa = 0; break; + case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; } + if (notify_die(DIE_FAULT, "ia64_fault", ®s, vector, siginfo.si_code, SIGTRAP) + == NOTIFY_STOP) + return; siginfo.si_signo = SIGTRAP; siginfo.si_errno = 0; siginfo.si_addr = (void __user *) ifa; diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index 4e9d06c..c6d4044 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -205,23 +205,18 @@ EXPORT_SYMBOL(uncached_free_page); static int __init uncached_build_memmap(unsigned long start, unsigned long end, void *arg) { - long length; - unsigned long vstart, vend; + long length = end - start; int node; - length = end - start; - vstart = start + __IA64_UNCACHED_OFFSET; - vend = end + __IA64_UNCACHED_OFFSET; - dprintk(KERN_ERR "uncached_build_memmap(%lx %lx)\n", start, end); - memset((char *)vstart, 0, length); + memset((char *)start, 0, length); - node = paddr_to_nid(start); + node = paddr_to_nid(start - __IA64_UNCACHED_OFFSET); - for (; vstart < vend ; vstart += PAGE_SIZE) { - dprintk(KERN_INFO "sticking %lx into the pool!\n", vstart); - gen_pool_free(uncached_pool[node], vstart, PAGE_SIZE); + for (; start < end ; start += PAGE_SIZE) { + dprintk(KERN_INFO "sticking %lx into the pool!\n", start); + gen_pool_free(uncached_pool[node], start, PAGE_SIZE); } return 0; |