diff options
Diffstat (limited to 'arch/powerpc/kernel')
41 files changed, 1084 insertions, 341 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c002b04..8773263 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -98,11 +98,16 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o perf_callchain.o +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o + +obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o +obj-$(CONFIG_FSL_EMB_PERF_EVENT) += perf_event_fsl_emb.o +obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o + obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o ifneq ($(CONFIG_PPC_INDIRECT_IO),y) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 957ceb7..c09138d 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -133,7 +133,6 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_event_pending)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_PPC_MM_SLICES DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct, diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 01fe9ce..a3c684b 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -19,6 +19,7 @@ #include <linux/notifier.h> #include <linux/of.h> #include <linux/percpu.h> +#include <linux/slab.h> #include <asm/prom.h> #include "cacheinfo.h" diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 2fc82ba..8af4949 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1808,7 +1808,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", /* xxx - galak, e500mc? */ + .oprofile_cpu_type = "ppc/e500mc", .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500, diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 59c9285..4ff4da2c 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -1,7 +1,8 @@ /* * Contains routines needed to support swiotlb for ppc. * - * Copyright (C) 2009 Becky Bruce, Freescale Semiconductor + * Copyright (C) 2009-2010 Freescale Semiconductor, Inc. + * Author: Becky Bruce * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -70,7 +71,7 @@ static int ppc_swiotlb_bus_notify(struct notifier_block *nb, sd->max_direct_dma_addr = 0; /* May need to bounce if the device can't address all of DRAM */ - if (dma_get_mask(dev) < lmb_end_of_DRAM()) + if ((dma_get_mask(dev) + 1) < lmb_end_of_DRAM()) set_dma_ops(dev, &swiotlb_dma_ops); return NOTIFY_DONE; diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 6215062..6c1df57 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -8,6 +8,7 @@ #include <linux/device.h> #include <linux/dma-mapping.h> #include <linux/dma-debug.h> +#include <linux/gfp.h> #include <linux/lmb.h> #include <asm/bug.h> #include <asm/abs_addr.h> diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c new file mode 100644 index 0000000..7c07de0 --- /dev/null +++ b/arch/powerpc/kernel/e500-pmu.c @@ -0,0 +1,129 @@ +/* + * Performance counter support for e500 family processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/string.h> +#include <linux/perf_event.h> +#include <asm/reg.h> +#include <asm/cputable.h> + +/* + * Map of generic hardware event types to hardware events + * Zero if unsupported + */ +static int e500_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 1, + [PERF_COUNT_HW_INSTRUCTIONS] = 2, + [PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12, + [PERF_COUNT_HW_BRANCH_MISSES] = 15, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + /* + * D-cache misses are not split into read/write/prefetch; + * use raw event 41. + */ + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 27, 0 }, + [C(OP_WRITE)] = { 28, 0 }, + [C(OP_PREFETCH)] = { 29, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 2, 60 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + /* + * Assuming LL means L2, it's not a good match for this model. + * It allocates only on L1 castout or explicit prefetch, and + * does not have separate read/write events (but it does have + * separate instruction/data events). + */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + /* + * There are data/instruction MMU misses, but that's a miss on + * the chip's internal level-one TLB which is probably not + * what the user wants. Instead, unified level-two TLB misses + * are reported here. + */ + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 26, 66 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 12, 15 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + +static int num_events = 128; + +/* Upper half of event id is PMLCb, for threshold events */ +static u64 e500_xlate_event(u64 event_id) +{ + u32 event_low = (u32)event_id; + u64 ret; + + if (event_low >= num_events) + return 0; + + ret = FSL_EMB_EVENT_VALID; + + if (event_low >= 76 && event_low <= 81) { + ret |= FSL_EMB_EVENT_RESTRICTED; + ret |= event_id & + (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH); + } else if (event_id & + (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) { + /* Threshold requested on non-threshold event */ + return 0; + } + + return ret; +} + +static struct fsl_emb_pmu e500_pmu = { + .name = "e500 family", + .n_counter = 4, + .n_restricted = 2, + .xlate_event = e500_xlate_event, + .n_generic = ARRAY_SIZE(e500_generic_events), + .generic_events = e500_generic_events, + .cache_events = &e500_cache_events, +}; + +static int init_e500_pmu(void) +{ + if (!cur_cpu_spec->oprofile_cpu_type) + return -ENODEV; + + if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) + num_events = 256; + else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) + return -ENODEV; + + return register_fsl_emb_pmu(&e500_pmu); +} + +arch_initcall(init_e500_pmu); diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 07109d8..42e9d90 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -556,15 +556,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); -#ifdef CONFIG_PERF_EVENTS - /* check paca->perf_event_pending if we're enabling ints */ - lbz r3,PACAPERFPEND(r13) - and. r3,r3,r5 - beq 27f - bl .perf_event_do_pending -27: -#endif /* CONFIG_PERF_EVENTS */ - /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 9258074..bed9a29 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -219,7 +219,8 @@ generic_secondary_common_init: * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ - LOAD_REG_ADDR(r13, paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ + ld r13,0(r13) /* Get base vaddr of paca array */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ @@ -536,7 +537,8 @@ _GLOBAL(pmac_secondary_start) mtmsrd r3 /* RI on */ /* Set up a paca value for this processor. */ - LOAD_REG_ADDR(r4,paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r4,paca) /* Load paca pointer */ + ld r4,0(r4) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ @@ -615,6 +617,17 @@ _GLOBAL(start_secondary_prolog) std r3,0(r1) /* Zero the stack frame pointer */ bl .start_secondary b . +/* + * Reset stack pointer and call start_secondary + * to continue with online operation when woken up + * from cede in cpu offline. + */ +_GLOBAL(start_secondary_resume) + ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */ + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary + b . #endif /* diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 25793bb..7255265 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -747,9 +747,6 @@ finish_tlb_load: #else rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ #endif -#ifdef CONFIG_SMP - ori r12, r12, MAS2_M -#endif mtspr SPRN_MAS2, r12 #ifdef CONFIG_PTE_64BIT @@ -887,13 +884,17 @@ KernelSPE: lwz r3,_MSR(r1) oris r3,r3,MSR_SPE@h stw r3,_MSR(r1) /* enable use of SPE after return */ +#ifdef CONFIG_PRINTK lis r3,87f@h ori r3,r3,87f@l mr r4,r2 /* current */ lwz r5,_NIP(r1) bl printk +#endif b ret_from_except +#ifdef CONFIG_PRINTK 87: .string "SPE used in kernel (task=%p, pc=%x) \n" +#endif .align 4,0 #endif /* CONFIG_SPE */ diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index a4c8b38..71cf280 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -42,6 +42,7 @@ #include <linux/dma-mapping.h> #include <linux/interrupt.h> #include <linux/of.h> +#include <linux/slab.h> #include <linux/of_platform.h> #include <asm/ibmebus.h> #include <asm/abs_addr.h> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 5547ae6..ec94f90 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -42,12 +42,7 @@ #define DBG(...) -#ifdef CONFIG_IOMMU_VMERGE -static int novmerge = 0; -#else -static int novmerge = 1; -#endif - +static int novmerge; static int protect4gb = 1; static void __iommu_free(struct iommu_table *, dma_addr_t, unsigned int); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 64f6f20..066bd31 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -53,7 +53,6 @@ #include <linux/bootmem.h> #include <linux/pci.h> #include <linux/debugfs.h> -#include <linux/perf_event.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -145,11 +144,6 @@ notrace void raw_local_irq_restore(unsigned long en) } #endif /* CONFIG_PPC_STD_MMU_64 */ - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); - } - /* * if (get_paca()->hard_enabled) return; * But again we need to take care that gcc gets hard_enabled directly diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 3fd1af9..b36f074 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -31,6 +31,7 @@ #include <linux/preempt.h> #include <linux/module.h> #include <linux/kdebug.h> +#include <linux/slab.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/uaccess.h> diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 9ddfaef..035ada5 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -469,7 +469,7 @@ static int __init serial_dev_init(void) return -ENODEV; /* - * Before we register the platfrom serial devices, we need + * Before we register the platform serial devices, we need * to fixup their interrupts and their IO ports. */ DBG("Fixing serial ports interrupts and IO ports ...\n"); diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index d09d1c6..c2c70e1 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -24,6 +24,7 @@ #include <linux/proc_fs.h> #include <linux/init.h> #include <linux/seq_file.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/iseries/hv_lp_config.h> #include <asm/lppaca.h> diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index 2d29752..22e507c 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -127,3 +127,29 @@ _GLOBAL(__setup_cpu_power7) _GLOBAL(__restore_cpu_power7) /* place holder */ blr + +/* + * Get a minimal set of registers for our caller's nth caller. + * r3 = regs pointer, r5 = n. + * + * We only get R1 (stack pointer), NIP (next instruction pointer) + * and LR (link register). These are all we can get in the + * general case without doing complicated stack unwinding, but + * fortunately they are enough to do a stack backtrace, which + * is all we need them for. + */ +_GLOBAL(perf_arch_fetch_caller_regs) + mr r6,r1 + cmpwi r5,0 + mflr r4 + ble 2f + mtctr r5 +1: PPC_LL r6,0(r6) + bdnz 1b + PPC_LL r4,PPC_LR_STKOFF(r6) +2: PPC_LL r7,0(r6) + PPC_LL r7,PPC_LR_STKOFF(r7) + PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3) + PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3) + blr diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index 666d08d..6c1dfc3 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -17,7 +17,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mod_devicetable.h> -#include <linux/slab.h> #include <linux/pci.h> #include <linux/of.h> #include <linux/of_device.h> diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d16b1ea..0c40c6f 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -9,11 +9,15 @@ #include <linux/threads.h> #include <linux/module.h> +#include <linux/lmb.h> +#include <asm/firmware.h> #include <asm/lppaca.h> #include <asm/paca.h> #include <asm/sections.h> #include <asm/pgtable.h> +#include <asm/iseries/lpar_map.h> +#include <asm/iseries/hv_types.h> /* This symbol is provided by the linker - let it fill in the paca * field correctly */ @@ -70,37 +74,82 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = { * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -struct paca_struct paca[NR_CPUS]; +struct paca_struct *paca; EXPORT_SYMBOL(paca); -void __init initialise_pacas(void) -{ - int cpu; +struct paca_struct boot_paca; - /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB - * of the TOC can be addressed using a single machine instruction. - */ +void __init initialise_paca(struct paca_struct *new_paca, int cpu) +{ + /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB + * of the TOC can be addressed using a single machine instruction. + */ unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; - /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - struct paca_struct *new_paca = &paca[cpu]; - #ifdef CONFIG_PPC_BOOK3S - new_paca->lppaca_ptr = &lppaca[cpu]; + new_paca->lppaca_ptr = &lppaca[cpu]; #else - new_paca->kernel_pgd = swapper_pg_dir; + new_paca->kernel_pgd = swapper_pg_dir; #endif - new_paca->lock_token = 0x8000; - new_paca->paca_index = cpu; - new_paca->kernel_toc = kernel_toc; - new_paca->kernelbase = (unsigned long) _stext; - new_paca->kernel_msr = MSR_KERNEL; - new_paca->hw_cpu_id = 0xffff; - new_paca->__current = &init_task; + new_paca->lock_token = 0x8000; + new_paca->paca_index = cpu; + new_paca->kernel_toc = kernel_toc; + new_paca->kernelbase = (unsigned long) _stext; + new_paca->kernel_msr = MSR_KERNEL; + new_paca->hw_cpu_id = 0xffff; + new_paca->__current = &init_task; #ifdef CONFIG_PPC_STD_MMU_64 - new_paca->slb_shadow_ptr = &slb_shadow[cpu]; + new_paca->slb_shadow_ptr = &slb_shadow[cpu]; #endif /* CONFIG_PPC_STD_MMU_64 */ +} + +static int __initdata paca_size; + +void __init allocate_pacas(void) +{ + int nr_cpus, cpu, limit; + + /* + * We can't take SLB misses on the paca, and we want to access them + * in real mode, so allocate them within the RMA and also within + * the first segment. On iSeries they must be within the area mapped + * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. + */ + limit = min(0x10000000ULL, lmb.rmo_size); + if (firmware_has_feature(FW_FEATURE_ISERIES)) + limit = min(limit, HvPagesToMap * HVPAGESIZE); + + nr_cpus = NR_CPUS; + /* On iSeries we know we can never have more than 64 cpus */ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + nr_cpus = min(64, nr_cpus); + + paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); + + paca = __va(lmb_alloc_base(paca_size, PAGE_SIZE, limit)); + memset(paca, 0, paca_size); + + printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", + paca_size, nr_cpus, paca); + + /* Can't use for_each_*_cpu, as they aren't functional yet */ + for (cpu = 0; cpu < nr_cpus; cpu++) + initialise_paca(&paca[cpu], cpu); +} + +void __init free_unused_pacas(void) +{ + int new_size; + + new_size = PAGE_ALIGN(sizeof(struct paca_struct) * num_possible_cpus()); + + if (new_size >= paca_size) + return; + + lmb_free(__pa(paca) + new_size, paca_size - new_size); + + printk(KERN_DEBUG "Freed %u bytes for unused pacas\n", + paca_size - new_size); - } + paca_size = new_size; } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 2597f95..0c0567e 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -26,6 +26,7 @@ #include <linux/syscalls.h> #include <linux/irq.h> #include <linux/vmalloc.h> +#include <linux/slab.h> #include <asm/processor.h> #include <asm/io.h> @@ -63,21 +64,6 @@ struct dma_map_ops *get_pci_dma_ops(void) } EXPORT_SYMBOL(get_pci_dma_ops); -int pci_set_dma_mask(struct pci_dev *dev, u64 mask) -{ - return dma_set_mask(&dev->dev, mask); -} - -int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) -{ - int rc; - - rc = dma_set_mask(&dev->dev, mask); - dev->dev.coherent_dma_mask = dev->dma_mask; - - return rc; -} - struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index c13668c..e7db5b4 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -14,6 +14,7 @@ #include <linux/irq.h> #include <linux/list.h> #include <linux/of.h> +#include <linux/slab.h> #include <asm/processor.h> #include <asm/io.h> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c index d5e36e5..d56b35e 100644 --- a/arch/powerpc/kernel/pci_dn.c +++ b/arch/powerpc/kernel/pci_dn.c @@ -23,6 +23,7 @@ #include <linux/pci.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/gfp.h> #include <asm/io.h> #include <asm/prom.h> diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index b6cf8f1..43b83c3 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -35,6 +35,9 @@ struct cpu_hw_events { u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long amasks[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; unsigned long avalues[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES]; + + unsigned int group_flag; + int n_txn_start; }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); @@ -718,66 +721,6 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -static void event_sched_in(struct perf_event *event) -{ - event->state = PERF_EVENT_STATE_ACTIVE; - event->oncpu = smp_processor_id(); - event->tstamp_running += event->ctx->time - event->tstamp_stopped; - if (is_software_event(event)) - event->pmu->enable(event); -} - -/* - * Called to enable a whole group of events. - * Returns 1 if the group was enabled, or -EAGAIN if it could not be. - * Assumes the caller has disabled interrupts and has - * frozen the PMU with hw_perf_save_disable. - */ -int hw_perf_group_sched_in(struct perf_event *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx) -{ - struct cpu_hw_events *cpuhw; - long i, n, n0; - struct perf_event *sub; - - if (!ppmu) - return 0; - cpuhw = &__get_cpu_var(cpu_hw_events); - n0 = cpuhw->n_events; - n = collect_events(group_leader, ppmu->n_counter - n0, - &cpuhw->event[n0], &cpuhw->events[n0], - &cpuhw->flags[n0]); - if (n < 0) - return -EAGAIN; - if (check_excludes(cpuhw->event, cpuhw->flags, n0, n)) - return -EAGAIN; - i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n + n0); - if (i < 0) - return -EAGAIN; - cpuhw->n_events = n0 + n; - cpuhw->n_added += n; - - /* - * OK, this group can go on; update event states etc., - * and enable any software events - */ - for (i = n0; i < n0 + n; ++i) - cpuhw->event[i]->hw.config = cpuhw->events[i]; - cpuctx->active_oncpu += n; - n = 1; - event_sched_in(group_leader); - list_for_each_entry(sub, &group_leader->sibling_list, group_entry) { - if (sub->state != PERF_EVENT_STATE_OFF) { - event_sched_in(sub); - ++n; - } - } - ctx->nr_active += n; - - return 1; -} - /* * Add a event to the PMU. * If all events are not already frozen, then we disable and @@ -805,12 +748,22 @@ static int power_pmu_enable(struct perf_event *event) cpuhw->event[n0] = event; cpuhw->events[n0] = event->hw.config; cpuhw->flags[n0] = event->hw.event_base; + + /* + * If group events scheduling transaction was started, + * skip the schedulability test here, it will be peformed + * at commit time(->commit_txn) as a whole + */ + if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED) + goto nocheck; + if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1)) goto out; if (power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n0 + 1)) goto out; - event->hw.config = cpuhw->events[n0]; + +nocheck: ++cpuhw->n_events; ++cpuhw->n_added; @@ -896,11 +849,65 @@ static void power_pmu_unthrottle(struct perf_event *event) local_irq_restore(flags); } +/* + * Start group events scheduling transaction + * Set the flag to make pmu::enable() not perform the + * schedulability test, it will be performed at commit time + */ +void power_pmu_start_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpuhw->group_flag |= PERF_EVENT_TXN_STARTED; + cpuhw->n_txn_start = cpuhw->n_events; +} + +/* + * Stop group events scheduling transaction + * Clear the flag and pmu::enable() will perform the + * schedulability test. + */ +void power_pmu_cancel_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + + cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED; +} + +/* + * Commit group events scheduling transaction + * Perform the group schedulability test as a whole + * Return 0 if success + */ +int power_pmu_commit_txn(const struct pmu *pmu) +{ + struct cpu_hw_events *cpuhw; + long i, n; + + if (!ppmu) + return -EAGAIN; + cpuhw = &__get_cpu_var(cpu_hw_events); + n = cpuhw->n_events; + if (check_excludes(cpuhw->event, cpuhw->flags, 0, n)) + return -EAGAIN; + i = power_check_constraints(cpuhw, cpuhw->events, cpuhw->flags, n); + if (i < 0) + return -EAGAIN; + + for (i = cpuhw->n_txn_start; i < n; ++i) + cpuhw->event[i]->hw.config = cpuhw->events[i]; + + return 0; +} + struct pmu power_pmu = { .enable = power_pmu_enable, .disable = power_pmu_disable, .read = power_pmu_read, .unthrottle = power_pmu_unthrottle, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, }; /* @@ -1164,10 +1171,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * Finally record data if requested. */ if (record) { - struct perf_sample_data data = { - .addr = ~0ULL, - .period = event->hw.last_period, - }; + struct perf_sample_data data; + + perf_sample_data_init(&data, ~0ULL); + data.period = event->hw.last_period; if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); @@ -1287,7 +1294,7 @@ static void perf_event_interrupt(struct pt_regs *regs) irq_exit(); } -void hw_perf_event_setup(int cpu) +static void power_pmu_setup(int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -1297,6 +1304,23 @@ void hw_perf_event_setup(int cpu) cpuhw->mmcr[0] = MMCR0_FC; } +static int __cpuinit +power_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) +{ + unsigned int cpu = (long)hcpu; + + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_UP_PREPARE: + power_pmu_setup(cpu); + break; + + default: + break; + } + + return NOTIFY_OK; +} + int register_power_pmu(struct power_pmu *pmu) { if (ppmu) @@ -1314,5 +1338,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_cpu_notifier(power_pmu_notifier); + return 0; } diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c new file mode 100644 index 0000000..369872f --- /dev/null +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -0,0 +1,654 @@ +/* + * Performance event support - Freescale Embedded Performance Monitor + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <asm/reg_fsl_emb.h> +#include <asm/pmc.h> +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/ptrace.h> + +struct cpu_hw_events { + int n_events; + int disabled; + u8 pmcs_enabled; + struct perf_event *event[MAX_HWEVENTS]; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +static struct fsl_emb_pmu *ppmu; + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ +#ifdef __powerpc64__ + return !regs->softe; +#else + return 0; +#endif +} + +static void perf_event_interrupt(struct pt_regs *regs); + +/* + * Read one performance monitor counter (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 0: + val = mfpmr(PMRN_PMC0); + break; + case 1: + val = mfpmr(PMRN_PMC1); + break; + case 2: + val = mfpmr(PMRN_PMC2); + break; + case 3: + val = mfpmr(PMRN_PMC3); + break; + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMC0, val); + break; + case 1: + mtpmr(PMRN_PMC1, val); + break; + case 2: + mtpmr(PMRN_PMC2, val); + break; + case 3: + mtpmr(PMRN_PMC3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } + + isync(); +} + +/* + * Write one local control A register + */ +static void write_pmlca(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMLCA0, val); + break; + case 1: + mtpmr(PMRN_PMLCA1, val); + break; + case 2: + mtpmr(PMRN_PMLCA2, val); + break; + case 3: + mtpmr(PMRN_PMLCA3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMLCA%d\n", idx); + } + + isync(); +} + +/* + * Write one local control B register + */ +static void write_pmlcb(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMLCB0, val); + break; + case 1: + mtpmr(PMRN_PMLCB1, val); + break; + case 2: + mtpmr(PMRN_PMLCB2, val); + break; + case 3: + mtpmr(PMRN_PMLCB3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMLCB%d\n", idx); + } + + isync(); +} + +static void fsl_emb_pmu_read(struct perf_event *event) +{ + s64 val, delta, prev; + + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&event->hw.prev_count); + barrier(); + val = read_pmc(event->hw.idx); + } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + /* The counters are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + atomic64_sub(delta, &event->hw.period_left); +} + +/* + * Disable all events to prevent PMU interrupts and to allow + * events to be added or removed. + */ +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!cpuhw->disabled) { + cpuhw->disabled = 1; + + /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + ppc_enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + + if (atomic_read(&num_events)) { + /* + * Set the 'freeze all counters' bit, and disable + * interrupts. The barrier is to make sure the + * mtpmr has been executed and the PMU has frozen + * the events before we return. + */ + + mtpmr(PMRN_PMGC0, PMGC0_FAC); + isync(); + } + } + local_irq_restore(flags); +} + +/* + * Re-enable all events if disable == 0. + * If we were previously disabled and events were added, then + * put the new config on the PMU. + */ +void hw_perf_enable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + if (!cpuhw->disabled) + goto out; + + cpuhw->disabled = 0; + ppc_set_pmu_inuse(cpuhw->n_events != 0); + + if (cpuhw->n_events > 0) { + mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); + isync(); + } + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *ctrs[]) +{ + int n = 0; + struct perf_event *event; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + n++; + } + list_for_each_entry(event, &group->sibling_list, group_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = event; + n++; + } + } + return n; +} + +/* perf must be disabled, context locked on entry */ +static int fsl_emb_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + int ret = -EAGAIN; + int num_counters = ppmu->n_counter; + u64 val; + int i; + + cpuhw = &get_cpu_var(cpu_hw_events); + + if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) + num_counters = ppmu->n_restricted; + + /* + * Allocate counters from top-down, so that restricted-capable + * counters are kept free as long as possible. + */ + for (i = num_counters - 1; i >= 0; i--) { + if (cpuhw->event[i]) + continue; + + break; + } + + if (i < 0) + goto out; + + event->hw.idx = i; + cpuhw->event[i] = event; + ++cpuhw->n_events; + + val = 0; + if (event->hw.sample_period) { + s64 left = atomic64_read(&event->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&event->hw.prev_count, val); + write_pmc(i, val); + perf_event_update_userpage(event); + + write_pmlcb(i, event->hw.config >> 32); + write_pmlca(i, event->hw.config_base); + + ret = 0; + out: + put_cpu_var(cpu_hw_events); + return ret; +} + +/* perf must be disabled, context locked on entry */ +static void fsl_emb_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + int i = event->hw.idx; + + if (i < 0) + goto out; + + fsl_emb_pmu_read(event); + + cpuhw = &get_cpu_var(cpu_hw_events); + + WARN_ON(event != cpuhw->event[event->hw.idx]); + + write_pmlca(i, 0); + write_pmlcb(i, 0); + write_pmc(i, 0); + + cpuhw->event[i] = NULL; + event->hw.idx = -1; + + /* + * TODO: if at least one restricted event exists, and we + * just freed up a non-restricted-capable counter, and + * there is a restricted-capable counter occupied by + * a non-restricted event, migrate that event to the + * vacated counter. + */ + + cpuhw->n_events--; + + out: + put_cpu_var(cpu_hw_events); +} + +/* + * Re-enable interrupts on a event after they were throttled + * because they were coming too fast. + * + * Context is locked on entry, but perf is not disabled. + */ +static void fsl_emb_pmu_unthrottle(struct perf_event *event) +{ + s64 val, left; + unsigned long flags; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + local_irq_save(flags); + perf_disable(); + fsl_emb_pmu_read(event); + left = event->hw.sample_period; + event->hw.last_period = left; + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + perf_enable(); + local_irq_restore(flags); +} + +static struct pmu fsl_emb_pmu = { + .enable = fsl_emb_pmu_enable, + .disable = fsl_emb_pmu_disable, + .read = fsl_emb_pmu_read, + .unthrottle = fsl_emb_pmu_unthrottle, +}; + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* + * Translate a generic cache event_id config to a raw event_id code. + */ +static int hw_perf_cache_event(u64 config, u64 *eventp) +{ + unsigned long type, op, result; + int ev; + + if (!ppmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*ppmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *eventp = ev; + return 0; +} + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + u64 ev; + struct perf_event *events[MAX_HWEVENTS]; + int n; + int err; + int num_restricted; + int i; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + ev = event->attr.config; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) + return ERR_PTR(-EOPNOTSUPP); + ev = ppmu->generic_events[ev]; + break; + + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(event->attr.config, &ev); + if (err) + return ERR_PTR(err); + break; + + case PERF_TYPE_RAW: + ev = event->attr.config; + break; + + default: + return ERR_PTR(-EINVAL); + } + + event->hw.config = ppmu->xlate_event(ev); + if (!(event->hw.config & FSL_EMB_EVENT_VALID)) + return ERR_PTR(-EINVAL); + + /* + * If this is in a group, check if it can go on with all the + * other hardware events in the group. We assume the event + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, + ppmu->n_counter - 1, events); + if (n < 0) + return ERR_PTR(-EINVAL); + } + + if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { + num_restricted = 0; + for (i = 0; i < n; i++) { + if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED) + num_restricted++; + } + + if (num_restricted >= ppmu->n_restricted) + return ERR_PTR(-EINVAL); + } + + event->hw.idx = -1; + + event->hw.config_base = PMLCA_CE | PMLCA_FCM1 | + (u32)((ev << 16) & PMLCA_EVENT_MASK); + + if (event->attr.exclude_user) + event->hw.config_base |= PMLCA_FCU; + if (event->attr.exclude_kernel) + event->hw.config_base |= PMLCA_FCS; + if (event->attr.exclude_idle) + return ERR_PTR(-ENOTSUPP); + + event->hw.last_period = event->hw.sample_period; + atomic64_set(&event->hw.period_left, event->hw.last_period); + + /* + * See if we need to reserve the PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware(perf_event_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + + mtpmr(PMRN_PMGC0, PMGC0_FAC); + isync(); + } + event->destroy = hw_perf_event_destroy; + + if (err) + return ERR_PTR(err); + return &fsl_emb_pmu; +} + +/* + * A counter has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_event *event, unsigned long val, + struct pt_regs *regs, int nmi) +{ + u64 period = event->hw.sample_period; + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&event->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + + /* + * See if the total period for this event has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&event->hw.period_left) - delta; + if (period) { + if (left <= 0) { + left += period; + if (left <= 0) + left = period; + record = 1; + } + if (left < 0x80000000LL) + val = 0x80000000LL - left; + } + + /* + * Finally record data if requested. + */ + if (record) { + struct perf_sample_data data = { + .period = event->hw.last_period, + }; + + if (perf_event_overflow(event, nmi, &data, regs)) { + /* + * Interrupts are coming too fast - throttle them + * by setting the event to 0, so it will be + * at least 2^30 cycles until the next interrupt + * (assuming each event counts at most 2 counts + * per cycle). + */ + val = 0; + left = ~0ULL >> 1; + } + } + + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); +} + +static void perf_event_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + unsigned long val; + int found = 0; + int nmi; + + nmi = perf_intr_is_nmi(regs); + if (nmi) + nmi_enter(); + else + irq_enter(); + + for (i = 0; i < ppmu->n_counter; ++i) { + event = cpuhw->event[i]; + + val = read_pmc(i); + if ((int)val < 0) { + if (event) { + /* event has overflowed */ + found = 1; + record_and_restart(event, val, regs, nmi); + } else { + /* + * Disabled counter is negative, + * reset it just in case. + */ + write_pmc(i, 0); + } + } + } + + /* PMM will keep counters frozen until we return from the interrupt. */ + mtmsr(mfmsr() | MSR_PMM); + mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); + isync(); + + if (nmi) + nmi_exit(); + else + irq_exit(); +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + memset(cpuhw, 0, sizeof(*cpuhw)); +} + +int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) +{ + if (ppmu) + return -EBUSY; /* something's already registered */ + + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); + + return 0; +} diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index 1ed3b8d..c8ae371 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -19,7 +19,6 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/proc_fs.h> -#include <linux/slab.h> #include <linux/kernel.h> #include <asm/machdep.h> diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 43238b2..05131d6 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -43,6 +43,7 @@ #include <asm/smp.h> #include <asm/system.h> #include <asm/mmu.h> +#include <asm/paca.h> #include <asm/pgtable.h> #include <asm/pci.h> #include <asm/iommu.h> @@ -721,6 +722,8 @@ void __init early_init_devtree(void *params) * FIXME .. and the initrd too? */ move_device_tree(); + allocate_pacas(); + DBG("Scanning CPUs ...\n"); /* Retreive CPU related informations from the flat tree diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 5f306c4..97d4bd9 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -653,6 +653,7 @@ static void __init early_cmdline_parse(void) #else #define OV5_CMO 0x00 #endif +#define OV5_TYPE1_AFFINITY 0x80 /* Type 1 NUMA affinity */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ @@ -706,7 +707,7 @@ static unsigned char ibm_architecture_vec[] = { OV5_DONATE_DEDICATE_CPU | OV5_MSI, 0, OV5_CMO, - 0, + OV5_TYPE1_AFFINITY, 0, 0, 0, diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index d9b0586..ed2cfe1 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -940,7 +940,7 @@ static int del_instruction_bp(struct task_struct *child, int slot) { switch (slot) { case 1: - if (child->thread.iac1 == 0) + if ((child->thread.dbcr0 & DBCR0_IAC1) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC12MODE) { @@ -952,7 +952,7 @@ static int del_instruction_bp(struct task_struct *child, int slot) child->thread.dbcr0 &= ~DBCR0_IAC1; break; case 2: - if (child->thread.iac2 == 0) + if ((child->thread.dbcr0 & DBCR0_IAC2) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC12MODE) @@ -963,7 +963,7 @@ static int del_instruction_bp(struct task_struct *child, int slot) break; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 case 3: - if (child->thread.iac3 == 0) + if ((child->thread.dbcr0 & DBCR0_IAC3) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC34MODE) { @@ -975,7 +975,7 @@ static int del_instruction_bp(struct task_struct *child, int slot) child->thread.dbcr0 &= ~DBCR0_IAC3; break; case 4: - if (child->thread.iac4 == 0) + if ((child->thread.dbcr0 & DBCR0_IAC4) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC34MODE) @@ -1054,7 +1054,7 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) static int del_dac(struct task_struct *child, int slot) { if (slot == 1) { - if (child->thread.dac1 == 0) + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) return -ENOENT; child->thread.dac1 = 0; @@ -1070,7 +1070,7 @@ static int del_dac(struct task_struct *child, int slot) child->thread.dvc1 = 0; #endif } else if (slot == 2) { - if (child->thread.dac1 == 0) + if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) return -ENOENT; #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index fd0d294..7436784 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -23,6 +23,7 @@ #include <linux/completion.h> #include <linux/cpumask.h> #include <linux/lmb.h> +#include <linux/slab.h> #include <asm/prom.h> #include <asm/rtas.h> diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index a85117d..bfc2aba 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/proc_fs.h> #include <asm/delay.h> #include <asm/uaccess.h> diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 2e4832a..4190eae 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -20,6 +20,7 @@ #include <linux/spinlock.h> #include <linux/cpu.h> #include <linux/workqueue.h> +#include <linux/slab.h> #include <asm/uaccess.h> #include <asm/io.h> diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 03dd6a2..48f0a00 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -36,6 +36,7 @@ #include <linux/lmb.h> #include <linux/of_platform.h> #include <asm/io.h> +#include <asm/paca.h> #include <asm/prom.h> #include <asm/processor.h> #include <asm/vdso_datapage.h> @@ -493,6 +494,8 @@ void __init smp_setup_cpu_maps(void) * here will have to be reworked */ cpu_init_thread_core_maps(nthreads); + + free_unused_pacas(); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index b152de3..8f58986 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -39,7 +39,6 @@ #include <asm/serial.h> #include <asm/udbg.h> #include <asm/mmu_context.h> -#include <asm/swiotlb.h> #include "setup.h" @@ -343,11 +342,6 @@ void __init setup_arch(char **cmdline_p) ppc_md.setup_arch(); if ( ppc_md.progress ) ppc_md.progress("arch: exit", 0x3eab); -#ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) - swiotlb_init(1); -#endif - paging_init(); /* Initialize the MMU context management stuff */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6568406..9143891 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -61,7 +61,6 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/swiotlb.h> #include <asm/mmu_context.h> #include "setup.h" @@ -144,9 +143,9 @@ early_param("smt-enabled", early_smt_enabled); #endif /* CONFIG_SMP */ /* Put the paca pointer into r13 and SPRG_PACA */ -void __init setup_paca(int cpu) +static void __init setup_paca(struct paca_struct *new_paca) { - local_paca = &paca[cpu]; + local_paca = new_paca; mtspr(SPRN_SPRG_PACA, local_paca); #ifdef CONFIG_PPC_BOOK3E mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); @@ -176,14 +175,12 @@ void __init early_setup(unsigned long dt_ptr) { /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Fill in any unititialised pacas */ - initialise_pacas(); - /* Identify CPU type */ identify_cpu(0, mfspr(SPRN_PVR)); /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ - setup_paca(0); + initialise_paca(&boot_paca, 0); + setup_paca(&boot_paca); /* Initialize lockdep early or else spinlocks will blow */ lockdep_init(); @@ -203,7 +200,7 @@ void __init early_setup(unsigned long dt_ptr) early_init_devtree(__va(dt_ptr)); /* Now we know the logical id of our boot cpu, setup the paca. */ - setup_paca(boot_cpuid); + setup_paca(&paca[boot_cpuid]); /* Fix up paca fields required for the boot cpu */ get_paca()->cpu_start = 1; @@ -543,11 +540,6 @@ void __init setup_arch(char **cmdline_p) if (ppc_md.setup_arch) ppc_md.setup_arch(); -#ifdef CONFIG_SWIOTLB - if (ppc_swiotlb_enable) - swiotlb_init(1); -#endif - paging_init(); /* Initialize the MMU context management stuff */ diff --git a/arch/powerpc/kernel/smp-tbsync.c b/arch/powerpc/kernel/smp-tbsync.c index a5e5452..03e45c4 100644 --- a/arch/powerpc/kernel/smp-tbsync.c +++ b/arch/powerpc/kernel/smp-tbsync.c @@ -10,6 +10,7 @@ #include <linux/smp.h> #include <linux/unistd.h> #include <linux/init.h> +#include <linux/slab.h> #include <asm/atomic.h> #include <asm/smp.h> #include <asm/time.h> diff --git a/arch/powerpc/kernel/softemu8xx.c b/arch/powerpc/kernel/softemu8xx.c index 23c8c5e..af0e829 100644 --- a/arch/powerpc/kernel/softemu8xx.c +++ b/arch/powerpc/kernel/softemu8xx.c @@ -21,7 +21,6 @@ #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> -#include <linux/slab.h> #include <linux/user.h> #include <linux/interrupt.h> diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c index c5a4732..19471a1 100644 --- a/arch/powerpc/kernel/sys_ppc32.c +++ b/arch/powerpc/kernel/sys_ppc32.c @@ -41,6 +41,7 @@ #include <linux/ptrace.h> #include <linux/elf.h> #include <linux/ipc.h> +#include <linux/slab.h> #include <asm/ptrace.h> #include <asm/types.h> diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 3370e62..f2496f2 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -42,100 +42,6 @@ #include <asm/time.h> #include <asm/unistd.h> -/* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. - */ -int sys_ipc(uint call, int first, unsigned long second, long third, - void __user *ptr, long fifth) -{ - int version, ret; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - - ret = -ENOSYS; - switch (call) { - case SEMOP: - ret = sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, NULL); - break; - case SEMTIMEDOP: - ret = sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, - (const struct timespec __user *) fifth); - break; - case SEMGET: - ret = sys_semget (first, (int)second, third); - break; - case SEMCTL: { - union semun fourth; - - ret = -EINVAL; - if (!ptr) - break; - if ((ret = get_user(fourth.__pad, (void __user * __user *)ptr))) - break; - ret = sys_semctl(first, (int)second, third, fourth); - break; - } - case MSGSND: - ret = sys_msgsnd(first, (struct msgbuf __user *)ptr, - (size_t)second, third); - break; - case MSGRCV: - switch (version) { - case 0: { - struct ipc_kludge tmp; - - ret = -EINVAL; - if (!ptr) - break; - if ((ret = copy_from_user(&tmp, - (struct ipc_kludge __user *) ptr, - sizeof (tmp)) ? -EFAULT : 0)) - break; - ret = sys_msgrcv(first, tmp.msgp, (size_t) second, - tmp.msgtyp, third); - break; - } - default: - ret = sys_msgrcv (first, (struct msgbuf __user *) ptr, - (size_t)second, fifth, third); - break; - } - break; - case MSGGET: - ret = sys_msgget((key_t)first, (int)second); - break; - case MSGCTL: - ret = sys_msgctl(first, (int)second, - (struct msqid_ds __user *)ptr); - break; - case SHMAT: { - ulong raddr; - ret = do_shmat(first, (char __user *)ptr, (int)second, &raddr); - if (ret) - break; - ret = put_user(raddr, (ulong __user *) third); - break; - } - case SHMDT: - ret = sys_shmdt((char __user *)ptr); - break; - case SHMGET: - ret = sys_shmget(first, (size_t)second, third); - break; - case SHMCTL: - ret = sys_shmctl(first, (int)second, - (struct shmid_ds __user *)ptr); - break; - } - - return ret; -} - static inline unsigned long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) @@ -210,76 +116,6 @@ long ppc64_personality(unsigned long personality) } #endif -#ifdef CONFIG_PPC64 -#define OVERRIDE_MACHINE (personality(current->personality) == PER_LINUX32) -#else -#define OVERRIDE_MACHINE 0 -#endif - -static inline int override_machine(char __user *mach) -{ - if (OVERRIDE_MACHINE) { - /* change ppc64 to ppc */ - if (__put_user(0, mach+3) || __put_user(0, mach+4)) - return -EFAULT; - } - return 0; -} - -long ppc_newuname(struct new_utsname __user * name) -{ - int err = 0; - - down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - err = -EFAULT; - up_read(&uts_sem); - if (!err) - err = override_machine(name->machine); - return err; -} - -int sys_uname(struct old_utsname __user *name) -{ - int err = 0; - - down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - err = -EFAULT; - up_read(&uts_sem); - if (!err) - err = override_machine(name->machine); - return err; -} - -int sys_olduname(struct oldold_utsname __user *name) -{ - int error; - - if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) - return -EFAULT; - - down_read(&uts_sem); - error = __copy_to_user(&name->sysname, &utsname()->sysname, - __OLD_UTS_LEN); - error |= __put_user(0, name->sysname + __OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename, &utsname()->nodename, - __OLD_UTS_LEN); - error |= __put_user(0, name->nodename + __OLD_UTS_LEN); - error |= __copy_to_user(&name->release, &utsname()->release, - __OLD_UTS_LEN); - error |= __put_user(0, name->release + __OLD_UTS_LEN); - error |= __copy_to_user(&name->version, &utsname()->version, - __OLD_UTS_LEN); - error |= __put_user(0, name->version + __OLD_UTS_LEN); - error |= __copy_to_user(&name->machine, &utsname()->machine, - __OLD_UTS_LEN); - error |= override_machine(name->machine); - up_read(&uts_sem); - - return error? -EFAULT: 0; -} - long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 1b16b9a..0441bbd 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -532,25 +532,60 @@ void __init iSeries_time_init_early(void) } #endif /* CONFIG_PPC_ISERIES */ -#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_PPC32) -DEFINE_PER_CPU(u8, perf_event_pending); +#ifdef CONFIG_PERF_EVENTS -void set_perf_event_pending(void) +/* + * 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable... + */ +#ifdef CONFIG_PPC64 +static inline unsigned long test_perf_event_pending(void) { - get_cpu_var(perf_event_pending) = 1; - set_dec(1); - put_cpu_var(perf_event_pending); + unsigned long x; + + asm volatile("lbz %0,%1(13)" + : "=r" (x) + : "i" (offsetof(struct paca_struct, perf_event_pending))); + return x; } +static inline void set_perf_event_pending_flag(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (1), + "i" (offsetof(struct paca_struct, perf_event_pending))); +} + +static inline void clear_perf_event_pending(void) +{ + asm volatile("stb %0,%1(13)" : : + "r" (0), + "i" (offsetof(struct paca_struct, perf_event_pending))); +} + +#else /* 32-bit */ + +DEFINE_PER_CPU(u8, perf_event_pending); + +#define set_perf_event_pending_flag() __get_cpu_var(perf_event_pending) = 1 #define test_perf_event_pending() __get_cpu_var(perf_event_pending) #define clear_perf_event_pending() __get_cpu_var(perf_event_pending) = 0 -#else /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ +#endif /* 32 vs 64 bit */ + +void set_perf_event_pending(void) +{ + preempt_disable(); + set_perf_event_pending_flag(); + set_dec(1); + preempt_enable(); +} + +#else /* CONFIG_PERF_EVENTS */ #define test_perf_event_pending() 0 #define clear_perf_event_pending() -#endif /* CONFIG_PERF_EVENTS && CONFIG_PPC32 */ +#endif /* CONFIG_PERF_EVENTS */ /* * For iSeries shared processors, we have to let the hypervisor @@ -582,10 +617,6 @@ void timer_interrupt(struct pt_regs * regs) set_dec(DECREMENTER_MAX); #ifdef CONFIG_PPC32 - if (test_perf_event_pending()) { - clear_perf_event_pending(); - perf_event_do_pending(); - } if (atomic_read(&ppc_n_lost_interrupts) != 0) do_IRQ(regs); #endif @@ -604,6 +635,11 @@ void timer_interrupt(struct pt_regs * regs) calculate_steal_time(); + if (test_perf_event_pending()) { + clear_perf_event_pending(); + perf_event_do_pending(); + } + #ifdef CONFIG_PPC_ISERIES if (firmware_has_feature(FW_FEATURE_ISERIES)) get_lppaca()->int_dword.fields.decr_int = 0; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 696626a..29d128e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -21,7 +21,6 @@ #include <linux/stddef.h> #include <linux/unistd.h> #include <linux/ptrace.h> -#include <linux/slab.h> #include <linux/user.h> #include <linux/interrupt.h> #include <linux/init.h> diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 77f6421..8223717 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -17,6 +17,7 @@ #include <linux/types.h> #include <linux/device.h> #include <linux/init.h> +#include <linux/slab.h> #include <linux/console.h> #include <linux/module.h> #include <linux/mm.h> |