diff options
463 files changed, 8360 insertions, 3829 deletions
@@ -3554,12 +3554,12 @@ E: cvance@nai.com D: portions of the Linux Security Module (LSM) framework and security modules N: Petr Vandrovec -E: vandrove@vc.cvut.cz +E: petr@vandrovec.name D: Small contributions to ncpfs D: Matrox framebuffer driver -S: Chudenicka 8 -S: 10200 Prague 10, Hostivar -S: Czech Republic +S: 21513 Conradia Ct +S: Cupertino, CA 95014 +S: USA N: Thibaut Varene E: T-Bone@parisc-linux.org diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 1762b81..741fe66 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -542,9 +542,11 @@ Kprobes does not use mutexes or allocate memory except during registration and unregistration. Probe handlers are run with preemption disabled. Depending on the -architecture, handlers may also run with interrupts disabled. In any -case, your handler should not yield the CPU (e.g., by attempting to -acquire a semaphore). +architecture and optimization state, handlers may also run with +interrupts disabled (e.g., kretprobe handlers and optimized kprobe +handlers run without interrupt disabled on x86/x86-64). In any case, +your handler should not yield the CPU (e.g., by attempting to acquire +a semaphore). Since a return probe is implemented by replacing the return address with the trampoline's address, stack backtraces and calls diff --git a/MAINTAINERS b/MAINTAINERS index 50b8148..f46d8e6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -962,6 +962,13 @@ W: http://www.fluff.org/ben/linux/ S: Maintained F: arch/arm/mach-s3c6410/ +ARM/S5P ARM ARCHITECTURES +M: Kukjin Kim <kgene.kim@samsung.com> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/mach-s5p*/ + ARM/SHMOBILE ARM ARCHITECTURE M: Paul Mundt <lethal@linux-sh.org> M: Magnus Damm <magnus.damm@gmail.com> @@ -1220,7 +1227,7 @@ F: drivers/auxdisplay/ F: include/linux/cfag12864b.h AVR32 ARCHITECTURE -M: Haavard Skinnemoen <hskinnemoen@atmel.com> +M: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com> W: http://www.atmel.com/products/AVR32/ W: http://avr32linux.org/ W: http://avrfreaks.net/ @@ -1228,7 +1235,7 @@ S: Supported F: arch/avr32/ AVR32/AT32AP MACHINE SUPPORT -M: Haavard Skinnemoen <hskinnemoen@atmel.com> +M: Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com> S: Supported F: arch/avr32/mach-at32ap/ @@ -2199,6 +2206,12 @@ W: http://acpi4asus.sf.net S: Maintained F: drivers/platform/x86/eeepc-laptop.c +EFIFB FRAMEBUFFER DRIVER +L: linux-fbdev@vger.kernel.org +M: Peter Jones <pjones@redhat.com> +S: Maintained +F: drivers/video/efifb.c + EFS FILESYSTEM W: http://aeschi.ch.eu.org/efs/ S: Orphan @@ -2662,6 +2675,8 @@ M: Guenter Roeck <guenter.roeck@ericsson.com> L: lm-sensors@lm-sensors.org W: http://www.lm-sensors.org/ T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/ +T: quilt kernel.org/pub/linux/kernel/people/groeck/linux-staging/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/groeck/linux-staging.git S: Maintained F: Documentation/hwmon/ F: drivers/hwmon/ @@ -3773,9 +3788,8 @@ W: http://www.syskonnect.com S: Supported MATROX FRAMEBUFFER DRIVER -M: Petr Vandrovec <vandrove@vc.cvut.cz> L: linux-fbdev@vger.kernel.org -S: Maintained +S: Orphan F: drivers/video/matrox/matroxfb_* F: include/linux/matroxfb.h @@ -3899,10 +3913,8 @@ F: Documentation/serial/moxa-smartio F: drivers/char/mxser.* MSI LAPTOP SUPPORT -M: Lennart Poettering <mzxreary@0pointer.de> +M: Lee, Chun-Yi <jlee@novell.com> L: platform-driver-x86@vger.kernel.org -W: https://tango.0pointer.de/mailman/listinfo/s270-linux -W: http://0pointer.de/lennart/tchibo.html S: Maintained F: drivers/platform/x86/msi-laptop.c @@ -3919,8 +3931,10 @@ S: Supported F: drivers/mfd/ MULTIMEDIA CARD (MMC), SECURE DIGITAL (SD) AND SDIO SUBSYSTEM -S: Orphan +M: Chris Ball <cjb@laptop.org> L: linux-mmc@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git +S: Maintained F: drivers/mmc/ F: include/linux/mmc/ @@ -3962,8 +3976,8 @@ S: Maintained F: drivers/net/natsemi.c NCP FILESYSTEM -M: Petr Vandrovec <vandrove@vc.cvut.cz> -S: Maintained +M: Petr Vandrovec <petr@vandrovec.name> +S: Odd Fixes F: fs/ncpfs/ NCR DUAL 700 SCSI DRIVER (MICROCHANNEL) @@ -5091,8 +5105,10 @@ S: Maintained F: drivers/mmc/host/sdricoh_cs.c SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) DRIVER -S: Orphan +M: Chris Ball <cjb@laptop.org> L: linux-mmc@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git +S: Maintained F: drivers/mmc/host/sdhci.* SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF) @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 36 -EXTRAVERSION = -rc5 +EXTRAVERSION = -rc7 NAME = Sheep on Meth # *DOCUMENTATION* @@ -568,6 +568,12 @@ endif ifdef CONFIG_FUNCTION_TRACER KBUILD_CFLAGS += -pg +ifdef CONFIG_DYNAMIC_FTRACE + ifdef CONFIG_HAVE_C_RECORDMCOUNT + BUILD_C_RECORDMCOUNT := y + export BUILD_C_RECORDMCOUNT + endif +endif endif # We trigger additional mismatches with less inlining @@ -591,6 +597,11 @@ KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) # conserve stack if available KBUILD_CFLAGS += $(call cc-option,-fconserve-stack) +# check for 'asm goto' +ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y) + KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO +endif + # Add user supplied CPPFLAGS, AFLAGS and CFLAGS as the last assignments # But warn user when we do so warn-assign = \ diff --git a/arch/Kconfig b/arch/Kconfig index fe48fc7..53d7f61 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -158,4 +158,7 @@ config HAVE_PERF_EVENTS_NMI subsystem. Also has support for calculating CPU cycle events to determine how many clock cycles in a given period. +config HAVE_ARCH_JUMP_LABEL + bool + source "kernel/gcov/Kconfig" diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index ab1ee0a..6d159ce 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -73,8 +73,6 @@ ldq $20, HAE_REG($19); \ stq $21, HAE_CACHE($19); \ stq $21, 0($20); \ - ldq $0, 0($sp); \ - ldq $1, 8($sp); \ 99:; \ ldq $19, 72($sp); \ ldq $20, 80($sp); \ @@ -316,7 +314,7 @@ ret_from_sys_call: cmovne $26, 0, $19 /* $19 = 0 => non-restartable */ ldq $0, SP_OFF($sp) and $0, 8, $0 - beq $0, restore_all + beq $0, ret_to_kernel ret_to_user: /* Make sure need_resched and sigpending don't change between sampling and the rti. */ @@ -329,6 +327,11 @@ restore_all: RESTORE_ALL call_pal PAL_rti +ret_to_kernel: + lda $16, 7 + call_pal PAL_swpipl + br restore_all + .align 3 $syscall_error: /* @@ -657,7 +660,7 @@ kernel_thread: /* We don't actually care for a3 success widgetry in the kernel. Not for positive errno values. */ stq $0, 0($sp) /* $0 */ - br restore_all + br ret_to_kernel .end kernel_thread /* @@ -912,15 +915,6 @@ sys_execve: .end sys_execve .align 4 - .globl osf_sigprocmask - .ent osf_sigprocmask -osf_sigprocmask: - .prologue 0 - mov $sp, $18 - jmp $31, sys_osf_sigprocmask -.end osf_sigprocmask - - .align 4 .globl alpha_ni_syscall .ent alpha_ni_syscall alpha_ni_syscall: diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 85d8e4f..1cc4968 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -307,7 +307,7 @@ again: new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; + delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf; /* It is possible on very rare occasions that the PMC has overflowed * but the interrupt is yet to come. Detect and fix this situation. @@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) struct hw_perf_event *hwc = &pe->hw; int idx = hwc->idx; - if (cpuc->current_idx[j] != PMC_NO_INDEX) { - cpuc->idx_mask |= (1<<cpuc->current_idx[j]); - continue; + if (cpuc->current_idx[j] == PMC_NO_INDEX) { + alpha_perf_event_set_period(pe, hwc, idx); + cpuc->current_idx[j] = idx; } - alpha_perf_event_set_period(pe, hwc, idx); - cpuc->current_idx[j] = idx; - cpuc->idx_mask |= (1<<cpuc->current_idx[j]); + if (!(hwc->state & PERF_HES_STOPPED)) + cpuc->idx_mask |= (1<<cpuc->current_idx[j]); } cpuc->config = cpuc->event[0]->hw.config_base; } @@ -420,12 +419,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc) * - this function is called from outside this module via the pmu struct * returned from perf event initialisation. */ -static int alpha_pmu_enable(struct perf_event *event) +static int alpha_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; int n0; int ret; - unsigned long flags; + unsigned long irq_flags; /* * The Sparc code has the IRQ disable first followed by the perf @@ -435,8 +435,8 @@ static int alpha_pmu_enable(struct perf_event *event) * nevertheless we disable the PMCs first to enable a potential * final PMI to occur before we disable interrupts. */ - perf_disable(); - local_irq_save(flags); + perf_pmu_disable(event->pmu); + local_irq_save(irq_flags); /* Default to error to be returned */ ret = -EAGAIN; @@ -455,8 +455,12 @@ static int alpha_pmu_enable(struct perf_event *event) } } - local_irq_restore(flags); - perf_enable(); + hwc->state = PERF_HES_UPTODATE; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_STOPPED; + + local_irq_restore(irq_flags); + perf_pmu_enable(event->pmu); return ret; } @@ -467,15 +471,15 @@ static int alpha_pmu_enable(struct perf_event *event) * - this function is called from outside this module via the pmu struct * returned from perf event initialisation. */ -static void alpha_pmu_disable(struct perf_event *event) +static void alpha_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - unsigned long flags; + unsigned long irq_flags; int j; - perf_disable(); - local_irq_save(flags); + perf_pmu_disable(event->pmu); + local_irq_save(irq_flags); for (j = 0; j < cpuc->n_events; j++) { if (event == cpuc->event[j]) { @@ -501,8 +505,8 @@ static void alpha_pmu_disable(struct perf_event *event) } } - local_irq_restore(flags); - perf_enable(); + local_irq_restore(irq_flags); + perf_pmu_enable(event->pmu); } @@ -514,13 +518,44 @@ static void alpha_pmu_read(struct perf_event *event) } -static void alpha_pmu_unthrottle(struct perf_event *event) +static void alpha_pmu_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!(hwc->state & PERF_HES_STOPPED)) { + cpuc->idx_mask &= ~(1UL<<hwc->idx); + hwc->state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + alpha_perf_event_update(event, hwc, hwc->idx, 0); + hwc->state |= PERF_HES_UPTODATE; + } + + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_DISABLE, (1UL<<hwc->idx)); +} + + +static void alpha_pmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + alpha_perf_event_set_period(event, hwc, hwc->idx); + } + + hwc->state = 0; + cpuc->idx_mask |= 1UL<<hwc->idx; - wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); + if (cpuc->enabled) + wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx)); } @@ -642,39 +677,36 @@ static int __hw_perf_event_init(struct perf_event *event) return 0; } -static const struct pmu pmu = { - .enable = alpha_pmu_enable, - .disable = alpha_pmu_disable, - .read = alpha_pmu_read, - .unthrottle = alpha_pmu_unthrottle, -}; - - /* * Main entry point to initialise a HW performance event. */ -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int alpha_pmu_event_init(struct perf_event *event) { int err; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!alpha_pmu) - return ERR_PTR(-ENODEV); + return -ENODEV; /* Do the real initialisation work. */ err = __hw_perf_event_init(event); - if (err) - return ERR_PTR(err); - - return &pmu; + return err; } - - /* * Main entry point - enable HW performance counters. */ -void hw_perf_enable(void) +static void alpha_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -700,7 +732,7 @@ void hw_perf_enable(void) * Main entry point - disable HW performance counters. */ -void hw_perf_disable(void) +static void alpha_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -713,6 +745,17 @@ void hw_perf_disable(void) wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); } +static struct pmu pmu = { + .pmu_enable = alpha_pmu_enable, + .pmu_disable = alpha_pmu_disable, + .event_init = alpha_pmu_event_init, + .add = alpha_pmu_add, + .del = alpha_pmu_del, + .start = alpha_pmu_start, + .stop = alpha_pmu_stop, + .read = alpha_pmu_read, +}; + /* * Main entry point - don't know when this is called but it @@ -766,7 +809,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, wrperfmon(PERFMON_CMD_DISABLE, cpuc->idx_mask); /* la_ptr is the counter that overflowed. */ - if (unlikely(la_ptr >= perf_max_events)) { + if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) { /* This should never occur! */ irq_err_count++; pr_warning("PMI: silly index %ld\n", la_ptr); @@ -807,7 +850,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, /* Interrupts coming too quickly; "throttle" the * counter, i.e., disable it for a little while. */ - cpuc->idx_mask &= ~(1UL<<idx); + alpha_pmu_stop(event, 0); } } wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask); @@ -837,6 +880,7 @@ void __init init_hw_perf_events(void) /* And set up PMU specification */ alpha_pmu = &ev67_pmu; - perf_max_events = alpha_pmu->num_pmcs; + + perf_pmu_register(&pmu); } diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 842dba3..3ec3506 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -356,7 +356,7 @@ dump_elf_thread(elf_greg_t *dest, struct pt_regs *pt, struct thread_info *ti) dest[27] = pt->r27; dest[28] = pt->r28; dest[29] = pt->gp; - dest[30] = rdusp(); + dest[30] = ti == current_thread_info() ? rdusp() : ti->pcb.usp; dest[31] = pt->pc; /* Once upon a time this was the PS value. Which is stupid diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 0f6b51a..6f7feb5 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -41,46 +41,20 @@ static void do_signal(struct pt_regs *, struct switch_stack *, /* * The OSF/1 sigprocmask calling sequence is different from the * C sigprocmask() sequence.. - * - * how: - * 1 - SIG_BLOCK - * 2 - SIG_UNBLOCK - * 3 - SIG_SETMASK - * - * We change the range to -1 .. 1 in order to let gcc easily - * use the conditional move instructions. - * - * Note that we don't need to acquire the kernel lock for SMP - * operation, as all of this is local to this thread. */ -SYSCALL_DEFINE3(osf_sigprocmask, int, how, unsigned long, newmask, - struct pt_regs *, regs) +SYSCALL_DEFINE2(osf_sigprocmask, int, how, unsigned long, newmask) { - unsigned long oldmask = -EINVAL; - - if ((unsigned long)how-1 <= 2) { - long sign = how-2; /* -1 .. 1 */ - unsigned long block, unblock; - - newmask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - oldmask = current->blocked.sig[0]; - - unblock = oldmask & ~newmask; - block = oldmask | newmask; - if (!sign) - block = unblock; - if (sign <= 0) - newmask = block; - if (_NSIG_WORDS > 1 && sign > 0) - sigemptyset(¤t->blocked); - current->blocked.sig[0] = newmask; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - regs->r0 = 0; /* special no error return */ + sigset_t oldmask; + sigset_t mask; + unsigned long res; + + siginitset(&mask, newmask & _BLOCKABLE); + res = sigprocmask(how, &mask, &oldmask); + if (!res) { + force_successful_syscall_return(); + res = oldmask.sig[0]; } - return oldmask; + return res; } SYSCALL_DEFINE3(osf_sigaction, int, sig, @@ -94,9 +68,9 @@ SYSCALL_DEFINE3(osf_sigaction, int, sig, old_sigset_t mask; if (!access_ok(VERIFY_READ, act, sizeof(*act)) || __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_flags, &act->sa_flags)) + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) return -EFAULT; - __get_user(mask, &act->sa_mask); siginitset(&new_ka.sa.sa_mask, mask); new_ka.ka_restorer = NULL; } @@ -106,9 +80,9 @@ SYSCALL_DEFINE3(osf_sigaction, int, sig, if (!ret && oact) { if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags)) + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); } return ret; diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index ce594ef..a6a1de9 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -58,7 +58,7 @@ sys_call_table: .quad sys_open /* 45 */ .quad alpha_ni_syscall .quad sys_getxgid - .quad osf_sigprocmask + .quad sys_osf_sigprocmask .quad alpha_ni_syscall .quad alpha_ni_syscall /* 50 */ .quad sys_acct diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 553b7cf..88c97bc 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -271,7 +271,6 @@ config ARCH_AT91 bool "Atmel AT91" select ARCH_REQUIRE_GPIOLIB select HAVE_CLK - select ARCH_USES_GETTIMEOFFSET help This enables support for systems based on the Atmel AT91RM9200, AT91SAM9 and AT91CAP9 processors. @@ -1051,6 +1050,32 @@ config ARM_ERRATA_460075 ACTLR register. Note that setting specific bits in the ACTLR register may not be available in non-secure mode. +config ARM_ERRATA_742230 + bool "ARM errata: DMB operation may be faulty" + depends on CPU_V7 && SMP + help + This option enables the workaround for the 742230 Cortex-A9 + (r1p0..r2p2) erratum. Under rare circumstances, a DMB instruction + between two write operations may not ensure the correct visibility + ordering of the two writes. This workaround sets a specific bit in + the diagnostic register of the Cortex-A9 which causes the DMB + instruction to behave as a DSB, ensuring the correct behaviour of + the two writes. + +config ARM_ERRATA_742231 + bool "ARM errata: Incorrect hazard handling in the SCU may lead to data corruption" + depends on CPU_V7 && SMP + help + This option enables the workaround for the 742231 Cortex-A9 + (r2p0..r2p2) erratum. Under certain conditions, specific to the + Cortex-A9 MPCore micro-architecture, two CPUs working in SMP mode, + accessing some data located in the same cache line, may get corrupted + data due to bad handling of the address hazard when the line gets + replaced from one of the CPUs at the same time as another CPU is + accessing it. This workaround sets specific bits in the diagnostic + register of the Cortex-A9 which reduces the linefill issuing + capabilities of the processor. + config PL310_ERRATA_588369 bool "Clean & Invalidate maintenance operations do not invalidate clean lines" depends on CACHE_L2X0 && ARCH_OMAP4 diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index b23f6bc..65a7c1c 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -116,5 +116,5 @@ CFLAGS_font.o := -Dstatic= $(obj)/font.c: $(FONTC) $(call cmd,shipped) -$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile .config +$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG) @sed "$(SEDFLAGS)" < $< > $@ diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c index 7974baa..1bec96e 100644 --- a/arch/arm/common/it8152.c +++ b/arch/arm/common/it8152.c @@ -271,6 +271,14 @@ int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size) ((dma_addr + size - PHYS_OFFSET) >= SZ_64M); } +int dma_set_coherent_mask(struct device *dev, u64 mask) +{ + if (mask >= PHYS_OFFSET + SZ_64M - 1) + return 0; + + return -EIO; +} + int __init it8152_pci_setup(int nr, struct pci_sys_data *sys) { it8152_io.start = IT8152_IO_BASE + 0x12000; diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index ab68cf1..e90b167 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -317,6 +317,10 @@ static inline pte_t pte_mkspecial(pte_t pte) { return pte; } #ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE #define pgprot_dmacoherent(prot) \ __pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_BUFFERABLE) +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); #else #define pgprot_dmacoherent(prot) \ __pgprot_modify(prot, L_PTE_MT_MASK|L_PTE_EXEC, L_PTE_MT_UNCACHED) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 1b560825..7885722b 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -48,6 +48,8 @@ work_pending: beq no_work_pending mov r0, sp @ 'regs' mov r2, why @ 'syscall' + tst r1, #_TIF_SIGPENDING @ delivering a signal? + movne why, #0 @ prevent further restarts bl do_notify_resume b ret_slow_syscall @ Check work again diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ef3bc33..6cc6521 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -227,46 +227,56 @@ again: } static void -armpmu_disable(struct perf_event *event) +armpmu_read(struct perf_event *event) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - - WARN_ON(idx < 0); - - clear_bit(idx, cpuc->active_mask); - armpmu->disable(hwc, idx); - - barrier(); - armpmu_event_update(event, hwc, idx); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + /* Don't read disabled counters! */ + if (hwc->idx < 0) + return; - perf_event_update_userpage(event); + armpmu_event_update(event, hwc, hwc->idx); } static void -armpmu_read(struct perf_event *event) +armpmu_stop(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; - /* Don't read disabled counters! */ - if (hwc->idx < 0) + if (!armpmu) return; - armpmu_event_update(event, hwc, hwc->idx); + /* + * ARM pmu always has to update the counter, so ignore + * PERF_EF_UPDATE, see comments in armpmu_start(). + */ + if (!(hwc->state & PERF_HES_STOPPED)) { + armpmu->disable(hwc, hwc->idx); + barrier(); /* why? */ + armpmu_event_update(event, hwc, hwc->idx); + hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + } } static void -armpmu_unthrottle(struct perf_event *event) +armpmu_start(struct perf_event *event, int flags) { struct hw_perf_event *hwc = &event->hw; + if (!armpmu) + return; + + /* + * ARM pmu always has to reprogram the period, so ignore + * PERF_EF_RELOAD, see the comment below. + */ + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + + hwc->state = 0; /* * Set the period again. Some counters can't be stopped, so when we - * were throttled we simply disabled the IRQ source and the counter + * were stopped we simply disabled the IRQ source and the counter * may have been left counting. If we don't do this step then we may * get an interrupt too soon or *way* too late if the overflow has * happened since disabling. @@ -275,14 +285,33 @@ armpmu_unthrottle(struct perf_event *event) armpmu->enable(hwc, hwc->idx); } +static void +armpmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + WARN_ON(idx < 0); + + clear_bit(idx, cpuc->active_mask); + armpmu_stop(event, PERF_EF_UPDATE); + cpuc->events[idx] = NULL; + clear_bit(idx, cpuc->used_mask); + + perf_event_update_userpage(event); +} + static int -armpmu_enable(struct perf_event *event) +armpmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx; int err = 0; + perf_pmu_disable(event->pmu); + /* If we don't have a space for the counter then finish early. */ idx = armpmu->get_event_idx(cpuc, hwc); if (idx < 0) { @@ -299,25 +328,19 @@ armpmu_enable(struct perf_event *event) cpuc->events[idx] = event; set_bit(idx, cpuc->active_mask); - /* Set the period for the event. */ - armpmu_event_set_period(event, hwc, idx); - - /* Enable the event. */ - armpmu->enable(hwc, idx); + hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + if (flags & PERF_EF_START) + armpmu_start(event, PERF_EF_RELOAD); /* Propagate our changes to the userspace mapping. */ perf_event_update_userpage(event); out: + perf_pmu_enable(event->pmu); return err; } -static struct pmu pmu = { - .enable = armpmu_enable, - .disable = armpmu_disable, - .unthrottle = armpmu_unthrottle, - .read = armpmu_read, -}; +static struct pmu pmu; static int validate_event(struct cpu_hw_events *cpuc, @@ -497,20 +520,29 @@ __hw_perf_event_init(struct perf_event *event) return err; } -const struct pmu * -hw_perf_event_init(struct perf_event *event) +static int armpmu_event_init(struct perf_event *event) { int err = 0; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!armpmu) - return ERR_PTR(-ENODEV); + return -ENODEV; event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(&active_events)) { - if (atomic_read(&active_events) > perf_max_events) { + if (atomic_read(&active_events) > armpmu->num_events) { atomic_dec(&active_events); - return ERR_PTR(-ENOSPC); + return -ENOSPC; } mutex_lock(&pmu_reserve_mutex); @@ -524,17 +556,16 @@ hw_perf_event_init(struct perf_event *event) } if (err) - return ERR_PTR(err); + return err; err = __hw_perf_event_init(event); if (err) hw_perf_event_destroy(event); - return err ? ERR_PTR(err) : &pmu; + return err; } -void -hw_perf_enable(void) +static void armpmu_enable(struct pmu *pmu) { /* Enable all of the perf events on hardware. */ int idx; @@ -555,13 +586,23 @@ hw_perf_enable(void) armpmu->start(); } -void -hw_perf_disable(void) +static void armpmu_disable(struct pmu *pmu) { if (armpmu) armpmu->stop(); } +static struct pmu pmu = { + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, +}; + /* * ARMv6 Performance counter handling code. * @@ -2939,14 +2980,12 @@ init_hw_perf_events(void) armpmu = &armv6pmu; memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, sizeof(armv6_perf_cache_map)); - perf_max_events = armv6pmu.num_events; break; case 0xB020: /* ARM11mpcore */ armpmu = &armv6mpcore_pmu; memcpy(armpmu_perf_cache_map, armv6mpcore_perf_cache_map, sizeof(armv6mpcore_perf_cache_map)); - perf_max_events = armv6mpcore_pmu.num_events; break; case 0xC080: /* Cortex-A8 */ armv7pmu.id = ARM_PERF_PMU_ID_CA8; @@ -2958,7 +2997,6 @@ init_hw_perf_events(void) /* Reset PMNC and read the nb of CNTx counters supported */ armv7pmu.num_events = armv7_reset_read_pmnc(); - perf_max_events = armv7pmu.num_events; break; case 0xC090: /* Cortex-A9 */ armv7pmu.id = ARM_PERF_PMU_ID_CA9; @@ -2970,7 +3008,6 @@ init_hw_perf_events(void) /* Reset PMNC and read the nb of CNTx counters supported */ armv7pmu.num_events = armv7_reset_read_pmnc(); - perf_max_events = armv7pmu.num_events; break; } /* Intel CPUs [xscale]. */ @@ -2981,13 +3018,11 @@ init_hw_perf_events(void) armpmu = &xscale1pmu; memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, sizeof(xscale_perf_cache_map)); - perf_max_events = xscale1pmu.num_events; break; case 2: armpmu = &xscale2pmu; memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, sizeof(xscale_perf_cache_map)); - perf_max_events = xscale2pmu.num_events; break; } } @@ -2997,9 +3032,10 @@ init_hw_perf_events(void) arm_pmu_names[armpmu->id], armpmu->num_events); } else { pr_info("no hardware support available\n"); - perf_max_events = -1; } + perf_pmu_register(&pmu); + return 0; } arch_initcall(init_hw_perf_events); @@ -3007,13 +3043,6 @@ arch_initcall(init_hw_perf_events); /* * Callchain handling code. */ -static inline void -callchain_store(struct perf_callchain_entry *entry, - u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} /* * The registers we're interested in are at the end of the variable @@ -3045,7 +3074,7 @@ user_backtrace(struct frame_tail *tail, if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail))) return NULL; - callchain_store(entry, buftail.lr); + perf_callchain_store(entry, buftail.lr); /* * Frame pointers should strictly progress back up the stack @@ -3057,16 +3086,11 @@ user_backtrace(struct frame_tail *tail, return buftail.fp - 1; } -static void -perf_callchain_user(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct frame_tail *tail; - callchain_store(entry, PERF_CONTEXT_USER); - - if (!user_mode(regs)) - regs = task_pt_regs(current); tail = (struct frame_tail *)regs->ARM_fp - 1; @@ -3084,56 +3108,18 @@ callchain_trace(struct stackframe *fr, void *data) { struct perf_callchain_entry *entry = data; - callchain_store(entry, fr->pc); + perf_callchain_store(entry, fr->pc); return 0; } -static void -perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stackframe fr; - callchain_store(entry, PERF_CONTEXT_KERNEL); fr.fp = regs->ARM_fp; fr.sp = regs->ARM_sp; fr.lr = regs->ARM_lr; fr.pc = regs->ARM_pc; walk_stackframe(&fr, callchain_trace, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!current || !current->pid) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); - -struct perf_callchain_entry * -perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - perf_do_callchain(regs, entry); - return entry; -} diff --git a/arch/arm/mach-at91/at91sam9g45_devices.c b/arch/arm/mach-at91/at91sam9g45_devices.c index 5e71ccd..1276bab 100644 --- a/arch/arm/mach-at91/at91sam9g45_devices.c +++ b/arch/arm/mach-at91/at91sam9g45_devices.c @@ -426,7 +426,7 @@ static struct i2c_gpio_platform_data pdata_i2c0 = { .sda_is_open_drain = 1, .scl_pin = AT91_PIN_PA21, .scl_is_open_drain = 1, - .udelay = 2, /* ~100 kHz */ + .udelay = 5, /* ~100 kHz */ }; static struct platform_device at91sam9g45_twi0_device = { @@ -440,7 +440,7 @@ static struct i2c_gpio_platform_data pdata_i2c1 = { .sda_is_open_drain = 1, .scl_pin = AT91_PIN_PB11, .scl_is_open_drain = 1, - .udelay = 2, /* ~100 kHz */ + .udelay = 5, /* ~100 kHz */ }; static struct platform_device at91sam9g45_twi1_device = { diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index 3d996b6..9be261b 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c @@ -769,8 +769,7 @@ static struct map_desc dm355_io_desc[] = { .virtual = SRAM_VIRT, .pfn = __phys_to_pfn(0x00010000), .length = SZ_32K, - /* MT_MEMORY_NONCACHED requires supersection alignment */ - .type = MT_DEVICE, + .type = MT_MEMORY_NONCACHED, }, }; diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 6b6f4c6..7781e35 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -969,8 +969,7 @@ static struct map_desc dm365_io_desc[] = { .virtual = SRAM_VIRT, .pfn = __phys_to_pfn(0x00010000), .length = SZ_32K, - /* MT_MEMORY_NONCACHED requires supersection alignment */ - .type = MT_DEVICE, + .type = MT_MEMORY_NONCACHED, }, }; diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c index 40fec31..5e5b0a7 100644 --- a/arch/arm/mach-davinci/dm644x.c +++ b/arch/arm/mach-davinci/dm644x.c @@ -653,8 +653,7 @@ static struct map_desc dm644x_io_desc[] = { .virtual = SRAM_VIRT, .pfn = __phys_to_pfn(0x00008000), .length = SZ_16K, - /* MT_MEMORY_NONCACHED requires supersection alignment */ - .type = MT_DEVICE, + .type = MT_MEMORY_NONCACHED, }, }; diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c index e4a3df1..26e8a9c 100644 --- a/arch/arm/mach-davinci/dm646x.c +++ b/arch/arm/mach-davinci/dm646x.c @@ -737,8 +737,7 @@ static struct map_desc dm646x_io_desc[] = { .virtual = SRAM_VIRT, .pfn = __phys_to_pfn(0x00010000), .length = SZ_32K, - /* MT_MEMORY_NONCACHED requires supersection alignment */ - .type = MT_DEVICE, + .type = MT_MEMORY_NONCACHED, }, }; diff --git a/arch/arm/mach-dove/include/mach/io.h b/arch/arm/mach-dove/include/mach/io.h index 3b3e472..eb4936f 100644 --- a/arch/arm/mach-dove/include/mach/io.h +++ b/arch/arm/mach-dove/include/mach/io.h @@ -13,8 +13,8 @@ #define IO_SPACE_LIMIT 0xffffffff -#define __io(a) ((void __iomem *)(((a) - DOVE_PCIE0_IO_PHYS_BASE) +\ - DOVE_PCIE0_IO_VIRT_BASE)) -#define __mem_pci(a) (a) +#define __io(a) ((void __iomem *)(((a) - DOVE_PCIE0_IO_BUS_BASE) + \ + DOVE_PCIE0_IO_VIRT_BASE)) +#define __mem_pci(a) (a) #endif diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 61cd4d6..24498a9 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -503,6 +503,14 @@ struct pci_bus * __devinit ixp4xx_scan_bus(int nr, struct pci_sys_data *sys) return pci_scan_bus(sys->busnr, &ixp4xx_ops, sys); } +int dma_set_coherent_mask(struct device *dev, u64 mask) +{ + if (mask >= SZ_64M - 1) + return 0; + + return -EIO; +} + EXPORT_SYMBOL(ixp4xx_pci_read); EXPORT_SYMBOL(ixp4xx_pci_write); diff --git a/arch/arm/mach-ixp4xx/include/mach/hardware.h b/arch/arm/mach-ixp4xx/include/mach/hardware.h index f91ca6d..8138371 100644 --- a/arch/arm/mach-ixp4xx/include/mach/hardware.h +++ b/arch/arm/mach-ixp4xx/include/mach/hardware.h @@ -26,6 +26,8 @@ #define PCIBIOS_MAX_MEM 0x4BFFFFFF #endif +#define ARCH_HAS_DMA_SET_COHERENT_MASK + #define pcibios_assign_all_busses() 1 /* Register locations and bits */ diff --git a/arch/arm/mach-kirkwood/include/mach/kirkwood.h b/arch/arm/mach-kirkwood/include/mach/kirkwood.h index 93fc2ec..6e924b3 100644 --- a/arch/arm/mach-kirkwood/include/mach/kirkwood.h +++ b/arch/arm/mach-kirkwood/include/mach/kirkwood.h @@ -38,7 +38,7 @@ #define KIRKWOOD_PCIE1_IO_PHYS_BASE 0xf3000000 #define KIRKWOOD_PCIE1_IO_VIRT_BASE 0xfef00000 -#define KIRKWOOD_PCIE1_IO_BUS_BASE 0x00000000 +#define KIRKWOOD_PCIE1_IO_BUS_BASE 0x00100000 #define KIRKWOOD_PCIE1_IO_SIZE SZ_1M #define KIRKWOOD_PCIE_IO_PHYS_BASE 0xf2000000 diff --git a/arch/arm/mach-kirkwood/pcie.c b/arch/arm/mach-kirkwood/pcie.c index 55e7f00..513ad31 100644 --- a/arch/arm/mach-kirkwood/pcie.c +++ b/arch/arm/mach-kirkwood/pcie.c @@ -117,7 +117,7 @@ static void __init pcie0_ioresources_init(struct pcie_port *pp) * IORESOURCE_IO */ pp->res[0].name = "PCIe 0 I/O Space"; - pp->res[0].start = KIRKWOOD_PCIE_IO_PHYS_BASE; + pp->res[0].start = KIRKWOOD_PCIE_IO_BUS_BASE; pp->res[0].end = pp->res[0].start + KIRKWOOD_PCIE_IO_SIZE - 1; pp->res[0].flags = IORESOURCE_IO; @@ -139,7 +139,7 @@ static void __init pcie1_ioresources_init(struct pcie_port *pp) * IORESOURCE_IO */ pp->res[0].name = "PCIe 1 I/O Space"; - pp->res[0].start = KIRKWOOD_PCIE1_IO_PHYS_BASE; + pp->res[0].start = KIRKWOOD_PCIE1_IO_BUS_BASE; pp->res[0].end = pp->res[0].start + KIRKWOOD_PCIE1_IO_SIZE - 1; pp->res[0].flags = IORESOURCE_IO; diff --git a/arch/arm/mach-mmp/include/mach/system.h b/arch/arm/mach-mmp/include/mach/system.h index 4f5b0e0..1a8a25e 100644 --- a/arch/arm/mach-mmp/include/mach/system.h +++ b/arch/arm/mach-mmp/include/mach/system.h @@ -9,6 +9,8 @@ #ifndef __ASM_MACH_SYSTEM_H #define __ASM_MACH_SYSTEM_H +#include <mach/cputype.h> + static inline void arch_idle(void) { cpu_do_idle(); @@ -16,6 +18,9 @@ static inline void arch_idle(void) static inline void arch_reset(char mode, const char *cmd) { - cpu_reset(0); + if (cpu_is_pxa168()) + cpu_reset(0xffff0000); + else + cpu_reset(0); } #endif /* __ASM_MACH_SYSTEM_H */ diff --git a/arch/arm/mach-pxa/cpufreq-pxa2xx.c b/arch/arm/mach-pxa/cpufreq-pxa2xx.c index 50d5939..58093d9 100644 --- a/arch/arm/mach-pxa/cpufreq-pxa2xx.c +++ b/arch/arm/mach-pxa/cpufreq-pxa2xx.c @@ -312,8 +312,7 @@ static int pxa_set_target(struct cpufreq_policy *policy, freqs.cpu = policy->cpu; if (freq_debug) - pr_debug(KERN_INFO "Changing CPU frequency to %d Mhz, " - "(SDRAM %d Mhz)\n", + pr_debug("Changing CPU frequency to %d Mhz, (SDRAM %d Mhz)\n", freqs.new / 1000, (pxa_freq_settings[idx].div2) ? (new_freq_mem / 2000) : (new_freq_mem / 1000)); diff --git a/arch/arm/mach-pxa/include/mach/hardware.h b/arch/arm/mach-pxa/include/mach/hardware.h index 7f64d24..814f145 100644 --- a/arch/arm/mach-pxa/include/mach/hardware.h +++ b/arch/arm/mach-pxa/include/mach/hardware.h @@ -264,23 +264,35 @@ * <= 0x2 for pxa21x/pxa25x/pxa26x/pxa27x * == 0x3 for pxa300/pxa310/pxa320 */ +#if defined(CONFIG_PXA25x) || defined(CONFIG_PXA27x) #define __cpu_is_pxa2xx(id) \ ({ \ unsigned int _id = (id) >> 13 & 0x7; \ _id <= 0x2; \ }) +#else +#define __cpu_is_pxa2xx(id) (0) +#endif +#ifdef CONFIG_PXA3xx #define __cpu_is_pxa3xx(id) \ ({ \ unsigned int _id = (id) >> 13 & 0x7; \ _id == 0x3; \ }) +#else +#define __cpu_is_pxa3xx(id) (0) +#endif +#if defined(CONFIG_CPU_PXA930) || defined(CONFIG_CPU_PXA935) #define __cpu_is_pxa93x(id) \ ({ \ unsigned int _id = (id) >> 4 & 0xfff; \ _id == 0x683 || _id == 0x693; \ }) +#else +#define __cpu_is_pxa93x(id) (0) +#endif #define cpu_is_pxa2xx() \ ({ \ @@ -309,7 +321,7 @@ extern unsigned long get_clock_tick_rate(void); #define PCIBIOS_MIN_IO 0 #define PCIBIOS_MIN_MEM 0 #define pcibios_assign_all_busses() 1 +#define ARCH_HAS_DMA_SET_COHERENT_MASK #endif - #endif /* _ASM_ARCH_HARDWARE_H */ diff --git a/arch/arm/mach-pxa/include/mach/io.h b/arch/arm/mach-pxa/include/mach/io.h index 262691f..fdca3be 100644 --- a/arch/arm/mach-pxa/include/mach/io.h +++ b/arch/arm/mach-pxa/include/mach/io.h @@ -6,6 +6,8 @@ #ifndef __ASM_ARM_ARCH_IO_H #define __ASM_ARM_ARCH_IO_H +#include <mach/hardware.h> + #define IO_SPACE_LIMIT 0xffffffff /* diff --git a/arch/arm/mach-pxa/palm27x.c b/arch/arm/mach-pxa/palm27x.c index 77ad6d3..405b92a 100644 --- a/arch/arm/mach-pxa/palm27x.c +++ b/arch/arm/mach-pxa/palm27x.c @@ -469,9 +469,13 @@ static struct i2c_board_info __initdata palm27x_pi2c_board_info[] = { }, }; +static struct i2c_pxa_platform_data palm27x_i2c_power_info = { + .use_pio = 1, +}; + void __init palm27x_pmic_init(void) { i2c_register_board_info(1, ARRAY_AND_SIZE(palm27x_pi2c_board_info)); - pxa27x_set_i2c_power_info(NULL); + pxa27x_set_i2c_power_info(&palm27x_i2c_power_info); } #endif diff --git a/arch/arm/mach-pxa/vpac270.c b/arch/arm/mach-pxa/vpac270.c index c9b747c..37d6173 100644 --- a/arch/arm/mach-pxa/vpac270.c +++ b/arch/arm/mach-pxa/vpac270.c @@ -240,6 +240,7 @@ static void __init vpac270_onenand_init(void) {} #if defined(CONFIG_MMC_PXA) || defined(CONFIG_MMC_PXA_MODULE) static struct pxamci_platform_data vpac270_mci_platform_data = { .ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, + .gpio_power = -1, .gpio_card_detect = GPIO53_VPAC270_SD_DETECT_N, .gpio_card_ro = GPIO52_VPAC270_SD_READONLY, .detect_delay_ms = 200, diff --git a/arch/arm/mach-u300/include/mach/gpio.h b/arch/arm/mach-u300/include/mach/gpio.h index 7b1fc98..d5a71ab 100644 --- a/arch/arm/mach-u300/include/mach/gpio.h +++ b/arch/arm/mach-u300/include/mach/gpio.h @@ -273,6 +273,9 @@ extern void gpio_pullup(unsigned gpio, int value); extern int gpio_get_value(unsigned gpio); extern void gpio_set_value(unsigned gpio, int value); +#define gpio_get_value_cansleep gpio_get_value +#define gpio_set_value_cansleep gpio_set_value + /* wrappers to sleep-enable the previous two functions */ static inline unsigned gpio_to_irq(unsigned gpio) { diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c index 577df6c..efb1270 100644 --- a/arch/arm/mach-vexpress/ct-ca9x4.c +++ b/arch/arm/mach-vexpress/ct-ca9x4.c @@ -227,7 +227,13 @@ static void ct_ca9x4_init(void) int i; #ifdef CONFIG_CACHE_L2X0 - l2x0_init(MMIO_P2V(CT_CA9X4_L2CC), 0x00000000, 0xfe0fffff); + void __iomem *l2x0_base = MMIO_P2V(CT_CA9X4_L2CC); + + /* set RAM latencies to 1 cycle for this core tile. */ + writel(0, l2x0_base + L2X0_TAG_LATENCY_CTRL); + writel(0, l2x0_base + L2X0_DATA_LATENCY_CTRL); + + l2x0_init(l2x0_base, 0x00400000, 0xfe0fffff); #endif clkdev_add_table(lookups, ARRAY_SIZE(lookups)); diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index d073b64..724ba3b 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -885,8 +885,23 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (ai_usermode & UM_SIGNAL) force_sig(SIGBUS, current); - else - set_cr(cr_no_alignment); + else { + /* + * We're about to disable the alignment trap and return to + * user space. But if an interrupt occurs before actually + * reaching user space, then the IRQ vector entry code will + * notice that we were still in kernel space and therefore + * the alignment trap won't be re-enabled in that case as it + * is presumed to be always on from kernel space. + * Let's prevent that race by disabling interrupts here (they + * are disabled on the way back to user space anyway in + * entry-common.S) and disable the alignment trap only if + * there is no work pending for this thread. + */ + raw_local_irq_disable(); + if (!(current_thread_info()->flags & _TIF_WORK_MASK)) + set_cr(cr_no_alignment); + } return 0; } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6e1c4f6..6a3a2d0 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -15,6 +15,7 @@ #include <linux/nodemask.h> #include <linux/memblock.h> #include <linux/sort.h> +#include <linux/fs.h> #include <asm/cputype.h> #include <asm/sections.h> @@ -246,6 +247,9 @@ static struct mem_type mem_types[] = { .domain = DOMAIN_USER, }, [MT_MEMORY] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_USER | L_PTE_EXEC, + .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, @@ -254,6 +258,9 @@ static struct mem_type mem_types[] = { .domain = DOMAIN_KERNEL, }, [MT_MEMORY_NONCACHED] = { + .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | + L_PTE_USER | L_PTE_EXEC | L_PTE_MT_BUFFERABLE, + .prot_l1 = PMD_TYPE_TABLE, .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE, .domain = DOMAIN_KERNEL, }, @@ -411,9 +418,12 @@ static void __init build_mem_type_table(void) * Enable CPU-specific coherency if supported. * (Only available on XSC3 at the moment.) */ - if (arch_is_coherent() && cpu_is_xsc3()) + if (arch_is_coherent() && cpu_is_xsc3()) { mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; - + mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; + } /* * ARMv6 and above have extended page tables. */ @@ -438,7 +448,9 @@ static void __init build_mem_type_table(void) mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_S; mem_types[MT_DEVICE_CACHED].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED; mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S; + mem_types[MT_MEMORY_NONCACHED].prot_pte |= L_PTE_SHARED; #endif } @@ -475,6 +487,8 @@ static void __init build_mem_type_table(void) mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask; mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd; + mem_types[MT_MEMORY].prot_pte |= kern_pgprot; + mem_types[MT_MEMORY_NONCACHED].prot_sect |= ecc_mask; mem_types[MT_ROM].prot_sect |= cp->pmd; switch (cp->pmd) { @@ -498,6 +512,19 @@ static void __init build_mem_type_table(void) } } +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!pfn_valid(pfn)) + return pgprot_noncached(vma_prot); + else if (file->f_flags & O_SYNC) + return pgprot_writecombine(vma_prot); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); +#endif + #define vectors_base() (vectors_high() ? 0xffff0000 : 0) static void __init *early_alloc(unsigned long sz) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 6a8506d..7563ff0 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -186,13 +186,14 @@ cpu_v7_name: * It is assumed that: * - cache type register is implemented */ -__v7_setup: +__v7_ca9mp_setup: #ifdef CONFIG_SMP mrc p15, 0, r0, c1, c0, 1 tst r0, #(1 << 6) @ SMP/nAMP mode enabled? orreq r0, r0, #(1 << 6) | (1 << 0) @ Enable SMP/nAMP mode and mcreq p15, 0, r0, c1, c0, 1 @ TLB ops broadcasting #endif +__v7_setup: adr r12, __v7_setup_stack @ the local stack stmia r12, {r0-r5, r7, r9, r11, lr} bl v7_flush_dcache_all @@ -201,11 +202,16 @@ __v7_setup: mrc p15, 0, r0, c0, c0, 0 @ read main ID register and r10, r0, #0xff000000 @ ARM? teq r10, #0x41000000 - bne 2f + bne 3f and r5, r0, #0x00f00000 @ variant and r6, r0, #0x0000000f @ revision - orr r0, r6, r5, lsr #20-4 @ combine variant and revision + orr r6, r6, r5, lsr #20-4 @ combine variant and revision + ubfx r0, r0, #4, #12 @ primary part number + /* Cortex-A8 Errata */ + ldr r10, =0x00000c08 @ Cortex-A8 primary part number + teq r0, r10 + bne 2f #ifdef CONFIG_ARM_ERRATA_430973 teq r5, #0x00100000 @ only present in r1p* mrceq p15, 0, r10, c1, c0, 1 @ read aux control register @@ -213,21 +219,42 @@ __v7_setup: mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif #ifdef CONFIG_ARM_ERRATA_458693 - teq r0, #0x20 @ only present in r2p0 + teq r6, #0x20 @ only present in r2p0 mrceq p15, 0, r10, c1, c0, 1 @ read aux control register orreq r10, r10, #(1 << 5) @ set L1NEON to 1 orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 mcreq p15, 0, r10, c1, c0, 1 @ write aux control register #endif #ifdef CONFIG_ARM_ERRATA_460075 - teq r0, #0x20 @ only present in r2p0 + teq r6, #0x20 @ only present in r2p0 mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register tsteq r10, #1 << 22 orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register #endif + b 3f + + /* Cortex-A9 Errata */ +2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number + teq r0, r10 + bne 3f +#ifdef CONFIG_ARM_ERRATA_742230 + cmp r6, #0x22 @ only present up to r2p2 + mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register + orrle r10, r10, #1 << 4 @ set bit #4 + mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_742231 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register + orreq r10, r10, #1 << 12 @ set bit #12 + orreq r10, r10, #1 << 22 @ set bit #22 + mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register +#endif -2: mov r10, #0 +3: mov r10, #0 #ifdef HARVARD_CACHE mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #endif @@ -323,6 +350,29 @@ cpu_elf_name: .section ".proc.info.init", #alloc, #execinstr + .type __v7_ca9mp_proc_info, #object +__v7_ca9mp_proc_info: + .long 0x410fc090 @ Required ID value + .long 0xff0ffff0 @ Mask for ID + .long PMD_TYPE_SECT | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ | \ + PMD_FLAGS + .long PMD_TYPE_SECT | \ + PMD_SECT_XN | \ + PMD_SECT_AP_WRITE | \ + PMD_SECT_AP_READ + b __v7_ca9mp_setup + .long cpu_arch_name + .long cpu_elf_name + .long HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP + .long cpu_v7_name + .long v7_processor_functions + .long v7wbi_tlb_fns + .long v6_user_fns + .long v7_cache_fns + .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info + /* * Match any ARMv7 processor core. */ diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c index ea3ca86..aedf9c1 100644 --- a/arch/arm/plat-nomadik/timer.c +++ b/arch/arm/plat-nomadik/timer.c @@ -1,5 +1,5 @@ /* - * linux/arch/arm/mach-nomadik/timer.c + * linux/arch/arm/plat-nomadik/timer.c * * Copyright (C) 2008 STMicroelectronics * Copyright (C) 2010 Alessandro Rubini @@ -75,7 +75,7 @@ static void nmdk_clkevt_mode(enum clock_event_mode mode, cr = readl(mtu_base + MTU_CR(1)); writel(0, mtu_base + MTU_LR(1)); writel(cr | MTU_CRn_ENA, mtu_base + MTU_CR(1)); - writel(0x2, mtu_base + MTU_IMSC); + writel(1 << 1, mtu_base + MTU_IMSC); break; case CLOCK_EVT_MODE_SHUTDOWN: case CLOCK_EVT_MODE_UNUSED: @@ -131,25 +131,23 @@ void __init nmdk_timer_init(void) { unsigned long rate; struct clk *clk0; - struct clk *clk1; - u32 cr; + u32 cr = MTU_CRn_32BITS; clk0 = clk_get_sys("mtu0", NULL); BUG_ON(IS_ERR(clk0)); - clk1 = clk_get_sys("mtu1", NULL); - BUG_ON(IS_ERR(clk1)); - clk_enable(clk0); - clk_enable(clk1); /* - * Tick rate is 2.4MHz for Nomadik and 110MHz for ux500: - * use a divide-by-16 counter if it's more than 16MHz + * Tick rate is 2.4MHz for Nomadik and 2.4Mhz, 100MHz or 133 MHz + * for ux500. + * Use a divide-by-16 counter if the tick rate is more than 32MHz. + * At 32 MHz, the timer (with 32 bit counter) can be programmed + * to wake-up at a max 127s a head in time. Dividing a 2.4 MHz timer + * with 16 gives too low timer resolution. */ - cr = MTU_CRn_32BITS;; rate = clk_get_rate(clk0); - if (rate > 16 << 20) { + if (rate > 32000000) { rate /= 16; cr |= MTU_CRn_PRESCALE_16; } else { @@ -170,15 +168,8 @@ void __init nmdk_timer_init(void) pr_err("timer: failed to initialize clock source %s\n", nmdk_clksrc.name); - /* Timer 1 is used for events, fix according to rate */ - cr = MTU_CRn_32BITS; - rate = clk_get_rate(clk1); - if (rate > 16 << 20) { - rate /= 16; - cr |= MTU_CRn_PRESCALE_16; - } else { - cr |= MTU_CRn_PRESCALE_1; - } + /* Timer 1 is used for events */ + clockevents_calc_mult_shift(&nmdk_clkevt, rate, MTU_MIN_RANGE); writel(cr | MTU_CRn_ONESHOT, mtu_base + MTU_CR(1)); /* off, currently */ diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index e39a417..a92cb49 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -33,7 +33,7 @@ config OMAP_DEBUG_DEVICES config OMAP_DEBUG_LEDS bool depends on OMAP_DEBUG_DEVICES - default y if LEDS + default y if LEDS_CLASS config OMAP_RESET_CLOCKS bool "Reset unused clocks during boot" diff --git a/arch/arm/plat-omap/mcbsp.c b/arch/arm/plat-omap/mcbsp.c index e31496e..0c8612f 100644 --- a/arch/arm/plat-omap/mcbsp.c +++ b/arch/arm/plat-omap/mcbsp.c @@ -156,7 +156,7 @@ static irqreturn_t omap_mcbsp_rx_irq_handler(int irq, void *dev_id) /* Writing zero to RSYNC_ERR clears the IRQ */ MCBSP_WRITE(mcbsp_rx, SPCR1, MCBSP_READ_CACHE(mcbsp_rx, SPCR1)); } else { - complete(&mcbsp_rx->tx_irq_completion); + complete(&mcbsp_rx->rx_irq_completion); } return IRQ_HANDLED; diff --git a/arch/arm/plat-omap/sram.c b/arch/arm/plat-omap/sram.c index 226b2e8..10b3b4c 100644 --- a/arch/arm/plat-omap/sram.c +++ b/arch/arm/plat-omap/sram.c @@ -220,20 +220,7 @@ void __init omap_map_sram(void) if (omap_sram_size == 0) return; - if (cpu_is_omap24xx()) { - omap_sram_io_desc[0].virtual = OMAP2_SRAM_VA; - - base = OMAP2_SRAM_PA; - base = ROUND_DOWN(base, PAGE_SIZE); - omap_sram_io_desc[0].pfn = __phys_to_pfn(base); - } - if (cpu_is_omap34xx()) { - omap_sram_io_desc[0].virtual = OMAP3_SRAM_VA; - base = OMAP3_SRAM_PA; - base = ROUND_DOWN(base, PAGE_SIZE); - omap_sram_io_desc[0].pfn = __phys_to_pfn(base); - /* * SRAM must be marked as non-cached on OMAP3 since the * CORE DPLL M2 divider change code (in SRAM) runs with the @@ -244,13 +231,11 @@ void __init omap_map_sram(void) omap_sram_io_desc[0].type = MT_MEMORY_NONCACHED; } - if (cpu_is_omap44xx()) { - omap_sram_io_desc[0].virtual = OMAP4_SRAM_VA; - base = OMAP4_SRAM_PA; - base = ROUND_DOWN(base, PAGE_SIZE); - omap_sram_io_desc[0].pfn = __phys_to_pfn(base); - } - omap_sram_io_desc[0].length = 1024 * 1024; /* Use section desc */ + omap_sram_io_desc[0].virtual = omap_sram_base; + base = omap_sram_start; + base = ROUND_DOWN(base, PAGE_SIZE); + omap_sram_io_desc[0].pfn = __phys_to_pfn(base); + omap_sram_io_desc[0].length = ROUND_DOWN(omap_sram_size, PAGE_SIZE); iotable_init(omap_sram_io_desc, ARRAY_SIZE(omap_sram_io_desc)); printk(KERN_INFO "SRAM: Mapped pa 0x%08lx to va 0x%08lx size: 0x%lx\n", diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 98f94d0..a727f54 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -314,10 +314,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, vfree(module->arch.syminfo); module->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, module); + return 0; } void module_arch_cleanup(struct module *module) { - module_bug_cleanup(module); } diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c index 0865e29..db4953d 100644 --- a/arch/h8300/kernel/module.c +++ b/arch/h8300/kernel/module.c @@ -112,10 +112,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h index 9c1acb2..b2eeb0d 100644 --- a/arch/m32r/include/asm/signal.h +++ b/arch/m32r/include/asm/signal.h @@ -157,7 +157,6 @@ typedef struct sigaltstack { #undef __HAVE_ARCH_SIG_BITOPS struct pt_regs; -extern int do_signal(struct pt_regs *regs, sigset_t *oldset); #define ptrace_signal_deliver(regs, cookie) do { } while (0) diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h index 7612577..c705456 100644 --- a/arch/m32r/include/asm/unistd.h +++ b/arch/m32r/include/asm/unistd.h @@ -351,6 +351,7 @@ #define __ARCH_WANT_SYS_OLD_GETRLIMIT /*will be unused*/ #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_RT_SIGACTION +#define __ARCH_WANT_SYS_RT_SIGSUSPEND #define __IGNORE_lchown #define __IGNORE_setuid diff --git a/arch/m32r/kernel/entry.S b/arch/m32r/kernel/entry.S index 4038698..225412b 100644 --- a/arch/m32r/kernel/entry.S +++ b/arch/m32r/kernel/entry.S @@ -235,10 +235,9 @@ work_resched: work_notifysig: ; deal with pending signals and ; notify-resume requests mv r0, sp ; arg1 : struct pt_regs *regs - ldi r1, #0 ; arg2 : sigset_t *oldset - mv r2, r9 ; arg3 : __u32 thread_info_flags + mv r1, r9 ; arg2 : __u32 thread_info_flags bl do_notify_resume - bra restore_all + bra resume_userspace ; perform syscall exit tracing ALIGN diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index e555091..0021ade 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -592,16 +592,17 @@ void user_enable_single_step(struct task_struct *child) if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0) != sizeof(insn)) - break; + return -EIO; compute_next_pc(insn, pc, &next_pc, child); if (next_pc & 0x80000000) - break; + return -EIO; if (embed_debug_trap(child, next_pc)) - break; + return -EIO; invalidate_cache(); + return 0; } void user_disable_single_step(struct task_struct *child) diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c index 144b0f12..7bbe386 100644 --- a/arch/m32r/kernel/signal.c +++ b/arch/m32r/kernel/signal.c @@ -28,37 +28,6 @@ #define DEBUG_SIG 0 -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - -int do_signal(struct pt_regs *, sigset_t *); - -asmlinkage int -sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize, - unsigned long r2, unsigned long r3, unsigned long r4, - unsigned long r5, unsigned long r6, struct pt_regs *regs) -{ - sigset_t newset; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - - if (copy_from_user(&newset, unewset, sizeof(newset))) - return -EFAULT; - sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); - - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - current->state = TASK_INTERRUPTIBLE; - schedule(); - set_thread_flag(TIF_RESTORE_SIGMASK); - return -ERESTARTNOHAND; -} - asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, unsigned long r2, unsigned long r3, unsigned long r4, @@ -218,7 +187,7 @@ get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size) return (void __user *)((sp - frame_size) & -8ul); } -static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, +static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; @@ -275,22 +244,34 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, current->comm, current->pid, frame, regs->pc); #endif - return; + return 0; give_sigsegv: force_sigsegv(sig, current); + return -EFAULT; +} + +static int prev_insn(struct pt_regs *regs) +{ + u16 inst; + if (get_user(&inst, (u16 __user *)(regs->bpc - 2))) + return -EFAULT; + if ((inst & 0xfff0) == 0x10f0) /* trap ? */ + regs->bpc -= 2; + else + regs->bpc -= 4; + regs->syscall_nr = -1; + return 0; } /* * OK, we're invoking a handler */ -static void +static int handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, struct pt_regs *regs) { - unsigned short inst; - /* Are we from a system call? */ if (regs->syscall_nr >= 0) { /* If so, check system call restarting.. */ @@ -308,16 +289,14 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, /* fallthrough */ case -ERESTARTNOINTR: regs->r0 = regs->orig_r0; - inst = *(unsigned short *)(regs->bpc - 2); - if ((inst & 0xfff0) == 0x10f0) /* trap ? */ - regs->bpc -= 2; - else - regs->bpc -= 4; + if (prev_insn(regs) < 0) + return -EFAULT; } } /* Set up the stack frame */ - setup_rt_frame(sig, ka, info, oldset, regs); + if (setup_rt_frame(sig, ka, info, oldset, regs)) + return -EFAULT; spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); @@ -325,6 +304,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigaddset(¤t->blocked,sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + return 0; } /* @@ -332,12 +312,12 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info, * want to handle. Thus you cannot kill init even with a SIGKILL even by * mistake. */ -int do_signal(struct pt_regs *regs, sigset_t *oldset) +static void do_signal(struct pt_regs *regs) { siginfo_t info; int signr; struct k_sigaction ka; - unsigned short inst; + sigset_t *oldset; /* * We want the common case to go fast, which @@ -346,12 +326,14 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) * if so. */ if (!user_mode(regs)) - return 1; + return; if (try_to_freeze()) goto no_signal; - if (!oldset) + if (test_thread_flag(TIF_RESTORE_SIGMASK)) + oldset = ¤t->saved_sigmask; + else oldset = ¤t->blocked; signr = get_signal_to_deliver(&info, &ka, regs, NULL); @@ -363,8 +345,10 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) */ /* Whee! Actually deliver the signal. */ - handle_signal(signr, &ka, &info, oldset, regs); - return 1; + if (handle_signal(signr, &ka, &info, oldset, regs) == 0) + clear_thread_flag(TIF_RESTORE_SIGMASK); + + return; } no_signal: @@ -375,31 +359,24 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset) regs->r0 == -ERESTARTSYS || regs->r0 == -ERESTARTNOINTR) { regs->r0 = regs->orig_r0; - inst = *(unsigned short *)(regs->bpc - 2); - if ((inst & 0xfff0) == 0x10f0) /* trap ? */ - regs->bpc -= 2; - else - regs->bpc -= 4; - } - if (regs->r0 == -ERESTART_RESTARTBLOCK){ + prev_insn(regs); + } else if (regs->r0 == -ERESTART_RESTARTBLOCK){ regs->r0 = regs->orig_r0; regs->r7 = __NR_restart_syscall; - inst = *(unsigned short *)(regs->bpc - 2); - if ((inst & 0xfff0) == 0x10f0) /* trap ? */ - regs->bpc -= 2; - else - regs->bpc -= 4; + prev_insn(regs); } } - return 0; + if (test_thread_flag(TIF_RESTORE_SIGMASK)) { + clear_thread_flag(TIF_RESTORE_SIGMASK); + sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); + } } /* * notification of userspace execution resumption * - triggered by current->work.notify_resume */ -void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, - __u32 thread_info_flags) +void do_notify_resume(struct pt_regs *regs, __u32 thread_info_flags) { /* Pending single-step? */ if (thread_info_flags & _TIF_SINGLESTEP) @@ -407,7 +384,7 @@ void do_notify_resume(struct pt_regs *regs, sigset_t *oldset, /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) - do_signal(regs,oldset); + do_signal(regs); if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); diff --git a/arch/m68k/mac/macboing.c b/arch/m68k/mac/macboing.c index 8f06408..05285d0 100644 --- a/arch/m68k/mac/macboing.c +++ b/arch/m68k/mac/macboing.c @@ -162,7 +162,7 @@ static void mac_init_asc( void ) void mac_mksound( unsigned int freq, unsigned int length ) { __u32 cfreq = ( freq << 5 ) / 468; - __u32 flags; + unsigned long flags; int i; if ( mac_special_bell == NULL ) @@ -224,7 +224,7 @@ static void mac_nosound( unsigned long ignored ) */ static void mac_quadra_start_bell( unsigned int freq, unsigned int length, unsigned int volume ) { - __u32 flags; + unsigned long flags; /* if the bell is already ringing, ring longer */ if ( mac_bell_duration > 0 ) @@ -271,7 +271,7 @@ static void mac_quadra_start_bell( unsigned int freq, unsigned int length, unsig static void mac_quadra_ring_bell( unsigned long ignored ) { int i, count = mac_asc_samplespersec / HZ; - __u32 flags; + unsigned long flags; /* * we neither want a sound buffer overflow nor underflow, so we need to match diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 3ad59dd..5526faa 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -13,6 +13,7 @@ config MIPS select HAVE_KPROBES select HAVE_KRETPROBES select RTC_LIB if !MACH_LOONGSON + select GENERIC_ATOMIC64 if !64BIT mainmenu "Linux/MIPS Kernel Configuration" @@ -1646,8 +1647,16 @@ config MIPS_MT_SMP select SYS_SUPPORTS_SMP select SMP_UP help - This is a kernel model which is also known a VSMP or lately - has been marketesed into SMVP. + This is a kernel model which is known a VSMP but lately has been + marketesed into SMVP. + Virtual SMP uses the processor's VPEs to implement virtual + processors. In currently available configuration of the 34K processor + this allows for a dual processor. Both processors will share the same + primary caches; each will obtain the half of the TLB for it's own + exclusive use. For a layman this model can be described as similar to + what Intel calls Hyperthreading. + + For further information see http://www.linux-mips.org/wiki/34K#VSMP config MIPS_MT_SMTC bool "SMTC: Use all TCs on all VPEs for SMP" @@ -1664,6 +1673,14 @@ config MIPS_MT_SMTC help This is a kernel model which is known a SMTC or lately has been marketesed into SMVP. + is presenting the available TC's of the core as processors to Linux. + On currently available 34K processors this means a Linux system will + see up to 5 processors. The implementation of the SMTC kernel differs + significantly from VSMP and cannot efficiently coexist in the same + kernel binary so the choice between VSMP and SMTC is a compile time + decision. + + For further information see http://www.linux-mips.org/wiki/34K#SMTC endchoice diff --git a/arch/mips/alchemy/common/prom.c b/arch/mips/alchemy/common/prom.c index c29511b..5340210 100644 --- a/arch/mips/alchemy/common/prom.c +++ b/arch/mips/alchemy/common/prom.c @@ -43,7 +43,7 @@ int prom_argc; char **prom_argv; char **prom_envp; -void prom_init_cmdline(void) +void __init prom_init_cmdline(void) { int i; @@ -104,7 +104,7 @@ static inline void str2eaddr(unsigned char *ea, unsigned char *str) } } -int prom_get_ethernet_addr(char *ethernet_addr) +int __init prom_get_ethernet_addr(char *ethernet_addr) { char *ethaddr_str; @@ -123,7 +123,6 @@ int prom_get_ethernet_addr(char *ethernet_addr) return 0; } -EXPORT_SYMBOL(prom_get_ethernet_addr); void __init prom_free_prom_memory(void) { diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index ed9bb70..5fd7f7a 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -59,7 +59,7 @@ $(obj)/piggy.o: $(obj)/dummy.o $(obj)/vmlinux.bin.z FORCE hostprogs-y := calc_vmlinuz_load_addr VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \ - $(objtree)/$(KBUILD_IMAGE) $(VMLINUX_LOAD_ADDRESS)) + $(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS)) vmlinuzobjs-y += $(obj)/piggy.o diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index 094c17e..47323ca 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -83,3 +83,7 @@ config ARCH_SPARSEMEM_ENABLE def_bool y select SPARSEMEM_STATIC depends on CPU_CAVIUM_OCTEON + +config CAVIUM_OCTEON_HELPER + def_bool y + depends on OCTEON_ETHERNET || PCI diff --git a/arch/mips/cavium-octeon/cpu.c b/arch/mips/cavium-octeon/cpu.c index c664c8c..a5b4279 100644 --- a/arch/mips/cavium-octeon/cpu.c +++ b/arch/mips/cavium-octeon/cpu.c @@ -41,7 +41,7 @@ static int cnmips_cu2_call(struct notifier_block *nfb, unsigned long action, return NOTIFY_OK; /* Let default notifier send signals */ } -static int cnmips_cu2_setup(void) +static int __init cnmips_cu2_setup(void) { return cu2_notifier(cnmips_cu2_call, 0); } diff --git a/arch/mips/cavium-octeon/executive/Makefile b/arch/mips/cavium-octeon/executive/Makefile index 2fd66db..7f41c5b 100644 --- a/arch/mips/cavium-octeon/executive/Makefile +++ b/arch/mips/cavium-octeon/executive/Makefile @@ -11,4 +11,4 @@ obj-y += cvmx-bootmem.o cvmx-l2c.o cvmx-sysinfo.o octeon-model.o -obj-$(CONFIG_PCI) += cvmx-helper-errata.o cvmx-helper-jtag.o +obj-$(CONFIG_CAVIUM_OCTEON_HELPER) += cvmx-helper-errata.o cvmx-helper-jtag.o diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index c63c56b..47d87da 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -782,6 +782,10 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) */ #define atomic64_add_negative(i, v) (atomic64_add_return(i, (v)) < 0) +#else /* !CONFIG_64BIT */ + +#include <asm-generic/atomic64.h> + #endif /* CONFIG_64BIT */ /* diff --git a/arch/mips/include/asm/cop2.h b/arch/mips/include/asm/cop2.h index 2cb2f0c..3532e2c 100644 --- a/arch/mips/include/asm/cop2.h +++ b/arch/mips/include/asm/cop2.h @@ -24,7 +24,7 @@ extern int cu2_notifier_call_chain(unsigned long val, void *v); #define cu2_notifier(fn, pri) \ ({ \ - static struct notifier_block fn##_nb __cpuinitdata = { \ + static struct notifier_block fn##_nb = { \ .notifier_call = fn, \ .priority = pri \ }; \ diff --git a/arch/mips/include/asm/gic.h b/arch/mips/include/asm/gic.h index 9b9436a..86548da 100644 --- a/arch/mips/include/asm/gic.h +++ b/arch/mips/include/asm/gic.h @@ -321,6 +321,7 @@ struct gic_intrmask_regs { */ struct gic_intr_map { unsigned int cpunum; /* Directed to this CPU */ +#define GIC_UNUSED 0xdead /* Dummy data */ unsigned int pin; /* Directed to this Pin */ unsigned int polarity; /* Polarity : +/- */ unsigned int trigtype; /* Trigger : Edge/Levl */ diff --git a/arch/mips/include/asm/mach-tx49xx/kmalloc.h b/arch/mips/include/asm/mach-tx49xx/kmalloc.h index b74caf6..ff9a8b86 100644 --- a/arch/mips/include/asm/mach-tx49xx/kmalloc.h +++ b/arch/mips/include/asm/mach-tx49xx/kmalloc.h @@ -1,6 +1,6 @@ #ifndef __ASM_MACH_TX49XX_KMALLOC_H #define __ASM_MACH_TX49XX_KMALLOC_H -#define ARCH_KMALLOC_MINALIGN L1_CACHE_BYTES +#define ARCH_DMA_MINALIGN L1_CACHE_BYTES #endif /* __ASM_MACH_TX49XX_KMALLOC_H */ diff --git a/arch/mips/include/asm/mips-boards/maltaint.h b/arch/mips/include/asm/mips-boards/maltaint.h index cea872f..d11aa02 100644 --- a/arch/mips/include/asm/mips-boards/maltaint.h +++ b/arch/mips/include/asm/mips-boards/maltaint.h @@ -88,9 +88,6 @@ #define GIC_EXT_INTR(x) x -/* Dummy data */ -#define X 0xdead - /* External Interrupts used for IPI */ #define GIC_IPI_EXT_INTR_RESCHED_VPE0 16 #define GIC_IPI_EXT_INTR_CALLFNC_VPE0 17 diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index a16beaf..e59cd1a 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -150,6 +150,20 @@ typedef struct { unsigned long pgprot; } pgprot_t; ((unsigned long)(x) - PAGE_OFFSET + PHYS_OFFSET) #endif #define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - PHYS_OFFSET)) + +/* + * RELOC_HIDE was originally added by 6007b903dfe5f1d13e0c711ac2894bdd4a61b1ad + * (lmo) rsp. 8431fd094d625b94d364fe393076ccef88e6ce18 (kernel.org). The + * discussion can be found in lkml posting + * <a2ebde260608230500o3407b108hc03debb9da6e62c@mail.gmail.com> which is + * archived at http://lists.linuxcoding.com/kernel/2006-q3/msg17360.html + * + * It is unclear if the misscompilations mentioned in + * http://lkml.org/lkml/2010/8/8/138 also affect MIPS so we keep this one + * until GCC 3.x has been retired before we can apply + * https://patchwork.linux-mips.org/patch/1541/ + */ + #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h index 2376f2e..70df9c0 100644 --- a/arch/mips/include/asm/thread_info.h +++ b/arch/mips/include/asm/thread_info.h @@ -146,7 +146,8 @@ register struct thread_info *__current_thread_info __asm__("$28"); #define _TIF_LOAD_WATCH (1<<TIF_LOAD_WATCH) /* work to do on interrupt/exception return */ -#define _TIF_WORK_MASK (0x0000ffef & ~_TIF_SECCOMP) +#define _TIF_WORK_MASK (0x0000ffef & \ + ~(_TIF_SECCOMP | _TIF_SYSCALL_AUDIT)) /* work to do on any return to u-space */ #define _TIF_ALLWORK_MASK (0x8000ffff & ~_TIF_SECCOMP) diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index baa318a..550725b 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -356,16 +356,19 @@ #define __NR_perf_event_open (__NR_Linux + 333) #define __NR_accept4 (__NR_Linux + 334) #define __NR_recvmmsg (__NR_Linux + 335) +#define __NR_fanotify_init (__NR_Linux + 336) +#define __NR_fanotify_mark (__NR_Linux + 337) +#define __NR_prlimit64 (__NR_Linux + 338) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 335 +#define __NR_Linux_syscalls 338 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 335 +#define __NR_O32_Linux_syscalls 338 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -668,16 +671,19 @@ #define __NR_perf_event_open (__NR_Linux + 292) #define __NR_accept4 (__NR_Linux + 293) #define __NR_recvmmsg (__NR_Linux + 294) +#define __NR_fanotify_init (__NR_Linux + 295) +#define __NR_fanotify_mark (__NR_Linux + 296) +#define __NR_prlimit64 (__NR_Linux + 297) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 294 +#define __NR_Linux_syscalls 297 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 294 +#define __NR_64_Linux_syscalls 297 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -985,16 +991,19 @@ #define __NR_accept4 (__NR_Linux + 297) #define __NR_recvmmsg (__NR_Linux + 298) #define __NR_getdents64 (__NR_Linux + 299) +#define __NR_fanotify_init (__NR_Linux + 300) +#define __NR_fanotify_mark (__NR_Linux + 301) +#define __NR_prlimit64 (__NR_Linux + 302) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 299 +#define __NR_Linux_syscalls 302 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 299 +#define __NR_N32_Linux_syscalls 302 #ifdef __KERNEL__ diff --git a/arch/mips/kernel/irq-gic.c b/arch/mips/kernel/irq-gic.c index b181f2f..82ba9f6 100644 --- a/arch/mips/kernel/irq-gic.c +++ b/arch/mips/kernel/irq-gic.c @@ -7,7 +7,6 @@ #include <asm/io.h> #include <asm/gic.h> #include <asm/gcmpregs.h> -#include <asm/mips-boards/maltaint.h> #include <asm/irq.h> #include <linux/hardirq.h> #include <asm-generic/bitops/find.h> @@ -131,7 +130,7 @@ static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask) int i; irq -= _irqbase; - pr_debug(KERN_DEBUG "%s(%d) called\n", __func__, irq); + pr_debug("%s(%d) called\n", __func__, irq); cpumask_and(&tmp, cpumask, cpu_online_mask); if (cpus_empty(tmp)) return -1; @@ -222,7 +221,7 @@ static void __init gic_basic_init(int numintrs, int numvpes, /* Setup specifics */ for (i = 0; i < mapsize; i++) { cpu = intrmap[i].cpunum; - if (cpu == X) + if (cpu == GIC_UNUSED) continue; if (cpu == 0 && i != 0 && intrmap[i].flags == 0) continue; diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c index 1f4e2fa..f4546e9 100644 --- a/arch/mips/kernel/kgdb.c +++ b/arch/mips/kernel/kgdb.c @@ -283,7 +283,7 @@ static int kgdb_mips_notify(struct notifier_block *self, unsigned long cmd, struct pt_regs *regs = args->regs; int trap = (regs->cp0_cause & 0x7c) >> 2; - /* Userpace events, ignore. */ + /* Userspace events, ignore. */ if (user_mode(regs)) return NOTIFY_DONE; diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c index 80e2ba6..29811f0 100644 --- a/arch/mips/kernel/kspd.c +++ b/arch/mips/kernel/kspd.c @@ -251,7 +251,7 @@ void sp_work_handle_request(void) memset(&tz, 0, sizeof(tz)); if ((ret.retval = sp_syscall(__NR_gettimeofday, (int)&tv, (int)&tz, 0, 0)) == 0) - ret.retval = tv.tv_sec; + ret.retval = tv.tv_sec; break; case MTSP_SYSCALL_EXIT: diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index c2dab14..6343b4a 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -341,3 +341,10 @@ asmlinkage long sys32_lookup_dcookie(u32 a0, u32 a1, char __user *buf, { return sys_lookup_dcookie(merge_64(a0, a1), buf, len); } + +SYSCALL_DEFINE6(32_fanotify_mark, int, fanotify_fd, unsigned int, flags, + u64, a3, u64, a4, int, dfd, const char __user *, pathname) +{ + return sys_fanotify_mark(fanotify_fd, flags, merge_64(a3, a4), + dfd, pathname); +} diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 17202bb..584415e 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -583,7 +583,10 @@ einval: li v0, -ENOSYS sys sys_rt_tgsigqueueinfo 4 sys sys_perf_event_open 5 sys sys_accept4 4 - sys sys_recvmmsg 5 + sys sys_recvmmsg 5 /* 4335 */ + sys sys_fanotify_init 2 + sys sys_fanotify_mark 6 + sys sys_prlimit64 4 .endm /* We pre-compute the number of _instruction_ bytes needed to diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index a8a6c59..5573f8e 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -416,9 +416,12 @@ sys_call_table: PTR sys_pipe2 PTR sys_inotify_init1 PTR sys_preadv - PTR sys_pwritev /* 5390 */ + PTR sys_pwritev /* 5290 */ PTR sys_rt_tgsigqueueinfo PTR sys_perf_event_open PTR sys_accept4 - PTR sys_recvmmsg + PTR sys_recvmmsg + PTR sys_fanotify_init /* 5295 */ + PTR sys_fanotify_mark + PTR sys_prlimit64 .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index a3d6613..1e38ec9 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -419,5 +419,8 @@ EXPORT(sysn32_call_table) PTR sys_perf_event_open PTR sys_accept4 PTR compat_sys_recvmmsg - PTR sys_getdents + PTR sys_getdents64 + PTR sys_fanotify_init /* 6300 */ + PTR sys_fanotify_mark + PTR sys_prlimit64 .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 813689e..171979f 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -538,5 +538,8 @@ sys_call_table: PTR compat_sys_rt_tgsigqueueinfo PTR sys_perf_event_open PTR sys_accept4 - PTR compat_sys_recvmmsg + PTR compat_sys_recvmmsg /* 4335 */ + PTR sys_fanotify_init + PTR sys_32_fanotify_mark + PTR sys_prlimit64 .size sys_call_table,.-sys_call_table diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 7ba8908..469d401 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -44,27 +44,39 @@ static inline int cpu_is_noncoherent_r10000(struct device *dev) static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) { + gfp_t dma_flag; + /* ignore region specifiers */ gfp &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM); -#ifdef CONFIG_ZONE_DMA +#ifdef CONFIG_ISA if (dev == NULL) - gfp |= __GFP_DMA; - else if (dev->coherent_dma_mask < DMA_BIT_MASK(24)) - gfp |= __GFP_DMA; + dma_flag = __GFP_DMA; else #endif -#ifdef CONFIG_ZONE_DMA32 +#if defined(CONFIG_ZONE_DMA32) && defined(CONFIG_ZONE_DMA) if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) - gfp |= __GFP_DMA32; + dma_flag = __GFP_DMA; + else if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + dma_flag = __GFP_DMA32; + else +#endif +#if defined(CONFIG_ZONE_DMA32) && !defined(CONFIG_ZONE_DMA) + if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + dma_flag = __GFP_DMA32; + else +#endif +#if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) + if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + dma_flag = __GFP_DMA; else #endif - ; + dma_flag = 0; /* Don't invoke OOM killer */ gfp |= __GFP_NORETRY; - return gfp; + return gfp | dma_flag; } void *dma_alloc_noncoherent(struct device *dev, size_t size, diff --git a/arch/mips/mm/sc-rm7k.c b/arch/mips/mm/sc-rm7k.c index 1ef75cd..274af3b 100644 --- a/arch/mips/mm/sc-rm7k.c +++ b/arch/mips/mm/sc-rm7k.c @@ -30,7 +30,7 @@ #define tc_lsize 32 extern unsigned long icache_way_size, dcache_way_size; -unsigned long tcache_size; +static unsigned long tcache_size; #include <asm/r4kcache.h> diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c index 15949b0..b79b24a 100644 --- a/arch/mips/mti-malta/malta-int.c +++ b/arch/mips/mti-malta/malta-int.c @@ -385,6 +385,8 @@ static int __initdata msc_nr_eicirqs = ARRAY_SIZE(msc_eicirqmap); */ #define GIC_CPU_NMI GIC_MAP_TO_NMI_MSK +#define X GIC_UNUSED + static struct gic_intr_map gic_intr_map[GIC_NUM_INTRS] = { { X, X, X, X, 0 }, { X, X, X, X, 0 }, @@ -404,6 +406,7 @@ static struct gic_intr_map gic_intr_map[GIC_NUM_INTRS] = { { X, X, X, X, 0 }, /* The remainder of this table is initialised by fill_ipi_map */ }; +#undef X /* * GCMP needs to be detected before any SMP initialisation diff --git a/arch/mips/pci/pci-rc32434.c b/arch/mips/pci/pci-rc32434.c index 71f7d27..f31218e 100644 --- a/arch/mips/pci/pci-rc32434.c +++ b/arch/mips/pci/pci-rc32434.c @@ -118,7 +118,7 @@ static int __init rc32434_pcibridge_init(void) if (!((pcicvalue == PCIM_H_EA) || (pcicvalue == PCIM_H_IA_FIX) || (pcicvalue == PCIM_H_IA_RR))) { - pr_err(KERN_ERR "PCI init error!!!\n"); + pr_err("PCI init error!!!\n"); /* Not in Host Mode, return ERROR */ return -1; } diff --git a/arch/mips/pnx8550/common/reset.c b/arch/mips/pnx8550/common/reset.c index fadd8744..e7a12ff 100644 --- a/arch/mips/pnx8550/common/reset.c +++ b/arch/mips/pnx8550/common/reset.c @@ -22,29 +22,19 @@ */ #include <linux/kernel.h> +#include <asm/processor.h> #include <asm/reboot.h> #include <glb.h> void pnx8550_machine_restart(char *command) { - char head[] = "************* Machine restart *************"; - char foot[] = "*******************************************"; - - printk("\n\n"); - printk("%s\n", head); - if (command != NULL) - printk("* %s\n", command); - printk("%s\n", foot); - PNX8550_RST_CTL = PNX8550_RST_DO_SW_RST; } void pnx8550_machine_halt(void) { - printk("*** Machine halt. (Not implemented) ***\n"); -} - -void pnx8550_machine_power_off(void) -{ - printk("*** Machine power off. (Not implemented) ***\n"); + while (1) { + if (cpu_wait) + cpu_wait(); + } } diff --git a/arch/mips/pnx8550/common/setup.c b/arch/mips/pnx8550/common/setup.c index 64246c9..43cb394 100644 --- a/arch/mips/pnx8550/common/setup.c +++ b/arch/mips/pnx8550/common/setup.c @@ -44,7 +44,6 @@ extern void __init board_setup(void); extern void pnx8550_machine_restart(char *); extern void pnx8550_machine_halt(void); -extern void pnx8550_machine_power_off(void); extern struct resource ioport_resource; extern struct resource iomem_resource; extern char *prom_getcmdline(void); @@ -100,7 +99,7 @@ void __init plat_mem_setup(void) _machine_restart = pnx8550_machine_restart; _machine_halt = pnx8550_machine_halt; - pm_power_off = pnx8550_machine_power_off; + pm_power_off = pnx8550_machine_halt; /* Clear the Global 2 Register, PCI Inta Output Enable Registers Bit 1:Enable DAC Powerdown diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 444b9f9..7c2a2f7 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -8,7 +8,6 @@ mainmenu "Linux Kernel Configuration" config MN10300 def_bool y select HAVE_OPROFILE - select HAVE_ARCH_TRACEHOOK config AM33 def_bool y diff --git a/arch/mn10300/Kconfig.debug b/arch/mn10300/Kconfig.debug index ff80e86..ce83c74 100644 --- a/arch/mn10300/Kconfig.debug +++ b/arch/mn10300/Kconfig.debug @@ -101,7 +101,7 @@ config GDBSTUB_DEBUG_BREAKPOINT choice prompt "GDB stub port" - default GDBSTUB_TTYSM0 + default GDBSTUB_ON_TTYSM0 depends on GDBSTUB help Select the serial port used for GDB-stub. diff --git a/arch/mn10300/include/asm/bitops.h b/arch/mn10300/include/asm/bitops.h index f49ac49..3f50e96 100644 --- a/arch/mn10300/include/asm/bitops.h +++ b/arch/mn10300/include/asm/bitops.h @@ -229,9 +229,9 @@ int ffs(int x) #include <asm-generic/bitops/hweight.h> #define ext2_set_bit_atomic(lock, nr, addr) \ - test_and_set_bit((nr) ^ 0x18, (addr)) + test_and_set_bit((nr), (addr)) #define ext2_clear_bit_atomic(lock, nr, addr) \ - test_and_clear_bit((nr) ^ 0x18, (addr)) + test_and_clear_bit((nr), (addr)) #include <asm-generic/bitops/ext2-non-atomic.h> #include <asm-generic/bitops/minix-le.h> diff --git a/arch/mn10300/include/asm/signal.h b/arch/mn10300/include/asm/signal.h index 7e891fc..1865d72 100644 --- a/arch/mn10300/include/asm/signal.h +++ b/arch/mn10300/include/asm/signal.h @@ -78,7 +78,7 @@ typedef unsigned long sigset_t; /* These should not be considered constants from userland. */ #define SIGRTMIN 32 -#define SIGRTMAX (_NSIG-1) +#define SIGRTMAX _NSIG /* * SA_FLAGS values: diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c index 6aea7fd..196a111 100644 --- a/arch/mn10300/kernel/module.c +++ b/arch/mn10300/kernel/module.c @@ -206,7 +206,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } /* @@ -214,5 +214,4 @@ int module_finalize(const Elf_Ehdr *hdr, */ void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c index 717db14..d4de05a 100644 --- a/arch/mn10300/kernel/signal.c +++ b/arch/mn10300/kernel/signal.c @@ -65,10 +65,10 @@ asmlinkage long sys_sigaction(int sig, old_sigset_t mask; if (verify_area(VERIFY_READ, act, sizeof(*act)) || __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) + __get_user(new_ka.sa.sa_restorer, &act->sa_restorer) || + __get_user(new_ka.sa.sa_flags, &act->sa_flags) || + __get_user(mask, &act->sa_mask)) return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); siginitset(&new_ka.sa.sa_mask, mask); } @@ -77,10 +77,10 @@ asmlinkage long sys_sigaction(int sig, if (!ret && oact) { if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) + __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer) || + __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || + __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); } return ret; @@ -102,6 +102,9 @@ static int restore_sigcontext(struct pt_regs *regs, { unsigned int err = 0; + /* Always make any pending restarted system calls return -EINTR */ + current_thread_info()->restart_block.fn = do_no_restart_syscall; + if (is_using_fpu(current)) fpu_kill_state(current); @@ -330,8 +333,6 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, regs->d0 = sig; regs->d1 = (unsigned long) &frame->sc; - set_fs(USER_DS); - /* the tracer may want to single-step inside the handler */ if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); @@ -345,7 +346,7 @@ static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, return 0; give_sigsegv: - force_sig(SIGSEGV, current); + force_sigsegv(sig, current); return -EFAULT; } @@ -413,8 +414,6 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->d0 = sig; regs->d1 = (long) &frame->info; - set_fs(USER_DS); - /* the tracer may want to single-step inside the handler */ if (test_thread_flag(TIF_SINGLESTEP)) ptrace_notify(SIGTRAP); @@ -428,10 +427,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, return 0; give_sigsegv: - force_sig(SIGSEGV, current); + force_sigsegv(sig, current); return -EFAULT; } +static inline void stepback(struct pt_regs *regs) +{ + regs->pc -= 2; + regs->orig_d0 = -1; +} + /* * handle the actual delivery of a signal to userspace */ @@ -459,7 +464,7 @@ static int handle_signal(int sig, /* fallthrough */ case -ERESTARTNOINTR: regs->d0 = regs->orig_d0; - regs->pc -= 2; + stepback(regs); } } @@ -527,12 +532,12 @@ static void do_signal(struct pt_regs *regs) case -ERESTARTSYS: case -ERESTARTNOINTR: regs->d0 = regs->orig_d0; - regs->pc -= 2; + stepback(regs); break; case -ERESTART_RESTARTBLOCK: regs->d0 = __NR_restart_syscall; - regs->pc -= 2; + stepback(regs); break; } } diff --git a/arch/mn10300/mm/Makefile b/arch/mn10300/mm/Makefile index 28b9d98..1557277 100644 --- a/arch/mn10300/mm/Makefile +++ b/arch/mn10300/mm/Makefile @@ -2,13 +2,11 @@ # Makefile for the MN10300-specific memory management code # +cacheflush-y := cache.o cache-mn10300.o +cacheflush-$(CONFIG_MN10300_CACHE_WBACK) += cache-flush-mn10300.o + +cacheflush-$(CONFIG_MN10300_CACHE_DISABLED) := cache-disabled.o + obj-y := \ init.o fault.o pgtable.o extable.o tlb-mn10300.o mmu-context.o \ - misalignment.o dma-alloc.o - -ifneq ($(CONFIG_MN10300_CACHE_DISABLED),y) -obj-y += cache.o cache-mn10300.o -ifeq ($(CONFIG_MN10300_CACHE_WBACK),y) -obj-y += cache-flush-mn10300.o -endif -endif + misalignment.o dma-alloc.o $(cacheflush-y) diff --git a/arch/mn10300/mm/cache-disabled.c b/arch/mn10300/mm/cache-disabled.c new file mode 100644 index 0000000..f669ea4 --- /dev/null +++ b/arch/mn10300/mm/cache-disabled.c @@ -0,0 +1,21 @@ +/* Handle the cache being disabled + * + * Copyright (C) 2010 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#include <linux/mm.h> + +/* + * allow userspace to flush the instruction cache + */ +asmlinkage long sys_cacheflush(unsigned long start, unsigned long end) +{ + if (end < start) + return -EINVAL; + return 0; +} diff --git a/arch/mn10300/mm/cache.c b/arch/mn10300/mm/cache.c index 1b76719..9261217 100644 --- a/arch/mn10300/mm/cache.c +++ b/arch/mn10300/mm/cache.c @@ -54,13 +54,30 @@ EXPORT_SYMBOL(flush_icache_page); void flush_icache_range(unsigned long start, unsigned long end) { #ifdef CONFIG_MN10300_CACHE_WBACK - unsigned long addr, size, off; + unsigned long addr, size, base, off; struct page *page; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *ppte, pte; + if (end > 0x80000000UL) { + /* addresses above 0xa0000000 do not go through the cache */ + if (end > 0xa0000000UL) { + end = 0xa0000000UL; + if (start >= end) + return; + } + + /* kernel addresses between 0x80000000 and 0x9fffffff do not + * require page tables, so we just map such addresses directly */ + base = (start >= 0x80000000UL) ? start : 0x80000000UL; + mn10300_dcache_flush_range(base, end); + if (base == start) + goto invalidate; + end = base; + } + for (; start < end; start += size) { /* work out how much of the page to flush */ off = start & (PAGE_SIZE - 1); @@ -104,6 +121,7 @@ void flush_icache_range(unsigned long start, unsigned long end) } #endif +invalidate: mn10300_icache_inv(); } EXPORT_SYMBOL(flush_icache_range); diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 159a2b8..6e81bb5 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -941,11 +941,10 @@ int module_finalize(const Elf_Ehdr *hdr, nsyms = newptr - (Elf_Sym *)symhdr->sh_addr; DEBUGP("NEW num_symtab %lu\n", nsyms); symhdr->sh_size = nsyms * sizeof(Elf_Sym); - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { deregister_unwind_table(mod); - module_bug_cleanup(mod); } diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 477c663..49cee9d 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -63,11 +63,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { const Elf_Shdr *sect; - int err; - - err = module_bug_finalize(hdr, sechdrs, me); - if (err) - return err; /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); @@ -101,5 +96,4 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/powerpc/kernel/perf_callchain.c b/arch/powerpc/kernel/perf_callchain.c index 95ad9da..d05ae42 100644 --- a/arch/powerpc/kernel/perf_callchain.c +++ b/arch/powerpc/kernel/perf_callchain.c @@ -23,18 +23,6 @@ #include "ppc32.h" #endif -/* - * Store another value in a callchain_entry. - */ -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - unsigned int nr = entry->nr; - - if (nr < PERF_MAX_STACK_DEPTH) { - entry->ip[nr] = ip; - entry->nr = nr + 1; - } -} /* * Is sp valid as the address of the next kernel stack frame after prev_sp? @@ -58,8 +46,8 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp) return 0; } -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -69,8 +57,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->nip); + perf_callchain_store(entry, regs->nip); if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) return; @@ -89,7 +76,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; level = 0; - callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); } else { if (level == 0) @@ -111,7 +98,7 @@ static void perf_callchain_kernel(struct pt_regs *regs, ++level; } - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); if (!valid_next_sp(next_sp, sp)) return; sp = next_sp; @@ -233,8 +220,8 @@ static int sane_signal_64_frame(unsigned long sp) puc == (unsigned long) &sf->uc; } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long sp, next_sp; unsigned long next_ip; @@ -246,8 +233,7 @@ static void perf_callchain_user_64(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); for (;;) { fp = (unsigned long __user *) sp; @@ -276,14 +262,14 @@ static void perf_callchain_user_64(struct pt_regs *regs, read_user_stack_64(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } @@ -315,8 +301,8 @@ static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) return __get_user_inatomic(*ret, ptr); } -static inline void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static inline void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { } @@ -435,8 +421,8 @@ static unsigned int __user *signal_frame_32_regs(unsigned int sp, return mctx->mc_gregs; } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned int sp, next_sp; unsigned int next_ip; @@ -447,8 +433,7 @@ static void perf_callchain_user_32(struct pt_regs *regs, next_ip = regs->nip; lr = regs->link; sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); while (entry->nr < PERF_MAX_STACK_DEPTH) { fp = (unsigned int __user *) (unsigned long) sp; @@ -470,45 +455,24 @@ static void perf_callchain_user_32(struct pt_regs *regs, read_user_stack_32(&uregs[PT_R1], &sp)) return; level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); continue; } if (level == 0) next_ip = lr; - callchain_store(entry, next_ip); + perf_callchain_store(entry, next_ip); ++level; sp = next_sp; } } -/* - * Since we can't get PMU interrupts inside a PMU interrupt handler, - * we don't need separate irq and nmi entries here. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, cpu_perf_callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(cpu_perf_callchain); - - entry->nr = 0; - - if (!user_mode(regs)) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - if (current_is_64bit()) - perf_callchain_user_64(regs, entry); - else - perf_callchain_user_32(regs, entry); - } - - return entry; + if (current_is_64bit()) + perf_callchain_user_64(entry, regs); + else + perf_callchain_user_32(entry, regs); } diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index d301a30..9cb4924 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + if (!event->hw.idx) return; /* @@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void power_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -565,7 +568,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void power_pmu_enable(struct pmu *pmu) { struct perf_event *event; struct cpu_hw_events *cpuhw; @@ -672,6 +675,8 @@ void hw_perf_enable(void) } local64_set(&event->hw.prev_count, val); event->hw.idx = idx; + if (event->hw.state & PERF_HES_STOPPED) + val = 0; write_pmc(idx, val); perf_event_update_userpage(event); } @@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count, * re-enable the PMU in order to get hw_perf_enable to do the * actual work of reconfiguring the PMU. */ -static int power_pmu_enable(struct perf_event *event) +static int power_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -735,7 +740,7 @@ static int power_pmu_enable(struct perf_event *event) int ret = -EAGAIN; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); /* * Add the event to the list (if there is room) @@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event) cpuhw->events[n0] = event->hw.config; cpuhw->flags[n0] = event->hw.event_base; + if (!(ef_flags & PERF_EF_START)) + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -769,7 +777,7 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } @@ -777,14 +785,14 @@ nocheck: /* * Remove a event from the PMU. */ -static void power_pmu_disable(struct perf_event *event) +static void power_pmu_del(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuhw; long i; unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); power_pmu_read(event); @@ -821,34 +829,60 @@ static void power_pmu_disable(struct perf_event *event) cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } /* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. + * POWER-PMU does not support disabling individual counters, hence + * program their cycle counter to their max value and ignore the interrupts. */ -static void power_pmu_unthrottle(struct perf_event *event) + +static void power_pmu_start(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + s64 left; + + if (!event->hw.idx || !event->hw.sample_period) + return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} + +static void power_pmu_stop(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; if (!event->hw.idx || !event->hw.sample_period) return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); + power_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -857,10 +891,11 @@ static void power_pmu_unthrottle(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -void power_pmu_start_txn(const struct pmu *pmu) +void power_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; cpuhw->n_txn_start = cpuhw->n_events; } @@ -870,11 +905,12 @@ void power_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -void power_pmu_cancel_txn(const struct pmu *pmu) +void power_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); } /* @@ -882,7 +918,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -int power_pmu_commit_txn(const struct pmu *pmu) +int power_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw; long i, n; @@ -901,19 +937,10 @@ int power_pmu_commit_txn(const struct pmu *pmu) cpuhw->event[i]->hw.config = cpuhw->events[i]; cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); return 0; } -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, - .start_txn = power_pmu_start_txn, - .cancel_txn = power_pmu_cancel_txn, - .commit_txn = power_pmu_commit_txn, -}; - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1014,7 +1041,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int power_pmu_event_init(struct perf_event *event) { u64 ev; unsigned long flags; @@ -1026,25 +1053,27 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) struct cpu_hw_events *cpuhw; if (!ppmu) - return ERR_PTR(-ENXIO); + return -ENOENT; + switch (event->attr.type) { case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: ev = event->attr.config; break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } + event->hw.config_base = ev; event->hw.idx = 0; @@ -1081,7 +1110,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) */ ev = normal_pmc_alternative(ev, flags); if (!ev) - return ERR_PTR(-EINVAL); + return -EINVAL; } } @@ -1095,19 +1124,19 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, ctrs, events, cflags); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } events[n] = ev; ctrs[n] = event; cflags[n] = flags; if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); + return -EINVAL; cpuhw = &get_cpu_var(cpu_hw_events); err = power_check_constraints(cpuhw, events, cflags, n + 1); put_cpu_var(cpu_hw_events); if (err) - return ERR_PTR(-EINVAL); + return -EINVAL; event->hw.config = events[n]; event->hw.event_base = cflags[n]; @@ -1132,11 +1161,23 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &power_pmu; + return err; } +struct pmu power_pmu = { + .pmu_enable = power_pmu_enable, + .pmu_disable = power_pmu_disable, + .event_init = power_pmu_event_init, + .add = power_pmu_add, + .del = power_pmu_del, + .start = power_pmu_start, + .stop = power_pmu_stop, + .read = power_pmu_read, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -1149,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -1171,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -1183,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + power_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } /* @@ -1342,6 +1379,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_pmu_register(&power_pmu); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index 1ba4547..7ecca59 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event) { s64 val, delta, prev; + if (event->hw.state & PERF_HES_STOPPED) + return; + /* * Performance monitor interrupts come even when interrupts * are soft-disabled, as long as interrupts are hard-enabled. @@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event) * Disable all events to prevent PMU interrupts and to allow * events to be added or removed. */ -void hw_perf_disable(void) +static void fsl_emb_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -216,7 +219,7 @@ void hw_perf_disable(void) * If we were previously disabled and events were added, then * put the new config on the PMU. */ -void hw_perf_enable(void) +static void fsl_emb_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuhw; unsigned long flags; @@ -262,8 +265,8 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -/* perf must be disabled, context locked on entry */ -static int fsl_emb_pmu_enable(struct perf_event *event) +/* context locked on entry */ +static int fsl_emb_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int ret = -EAGAIN; @@ -271,6 +274,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event) u64 val; int i; + perf_pmu_disable(event->pmu); cpuhw = &get_cpu_var(cpu_hw_events); if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) @@ -301,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event) val = 0x80000000L - left; } local64_set(&event->hw.prev_count, val); + + if (!(flags & PERF_EF_START)) { + event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE; + val = 0; + } + write_pmc(i, val); perf_event_update_userpage(event); @@ -310,15 +320,17 @@ static int fsl_emb_pmu_enable(struct perf_event *event) ret = 0; out: put_cpu_var(cpu_hw_events); + perf_pmu_enable(event->pmu); return ret; } -/* perf must be disabled, context locked on entry */ -static void fsl_emb_pmu_disable(struct perf_event *event) +/* context locked on entry */ +static void fsl_emb_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuhw; int i = event->hw.idx; + perf_pmu_disable(event->pmu); if (i < 0) goto out; @@ -346,44 +358,57 @@ static void fsl_emb_pmu_disable(struct perf_event *event) cpuhw->n_events--; out: + perf_pmu_enable(event->pmu); put_cpu_var(cpu_hw_events); } -/* - * Re-enable interrupts on a event after they were throttled - * because they were coming too fast. - * - * Context is locked on entry, but perf is not disabled. - */ -static void fsl_emb_pmu_unthrottle(struct perf_event *event) +static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags) { - s64 val, left; unsigned long flags; + s64 left; if (event->hw.idx < 0 || !event->hw.sample_period) return; + + if (!(event->hw.state & PERF_HES_STOPPED)) + return; + + if (ef_flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + local_irq_save(flags); - perf_disable(); - fsl_emb_pmu_read(event); - left = event->hw.sample_period; - event->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); + perf_pmu_disable(event->pmu); + + event->hw.state = 0; + left = local64_read(&event->hw.period_left); + write_pmc(event->hw.idx, left); + perf_event_update_userpage(event); - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } -static struct pmu fsl_emb_pmu = { - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, - .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, -}; +static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags) +{ + unsigned long flags; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + + if (event->hw.state & PERF_HES_STOPPED) + return; + + local_irq_save(flags); + perf_pmu_disable(event->pmu); + + fsl_emb_pmu_read(event); + event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; + write_pmc(event->hw.idx, 0); + + perf_event_update_userpage(event); + perf_pmu_enable(event->pmu); + local_irq_restore(flags); +} /* * Release the PMU if this is the last perf_event. @@ -428,7 +453,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int fsl_emb_pmu_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; @@ -441,14 +466,14 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: @@ -456,12 +481,12 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } event->hw.config = ppmu->xlate_event(ev); if (!(event->hw.config & FSL_EMB_EVENT_VALID)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* * If this is in a group, check if it can go on with all the @@ -473,7 +498,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, events); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { @@ -484,7 +509,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } if (num_restricted >= ppmu->n_restricted) - return ERR_PTR(-EINVAL); + return -EINVAL; } event->hw.idx = -1; @@ -497,7 +522,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (event->attr.exclude_kernel) event->hw.config_base |= PMLCA_FCS; if (event->attr.exclude_idle) - return ERR_PTR(-ENOTSUPP); + return -ENOTSUPP; event->hw.last_period = event->hw.sample_period; local64_set(&event->hw.period_left, event->hw.last_period); @@ -523,11 +548,20 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &fsl_emb_pmu; + return err; } +static struct pmu fsl_emb_pmu = { + .pmu_enable = fsl_emb_pmu_enable, + .pmu_disable = fsl_emb_pmu_disable, + .event_init = fsl_emb_pmu_event_init, + .add = fsl_emb_pmu_add, + .del = fsl_emb_pmu_del, + .start = fsl_emb_pmu_start, + .stop = fsl_emb_pmu_stop, + .read = fsl_emb_pmu_read, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -540,6 +574,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, s64 prev, delta, left; int record = 0; + if (event->hw.state & PERF_HES_STOPPED) { + write_pmc(event->hw.idx, 0); + return; + } + /* we don't have to worry about interrupts here */ prev = local64_read(&event->hw.prev_count); delta = (val - prev) & 0xfffffffful; @@ -562,6 +601,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val, val = 0x80000000LL - left; } + write_pmc(event->hw.idx, val); + local64_set(&event->hw.prev_count, val); + local64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + /* * Finally record data if requested. */ @@ -571,23 +615,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val, perf_sample_data_init(&data, 0); data.period = event->hw.last_period; - if (perf_event_overflow(event, nmi, &data, regs)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the event to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each event counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } + if (perf_event_overflow(event, nmi, &data, regs)) + fsl_emb_pmu_stop(event, 0); } - - write_pmc(event->hw.idx, val); - local64_set(&event->hw.prev_count, val); - local64_set(&event->hw.period_left, left); - perf_event_update_userpage(event); } static void perf_event_interrupt(struct pt_regs *regs) @@ -651,5 +681,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); + perf_pmu_register(&fsl_emb_pmu); + return 0; } diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c index 5b243bd..3dc2a8d 100644 --- a/arch/powerpc/platforms/512x/clock.c +++ b/arch/powerpc/platforms/512x/clock.c @@ -57,7 +57,7 @@ static struct clk *mpc5121_clk_get(struct device *dev, const char *id) int id_match = 0; if (dev == NULL || id == NULL) - return NULL; + return clk; mutex_lock(&clocks_mutex); list_for_each_entry(p, &clocks, node) { diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 45c0cb9..18c1048 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -99,7 +99,7 @@ static void __init efika_pcisetup(void) if (bus_range == NULL || len < 2 * sizeof(int)) { printk(KERN_WARNING EFIKA_PLATFORM_NAME ": Can't get bus-range for %s\n", pcictrl->full_name); - return; + goto out_put; } if (bus_range[1] == bus_range[0]) @@ -111,12 +111,12 @@ static void __init efika_pcisetup(void) printk(" controlled by %s\n", pcictrl->full_name); printk("\n"); - hose = pcibios_alloc_controller(of_node_get(pcictrl)); + hose = pcibios_alloc_controller(pcictrl); if (!hose) { printk(KERN_WARNING EFIKA_PLATFORM_NAME ": Can't allocate PCI controller structure for %s\n", pcictrl->full_name); - return; + goto out_put; } hose->first_busno = bus_range[0]; @@ -124,6 +124,9 @@ static void __init efika_pcisetup(void) hose->ops = &rtas_pci_ops; pci_process_bridge_OF_ranges(hose, pcictrl, 0); + return; +out_put: + of_node_put(pcictrl); } #else diff --git a/arch/powerpc/platforms/52xx/mpc52xx_common.c b/arch/powerpc/platforms/52xx/mpc52xx_common.c index 6e90531..41f3a7e 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_common.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_common.c @@ -325,12 +325,16 @@ int mpc5200_psc_ac97_gpio_reset(int psc_number) clrbits32(&simple_gpio->simple_dvo, sync | out); clrbits8(&wkup_gpio->wkup_dvo, reset); - /* wait at lease 1 us */ - udelay(2); + /* wait for 1 us */ + udelay(1); /* Deassert reset */ setbits8(&wkup_gpio->wkup_dvo, reset); + /* wait at least 200ns */ + /* 7 ~= (200ns * timebase) / ns2sec */ + __delay(7); + /* Restore pin-muxing */ out_be32(&simple_gpio->port_config, mux); diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 22cfd63..f7167ee 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -407,10 +407,9 @@ int module_finalize(const Elf_Ehdr *hdr, { vfree(me->arch.syminfo); me->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index 43adddf..ae0be69 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -149,13 +149,11 @@ int module_finalize(const Elf_Ehdr *hdr, int ret = 0; ret |= module_dwarf_finalize(hdr, sechdrs, me); - ret |= module_bug_finalize(hdr, sechdrs, me); return ret; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); module_dwarf_cleanup(mod); } diff --git a/arch/sh/kernel/perf_callchain.c b/arch/sh/kernel/perf_callchain.c index a9dd3ab..d5ca1ef 100644 --- a/arch/sh/kernel/perf_callchain.c +++ b/arch/sh/kernel/perf_callchain.c @@ -14,11 +14,6 @@ #include <asm/unwinder.h> #include <asm/ptrace.h> -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} static void callchain_warning(void *data, char *msg) { @@ -39,7 +34,7 @@ static void callchain_address(void *data, unsigned long addr, int reliable) struct perf_callchain_entry *entry = data; if (reliable) - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops callchain_ops = { @@ -49,47 +44,10 @@ static const struct stacktrace_ops callchain_ops = { .address = callchain_address, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->pc); + perf_callchain_store(entry, regs->pc); unwind_stack(NULL, regs, NULL, &callchain_ops, entry); } - -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (is_user && current->state != TASK_RUNNING) - return; - - /* - * Only the kernel side is implemented for now. - */ - if (!is_user) - perf_callchain_kernel(regs, entry); -} - -/* - * No need for separate IRQ and NMI entries. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 55fe89b..5a4b334 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -224,50 +224,80 @@ again: local64_add(delta, &event->count); } -static void sh_pmu_disable(struct perf_event *event) +static void sh_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - clear_bit(idx, cpuc->active_mask); - sh_pmu->disable(hwc, idx); + if (!(event->hw.state & PERF_HES_STOPPED)) { + sh_pmu->disable(hwc, idx); + cpuc->events[idx] = NULL; + event->hw.state |= PERF_HES_STOPPED; + } + + if ((flags & PERF_EF_UPDATE) && !(event->hw.state & PERF_HES_UPTODATE)) { + sh_perf_event_update(event, &event->hw, idx); + event->hw.state |= PERF_HES_UPTODATE; + } +} - barrier(); +static void sh_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; - sh_perf_event_update(event, &event->hw, idx); + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + + cpuc->events[idx] = event; + event->hw.state = 0; + sh_pmu->enable(hwc, idx); +} + +static void sh_pmu_del(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + sh_pmu_stop(event, PERF_EF_UPDATE); + __clear_bit(event->hw.idx, cpuc->used_mask); perf_event_update_userpage(event); } -static int sh_pmu_enable(struct perf_event *event) +static int sh_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; + int ret = -EAGAIN; + + perf_pmu_disable(event->pmu); - if (test_and_set_bit(idx, cpuc->used_mask)) { + if (__test_and_set_bit(idx, cpuc->used_mask)) { idx = find_first_zero_bit(cpuc->used_mask, sh_pmu->num_events); if (idx == sh_pmu->num_events) - return -EAGAIN; + goto out; - set_bit(idx, cpuc->used_mask); + __set_bit(idx, cpuc->used_mask); hwc->idx = idx; } sh_pmu->disable(hwc, idx); - cpuc->events[idx] = event; - set_bit(idx, cpuc->active_mask); - - sh_pmu->enable(hwc, idx); + event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (flags & PERF_EF_START) + sh_pmu_start(event, PERF_EF_RELOAD); perf_event_update_userpage(event); - - return 0; + ret = 0; +out: + perf_pmu_enable(event->pmu); + return ret; } static void sh_pmu_read(struct perf_event *event) @@ -275,24 +305,56 @@ static void sh_pmu_read(struct perf_event *event) sh_perf_event_update(event, &event->hw, event->hw.idx); } -static const struct pmu pmu = { - .enable = sh_pmu_enable, - .disable = sh_pmu_disable, - .read = sh_pmu_read, -}; - -const struct pmu *hw_perf_event_init(struct perf_event *event) +static int sh_pmu_event_init(struct perf_event *event) { - int err = __hw_perf_event_init(event); + int err; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_HARDWARE: + err = __hw_perf_event_init(event); + break; + + default: + return -ENOENT; + } + if (unlikely(err)) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; +} + +static void sh_pmu_enable(struct pmu *pmu) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->enable_all(); +} + +static void sh_pmu_disable(struct pmu *pmu) +{ + if (!sh_pmu_initialized()) + return; + + sh_pmu->disable_all(); } +static struct pmu pmu = { + .pmu_enable = sh_pmu_enable, + .pmu_disable = sh_pmu_disable, + .event_init = sh_pmu_event_init, + .add = sh_pmu_add, + .del = sh_pmu_del, + .start = sh_pmu_start, + .stop = sh_pmu_stop, + .read = sh_pmu_read, +}; + static void sh_pmu_setup(int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -317,32 +379,17 @@ sh_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -void hw_perf_enable(void) -{ - if (!sh_pmu_initialized()) - return; - - sh_pmu->enable_all(); -} - -void hw_perf_disable(void) -{ - if (!sh_pmu_initialized()) - return; - - sh_pmu->disable_all(); -} - -int __cpuinit register_sh_pmu(struct sh_pmu *pmu) +int __cpuinit register_sh_pmu(struct sh_pmu *_pmu) { if (sh_pmu) return -EBUSY; - sh_pmu = pmu; + sh_pmu = _pmu; - pr_info("Performance Events: %s support registered\n", pmu->name); + pr_info("Performance Events: %s support registered\n", _pmu->name); - WARN_ON(pmu->num_events > MAX_HWEVENTS); + WARN_ON(_pmu->num_events > MAX_HWEVENTS); + perf_pmu_register(&pmu); perf_cpu_notifier(sh_pmu_notifier); return 0; } diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 491e9d6..9212cd4 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -30,6 +30,7 @@ config SPARC select PERF_USE_VMALLOC select HAVE_DMA_ATTRS select HAVE_DMA_API_DEBUG + select HAVE_ARCH_JUMP_LABEL config SPARC32 def_bool !64BIT diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h new file mode 100644 index 0000000..62e66d7 --- /dev/null +++ b/arch/sparc/include/asm/jump_label.h @@ -0,0 +1,32 @@ +#ifndef _ASM_SPARC_JUMP_LABEL_H +#define _ASM_SPARC_JUMP_LABEL_H + +#ifdef __KERNEL__ + +#include <linux/types.h> +#include <asm/system.h> + +#define JUMP_LABEL_NOP_SIZE 4 + +#define JUMP_LABEL(key, label) \ + do { \ + asm goto("1:\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + ".pushsection __jump_table, \"a\"\n\t"\ + ".word 1b, %l[" #label "], %c0\n\t" \ + ".popsection \n\t" \ + : : "i" (key) : : label);\ + } while (0) + +#endif /* __KERNEL__ */ + +typedef u32 jump_label_t; + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 0c2dc1f..599398f 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -119,3 +119,5 @@ obj-$(CONFIG_COMPAT) += $(audit--y) pc--$(CONFIG_PERF_EVENTS) := perf_event.o obj-$(CONFIG_SPARC64) += $(pc--y) + +obj-$(CONFIG_SPARC64) += jump_label.o diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c new file mode 100644 index 0000000..ea2dafc --- /dev/null +++ b/arch/sparc/kernel/jump_label.c @@ -0,0 +1,47 @@ +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/cpu.h> + +#include <linux/jump_label.h> +#include <linux/memory.h> + +#ifdef HAVE_JUMP_LABEL + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + u32 val; + u32 *insn = (u32 *) (unsigned long) entry->code; + + if (type == JUMP_LABEL_ENABLE) { + s32 off = (s32)entry->target - (s32)entry->code; + +#ifdef CONFIG_SPARC64 + /* ba,pt %xcc, . + (off << 2) */ + val = 0x10680000 | ((u32) off >> 2); +#else + /* ba . + (off << 2) */ + val = 0x10800000 | ((u32) off >> 2); +#endif + } else { + val = 0x01000000; + } + + get_online_cpus(); + mutex_lock(&text_mutex); + *insn = val; + flushi(insn); + mutex_unlock(&text_mutex); + put_online_cpus(); +} + +void arch_jump_label_text_poke_early(jump_label_t addr) +{ + u32 *insn_p = (u32 *) (unsigned long) addr; + + *insn_p = 0x01000000; + flushi(insn_p); +} + +#endif diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index f848aad..ee3c7dd 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -18,6 +18,9 @@ #include <asm/spitfire.h> #ifdef CONFIG_SPARC64 + +#include <linux/jump_label.h> + static void *module_map(unsigned long size) { struct vm_struct *area; @@ -227,6 +230,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { + /* make jump label nops */ + jump_label_apply_nops(me); + /* Cheetah's I-cache is fully coherent. */ if (tlb_type == spitfire) { unsigned long va; diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 6318e62..0d6deb5 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -658,13 +658,16 @@ static u64 maybe_change_configuration(struct cpu_hw_events *cpuc, u64 pcr) enc = perf_event_get_enc(cpuc->events[i]); pcr &= ~mask_for_index(idx); - pcr |= event_encoding(enc, idx); + if (hwc->state & PERF_HES_STOPPED) + pcr |= nop_for_index(idx); + else + pcr |= event_encoding(enc, idx); } out: return pcr; } -void hw_perf_enable(void) +static void sparc_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 pcr; @@ -691,7 +694,7 @@ void hw_perf_enable(void) pcr_ops->write(cpuc->pcr); } -void hw_perf_disable(void) +static void sparc_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; @@ -710,19 +713,65 @@ void hw_perf_disable(void) pcr_ops->write(cpuc->pcr); } -static void sparc_pmu_disable(struct perf_event *event) +static int active_event_index(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + int i; + + for (i = 0; i < cpuc->n_events; i++) { + if (cpuc->event[i] == event) + break; + } + BUG_ON(i == cpuc->n_events); + return cpuc->current_idx[i]; +} + +static void sparc_pmu_start(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = active_event_index(cpuc, event); + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + sparc_perf_event_set_period(event, &event->hw, idx); + } + + event->hw.state = 0; + + sparc_pmu_enable_event(cpuc, &event->hw, idx); +} + +static void sparc_pmu_stop(struct perf_event *event, int flags) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx = active_event_index(cpuc, event); + + if (!(event->hw.state & PERF_HES_STOPPED)) { + sparc_pmu_disable_event(cpuc, &event->hw, idx); + event->hw.state |= PERF_HES_STOPPED; + } + + if (!(event->hw.state & PERF_HES_UPTODATE) && (flags & PERF_EF_UPDATE)) { + sparc_perf_event_update(event, &event->hw, idx); + event->hw.state |= PERF_HES_UPTODATE; + } +} + +static void sparc_pmu_del(struct perf_event *event, int _flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; unsigned long flags; int i; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { - int idx = cpuc->current_idx[i]; + /* Absorb the final count and turn off the + * event. + */ + sparc_pmu_stop(event, PERF_EF_UPDATE); /* Shift remaining entries down into * the existing slot. @@ -734,13 +783,6 @@ static void sparc_pmu_disable(struct perf_event *event) cpuc->current_idx[i]; } - /* Absorb the final count and turn off the - * event. - */ - sparc_pmu_disable_event(cpuc, hwc, idx); - barrier(); - sparc_perf_event_update(event, hwc, idx); - perf_event_update_userpage(event); cpuc->n_events--; @@ -748,23 +790,10 @@ static void sparc_pmu_disable(struct perf_event *event) } } - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); } -static int active_event_index(struct cpu_hw_events *cpuc, - struct perf_event *event) -{ - int i; - - for (i = 0; i < cpuc->n_events; i++) { - if (cpuc->event[i] == event) - break; - } - BUG_ON(i == cpuc->n_events); - return cpuc->current_idx[i]; -} - static void sparc_pmu_read(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -774,15 +803,6 @@ static void sparc_pmu_read(struct perf_event *event) sparc_perf_event_update(event, hwc, idx); } -static void sparc_pmu_unthrottle(struct perf_event *event) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int idx = active_event_index(cpuc, event); - struct hw_perf_event *hwc = &event->hw; - - sparc_pmu_enable_event(cpuc, hwc, idx); -} - static atomic_t active_events = ATOMIC_INIT(0); static DEFINE_MUTEX(pmc_grab_mutex); @@ -877,7 +897,7 @@ static int sparc_check_constraints(struct perf_event **evts, if (!n_ev) return 0; - if (n_ev > perf_max_events) + if (n_ev > MAX_HWEVENTS) return -1; msk0 = perf_event_get_msk(events[0]); @@ -984,23 +1004,27 @@ static int collect_events(struct perf_event *group, int max_count, return n; } -static int sparc_pmu_enable(struct perf_event *event) +static int sparc_pmu_add(struct perf_event *event, int ef_flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n0, ret = -EAGAIN; unsigned long flags; local_irq_save(flags); - perf_disable(); + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - if (n0 >= perf_max_events) + if (n0 >= MAX_HWEVENTS) goto out; cpuc->event[n0] = event; cpuc->events[n0] = event->hw.event_base; cpuc->current_idx[n0] = PIC_NO_INDEX; + event->hw.state = PERF_HES_UPTODATE; + if (!(ef_flags & PERF_EF_START)) + event->hw.state |= PERF_HES_STOPPED; + /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed @@ -1020,12 +1044,12 @@ nocheck: ret = 0; out: - perf_enable(); + perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } -static int __hw_perf_event_init(struct perf_event *event) +static int sparc_pmu_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct perf_event *evts[MAX_HWEVENTS]; @@ -1038,22 +1062,33 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&nmi_active) < 0) return -ENODEV; - pmap = NULL; - if (attr->type == PERF_TYPE_HARDWARE) { + switch (attr->type) { + case PERF_TYPE_HARDWARE: if (attr->config >= sparc_pmu->max_events) return -EINVAL; pmap = sparc_pmu->event_map(attr->config); - } else if (attr->type == PERF_TYPE_HW_CACHE) { + break; + + case PERF_TYPE_HW_CACHE: pmap = sparc_map_cache_event(attr->config); if (IS_ERR(pmap)) return PTR_ERR(pmap); - } else if (attr->type != PERF_TYPE_RAW) - return -EOPNOTSUPP; + break; + + case PERF_TYPE_RAW: + pmap = NULL; + break; + + default: + return -ENOENT; + + } if (pmap) { hwc->event_base = perf_event_encode(pmap); } else { - /* User gives us "(encoding << 16) | pic_mask" for + /* + * User gives us "(encoding << 16) | pic_mask" for * PERF_TYPE_RAW events. */ hwc->event_base = attr->config; @@ -1071,7 +1106,7 @@ static int __hw_perf_event_init(struct perf_event *event) n = 0; if (event->group_leader != event) { n = collect_events(event->group_leader, - perf_max_events - 1, + MAX_HWEVENTS - 1, evts, events, current_idx_dmy); if (n < 0) return -EINVAL; @@ -1107,10 +1142,11 @@ static int __hw_perf_event_init(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void sparc_pmu_start_txn(const struct pmu *pmu) +static void sparc_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuhw->group_flag |= PERF_EVENT_TXN; } @@ -1119,11 +1155,12 @@ static void sparc_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void sparc_pmu_cancel_txn(const struct pmu *pmu) +static void sparc_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); cpuhw->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); } /* @@ -1131,7 +1168,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int sparc_pmu_commit_txn(const struct pmu *pmu) +static int sparc_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int n; @@ -1147,28 +1184,24 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu) return -EAGAIN; cpuc->group_flag &= ~PERF_EVENT_TXN; + perf_pmu_enable(pmu); return 0; } -static const struct pmu pmu = { - .enable = sparc_pmu_enable, - .disable = sparc_pmu_disable, +static struct pmu pmu = { + .pmu_enable = sparc_pmu_enable, + .pmu_disable = sparc_pmu_disable, + .event_init = sparc_pmu_event_init, + .add = sparc_pmu_add, + .del = sparc_pmu_del, + .start = sparc_pmu_start, + .stop = sparc_pmu_stop, .read = sparc_pmu_read, - .unthrottle = sparc_pmu_unthrottle, .start_txn = sparc_pmu_start_txn, .cancel_txn = sparc_pmu_cancel_txn, .commit_txn = sparc_pmu_commit_txn, }; -const struct pmu *hw_perf_event_init(struct perf_event *event) -{ - int err = __hw_perf_event_init(event); - - if (err) - return ERR_PTR(err); - return &pmu; -} - void perf_event_print_debug(void) { unsigned long flags; @@ -1244,7 +1277,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, continue; if (perf_event_overflow(event, 1, &data, regs)) - sparc_pmu_disable_event(cpuc, hwc, idx); + sparc_pmu_stop(event, 0); } return NOTIFY_STOP; @@ -1285,28 +1318,21 @@ void __init init_hw_perf_events(void) pr_cont("Supported PMU type is '%s'\n", sparc_pmu_type); - /* All sparc64 PMUs currently have 2 events. */ - perf_max_events = 2; - + perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); } -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ksp, fp; #ifdef CONFIG_FUNCTION_GRAPH_TRACER int graph = 0; #endif - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->tpc); + stack_trace_flush(); + + perf_callchain_store(entry, regs->tpc); ksp = regs->u_regs[UREG_I6]; fp = ksp + STACK_BIAS; @@ -1330,13 +1356,13 @@ static void perf_callchain_kernel(struct pt_regs *regs, pc = sf->callers_pc; fp = (unsigned long)sf->fp + STACK_BIAS; } - callchain_store(entry, pc); + perf_callchain_store(entry, pc); #ifdef CONFIG_FUNCTION_GRAPH_TRACER if ((pc + 8UL) == (unsigned long) &return_to_handler) { int index = current->curr_ret_stack; if (current->ret_stack && index >= graph) { pc = current->ret_stack[index - graph].ret; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); graph++; } } @@ -1344,13 +1370,12 @@ static void perf_callchain_kernel(struct pt_regs *regs, } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_64(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] + STACK_BIAS; do { @@ -1363,17 +1388,16 @@ static void perf_callchain_user_64(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp + STACK_BIAS; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) +static void perf_callchain_user_32(struct perf_callchain_entry *entry, + struct pt_regs *regs) { unsigned long ufp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->tpc); + perf_callchain_store(entry, regs->tpc); ufp = regs->u_regs[UREG_I6] & 0xffffffffUL; do { @@ -1386,34 +1410,16 @@ static void perf_callchain_user_32(struct pt_regs *regs, pc = sf.callers_pc; ufp = (unsigned long)sf.fp; - callchain_store(entry, pc); + perf_callchain_store(entry, pc); } while (entry->nr < PERF_MAX_STACK_DEPTH); } -/* Like powerpc we can't get PMU interrupts within the PMU handler, - * so no need for separate NMI and IRQ chains as on x86. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - if (!user_mode(regs)) { - stack_trace_flush(); - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - if (regs) { - flushw_user(); - if (test_thread_flag(TIF_32BIT)) - perf_callchain_user_32(regs, entry); - else - perf_callchain_user_64(regs, entry); - } - return entry; + flushw_user(); + if (test_thread_flag(TIF_32BIT)) + perf_callchain_user_32(entry, regs); + else + perf_callchain_user_64(entry, regs); } diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S index 84f296c..8f58bdf 100644 --- a/arch/tile/kernel/intvec_32.S +++ b/arch/tile/kernel/intvec_32.S @@ -1506,13 +1506,6 @@ handle_ill: } STD_ENDPROC(handle_ill) - .pushsection .rodata, "a" - .align 8 -bpt_code: - bpt - ENDPROC(bpt_code) - .popsection - /* Various stub interrupt handlers and syscall handlers */ STD_ENTRY_LOCAL(_kernel_double_fault) diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c index 2ab233b..47d0c37 100644 --- a/arch/um/drivers/net_kern.c +++ b/arch/um/drivers/net_kern.c @@ -255,18 +255,6 @@ static void uml_net_tx_timeout(struct net_device *dev) netif_wake_queue(dev); } -static int uml_net_set_mac(struct net_device *dev, void *addr) -{ - struct uml_net_private *lp = netdev_priv(dev); - struct sockaddr *hwaddr = addr; - - spin_lock_irq(&lp->lock); - eth_mac_addr(dev, hwaddr->sa_data); - spin_unlock_irq(&lp->lock); - - return 0; -} - static int uml_net_change_mtu(struct net_device *dev, int new_mtu) { dev->mtu = new_mtu; @@ -373,7 +361,7 @@ static const struct net_device_ops uml_netdev_ops = { .ndo_start_xmit = uml_net_start_xmit, .ndo_set_multicast_list = uml_net_set_multicast_list, .ndo_tx_timeout = uml_net_tx_timeout, - .ndo_set_mac_address = uml_net_set_mac, + .ndo_set_mac_address = eth_mac_addr, .ndo_change_mtu = uml_net_change_mtu, .ndo_validate_addr = eth_validate_addr, }; @@ -472,7 +460,8 @@ static void eth_configure(int n, void *init, char *mac, ((*transport->user->init)(&lp->user, dev) != 0)) goto out_unregister; - eth_mac_addr(dev, device->mac); + /* don't use eth_mac_addr, it will not work here */ + memcpy(dev->dev_addr, device->mac, ETH_ALEN); dev->mtu = transport->user->mtu; dev->netdev_ops = ¨_netdev_ops; dev->ethtool_ops = ¨_net_ethtool_ops; diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index cd145ed..49b5e1e 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -62,7 +62,7 @@ static long execve1(const char *file, return error; } -long um_execve(const char *file, char __user *__user *argv, char __user *__user *env) +long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env) { long err; @@ -72,8 +72,8 @@ long um_execve(const char *file, char __user *__user *argv, char __user *__user return err; } -long sys_execve(const char __user *file, char __user *__user *argv, - char __user *__user *env) +long sys_execve(const char __user *file, const char __user *const __user *argv, + const char __user *const __user *env) { long error; char *filename; diff --git a/arch/um/kernel/internal.h b/arch/um/kernel/internal.h index 1303a10..5bf97db 100644 --- a/arch/um/kernel/internal.h +++ b/arch/um/kernel/internal.h @@ -1 +1 @@ -extern long um_execve(const char *file, char __user *__user *argv, char __user *__user *env); +extern long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env); diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index 5ddb246..f958cb8 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -60,8 +60,8 @@ int kernel_execve(const char *filename, fs = get_fs(); set_fs(KERNEL_DS); - ret = um_execve(filename, (char __user *__user *)argv, - (char __user *__user *) envp); + ret = um_execve(filename, (const char __user *const __user *)argv, + (const char __user *const __user *) envp); set_fs(fs); return ret; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cea0cd9..9815221 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -33,6 +33,7 @@ config X86 select HAVE_KRETPROBES select HAVE_OPTPROBES select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_C_RECORDMCOUNT select HAVE_DYNAMIC_FTRACE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER @@ -59,6 +60,8 @@ config X86 select ANON_INODES select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER + select HAVE_ARCH_JUMP_LABEL + select HAVE_TEXT_POKE_SMP config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS) @@ -2125,6 +2128,10 @@ config HAVE_ATOMIC_IOMAP def_bool y depends on X86_32 +config HAVE_TEXT_POKE_SMP + bool + select STOP_MACHINE if SMP + source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/x86/boot/early_serial_console.c b/arch/x86/boot/early_serial_console.c index 030f4b9..5df2869 100644 --- a/arch/x86/boot/early_serial_console.c +++ b/arch/x86/boot/early_serial_console.c @@ -58,7 +58,19 @@ static void parse_earlyprintk(void) if (arg[pos] == ',') pos++; - if (!strncmp(arg, "ttyS", 4)) { + /* + * make sure we have + * "serial,0x3f8,115200" + * "serial,ttyS0,115200" + * "ttyS0,115200" + */ + if (pos == 7 && !strncmp(arg + pos, "0x", 2)) { + port = simple_strtoull(arg + pos, &e, 16); + if (port == 0 || arg + pos == e) + port = DEFAULT_SERIAL_PORT; + else + pos = e - arg; + } else if (!strncmp(arg + pos, "ttyS", 4)) { static const int bases[] = { 0x3f8, 0x2f8 }; int idx = 0; diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index bc6abb7..76561d2 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -4,6 +4,7 @@ #include <linux/types.h> #include <linux/stddef.h> #include <linux/stringify.h> +#include <linux/jump_label.h> #include <asm/asm.h> /* @@ -160,6 +161,8 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, #define __parainstructions_end NULL #endif +extern void *text_poke_early(void *addr, const void *opcode, size_t len); + /* * Clear and restore the kernel write-protection flag on the local CPU. * Allows the kernel to edit read-only pages. @@ -180,4 +183,12 @@ static inline void apply_paravirt(struct paravirt_patch_site *start, extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_smp(void *addr, const void *opcode, size_t len); +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) +#define IDEAL_NOP_SIZE_5 5 +extern unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; +extern void arch_init_ideal_nop5(void); +#else +static inline void arch_init_ideal_nop5(void) {} +#endif + #endif /* _ASM_X86_ALTERNATIVE_H */ diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index d2544f1..cb03037 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -38,4 +38,10 @@ static inline void amd_iommu_stats_init(void) { } #endif /* !CONFIG_AMD_IOMMU_STATS */ +static inline bool is_rd890_iommu(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_ATI) && + (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); +} + #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 7014e88..0861618 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -368,6 +368,9 @@ struct amd_iommu { /* capabilities of that IOMMU read from ACPI */ u32 cap; + /* flags read from acpi table */ + u8 acpi_flags; + /* * Capability pointer. There could be more than one IOMMU per PCI * device function if there are more than one AMD IOMMU capability @@ -411,6 +414,15 @@ struct amd_iommu { /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; + + /* + * This array is required to work around a potential BIOS bug. + * The BIOS may miss to restore parts of the PCI configuration + * space when the system resumes from S3. The result is that the + * IOMMU does not execute commands anymore which leads to system + * failure. + */ + u32 cache_cfg[4]; }; /* diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 545776e..bafd80d 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -309,7 +309,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr) static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) { return ((1UL << (nr % BITS_PER_LONG)) & - (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; + (addr[nr / BITS_PER_LONG])) != 0; } static inline int variable_test_bit(int nr, volatile const unsigned long *addr) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index c6fbb7b..3f76523 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -168,6 +168,7 @@ #define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */ #define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */ #define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */ +#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h new file mode 100644 index 0000000..f52d42e --- /dev/null +++ b/arch/x86/include/asm/jump_label.h @@ -0,0 +1,37 @@ +#ifndef _ASM_X86_JUMP_LABEL_H +#define _ASM_X86_JUMP_LABEL_H + +#ifdef __KERNEL__ + +#include <linux/types.h> +#include <asm/nops.h> + +#define JUMP_LABEL_NOP_SIZE 5 + +# define JUMP_LABEL_INITIAL_NOP ".byte 0xe9 \n\t .long 0\n\t" + +# define JUMP_LABEL(key, label) \ + do { \ + asm goto("1:" \ + JUMP_LABEL_INITIAL_NOP \ + ".pushsection __jump_table, \"a\" \n\t"\ + _ASM_PTR "1b, %l[" #label "], %c0 \n\t" \ + ".popsection \n\t" \ + : : "i" (key) : : label); \ + } while (0) + +#endif /* __KERNEL__ */ + +#ifdef CONFIG_X86_64 +typedef u64 jump_label_t; +#else +typedef u32 jump_label_t; +#endif + +struct jump_entry { + jump_label_t code; + jump_label_t target; + jump_label_t key; +}; + +#endif diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index def5007..a70cd21 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -36,19 +36,6 @@ #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) -/* Non HT mask */ -#define P4_ESCR_MASK \ - (P4_ESCR_EVENT_MASK | \ - P4_ESCR_EVENTMASK_MASK | \ - P4_ESCR_TAG_MASK | \ - P4_ESCR_TAG_ENABLE | \ - P4_ESCR_T0_OS | \ - P4_ESCR_T0_USR) - -/* HT mask */ -#define P4_ESCR_MASK_HT \ - (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) - #define P4_CCCR_OVF 0x80000000U #define P4_CCCR_CASCADE 0x40000000U #define P4_CCCR_OVF_PMI_T0 0x04000000U @@ -70,23 +57,6 @@ #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) -/* Non HT mask */ -#define P4_CCCR_MASK \ - (P4_CCCR_OVF | \ - P4_CCCR_CASCADE | \ - P4_CCCR_OVF_PMI_T0 | \ - P4_CCCR_FORCE_OVF | \ - P4_CCCR_EDGE | \ - P4_CCCR_THRESHOLD_MASK | \ - P4_CCCR_COMPLEMENT | \ - P4_CCCR_COMPARE | \ - P4_CCCR_ESCR_SELECT_MASK | \ - P4_CCCR_ENABLE) - -/* HT mask */ -#define P4_CCCR_MASK_HT \ - (P4_CCCR_MASK | P4_CCCR_OVF_PMI_T1 | P4_CCCR_THREAD_ANY) - #define P4_GEN_ESCR_EMASK(class, name, bit) \ class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_EMASK_BIT(class, name) class##__##name @@ -127,6 +97,28 @@ #define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) +/* + * The bits we allow to pass for RAW events + */ +#define P4_CONFIG_MASK_ESCR \ + P4_ESCR_EVENT_MASK | \ + P4_ESCR_EVENTMASK_MASK | \ + P4_ESCR_TAG_MASK | \ + P4_ESCR_TAG_ENABLE + +#define P4_CONFIG_MASK_CCCR \ + P4_CCCR_EDGE | \ + P4_CCCR_THRESHOLD_MASK | \ + P4_CCCR_COMPLEMENT | \ + P4_CCCR_COMPARE | \ + P4_CCCR_THREAD_ANY | \ + P4_CCCR_RESERVED + +/* some dangerous bits are reserved for kernel internals */ +#define P4_CONFIG_MASK \ + (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ + (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) + static inline bool p4_is_event_cascaded(u64 config) { u32 cccr = p4_config_unpack_cccr(config); diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fedf32a..9d3f485 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -34,7 +34,7 @@ GCOV_PROFILE_paravirt.o := n obj-y := process_$(BITS).o signal.o entry_$(BITS).o obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o ldt.o dumpstack.o -obj-y += setup.o x86_init.o i8259.o irqinit.o +obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o obj-$(CONFIG_X86_VISWS) += visws_quirks.o obj-$(CONFIG_X86_32) += probe_roms_32.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index fb7a5f0..fb16f17 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -61,7 +61,7 @@ struct cstate_entry { unsigned int ecx; } states[ACPI_PROCESSOR_MAX_POWER]; }; -static struct cstate_entry *cpu_cstate_entry; /* per CPU ptr */ +static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */ static short mwait_supported[ACPI_PROCESSOR_MAX_POWER]; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f65ab8b..a36bb90 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -195,7 +195,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; -static void *text_poke_early(void *addr, const void *opcode, size_t len); +void *text_poke_early(void *addr, const void *opcode, size_t len); /* Replace instructions with better alternatives for this CPU type. This runs before SMP is initialized to avoid SMP problems with @@ -522,7 +522,7 @@ void __init alternative_instructions(void) * instructions. And on the local CPU you need to be protected again NMI or MCE * handlers seeing an inconsistent instruction while you patch. */ -static void *__init_or_module text_poke_early(void *addr, const void *opcode, +void *__init_or_module text_poke_early(void *addr, const void *opcode, size_t len) { unsigned long flags; @@ -637,7 +637,72 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) tpp.len = len; atomic_set(&stop_machine_first, 1); wrote_text = 0; - stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); + /* Use __stop_machine() because the caller already got online_cpus. */ + __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); return addr; } +#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL) + +unsigned char ideal_nop5[IDEAL_NOP_SIZE_5]; + +void __init arch_init_ideal_nop5(void) +{ + extern const unsigned char ftrace_test_p6nop[]; + extern const unsigned char ftrace_test_nop5[]; + extern const unsigned char ftrace_test_jmp[]; + int faulted = 0; + + /* + * There is no good nop for all x86 archs. + * We will default to using the P6_NOP5, but first we + * will test to make sure that the nop will actually + * work on this CPU. If it faults, we will then + * go to a lesser efficient 5 byte nop. If that fails + * we then just use a jmp as our nop. This isn't the most + * efficient nop, but we can not use a multi part nop + * since we would then risk being preempted in the middle + * of that nop, and if we enabled tracing then, it might + * cause a system crash. + * + * TODO: check the cpuid to determine the best nop. + */ + asm volatile ( + "ftrace_test_jmp:" + "jmp ftrace_test_p6nop\n" + "nop\n" + "nop\n" + "nop\n" /* 2 byte jmp + 3 bytes */ + "ftrace_test_p6nop:" + P6_NOP5 + "jmp 1f\n" + "ftrace_test_nop5:" + ".byte 0x66,0x66,0x66,0x66,0x90\n" + "1:" + ".section .fixup, \"ax\"\n" + "2: movl $1, %0\n" + " jmp ftrace_test_nop5\n" + "3: movl $2, %0\n" + " jmp 1b\n" + ".previous\n" + _ASM_EXTABLE(ftrace_test_p6nop, 2b) + _ASM_EXTABLE(ftrace_test_nop5, 3b) + : "=r"(faulted) : "0" (faulted)); + + switch (faulted) { + case 0: + pr_info("converting mcount calls to 0f 1f 44 00 00\n"); + memcpy(ideal_nop5, ftrace_test_p6nop, IDEAL_NOP_SIZE_5); + break; + case 1: + pr_info("converting mcount calls to 66 66 66 66 90\n"); + memcpy(ideal_nop5, ftrace_test_nop5, IDEAL_NOP_SIZE_5); + break; + case 2: + pr_info("converting mcount calls to jmp . + 5\n"); + memcpy(ideal_nop5, ftrace_test_jmp, IDEAL_NOP_SIZE_5); + break; + } + +} +#endif diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index fa044e1..679b645 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1953,6 +1953,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, size_t size, int dir) { + dma_addr_t flush_addr; dma_addr_t i, start; unsigned int pages; @@ -1960,6 +1961,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, (dma_addr + size > dma_dom->aperture_size)) return; + flush_addr = dma_addr; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); dma_addr &= PAGE_MASK; start = dma_addr; @@ -1974,7 +1976,7 @@ static void __unmap_single(struct dma_ops_domain *dma_dom, dma_ops_free_addresses(dma_dom, dma_addr, pages); if (amd_iommu_unmap_flush || dma_dom->need_flush) { - iommu_flush_pages(&dma_dom->domain, dma_addr, size); + iommu_flush_pages(&dma_dom->domain, flush_addr, size); dma_dom->need_flush = false; } } diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 3cc63e2..5a170cb 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) iommu->last_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_LD(range)); iommu->evt_msi_num = MMIO_MSI_NUM(misc); + + if (is_rd890_iommu(iommu->dev)) { + pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); + pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); + pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); + pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); + } } /* @@ -649,29 +656,9 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, struct ivhd_entry *e; /* - * First set the recommended feature enable bits from ACPI - * into the IOMMU control registers + * First save the recommended feature enable bits from ACPI */ - h->flags & IVHD_FLAG_HT_TUN_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : - iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); - - h->flags & IVHD_FLAG_PASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_PASSPW_EN); - - h->flags & IVHD_FLAG_RESPASSPW_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : - iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); - - h->flags & IVHD_FLAG_ISOC_EN_MASK ? - iommu_feature_enable(iommu, CONTROL_ISOC_EN) : - iommu_feature_disable(iommu, CONTROL_ISOC_EN); - - /* - * make IOMMU memory accesses cache coherent - */ - iommu_feature_enable(iommu, CONTROL_COHERENT_EN); + iommu->acpi_flags = h->flags; /* * Done. Now parse the device entries @@ -1116,6 +1103,40 @@ static void init_device_table(void) } } +static void iommu_init_flags(struct amd_iommu *iommu) +{ + iommu->acpi_flags & IVHD_FLAG_HT_TUN_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_HT_TUN_EN) : + iommu_feature_disable(iommu, CONTROL_HT_TUN_EN); + + iommu->acpi_flags & IVHD_FLAG_PASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_PASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_PASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_RESPASSPW_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_RESPASSPW_EN) : + iommu_feature_disable(iommu, CONTROL_RESPASSPW_EN); + + iommu->acpi_flags & IVHD_FLAG_ISOC_EN_MASK ? + iommu_feature_enable(iommu, CONTROL_ISOC_EN) : + iommu_feature_disable(iommu, CONTROL_ISOC_EN); + + /* + * make IOMMU memory accesses cache coherent + */ + iommu_feature_enable(iommu, CONTROL_COHERENT_EN); +} + +static void iommu_apply_quirks(struct amd_iommu *iommu) +{ + if (is_rd890_iommu(iommu->dev)) { + pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); + pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); + pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); + pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); + } +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -1126,6 +1147,8 @@ static void enable_iommus(void) for_each_iommu(iommu) { iommu_disable(iommu); + iommu_apply_quirks(iommu); + iommu_init_flags(iommu); iommu_set_device_table(iommu); iommu_enable_command_buffer(iommu); iommu_enable_event_buffer(iommu); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f1efeba..5c5b8f3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -306,14 +306,19 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc, old_cfg = old_desc->chip_data; - memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + cfg->vector = old_cfg->vector; + cfg->move_in_progress = old_cfg->move_in_progress; + cpumask_copy(cfg->domain, old_cfg->domain); + cpumask_copy(cfg->old_domain, old_cfg->old_domain); init_copy_irq_2_pin(old_cfg, cfg, node); } -static void free_irq_cfg(struct irq_cfg *old_cfg) +static void free_irq_cfg(struct irq_cfg *cfg) { - kfree(old_cfg); + free_cpumask_var(cfg->domain); + free_cpumask_var(cfg->old_domain); + kfree(cfg); } void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 490dac6..f2f9ac7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -545,7 +545,7 @@ void __cpuinit cpu_detect(struct cpuinfo_x86 *c) } } -static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) +void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; u32 ebx; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 3624e8a..f668bb1 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -33,5 +33,6 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], *const __x86_cpu_dev_end[]; extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +extern void get_cpu_cap(struct cpuinfo_x86 *c); #endif diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c index 994230d..4f6f679 100644 --- a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c @@ -368,16 +368,22 @@ static int __init pcc_cpufreq_do_osc(acpi_handle *handle) return -ENODEV; out_obj = output.pointer; - if (out_obj->type != ACPI_TYPE_BUFFER) - return -ENODEV; + if (out_obj->type != ACPI_TYPE_BUFFER) { + ret = -ENODEV; + goto out_free; + } errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); - if (errors) - return -ENODEV; + if (errors) { + ret = -ENODEV; + goto out_free; + } supported = *((u32 *)(out_obj->buffer.pointer + 4)); - if (!(supported & 0x1)) - return -ENODEV; + if (!(supported & 0x1)) { + ret = -ENODEV; + goto out_free; + } out_free: kfree(output.pointer); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 85f69cd..b438944 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -39,6 +39,7 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); c->cpuid_level = cpuid_eax(0); + get_cpu_cap(c); } } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 03a5b03..e2513f2 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -531,7 +531,7 @@ static int x86_pmu_hw_config(struct perf_event *event) /* * Setup the hardware configuration for a given attr_type */ -static int __hw_perf_event_init(struct perf_event *event) +static int __x86_pmu_event_init(struct perf_event *event) { int err; @@ -584,7 +584,7 @@ static void x86_pmu_disable_all(void) } } -void hw_perf_disable(void) +static void x86_pmu_disable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -619,7 +619,7 @@ static void x86_pmu_enable_all(int added) } } -static const struct pmu pmu; +static struct pmu pmu; static inline int is_x86_event(struct perf_event *event) { @@ -801,10 +801,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, hwc->last_tag == cpuc->tags[i]; } -static int x86_pmu_start(struct perf_event *event); -static void x86_pmu_stop(struct perf_event *event); +static void x86_pmu_start(struct perf_event *event, int flags); +static void x86_pmu_stop(struct perf_event *event, int flags); -void hw_perf_enable(void) +static void x86_pmu_enable(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *event; @@ -840,7 +840,14 @@ void hw_perf_enable(void) match_prev_assignment(hwc, cpuc, i)) continue; - x86_pmu_stop(event); + /* + * Ensure we don't accidentally enable a stopped + * counter simply because we rescheduled. + */ + if (hwc->state & PERF_HES_STOPPED) + hwc->state |= PERF_HES_ARCH; + + x86_pmu_stop(event, PERF_EF_UPDATE); } for (i = 0; i < cpuc->n_events; i++) { @@ -852,7 +859,10 @@ void hw_perf_enable(void) else if (i < n_running) continue; - x86_pmu_start(event); + if (hwc->state & PERF_HES_ARCH) + continue; + + x86_pmu_start(event, PERF_EF_RELOAD); } cpuc->n_added = 0; perf_events_lapic_init(); @@ -953,15 +963,12 @@ static void x86_pmu_enable_event(struct perf_event *event) } /* - * activate a single event + * Add a single event to the PMU. * * The event is added to the group of enabled events * but only if it can be scehduled with existing events. - * - * Called with PMU disabled. If successful and return value 1, - * then guaranteed to call perf_enable() and hw_perf_enable() */ -static int x86_pmu_enable(struct perf_event *event) +static int x86_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc; @@ -970,58 +977,67 @@ static int x86_pmu_enable(struct perf_event *event) hwc = &event->hw; + perf_pmu_disable(event->pmu); n0 = cpuc->n_events; - n = collect_events(cpuc, event, false); - if (n < 0) - return n; + ret = n = collect_events(cpuc, event, false); + if (ret < 0) + goto out; + + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (!(flags & PERF_EF_START)) + hwc->state |= PERF_HES_ARCH; /* * If group events scheduling transaction was started, * skip the schedulability test here, it will be peformed - * at commit time(->commit_txn) as a whole + * at commit time (->commit_txn) as a whole */ if (cpuc->group_flag & PERF_EVENT_TXN) - goto out; + goto done_collect; ret = x86_pmu.schedule_events(cpuc, n, assign); if (ret) - return ret; + goto out; /* * copy new assignment, now we know it is possible * will be used by hw_perf_enable() */ memcpy(cpuc->assign, assign, n*sizeof(int)); -out: +done_collect: cpuc->n_events = n; cpuc->n_added += n - n0; cpuc->n_txn += n - n0; - return 0; + ret = 0; +out: + perf_pmu_enable(event->pmu); + return ret; } -static int x86_pmu_start(struct perf_event *event) +static void x86_pmu_start(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx = event->hw.idx; - if (idx == -1) - return -EAGAIN; + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + + if (WARN_ON_ONCE(idx == -1)) + return; + + if (flags & PERF_EF_RELOAD) { + WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE)); + x86_perf_event_set_period(event); + } + + event->hw.state = 0; - x86_perf_event_set_period(event); cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); __set_bit(idx, cpuc->running); x86_pmu.enable(event); perf_event_update_userpage(event); - - return 0; -} - -static void x86_pmu_unthrottle(struct perf_event *event) -{ - int ret = x86_pmu_start(event); - WARN_ON_ONCE(ret); } void perf_event_print_debug(void) @@ -1078,27 +1094,29 @@ void perf_event_print_debug(void) local_irq_restore(flags); } -static void x86_pmu_stop(struct perf_event *event) +static void x86_pmu_stop(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - if (!__test_and_clear_bit(idx, cpuc->active_mask)) - return; - - x86_pmu.disable(event); - - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - x86_perf_event_update(event); + if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) { + x86_pmu.disable(event); + cpuc->events[hwc->idx] = NULL; + WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); + hwc->state |= PERF_HES_STOPPED; + } - cpuc->events[idx] = NULL; + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + /* + * Drain the remaining delta count out of a event + * that we are disabling: + */ + x86_perf_event_update(event); + hwc->state |= PERF_HES_UPTODATE; + } } -static void x86_pmu_disable(struct perf_event *event) +static void x86_pmu_del(struct perf_event *event, int flags) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int i; @@ -1111,7 +1129,7 @@ static void x86_pmu_disable(struct perf_event *event) if (cpuc->group_flag & PERF_EVENT_TXN) return; - x86_pmu_stop(event); + x86_pmu_stop(event, PERF_EF_UPDATE); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { @@ -1134,7 +1152,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) struct perf_sample_data data; struct cpu_hw_events *cpuc; struct perf_event *event; - struct hw_perf_event *hwc; int idx, handled = 0; u64 val; @@ -1155,7 +1172,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) } event = cpuc->events[idx]; - hwc = &event->hw; val = x86_perf_event_update(event); if (val & (1ULL << (x86_pmu.cntval_bits - 1))) @@ -1171,7 +1187,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) continue; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } if (handled) @@ -1388,7 +1404,6 @@ void __init init_hw_perf_events(void) x86_pmu.num_counters = X86_PMC_MAX_GENERIC; } x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; - perf_max_events = x86_pmu.num_counters; if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", @@ -1424,6 +1439,7 @@ void __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + perf_pmu_register(&pmu); perf_cpu_notifier(x86_pmu_notifier); } @@ -1437,10 +1453,11 @@ static inline void x86_pmu_read(struct perf_event *event) * Set the flag to make pmu::enable() not perform the * schedulability test, it will be performed at commit time */ -static void x86_pmu_start_txn(const struct pmu *pmu) +static void x86_pmu_start_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + perf_pmu_disable(pmu); cpuc->group_flag |= PERF_EVENT_TXN; cpuc->n_txn = 0; } @@ -1450,7 +1467,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu) * Clear the flag and pmu::enable() will perform the * schedulability test. */ -static void x86_pmu_cancel_txn(const struct pmu *pmu) +static void x86_pmu_cancel_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1460,6 +1477,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) */ cpuc->n_added -= cpuc->n_txn; cpuc->n_events -= cpuc->n_txn; + perf_pmu_enable(pmu); } /* @@ -1467,7 +1485,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu) * Perform the group schedulability test as a whole * Return 0 if success */ -static int x86_pmu_commit_txn(const struct pmu *pmu) +static int x86_pmu_commit_txn(struct pmu *pmu) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int assign[X86_PMC_IDX_MAX]; @@ -1489,22 +1507,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu) memcpy(cpuc->assign, assign, n*sizeof(int)); cpuc->group_flag &= ~PERF_EVENT_TXN; - + perf_pmu_enable(pmu); return 0; } -static const struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, - .start_txn = x86_pmu_start_txn, - .cancel_txn = x86_pmu_cancel_txn, - .commit_txn = x86_pmu_commit_txn, -}; - /* * validate that we can schedule this event */ @@ -1579,12 +1585,22 @@ out: return ret; } -const struct pmu *hw_perf_event_init(struct perf_event *event) +int x86_pmu_event_init(struct perf_event *event) { - const struct pmu *tmp; + struct pmu *tmp; int err; - err = __hw_perf_event_init(event); + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + err = __x86_pmu_event_init(event); if (!err) { /* * we temporarily connect event to its pmu @@ -1604,26 +1620,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; } -/* - * callchain support - */ +static struct pmu pmu = { + .pmu_enable = x86_pmu_enable, + .pmu_disable = x86_pmu_disable, -static inline -void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - if (entry->nr < PERF_MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} + .event_init = x86_pmu_event_init, -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); + .add = x86_pmu_add, + .del = x86_pmu_del, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, + .start_txn = x86_pmu_start_txn, + .cancel_txn = x86_pmu_cancel_txn, + .commit_txn = x86_pmu_commit_txn, +}; + +/* + * callchain support + */ static void backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) @@ -1645,7 +1666,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - callchain_store(entry, addr); + perf_callchain_store(entry, addr); } static const struct stacktrace_ops backtrace_ops = { @@ -1656,11 +1677,15 @@ static const struct stacktrace_ops backtrace_ops = { .walk_stack = print_context_stack_bp, }; -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->ip); + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->ip); dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } @@ -1689,7 +1714,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (fp < compat_ptr(regs->sp)) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = compat_ptr(frame.next_frame); } return 1; @@ -1702,19 +1727,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) } #endif -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) +void +perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stack_frame frame; const void __user *fp; - if (!user_mode(regs)) - regs = task_pt_regs(current); + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return; + } fp = (void __user *)regs->bp; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, regs->ip); + perf_callchain_store(entry, regs->ip); if (perf_callchain_user32(regs, entry)) return; @@ -1731,52 +1757,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) if ((unsigned long)fp < regs->sp) break; - callchain_store(entry, frame.return_address); + perf_callchain_store(entry, frame.return_address); fp = frame.next_frame; } } -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* TODO: We don't support guest os callchain now */ - return NULL; - } - - if (in_nmi()) - entry = &__get_cpu_var(pmc_nmi_entry); - else - entry = &__get_cpu_var(pmc_irq_entry); - - entry->nr = 0; - - perf_do_callchain(regs, entry); - - return entry; -} - unsigned long perf_instruction_pointer(struct pt_regs *regs) { unsigned long ip; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index ee05c90..c8f5c08 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -713,18 +713,18 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) struct cpu_hw_events *cpuc; int bit, loops; u64 status; - int handled = 0; + int handled; perf_sample_data_init(&data, 0); cpuc = &__get_cpu_var(cpu_hw_events); intel_pmu_disable_all(); - intel_pmu_drain_bts_buffer(); + handled = intel_pmu_drain_bts_buffer(); status = intel_pmu_get_status(); if (!status) { intel_pmu_enable_all(0); - return 0; + return handled; } loops = 0; @@ -763,7 +763,7 @@ again: data.period = event->hw.last_period; if (perf_event_overflow(event, 1, &data, regs)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 18018d1..4977f9c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -214,7 +214,7 @@ static void intel_pmu_disable_bts(void) update_debugctlmsr(debugctlmsr); } -static void intel_pmu_drain_bts_buffer(void) +static int intel_pmu_drain_bts_buffer(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct debug_store *ds = cpuc->ds; @@ -231,16 +231,16 @@ static void intel_pmu_drain_bts_buffer(void) struct pt_regs regs; if (!event) - return; + return 0; if (!ds) - return; + return 0; at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; top = (struct bts_record *)(unsigned long)ds->bts_index; if (top <= at) - return; + return 0; ds->bts_index = ds->bts_buffer_base; @@ -256,7 +256,7 @@ static void intel_pmu_drain_bts_buffer(void) perf_prepare_sample(&header, &data, event, ®s); if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) - return; + return 1; for (; at < top; at++) { data.ip = at->from; @@ -270,6 +270,7 @@ static void intel_pmu_drain_bts_buffer(void) /* There's new data available. */ event->hw.interrupts++; event->pending_kill = POLL_IN; + return 1; } /* @@ -491,7 +492,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.flags &= ~PERF_EFLAGS_EXACT; if (perf_event_overflow(event, 1, &data, ®s)) - x86_pmu_stop(event); + x86_pmu_stop(event, 0); } static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index b560db3..81400b9 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -18,6 +18,8 @@ struct p4_event_bind { unsigned int opcode; /* Event code and ESCR selector */ unsigned int escr_msr[2]; /* ESCR MSR for this event */ + unsigned int escr_emask; /* valid ESCR EventMask bits */ + unsigned int shared; /* event is shared across threads */ char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ }; @@ -66,231 +68,435 @@ static struct p4_event_bind p4_event_bind_map[] = { [P4_EVENT_TC_DELIVER_MODE] = { .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | + P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), + .shared = 1, .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_BPU_FETCH_REQUEST] = { .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_ITLB_REFERENCE] = { .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_MEMORY_CANCEL] = { .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_MEMORY_COMPLETE] = { .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | + P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_LOAD_PORT_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_STORE_PORT_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_MOB_LOAD_REPLAY] = { .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | + P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_PAGE_WALK_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), + .shared = 1, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BSQ_CACHE_REFERENCE] = { .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_IOQ_ALLOCATION] = { .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), .cntr = { {2, -1, -1}, {3, -1, -1} }, }, [P4_EVENT_FSB_DATA_ACTIVITY] = { .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), + .shared = 1, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), .cntr = { {0, -1, -1}, {1, -1, -1} }, }, [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2), .cntr = { {2, -1, -1}, {3, -1, -1} }, }, [P4_EVENT_SSE_INPUT_ASSIST] = { .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_PACKED_SP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_PACKED_DP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_SCALAR_SP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_SCALAR_DP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_64BIT_MMX_UOP] = { .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_128BIT_MMX_UOP] = { .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_X87_FP_UOP] = { .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_TC_MISC] = { .opcode = P4_OPCODE(P4_EVENT_TC_MISC), .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_GLOBAL_POWER_EVENTS] = { .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING), .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_TC_MS_XFER] = { .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_UOP_QUEUE_WRITES] = { .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RETIRED_BRANCH_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT), .cntr = { {4, 5, -1}, {6, 7, -1} }, }, [P4_EVENT_RESOURCE_STALL] = { .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_WC_BUFFER] = { .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) | + P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS), + .shared = 1, .cntr = { {8, 9, -1}, {10, 11, -1} }, }, [P4_EVENT_B2B_CYCLES] = { .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_BNR] = { .opcode = P4_OPCODE(P4_EVENT_BNR), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_SNOOP] = { .opcode = P4_OPCODE(P4_EVENT_SNOOP), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_RESPONSE] = { .opcode = P4_OPCODE(P4_EVENT_RESPONSE), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .escr_emask = 0, .cntr = { {0, -1, -1}, {2, -1, -1} }, }, [P4_EVENT_FRONT_END_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_EXECUTION_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | + P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_REPLAY_EVENT] = { .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_INSTR_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_UOPS_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_UOP_TYPE] = { .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) | + P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_BRANCH_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) | + P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_MISPRED_BRANCH_RETIRED] = { .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_X87_ASSIST] = { .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) | + P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_MACHINE_CLEAR] = { .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) | + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) | + P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, [P4_EVENT_INSTR_COMPLETED] = { .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .escr_emask = + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS), .cntr = { {12, 13, 16}, {14, 15, 17} }, }, }; @@ -428,29 +634,73 @@ static u64 p4_pmu_event_map(int hw_event) return config; } +/* check cpu model specifics */ +static bool p4_event_match_cpu_model(unsigned int event_idx) +{ + /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ + if (event_idx == P4_EVENT_INSTR_COMPLETED) { + if (boot_cpu_data.x86_model != 3 && + boot_cpu_data.x86_model != 4 && + boot_cpu_data.x86_model != 6) + return false; + } + + /* + * For info + * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2 + */ + + return true; +} + static int p4_validate_raw_event(struct perf_event *event) { - unsigned int v; + unsigned int v, emask; - /* user data may have out-of-bound event index */ + /* User data may have out-of-bound event index */ v = p4_config_unpack_event(event->attr.config); - if (v >= ARRAY_SIZE(p4_event_bind_map)) { - pr_warning("P4 PMU: Unknown event code: %d\n", v); + if (v >= ARRAY_SIZE(p4_event_bind_map)) + return -EINVAL; + + /* It may be unsupported: */ + if (!p4_event_match_cpu_model(v)) return -EINVAL; + + /* + * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as + * in Architectural Performance Monitoring, it means not + * on _which_ logical cpu to count but rather _when_, ie it + * depends on logical cpu state -- count event if one cpu active, + * none, both or any, so we just allow user to pass any value + * desired. + * + * In turn we always set Tx_OS/Tx_USR bits bound to logical + * cpu without their propagation to another cpu + */ + + /* + * if an event is shared accross the logical threads + * the user needs special permissions to be able to use it + */ + if (p4_event_bind_map[v].shared) { + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return -EACCES; } + /* ESCR EventMask bits may be invalid */ + emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK; + if (emask & ~p4_event_bind_map[v].escr_emask) + return -EINVAL; + /* - * it may have some screwed PEBS bits + * it may have some invalid PEBS bits */ - if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) { - pr_warning("P4 PMU: PEBS are not supported yet\n"); + if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) return -EINVAL; - } + v = p4_config_unpack_metric(event->attr.config); - if (v >= ARRAY_SIZE(p4_pebs_bind_map)) { - pr_warning("P4 PMU: Unknown metric code: %d\n", v); + if (v >= ARRAY_SIZE(p4_pebs_bind_map)) return -EINVAL; - } return 0; } @@ -478,27 +728,21 @@ static int p4_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) { + /* + * Clear bits we reserve to be managed by kernel itself + * and never allowed from a user space + */ + event->attr.config &= P4_CONFIG_MASK; + rc = p4_validate_raw_event(event); if (rc) goto out; /* - * We don't control raw events so it's up to the caller - * to pass sane values (and we don't count the thread number - * on HT machine but allow HT-compatible specifics to be - * passed on) - * * Note that for RAW events we allow user to use P4_CCCR_RESERVED * bits since we keep additional info here (for cache events and etc) - * - * XXX: HT wide things should check perf_paranoid_cpu() && - * CAP_SYS_ADMIN */ - event->hw.config |= event->attr.config & - (p4_config_pack_escr(P4_ESCR_MASK_HT) | - p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED)); - - event->hw.config &= ~P4_CCCR_FORCE_OVF; + event->hw.config |= event->attr.config; } rc = x86_setup_perfctr(event); @@ -660,8 +904,12 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) for (idx = 0; idx < x86_pmu.num_counters; idx++) { int overflow; - if (!test_bit(idx, cpuc->active_mask)) + if (!test_bit(idx, cpuc->active_mask)) { + /* catch in-flight IRQs */ + if (__test_and_clear_bit(idx, cpuc->running)) + handled++; continue; + } event = cpuc->events[idx]; hwc = &event->hw; diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 34b4dad..d490795 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -31,6 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) const struct cpuid_bit *cb; static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { + { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 }, { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 }, { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 }, { X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 }, diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index cd37469..3afb33f 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -257,14 +257,9 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) return mod_code_status; } - - - -static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; - static unsigned char *ftrace_nop_replace(void) { - return ftrace_nop; + return ideal_nop5; } static int @@ -338,62 +333,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int __init ftrace_dyn_arch_init(void *data) { - extern const unsigned char ftrace_test_p6nop[]; - extern const unsigned char ftrace_test_nop5[]; - extern const unsigned char ftrace_test_jmp[]; - int faulted = 0; - - /* - * There is no good nop for all x86 archs. - * We will default to using the P6_NOP5, but first we - * will test to make sure that the nop will actually - * work on this CPU. If it faults, we will then - * go to a lesser efficient 5 byte nop. If that fails - * we then just use a jmp as our nop. This isn't the most - * efficient nop, but we can not use a multi part nop - * since we would then risk being preempted in the middle - * of that nop, and if we enabled tracing then, it might - * cause a system crash. - * - * TODO: check the cpuid to determine the best nop. - */ - asm volatile ( - "ftrace_test_jmp:" - "jmp ftrace_test_p6nop\n" - "nop\n" - "nop\n" - "nop\n" /* 2 byte jmp + 3 bytes */ - "ftrace_test_p6nop:" - P6_NOP5 - "jmp 1f\n" - "ftrace_test_nop5:" - ".byte 0x66,0x66,0x66,0x66,0x90\n" - "1:" - ".section .fixup, \"ax\"\n" - "2: movl $1, %0\n" - " jmp ftrace_test_nop5\n" - "3: movl $2, %0\n" - " jmp 1b\n" - ".previous\n" - _ASM_EXTABLE(ftrace_test_p6nop, 2b) - _ASM_EXTABLE(ftrace_test_nop5, 3b) - : "=r"(faulted) : "0" (faulted)); - - switch (faulted) { - case 0: - pr_info("converting mcount calls to 0f 1f 44 00 00\n"); - memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); - break; - case 1: - pr_info("converting mcount calls to 66 66 66 66 90\n"); - memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); - break; - case 2: - pr_info("converting mcount calls to jmp . + 5\n"); - memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); - break; - } - /* The return code is retured via data */ *(unsigned long *)data = 0; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 410fdb3..7494999 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -506,7 +506,7 @@ static int hpet_assign_irq(struct hpet_dev *dev) { unsigned int irq; - irq = create_irq(); + irq = create_irq_nr(0, -1); if (!irq) return -EINVAL; diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c new file mode 100644 index 0000000..961b6b3 --- /dev/null +++ b/arch/x86/kernel/jump_label.c @@ -0,0 +1,50 @@ +/* + * jump label x86 support + * + * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> + * + */ +#include <linux/jump_label.h> +#include <linux/memory.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/cpu.h> +#include <asm/kprobes.h> +#include <asm/alternative.h> + +#ifdef HAVE_JUMP_LABEL + +union jump_code_union { + char code[JUMP_LABEL_NOP_SIZE]; + struct { + char jump; + int offset; + } __attribute__((packed)); +}; + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + union jump_code_union code; + + if (type == JUMP_LABEL_ENABLE) { + code.jump = 0xe9; + code.offset = entry->target - + (entry->code + JUMP_LABEL_NOP_SIZE); + } else + memcpy(&code, ideal_nop5, JUMP_LABEL_NOP_SIZE); + get_online_cpus(); + mutex_lock(&text_mutex); + text_poke_smp((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE); + mutex_unlock(&text_mutex); + put_online_cpus(); +} + +void arch_jump_label_text_poke_early(jump_label_t addr) +{ + text_poke_early((void *)addr, ideal_nop5, JUMP_LABEL_NOP_SIZE); +} + +#endif diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 770ebfb..1cbd54c 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -230,9 +230,6 @@ static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) return 0; } -/* Dummy buffers for kallsyms_lookup */ -static char __dummy_buf[KSYM_NAME_LEN]; - /* Check if paddr is at an instruction boundary */ static int __kprobes can_probe(unsigned long paddr) { @@ -241,7 +238,7 @@ static int __kprobes can_probe(unsigned long paddr) struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; - if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) + if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) return 0; /* Decode instructions */ @@ -1129,7 +1126,7 @@ static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, *(unsigned long *)addr = val; } -void __kprobes kprobes_optinsn_template_holder(void) +static void __used __kprobes kprobes_optinsn_template_holder(void) { asm volatile ( ".global optprobe_template_entry\n" @@ -1221,7 +1218,8 @@ static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) } /* Check whether the address range is reserved */ if (ftrace_text_reserved(src, src + len - 1) || - alternatives_text_reserved(src, src + len - 1)) + alternatives_text_reserved(src, src + len - 1) || + jump_label_text_reserved(src, src + len - 1)) return -EBUSY; return len; @@ -1269,11 +1267,9 @@ static int __kprobes can_optimize(unsigned long paddr) unsigned long addr, size = 0, offset = 0; struct insn insn; kprobe_opcode_t buf[MAX_INSN_SIZE]; - /* Dummy buffers for lookup_symbol_attrs */ - static char __dummy_buf[KSYM_NAME_LEN]; /* Lookup symbol including addr */ - if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) + if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) return 0; /* Check there is enough space for a relative jump. */ diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e0bc186..8f29560 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -239,11 +239,13 @@ int module_finalize(const Elf_Ehdr *hdr, apply_paravirt(pseg, pseg + para->sh_size); } - return module_bug_finalize(hdr, sechdrs, me); + /* make jump label nops */ + jump_label_apply_nops(me); + + return 0; } void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); - module_bug_cleanup(mod); } diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c3a4fbb..00e1678 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -112,6 +112,7 @@ #include <asm/numa_64.h> #endif #include <asm/mce.h> +#include <asm/alternative.h> /* * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. @@ -726,6 +727,7 @@ void __init setup_arch(char **cmdline_p) { int acpi = 0; int k8 = 0; + unsigned long flags; #ifdef CONFIG_X86_32 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); @@ -1071,6 +1073,10 @@ void __init setup_arch(char **cmdline_p) x86_init.oem.banner(); mcheck_init(); + + local_irq_save(flags); + arch_init_ideal_nop5(); + local_irq_restore(flags); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 4c4508e..a24c6cf 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -251,6 +251,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; + WARN_ON_ONCE(in_nmi()); + /* * Synchronize this task's top level page-table * with the 'reference' page table. @@ -369,6 +371,8 @@ static noinline __kprobes int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; + WARN_ON_ONCE(in_nmi()); + /* * Copy kernel mappings over when needed. This can also * happen within a race in page table update. In the later diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index b3b531a..d87dd6d 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c @@ -631,6 +631,8 @@ bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, if (!pte) return false; + WARN_ON_ONCE(in_nmi()); + if (error_code & 2) kmemcheck_access(regs, address, KMEMCHECK_WRITE); else diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 1a5353a..b2bb5aa3 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -489,8 +489,9 @@ static void xen_hvm_setup_cpu_clockevents(void) __init void xen_hvm_init_time_ops(void) { /* vector callback is needed otherwise we cannot receive interrupts - * on cpu > 0 */ - if (!xen_have_vector_callback && num_present_cpus() > 1) + * on cpu > 0 and at this point we don't know how many cpus are + * available */ + if (!xen_have_vector_callback) return; if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { printk(KERN_INFO "Xen doesn't support pvclock on HVM," diff --git a/block/blk-merge.c b/block/blk-merge.c index 3b0cd42..eafc94f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -362,6 +362,18 @@ static int attempt_merge(struct request_queue *q, struct request *req, return 0; /* + * Don't merge file system requests and discard requests + */ + if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD)) + return 0; + + /* + * Don't merge discard requests and secure discard requests + */ + if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE)) + return 0; + + /* * not contiguous */ if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index b811f21..88681ac 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -105,7 +105,7 @@ config ACPI_EC_DEBUGFS Be aware that using this interface can confuse your Embedded Controller in a way that a normal reboot is not enough. You then - have to power of your system, and remove the laptop battery for + have to power off your system, and remove the laptop battery for some seconds. An Embedded Controller typically is available on laptops and reads sensor values like battery state and temperature. diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c index b76848c..6b115f6 100644 --- a/drivers/acpi/acpi_pad.c +++ b/drivers/acpi/acpi_pad.c @@ -382,31 +382,32 @@ static void acpi_pad_remove_sysfs(struct acpi_device *device) device_remove_file(&device->dev, &dev_attr_rrtime); } -/* Query firmware how many CPUs should be idle */ -static int acpi_pad_pur(acpi_handle handle, int *num_cpus) +/* + * Query firmware how many CPUs should be idle + * return -1 on failure + */ +static int acpi_pad_pur(acpi_handle handle) { struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; union acpi_object *package; - int rev, num, ret = -EINVAL; + int num = -1; if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PUR", NULL, &buffer))) - return -EINVAL; + return num; if (!buffer.length || !buffer.pointer) - return -EINVAL; + return num; package = buffer.pointer; - if (package->type != ACPI_TYPE_PACKAGE || package->package.count != 2) - goto out; - rev = package->package.elements[0].integer.value; - num = package->package.elements[1].integer.value; - if (rev != 1 || num < 0) - goto out; - *num_cpus = num; - ret = 0; -out: + + if (package->type == ACPI_TYPE_PACKAGE && + package->package.count == 2 && + package->package.elements[0].integer.value == 1) /* rev 1 */ + + num = package->package.elements[1].integer.value; + kfree(buffer.pointer); - return ret; + return num; } /* Notify firmware how many CPUs are idle */ @@ -433,7 +434,8 @@ static void acpi_pad_handle_notify(acpi_handle handle) uint32_t idle_cpus; mutex_lock(&isolated_cpus_lock); - if (acpi_pad_pur(handle, &num_cpus)) { + num_cpus = acpi_pad_pur(handle); + if (num_cpus < 0) { mutex_unlock(&isolated_cpus_lock); return; } diff --git a/drivers/acpi/acpica/aclocal.h b/drivers/acpi/acpica/aclocal.h index df85b53..7dad916 100644 --- a/drivers/acpi/acpica/aclocal.h +++ b/drivers/acpi/acpica/aclocal.h @@ -854,6 +854,7 @@ struct acpi_bit_register_info { ACPI_BITMASK_POWER_BUTTON_STATUS | \ ACPI_BITMASK_SLEEP_BUTTON_STATUS | \ ACPI_BITMASK_RT_CLOCK_STATUS | \ + ACPI_BITMASK_PCIEXP_WAKE_DISABLE | \ ACPI_BITMASK_WAKE_STATUS) #define ACPI_BITMASK_TIMER_ENABLE 0x0001 diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c index 74c24d5..4093522 100644 --- a/drivers/acpi/acpica/exutils.c +++ b/drivers/acpi/acpica/exutils.c @@ -109,7 +109,7 @@ void acpi_ex_enter_interpreter(void) * * DESCRIPTION: Reacquire the interpreter execution region from within the * interpreter code. Failure to enter the interpreter region is a - * fatal system error. Used in conjuction with + * fatal system error. Used in conjunction with * relinquish_interpreter * ******************************************************************************/ diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c index 22cfcfb..491191e 100644 --- a/drivers/acpi/acpica/rsutils.c +++ b/drivers/acpi/acpica/rsutils.c @@ -149,7 +149,7 @@ acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type) /* * 16-, 32-, and 64-bit cases must use the move macros that perform - * endian conversion and/or accomodate hardware that cannot perform + * endian conversion and/or accommodate hardware that cannot perform * misaligned memory transfers */ case ACPI_RSC_MOVE16: diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 907e350..fca34cc 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -34,6 +34,6 @@ config ACPI_APEI_ERST_DEBUG depends on ACPI_APEI help ERST is a way provided by APEI to save and retrieve hardware - error infomation to and from a persistent store. Enable this + error information to and from a persistent store. Enable this if you want to debugging and testing the ERST kernel support and firmware implementation. diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 73fd0c7..4a904a4 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -445,11 +445,15 @@ EXPORT_SYMBOL_GPL(apei_resources_sub); int apei_resources_request(struct apei_resources *resources, const char *desc) { - struct apei_res *res, *res_bak; + struct apei_res *res, *res_bak = NULL; struct resource *r; + int rc; - apei_resources_sub(resources, &apei_resources_all); + rc = apei_resources_sub(resources, &apei_resources_all); + if (rc) + return rc; + rc = -EINVAL; list_for_each_entry(res, &resources->iomem, list) { r = request_mem_region(res->start, res->end - res->start, desc); @@ -475,7 +479,11 @@ int apei_resources_request(struct apei_resources *resources, } } - apei_resources_merge(&apei_resources_all, resources); + rc = apei_resources_merge(&apei_resources_all, resources); + if (rc) { + pr_err(APEI_PFX "Fail to merge resources!\n"); + goto err_unmap_ioport; + } return 0; err_unmap_ioport: @@ -491,12 +499,13 @@ err_unmap_iomem: break; release_mem_region(res->start, res->end - res->start); } - return -EINVAL; + return rc; } EXPORT_SYMBOL_GPL(apei_resources_request); void apei_resources_release(struct apei_resources *resources) { + int rc; struct apei_res *res; list_for_each_entry(res, &resources->iomem, list) @@ -504,7 +513,9 @@ void apei_resources_release(struct apei_resources *resources) list_for_each_entry(res, &resources->ioport, list) release_region(res->start, res->end - res->start); - apei_resources_sub(&apei_resources_all, resources); + rc = apei_resources_sub(&apei_resources_all, resources); + if (rc) + pr_err(APEI_PFX "Fail to sub resources!\n"); } EXPORT_SYMBOL_GPL(apei_resources_release); diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index 465c885..cf29df6 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -426,7 +426,9 @@ DEFINE_SIMPLE_ATTRIBUTE(error_inject_fops, NULL, static int einj_check_table(struct acpi_table_einj *einj_tab) { - if (einj_tab->header_length != sizeof(struct acpi_table_einj)) + if ((einj_tab->header_length != + (sizeof(struct acpi_table_einj) - sizeof(einj_tab->header))) + && (einj_tab->header_length != sizeof(struct acpi_table_einj))) return -EINVAL; if (einj_tab->header.length < sizeof(struct acpi_table_einj)) return -EINVAL; diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index 5281ddd..da1228a 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -2,7 +2,7 @@ * APEI Error Record Serialization Table debug support * * ERST is a way provided by APEI to save and retrieve hardware error - * infomation to and from a persistent store. This file provide the + * information to and from a persistent store. This file provide the * debugging/testing support for ERST kernel support and firmware * implementation. * @@ -111,11 +111,13 @@ retry: goto out; } if (len > erst_dbg_buf_len) { - kfree(erst_dbg_buf); + void *p; rc = -ENOMEM; - erst_dbg_buf = kmalloc(len, GFP_KERNEL); - if (!erst_dbg_buf) + p = kmalloc(len, GFP_KERNEL); + if (!p) goto out; + kfree(erst_dbg_buf); + erst_dbg_buf = p; erst_dbg_buf_len = len; goto retry; } @@ -150,11 +152,13 @@ static ssize_t erst_dbg_write(struct file *filp, const char __user *ubuf, if (mutex_lock_interruptible(&erst_dbg_mutex)) return -EINTR; if (usize > erst_dbg_buf_len) { - kfree(erst_dbg_buf); + void *p; rc = -ENOMEM; - erst_dbg_buf = kmalloc(usize, GFP_KERNEL); - if (!erst_dbg_buf) + p = kmalloc(usize, GFP_KERNEL); + if (!p) goto out; + kfree(erst_dbg_buf); + erst_dbg_buf = p; erst_dbg_buf_len = usize; } rc = copy_from_user(erst_dbg_buf, ubuf, usize); diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 18645f4..1211c03 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -2,7 +2,7 @@ * APEI Error Record Serialization Table support * * ERST is a way provided by APEI to save and retrieve hardware error - * infomation to and from a persistent store. + * information to and from a persistent store. * * For more information about ERST, please refer to ACPI Specification * version 4.0, section 17.4. @@ -266,13 +266,30 @@ static int erst_exec_move_data(struct apei_exec_context *ctx, { int rc; u64 offset; + void *src, *dst; + + /* ioremap does not work in interrupt context */ + if (in_interrupt()) { + pr_warning(ERST_PFX + "MOVE_DATA can not be used in interrupt context"); + return -EBUSY; + } rc = __apei_exec_read_register(entry, &offset); if (rc) return rc; - memmove((void *)ctx->dst_base + offset, - (void *)ctx->src_base + offset, - ctx->var2); + + src = ioremap(ctx->src_base + offset, ctx->var2); + if (!src) + return -ENOMEM; + dst = ioremap(ctx->dst_base + offset, ctx->var2); + if (!dst) + return -ENOMEM; + + memmove(dst, src, ctx->var2); + + iounmap(src); + iounmap(dst); return 0; } @@ -750,7 +767,9 @@ __setup("erst_disable", setup_erst_disable); static int erst_check_table(struct acpi_table_erst *erst_tab) { - if (erst_tab->header_length != sizeof(struct acpi_table_erst)) + if ((erst_tab->header_length != + (sizeof(struct acpi_table_erst) - sizeof(erst_tab->header))) + && (erst_tab->header_length != sizeof(struct acpi_table_einj))) return -EINVAL; if (erst_tab->header.length < sizeof(struct acpi_table_erst)) return -EINVAL; diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 385a605..0d505e5 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -302,7 +302,7 @@ static int __devinit ghes_probe(struct platform_device *ghes_dev) struct ghes *ghes = NULL; int rc = -EINVAL; - generic = ghes_dev->dev.platform_data; + generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; if (!generic->enabled) return -ENODEV; diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 343168d..1a3508a 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -137,20 +137,23 @@ static int hest_parse_ghes_count(struct acpi_hest_header *hest_hdr, void *data) static int hest_parse_ghes(struct acpi_hest_header *hest_hdr, void *data) { - struct acpi_hest_generic *generic; struct platform_device *ghes_dev; struct ghes_arr *ghes_arr = data; int rc; if (hest_hdr->type != ACPI_HEST_TYPE_GENERIC_ERROR) return 0; - generic = (struct acpi_hest_generic *)hest_hdr; - if (!generic->enabled) + + if (!((struct acpi_hest_generic *)hest_hdr)->enabled) return 0; ghes_dev = platform_device_alloc("GHES", hest_hdr->source_id); if (!ghes_dev) return -ENOMEM; - ghes_dev->dev.platform_data = generic; + + rc = platform_device_add_data(ghes_dev, &hest_hdr, sizeof(void *)); + if (rc) + goto err; + rc = platform_device_add(ghes_dev); if (rc) goto err; diff --git a/drivers/acpi/atomicio.c b/drivers/acpi/atomicio.c index 8f8bd73..542e539 100644 --- a/drivers/acpi/atomicio.c +++ b/drivers/acpi/atomicio.c @@ -142,7 +142,7 @@ static void __iomem *acpi_pre_map(phys_addr_t paddr, list_add_tail_rcu(&map->list, &acpi_iomaps); spin_unlock_irqrestore(&acpi_iomaps_lock, flags); - return vaddr + (paddr - pg_off); + return map->vaddr + (paddr - map->paddr); err_unmap: iounmap(vaddr); return NULL; diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index dc58402..9841720 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -273,7 +273,6 @@ static enum power_supply_property energy_battery_props[] = { POWER_SUPPLY_PROP_CYCLE_COUNT, POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN, POWER_SUPPLY_PROP_VOLTAGE_NOW, - POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_POWER_NOW, POWER_SUPPLY_PROP_ENERGY_FULL_DESIGN, POWER_SUPPLY_PROP_ENERGY_FULL, diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c index 2bb28b9..f761960 100644 --- a/drivers/acpi/blacklist.c +++ b/drivers/acpi/blacklist.c @@ -183,6 +183,8 @@ static int __init dmi_disable_osi_vista(const struct dmi_system_id *d) { printk(KERN_NOTICE PREFIX "DMI detected: %s\n", d->ident); acpi_osi_setup("!Windows 2006"); + acpi_osi_setup("!Windows 2006 SP1"); + acpi_osi_setup("!Windows 2006 SP2"); return 0; } static int __init dmi_disable_osi_win7(const struct dmi_system_id *d) @@ -226,6 +228,14 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { }, }, { + .callback = dmi_disable_osi_vista, + .ident = "Toshiba Satellite L355", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "Satellite L355"), + }, + }, + { .callback = dmi_disable_osi_win7, .ident = "ASUS K50IJ", .matches = { @@ -233,6 +243,14 @@ static struct dmi_system_id acpi_osi_dmi_table[] __initdata = { DMI_MATCH(DMI_PRODUCT_NAME, "K50IJ"), }, }, + { + .callback = dmi_disable_osi_vista, + .ident = "Toshiba P305D", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_NAME, "Satellite P305D"), + }, + }, /* * BIOS invocation of _OSI(Linux) is almost always a BIOS bug. diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 5c221ab..310e3b9 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -55,7 +55,7 @@ EXPORT_SYMBOL(acpi_root_dir); static int set_power_nocheck(const struct dmi_system_id *id) { printk(KERN_NOTICE PREFIX "%s detected - " - "disable power check in power transistion\n", id->ident); + "disable power check in power transition\n", id->ident); acpi_power_nocheck = 1; return 0; } @@ -80,23 +80,15 @@ static int set_copy_dsdt(const struct dmi_system_id *id) static struct dmi_system_id dsdt_dmi_table[] __initdata = { /* - * Insyde BIOS on some TOSHIBA machines corrupt the DSDT. + * Invoke DSDT corruption work-around on all Toshiba Satellite. * https://bugzilla.kernel.org/show_bug.cgi?id=14679 */ { .callback = set_copy_dsdt, - .ident = "TOSHIBA Satellite A505", + .ident = "TOSHIBA Satellite", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), - DMI_MATCH(DMI_PRODUCT_NAME, "Satellite A505"), - }, - }, - { - .callback = set_copy_dsdt, - .ident = "TOSHIBA Satellite L505D", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), - DMI_MATCH(DMI_PRODUCT_NAME, "Satellite L505D"), + DMI_MATCH(DMI_PRODUCT_NAME, "Satellite"), }, }, {} @@ -1027,7 +1019,7 @@ static int __init acpi_init(void) /* * If the laptop falls into the DMI check table, the power state check - * will be disabled in the course of device power transistion. + * will be disabled in the course of device power transition. */ dmi_check_system(power_nocheck_dmi_table); diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c index 8a3b840..d94d295 100644 --- a/drivers/acpi/fan.c +++ b/drivers/acpi/fan.c @@ -369,7 +369,9 @@ static void __exit acpi_fan_exit(void) acpi_bus_unregister_driver(&acpi_fan_driver); +#ifdef CONFIG_ACPI_PROCFS remove_proc_entry(ACPI_FAN_CLASS, acpi_root_dir); +#endif return; } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index e9699aae..b618f88 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -29,12 +29,6 @@ static int set_no_mwait(const struct dmi_system_id *id) static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = { { - set_no_mwait, "IFL91 board", { - DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"), - DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"), - DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"), - DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL}, - { set_no_mwait, "Extensa 5220", { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"), DMI_MATCH(DMI_SYS_VENDOR, "Acer"), diff --git a/drivers/acpi/processor_driver.c b/drivers/acpi/processor_driver.c index 1560218..347eb21 100644 --- a/drivers/acpi/processor_driver.c +++ b/drivers/acpi/processor_driver.c @@ -850,7 +850,7 @@ static int __init acpi_processor_init(void) printk(KERN_DEBUG "ACPI: %s registered with cpuidle\n", acpi_idle_driver.name); } else { - printk(KERN_DEBUG "ACPI: acpi_idle yielding to %s", + printk(KERN_DEBUG "ACPI: acpi_idle yielding to %s\n", cpuidle_get_driver()->name); } diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index ba1bd26..3a73a93 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -447,8 +447,8 @@ int acpi_processor_notify_smm(struct module *calling_module) if (!try_module_get(calling_module)) return -EINVAL; - /* is_done is set to negative if an error occured, - * and to postitive if _no_ error occured, but SMM + /* is_done is set to negative if an error occurred, + * and to postitive if _no_ error occurred, but SMM * was already notified. This avoids double notification * which might lead to unexpected results... */ diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index cf82989..4754ff6 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -363,6 +363,12 @@ static int __init init_old_suspend_ordering(const struct dmi_system_id *d) return 0; } +static int __init init_nvs_nosave(const struct dmi_system_id *d) +{ + acpi_nvs_nosave(); + return 0; +} + static struct dmi_system_id __initdata acpisleep_dmi_table[] = { { .callback = init_old_suspend_ordering, @@ -397,6 +403,22 @@ static struct dmi_system_id __initdata acpisleep_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "CF51-2L"), }, }, + { + .callback = init_nvs_nosave, + .ident = "Sony Vaio VGN-SR11M", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "VGN-SR11M"), + }, + }, + { + .callback = init_nvs_nosave, + .ident = "Everex StepNote Series", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Everex Systems, Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Everex StepNote Series"), + }, + }, {}, }; #endif /* CONFIG_SUSPEND */ diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 68e2e45..f8588f8 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -100,7 +100,7 @@ static const struct acpi_dlevel acpi_debug_levels[] = { ACPI_DEBUG_INIT(ACPI_LV_EVENTS), }; -static int param_get_debug_layer(char *buffer, struct kernel_param *kp) +static int param_get_debug_layer(char *buffer, const struct kernel_param *kp) { int result = 0; int i; @@ -128,7 +128,7 @@ static int param_get_debug_layer(char *buffer, struct kernel_param *kp) return result; } -static int param_get_debug_level(char *buffer, struct kernel_param *kp) +static int param_get_debug_level(char *buffer, const struct kernel_param *kp) { int result = 0; int i; @@ -149,10 +149,18 @@ static int param_get_debug_level(char *buffer, struct kernel_param *kp) return result; } -module_param_call(debug_layer, param_set_uint, param_get_debug_layer, - &acpi_dbg_layer, 0644); -module_param_call(debug_level, param_set_uint, param_get_debug_level, - &acpi_dbg_level, 0644); +static struct kernel_param_ops param_ops_debug_layer = { + .set = param_set_uint, + .get = param_get_debug_layer, +}; + +static struct kernel_param_ops param_ops_debug_level = { + .set = param_set_uint, + .get = param_get_debug_level, +}; + +module_param_cb(debug_layer, ¶m_ops_debug_layer, &acpi_dbg_layer, 0644); +module_param_cb(debug_level, ¶m_ops_debug_level, &acpi_dbg_level, 0644); static char trace_method_name[6]; module_param_string(trace_method_name, trace_method_name, 6, 0644); diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index c5fef01..b836761 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -59,8 +59,8 @@ acpi_backlight_cap_match(acpi_handle handle, u32 level, void *context, "support\n")); *cap |= ACPI_VIDEO_BACKLIGHT; if (ACPI_FAILURE(acpi_get_handle(handle, "_BQC", &h_dummy))) - printk(KERN_WARNING FW_BUG PREFIX "ACPI brightness " - "control misses _BQC function\n"); + printk(KERN_WARNING FW_BUG PREFIX "No _BQC method, " + "cannot determine initial brightness\n"); /* We have backlight support, no need to scan further */ return AE_CTRL_TERMINATE; } diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index ff1c945..99d0e5a 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -90,6 +90,10 @@ static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg); static int ahci_pci_device_resume(struct pci_dev *pdev); #endif +static struct scsi_host_template ahci_sht = { + AHCI_SHT("ahci"), +}; + static struct ata_port_operations ahci_vt8251_ops = { .inherits = &ahci_ops, .hardreset = ahci_vt8251_hardreset, diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 474427b..e5fdeeb 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -298,7 +298,17 @@ struct ahci_host_priv { extern int ahci_ignore_sss; -extern struct scsi_host_template ahci_sht; +extern struct device_attribute *ahci_shost_attrs[]; +extern struct device_attribute *ahci_sdev_attrs[]; + +#define AHCI_SHT(drv_name) \ + ATA_NCQ_SHT(drv_name), \ + .can_queue = AHCI_MAX_CMDS - 1, \ + .sg_tablesize = AHCI_MAX_SG, \ + .dma_boundary = AHCI_DMA_BOUNDARY, \ + .shost_attrs = ahci_shost_attrs, \ + .sdev_attrs = ahci_sdev_attrs + extern struct ata_port_operations ahci_ops; void ahci_save_initial_config(struct device *dev, diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 4e97f33..84b6432 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -23,6 +23,10 @@ #include <linux/ahci_platform.h> #include "ahci.h" +static struct scsi_host_template ahci_platform_sht = { + AHCI_SHT("ahci_platform"), +}; + static int __init ahci_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -145,7 +149,7 @@ static int __init ahci_probe(struct platform_device *pdev) ahci_print_info(host, "platform"); rc = ata_host_activate(host, irq, ahci_interrupt, IRQF_SHARED, - &ahci_sht); + &ahci_platform_sht); if (rc) goto err0; diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index 68dc678..8eea309 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -121,7 +121,7 @@ static DEVICE_ATTR(ahci_port_cmd, S_IRUGO, ahci_show_port_cmd, NULL); static DEVICE_ATTR(em_buffer, S_IWUSR | S_IRUGO, ahci_read_em_buffer, ahci_store_em_buffer); -static struct device_attribute *ahci_shost_attrs[] = { +struct device_attribute *ahci_shost_attrs[] = { &dev_attr_link_power_management_policy, &dev_attr_em_message_type, &dev_attr_em_message, @@ -132,22 +132,14 @@ static struct device_attribute *ahci_shost_attrs[] = { &dev_attr_em_buffer, NULL }; +EXPORT_SYMBOL_GPL(ahci_shost_attrs); -static struct device_attribute *ahci_sdev_attrs[] = { +struct device_attribute *ahci_sdev_attrs[] = { &dev_attr_sw_activity, &dev_attr_unload_heads, NULL }; - -struct scsi_host_template ahci_sht = { - ATA_NCQ_SHT("ahci"), - .can_queue = AHCI_MAX_CMDS - 1, - .sg_tablesize = AHCI_MAX_SG, - .dma_boundary = AHCI_DMA_BOUNDARY, - .shost_attrs = ahci_shost_attrs, - .sdev_attrs = ahci_sdev_attrs, -}; -EXPORT_SYMBOL_GPL(ahci_sht); +EXPORT_SYMBOL_GPL(ahci_sdev_attrs); struct ata_port_operations ahci_ops = { .inherits = &sata_pmp_port_ops, diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index b1cbeb5..37a2bb5 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2369,7 +2369,7 @@ static void pkt_release_dev(struct pktcdvd_device *pd, int flush) pkt_shrink_pktlist(pd); } -static struct pktcdvd_device *pkt_find_dev_from_minor(int dev_minor) +static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor) { if (dev_minor >= MAX_WRITERS) return NULL; diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 3822b4f..7bd7c45 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -305,6 +305,9 @@ static int num_force_kipmid; #ifdef CONFIG_PCI static int pci_registered; #endif +#ifdef CONFIG_ACPI +static int pnp_registered; +#endif #ifdef CONFIG_PPC_OF static int of_registered; #endif @@ -2126,7 +2129,7 @@ static int __devinit ipmi_pnp_probe(struct pnp_dev *dev, { struct acpi_device *acpi_dev; struct smi_info *info; - struct resource *res; + struct resource *res, *res_second; acpi_handle handle; acpi_status status; unsigned long long tmp; @@ -2182,13 +2185,13 @@ static int __devinit ipmi_pnp_probe(struct pnp_dev *dev, info->io.addr_data = res->start; info->io.regspacing = DEFAULT_REGSPACING; - res = pnp_get_resource(dev, + res_second = pnp_get_resource(dev, (info->io.addr_type == IPMI_IO_ADDR_SPACE) ? IORESOURCE_IO : IORESOURCE_MEM, 1); - if (res) { - if (res->start > info->io.addr_data) - info->io.regspacing = res->start - info->io.addr_data; + if (res_second) { + if (res_second->start > info->io.addr_data) + info->io.regspacing = res_second->start - info->io.addr_data; } info->io.regsize = DEFAULT_REGSPACING; info->io.regshift = 0; @@ -3359,6 +3362,7 @@ static __devinit int init_ipmi_si(void) #ifdef CONFIG_ACPI pnp_register_driver(&ipmi_pnp_driver); + pnp_registered = 1; #endif #ifdef CONFIG_DMI @@ -3526,7 +3530,8 @@ static __exit void cleanup_ipmi_si(void) pci_unregister_driver(&ipmi_pci_driver); #endif #ifdef CONFIG_ACPI - pnp_unregister_driver(&ipmi_pnp_driver); + if (pnp_registered) + pnp_unregister_driver(&ipmi_pnp_driver); #endif #ifdef CONFIG_PPC_OF diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index c2408bb..f508690 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -80,7 +80,7 @@ * Limiting Performance Impact * --------------------------- * C states, especially those with large exit latencies, can have a real - * noticable impact on workloads, which is not acceptable for most sysadmins, + * noticeable impact on workloads, which is not acceptable for most sysadmins, * and in addition, less performance has a power price of its own. * * As a general rule of thumb, menu assumes that the following heuristic diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 86c5ae9..411d5bf 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -162,7 +162,7 @@ static int mv_is_err_intr(u32 intr_cause) static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) { - u32 val = (1 << (1 + (chan->idx * 16))); + u32 val = ~(1 << (chan->idx * 16)); dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val); __raw_writel(val, XOR_INTR_CAUSE(chan)); } diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index fb64cf3..eb6b54d 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -580,7 +580,6 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg( sh_chan = to_sh_chan(chan); param = chan->private; - slave_addr = param->config->addr; /* Someone calling slave DMA on a public channel? */ if (!param || !sg_len) { @@ -589,6 +588,8 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg( return NULL; } + slave_addr = param->config->addr; + /* * if (param != NULL), this is a successfully requested slave channel, * therefore param->config != NULL too. diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 3630308..6b21e25 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -339,6 +339,9 @@ static void edac_mc_workq_teardown(struct mem_ctl_info *mci) { int status; + if (mci->op_state != OP_RUNNING_POLL) + return; + status = cancel_delayed_work(&mci->work); if (status == 0) { debugf0("%s() not canceled, flush the queue\n", diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index e0187d1..0fd5b85 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1140,6 +1140,7 @@ static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = { ATTR_COUNTER(0), ATTR_COUNTER(1), ATTR_COUNTER(2), + { .attr = { .name = NULL } } }; static struct mcidev_sysfs_group i7core_udimm_counters = { diff --git a/drivers/gpu/drm/drm_buffer.c b/drivers/gpu/drm/drm_buffer.c index 55d03ed..529a0db 100644 --- a/drivers/gpu/drm/drm_buffer.c +++ b/drivers/gpu/drm/drm_buffer.c @@ -98,8 +98,8 @@ EXPORT_SYMBOL(drm_buffer_alloc); * user_data: A pointer the data that is copied to the buffer. * size: The Number of bytes to copy. */ -extern int drm_buffer_copy_from_user(struct drm_buffer *buf, - void __user *user_data, int size) +int drm_buffer_copy_from_user(struct drm_buffer *buf, + void __user *user_data, int size) { int nr_pages = size / PAGE_SIZE + 1; int idx; @@ -163,7 +163,7 @@ void *drm_buffer_read_object(struct drm_buffer *buf, { int idx = drm_buffer_index(buf); int page = drm_buffer_page(buf); - void *obj = 0; + void *obj = NULL; if (idx + objsize <= PAGE_SIZE) { obj = &buf->data[page][idx]; diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index bf92d07..5663d27 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -148,7 +148,7 @@ int drm_gem_object_init(struct drm_device *dev, return -ENOMEM; kref_init(&obj->refcount); - kref_init(&obj->handlecount); + atomic_set(&obj->handle_count, 0); obj->size = size; atomic_inc(&dev->object_count); @@ -462,28 +462,6 @@ drm_gem_object_free(struct kref *kref) } EXPORT_SYMBOL(drm_gem_object_free); -/** - * Called after the last reference to the object has been lost. - * Must be called without holding struct_mutex - * - * Frees the object - */ -void -drm_gem_object_free_unlocked(struct kref *kref) -{ - struct drm_gem_object *obj = (struct drm_gem_object *) kref; - struct drm_device *dev = obj->dev; - - if (dev->driver->gem_free_object_unlocked != NULL) - dev->driver->gem_free_object_unlocked(obj); - else if (dev->driver->gem_free_object != NULL) { - mutex_lock(&dev->struct_mutex); - dev->driver->gem_free_object(obj); - mutex_unlock(&dev->struct_mutex); - } -} -EXPORT_SYMBOL(drm_gem_object_free_unlocked); - static void drm_gem_object_ref_bug(struct kref *list_kref) { BUG(); @@ -496,12 +474,8 @@ static void drm_gem_object_ref_bug(struct kref *list_kref) * called before drm_gem_object_free or we'll be touching * freed memory */ -void -drm_gem_object_handle_free(struct kref *kref) +void drm_gem_object_handle_free(struct drm_gem_object *obj) { - struct drm_gem_object *obj = container_of(kref, - struct drm_gem_object, - handlecount); struct drm_device *dev = obj->dev; /* Remove any name for this object */ @@ -528,6 +502,10 @@ void drm_gem_vm_open(struct vm_area_struct *vma) struct drm_gem_object *obj = vma->vm_private_data; drm_gem_object_reference(obj); + + mutex_lock(&obj->dev->struct_mutex); + drm_vm_open_locked(vma); + mutex_unlock(&obj->dev->struct_mutex); } EXPORT_SYMBOL(drm_gem_vm_open); @@ -535,7 +513,10 @@ void drm_gem_vm_close(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; - drm_gem_object_unreference_unlocked(obj); + mutex_lock(&obj->dev->struct_mutex); + drm_vm_close_locked(vma); + drm_gem_object_unreference(obj); + mutex_unlock(&obj->dev->struct_mutex); } EXPORT_SYMBOL(drm_gem_vm_close); diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c index 2ef2c78..974e970 100644 --- a/drivers/gpu/drm/drm_info.c +++ b/drivers/gpu/drm/drm_info.c @@ -255,7 +255,7 @@ int drm_gem_one_name_info(int id, void *ptr, void *data) seq_printf(m, "%6d %8zd %7d %8d\n", obj->name, obj->size, - atomic_read(&obj->handlecount.refcount), + atomic_read(&obj->handle_count), atomic_read(&obj->refcount.refcount)); return 0; } diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index fda6746..5df4506 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -433,15 +433,7 @@ static void drm_vm_open(struct vm_area_struct *vma) mutex_unlock(&dev->struct_mutex); } -/** - * \c close method for all virtual memory types. - * - * \param vma virtual memory area. - * - * Search the \p vma private data entry in drm_device::vmalist, unlink it, and - * free it. - */ -static void drm_vm_close(struct vm_area_struct *vma) +void drm_vm_close_locked(struct vm_area_struct *vma) { struct drm_file *priv = vma->vm_file->private_data; struct drm_device *dev = priv->minor->dev; @@ -451,7 +443,6 @@ static void drm_vm_close(struct vm_area_struct *vma) vma->vm_start, vma->vm_end - vma->vm_start); atomic_dec(&dev->vma_count); - mutex_lock(&dev->struct_mutex); list_for_each_entry_safe(pt, temp, &dev->vmalist, head) { if (pt->vma == vma) { list_del(&pt->head); @@ -459,6 +450,23 @@ static void drm_vm_close(struct vm_area_struct *vma) break; } } +} + +/** + * \c close method for all virtual memory types. + * + * \param vma virtual memory area. + * + * Search the \p vma private data entry in drm_device::vmalist, unlink it, and + * free it. + */ +static void drm_vm_close(struct vm_area_struct *vma) +{ + struct drm_file *priv = vma->vm_file->private_data; + struct drm_device *dev = priv->minor->dev; + + mutex_lock(&dev->struct_mutex); + drm_vm_close_locked(vma); mutex_unlock(&dev->struct_mutex); } diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c index 61b4caf..fb07e73 100644 --- a/drivers/gpu/drm/i810/i810_dma.c +++ b/drivers/gpu/drm/i810/i810_dma.c @@ -116,7 +116,7 @@ static int i810_mmap_buffers(struct file *filp, struct vm_area_struct *vma) static const struct file_operations i810_buffer_fops = { .open = drm_open, .release = drm_release, - .unlocked_ioctl = drm_ioctl, + .unlocked_ioctl = i810_ioctl, .mmap = i810_mmap_buffers, .fasync = drm_fasync, }; diff --git a/drivers/gpu/drm/i830/i830_dma.c b/drivers/gpu/drm/i830/i830_dma.c index 671aa18..cc92c7e 100644 --- a/drivers/gpu/drm/i830/i830_dma.c +++ b/drivers/gpu/drm/i830/i830_dma.c @@ -118,7 +118,7 @@ static int i830_mmap_buffers(struct file *filp, struct vm_area_struct *vma) static const struct file_operations i830_buffer_fops = { .open = drm_open, .release = drm_release, - .unlocked_ioctl = drm_ioctl, + .unlocked_ioctl = i830_ioctl, .mmap = i830_mmap_buffers, .fasync = drm_fasync, }; diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 9d67b48..c74e4e8 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1787,9 +1787,9 @@ unsigned long i915_chipset_val(struct drm_i915_private *dev_priv) } } - div_u64(diff, diff1); + diff = div_u64(diff, diff1); ret = ((m * diff) + c); - div_u64(ret, 10); + ret = div_u64(ret, 10); dev_priv->last_count1 = total_count; dev_priv->last_time1 = now; @@ -1858,7 +1858,7 @@ void i915_update_gfx_val(struct drm_i915_private *dev_priv) /* More magic constants... */ diff = diff * 1181; - div_u64(diff, diffms * 10); + diff = div_u64(diff, diffms * 10); dev_priv->gfx_power = diff; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cf4ffbe..90b1d67 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -136,14 +136,12 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, return -ENOMEM; ret = drm_gem_handle_create(file_priv, obj, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(obj); if (ret) { - drm_gem_object_unreference_unlocked(obj); return ret; } - /* Sink the floating reference from kref_init(handlecount) */ - drm_gem_object_handle_unreference_unlocked(obj); - args->handle = handle; return 0; } @@ -471,14 +469,17 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, return -ENOENT; obj_priv = to_intel_bo(obj); - /* Bounds check source. - * - * XXX: This could use review for overflow issues... - */ - if (args->offset > obj->size || args->size > obj->size || - args->offset + args->size > obj->size) { - drm_gem_object_unreference_unlocked(obj); - return -EINVAL; + /* Bounds check source. */ + if (args->offset > obj->size || args->size > obj->size - args->offset) { + ret = -EINVAL; + goto err; + } + + if (!access_ok(VERIFY_WRITE, + (char __user *)(uintptr_t)args->data_ptr, + args->size)) { + ret = -EFAULT; + goto err; } if (i915_gem_object_needs_bit17_swizzle(obj)) { @@ -490,8 +491,8 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, file_priv); } +err: drm_gem_object_unreference_unlocked(obj); - return ret; } @@ -580,8 +581,6 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj, user_data = (char __user *) (uintptr_t) args->data_ptr; remain = args->size; - if (!access_ok(VERIFY_READ, user_data, remain)) - return -EFAULT; mutex_lock(&dev->struct_mutex); @@ -934,14 +933,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, return -ENOENT; obj_priv = to_intel_bo(obj); - /* Bounds check destination. - * - * XXX: This could use review for overflow issues... - */ - if (args->offset > obj->size || args->size > obj->size || - args->offset + args->size > obj->size) { - drm_gem_object_unreference_unlocked(obj); - return -EINVAL; + /* Bounds check destination. */ + if (args->offset > obj->size || args->size > obj->size - args->offset) { + ret = -EINVAL; + goto err; + } + + if (!access_ok(VERIFY_READ, + (char __user *)(uintptr_t)args->data_ptr, + args->size)) { + ret = -EFAULT; + goto err; } /* We can only do the GTT pwrite on untiled buffers, as otherwise @@ -975,8 +977,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, DRM_INFO("pwrite failed %d\n", ret); #endif +err: drm_gem_object_unreference_unlocked(obj); - return ret; } @@ -2400,7 +2402,7 @@ i915_gem_clear_fence_reg(struct drm_gem_object *obj) I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0); break; case 3: - if (obj_priv->fence_reg > 8) + if (obj_priv->fence_reg >= 8) fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg - 8) * 4; else case 2: @@ -3258,6 +3260,8 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, (int) reloc->offset, reloc->read_domains, reloc->write_domain); + drm_gem_object_unreference(target_obj); + i915_gem_object_unpin(obj); return -EINVAL; } if (reloc->write_domain & I915_GEM_DOMAIN_CPU || diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index e85246e..5c428fa 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -93,7 +93,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size, unsigned alignmen { drm_i915_private_t *dev_priv = dev->dev_private; struct list_head eviction_list, unwind_list; - struct drm_i915_gem_object *obj_priv, *tmp_obj_priv; + struct drm_i915_gem_object *obj_priv; struct list_head *render_iter, *bsd_iter; int ret = 0; @@ -175,39 +175,34 @@ i915_gem_evict_something(struct drm_device *dev, int min_size, unsigned alignmen return -ENOSPC; found: + /* drm_mm doesn't allow any other other operations while + * scanning, therefore store to be evicted objects on a + * temporary list. */ INIT_LIST_HEAD(&eviction_list); - list_for_each_entry_safe(obj_priv, tmp_obj_priv, - &unwind_list, evict_list) { + while (!list_empty(&unwind_list)) { + obj_priv = list_first_entry(&unwind_list, + struct drm_i915_gem_object, + evict_list); if (drm_mm_scan_remove_block(obj_priv->gtt_space)) { - /* drm_mm doesn't allow any other other operations while - * scanning, therefore store to be evicted objects on a - * temporary list. */ list_move(&obj_priv->evict_list, &eviction_list); - } else - drm_gem_object_unreference(&obj_priv->base); + continue; + } + list_del(&obj_priv->evict_list); + drm_gem_object_unreference(&obj_priv->base); } /* Unbinding will emit any required flushes */ - list_for_each_entry_safe(obj_priv, tmp_obj_priv, - &eviction_list, evict_list) { -#if WATCH_LRU - DRM_INFO("%s: evicting %p\n", __func__, &obj_priv->base); -#endif - ret = i915_gem_object_unbind(&obj_priv->base); - if (ret) - return ret; - + while (!list_empty(&eviction_list)) { + obj_priv = list_first_entry(&eviction_list, + struct drm_i915_gem_object, + evict_list); + if (ret == 0) + ret = i915_gem_object_unbind(&obj_priv->base); + list_del(&obj_priv->evict_list); drm_gem_object_unreference(&obj_priv->base); } - /* The just created free hole should be on the top of the free stack - * maintained by drm_mm, so this BUG_ON actually executes in O(1). - * Furthermore all accessed data has just recently been used, so it - * should be really fast, too. */ - BUG_ON(!drm_mm_search_free(&dev_priv->mm.gtt_space, min_size, - alignment, 0)); - - return 0; + return ret; } int diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b5bf51a..9792285 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1013,8 +1013,8 @@ void intel_wait_for_vblank(struct drm_device *dev, int pipe) DRM_DEBUG_KMS("vblank wait timed out\n"); } -/** - * intel_wait_for_vblank_off - wait for vblank after disabling a pipe +/* + * intel_wait_for_pipe_off - wait for pipe to turn off * @dev: drm device * @pipe: pipe to wait for * @@ -1022,25 +1022,39 @@ void intel_wait_for_vblank(struct drm_device *dev, int pipe) * spinning on the vblank interrupt status bit, since we won't actually * see an interrupt when the pipe is disabled. * - * So this function waits for the display line value to settle (it - * usually ends up stopping at the start of the next frame). + * On Gen4 and above: + * wait for the pipe register state bit to turn off + * + * Otherwise: + * wait for the display line value to settle (it usually + * ends up stopping at the start of the next frame). + * */ -void intel_wait_for_vblank_off(struct drm_device *dev, int pipe) +static void intel_wait_for_pipe_off(struct drm_device *dev, int pipe) { struct drm_i915_private *dev_priv = dev->dev_private; - int pipedsl_reg = (pipe == 0 ? PIPEADSL : PIPEBDSL); - unsigned long timeout = jiffies + msecs_to_jiffies(100); - u32 last_line; - - /* Wait for the display line to settle */ - do { - last_line = I915_READ(pipedsl_reg) & DSL_LINEMASK; - mdelay(5); - } while (((I915_READ(pipedsl_reg) & DSL_LINEMASK) != last_line) && - time_after(timeout, jiffies)); - - if (time_after(jiffies, timeout)) - DRM_DEBUG_KMS("vblank wait timed out\n"); + + if (INTEL_INFO(dev)->gen >= 4) { + int pipeconf_reg = (pipe == 0 ? PIPEACONF : PIPEBCONF); + + /* Wait for the Pipe State to go off */ + if (wait_for((I915_READ(pipeconf_reg) & I965_PIPECONF_ACTIVE) == 0, + 100, 0)) + DRM_DEBUG_KMS("pipe_off wait timed out\n"); + } else { + u32 last_line; + int pipedsl_reg = (pipe == 0 ? PIPEADSL : PIPEBDSL); + unsigned long timeout = jiffies + msecs_to_jiffies(100); + + /* Wait for the display line to settle */ + do { + last_line = I915_READ(pipedsl_reg) & DSL_LINEMASK; + mdelay(5); + } while (((I915_READ(pipedsl_reg) & DSL_LINEMASK) != last_line) && + time_after(timeout, jiffies)); + if (time_after(jiffies, timeout)) + DRM_DEBUG_KMS("pipe_off wait timed out\n"); + } } /* Parameters have changed, update FBC info */ @@ -2328,13 +2342,13 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) I915_READ(dspbase_reg); } - /* Wait for vblank for the disable to take effect */ - intel_wait_for_vblank_off(dev, pipe); - /* Don't disable pipe A or pipe A PLLs if needed */ if (pipeconf_reg == PIPEACONF && - (dev_priv->quirks & QUIRK_PIPEA_FORCE)) + (dev_priv->quirks & QUIRK_PIPEA_FORCE)) { + /* Wait for vblank for the disable to take effect */ + intel_wait_for_vblank(dev, pipe); goto skip_pipe_off; + } /* Next, disable display pipes */ temp = I915_READ(pipeconf_reg); @@ -2343,8 +2357,8 @@ static void i9xx_crtc_dpms(struct drm_crtc *crtc, int mode) I915_READ(pipeconf_reg); } - /* Wait for vblank for the disable to take effect. */ - intel_wait_for_vblank_off(dev, pipe); + /* Wait for the pipe to turn off */ + intel_wait_for_pipe_off(dev, pipe); temp = I915_READ(dpll_reg); if ((temp & DPLL_VCO_ENABLE) != 0) { diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 1a51ee0..9ab8708 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1138,18 +1138,14 @@ static bool intel_dp_set_link_train(struct intel_dp *intel_dp, uint32_t dp_reg_value, uint8_t dp_train_pat, - uint8_t train_set[4], - bool first) + uint8_t train_set[4]) { struct drm_device *dev = intel_dp->base.enc.dev; struct drm_i915_private *dev_priv = dev->dev_private; - struct intel_crtc *intel_crtc = to_intel_crtc(intel_dp->base.enc.crtc); int ret; I915_WRITE(intel_dp->output_reg, dp_reg_value); POSTING_READ(intel_dp->output_reg); - if (first) - intel_wait_for_vblank(dev, intel_crtc->pipe); intel_dp_aux_native_write_1(intel_dp, DP_TRAINING_PATTERN_SET, @@ -1174,10 +1170,15 @@ intel_dp_link_train(struct intel_dp *intel_dp) uint8_t voltage; bool clock_recovery = false; bool channel_eq = false; - bool first = true; int tries; u32 reg; uint32_t DP = intel_dp->DP; + struct intel_crtc *intel_crtc = to_intel_crtc(intel_dp->base.enc.crtc); + + /* Enable output, wait for it to become active */ + I915_WRITE(intel_dp->output_reg, intel_dp->DP); + POSTING_READ(intel_dp->output_reg); + intel_wait_for_vblank(dev, intel_crtc->pipe); /* Write the link configuration data */ intel_dp_aux_native_write(intel_dp, DP_LINK_BW_SET, @@ -1210,9 +1211,8 @@ intel_dp_link_train(struct intel_dp *intel_dp) reg = DP | DP_LINK_TRAIN_PAT_1; if (!intel_dp_set_link_train(intel_dp, reg, - DP_TRAINING_PATTERN_1, train_set, first)) + DP_TRAINING_PATTERN_1, train_set)) break; - first = false; /* Set training pattern 1 */ udelay(100); @@ -1266,8 +1266,7 @@ intel_dp_link_train(struct intel_dp *intel_dp) /* channel eq pattern */ if (!intel_dp_set_link_train(intel_dp, reg, - DP_TRAINING_PATTERN_2, train_set, - false)) + DP_TRAINING_PATTERN_2, train_set)) break; udelay(400); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ad312ca..8828b3a 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -229,7 +229,6 @@ extern struct drm_display_mode *intel_crtc_mode_get(struct drm_device *dev, struct drm_crtc *crtc); int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data, struct drm_file *file_priv); -extern void intel_wait_for_vblank_off(struct drm_device *dev, int pipe); extern void intel_wait_for_vblank(struct drm_device *dev, int pipe); extern struct drm_crtc *intel_get_crtc_from_pipe(struct drm_device *dev, int pipe); extern struct drm_crtc *intel_get_load_detect_pipe(struct intel_encoder *intel_encoder, diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 7bdc962..56ad9df 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -237,8 +237,10 @@ int intel_fbdev_destroy(struct drm_device *dev, drm_fb_helper_fini(&ifbdev->helper); drm_framebuffer_cleanup(&ifb->base); - if (ifb->obj) + if (ifb->obj) { + drm_gem_object_handle_unreference(ifb->obj); drm_gem_object_unreference(ifb->obj); + } return 0; } diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index e8e902d..ee73e42 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2170,8 +2170,7 @@ intel_sdvo_tv_init(struct intel_sdvo *intel_sdvo, int type) return true; err: - intel_sdvo_destroy_enhance_property(connector); - kfree(intel_sdvo_connector); + intel_sdvo_destroy(connector); return false; } @@ -2243,8 +2242,7 @@ intel_sdvo_lvds_init(struct intel_sdvo *intel_sdvo, int device) return true; err: - intel_sdvo_destroy_enhance_property(connector); - kfree(intel_sdvo_connector); + intel_sdvo_destroy(connector); return false; } @@ -2522,11 +2520,10 @@ static bool intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo, uint16_t response; } enhancements; - if (!intel_sdvo_get_value(intel_sdvo, - SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS, - &enhancements, sizeof(enhancements))) - return false; - + enhancements.response = 0; + intel_sdvo_get_value(intel_sdvo, + SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS, + &enhancements, sizeof(enhancements)); if (enhancements.response == 0) { DRM_DEBUG_KMS("No enhancement is supported\n"); return true; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 87186a4..fc73703 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -558,8 +558,10 @@ nouveau_connector_get_modes(struct drm_connector *connector) if (nv_encoder->dcb->type == OUTPUT_LVDS && (nv_encoder->dcb->lvdsconf.use_straps_for_mode || dev_priv->vbios.fp_no_ddc) && nouveau_bios_fp_mode(dev, NULL)) { - nv_connector->native_mode = drm_mode_create(dev); - nouveau_bios_fp_mode(dev, nv_connector->native_mode); + struct drm_display_mode mode; + + nouveau_bios_fp_mode(dev, &mode); + nv_connector->native_mode = drm_mode_duplicate(dev, &mode); } /* Find the native mode if this is a digital panel, if we didn't diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index dbd30b2..d204771 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -352,6 +352,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *nfbdev) if (nouveau_fb->nvbo) { nouveau_bo_unmap(nouveau_fb->nvbo); + drm_gem_object_handle_unreference_unlocked(nouveau_fb->nvbo->gem); drm_gem_object_unreference_unlocked(nouveau_fb->nvbo->gem); nouveau_fb->nvbo = NULL; } diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index ead7b8f..19620a6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -167,11 +167,9 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data, goto out; ret = drm_gem_handle_create(file_priv, nvbo->gem, &req->info.handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(nvbo->gem); out: - drm_gem_object_handle_unreference_unlocked(nvbo->gem); - - if (ret) - drm_gem_object_unreference_unlocked(nvbo->gem); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c index 3ec181f..3c9964a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_notifier.c +++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c @@ -79,6 +79,7 @@ nouveau_notifier_takedown_channel(struct nouveau_channel *chan) mutex_lock(&dev->struct_mutex); nouveau_bo_unpin(chan->notifier_bo); mutex_unlock(&dev->struct_mutex); + drm_gem_object_handle_unreference_unlocked(chan->notifier_bo->gem); drm_gem_object_unreference_unlocked(chan->notifier_bo->gem); drm_mm_takedown(&chan->notifier_heap); } diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h index 1bc72c3..fe359a2 100644 --- a/drivers/gpu/drm/radeon/atombios.h +++ b/drivers/gpu/drm/radeon/atombios.h @@ -4999,7 +4999,7 @@ typedef struct _SW_I2C_IO_DATA_PARAMETERS #define SW_I2C_CNTL_WRITE1BIT 6 //==============================VESA definition Portion=============================== -#define VESA_OEM_PRODUCT_REV '01.00' +#define VESA_OEM_PRODUCT_REV "01.00" #define VESA_MODE_ATTRIBUTE_MODE_SUPPORT 0xBB //refer to VBE spec p.32, no TTY support #define VESA_MODE_WIN_ATTRIBUTE 7 #define VESA_WIN_SIZE 64 diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index afc18d8..7a04959 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2729,7 +2729,7 @@ int r600_ib_test(struct radeon_device *rdev) if (i < rdev->usec_timeout) { DRM_INFO("ib test succeeded in %u usecs\n", i); } else { - DRM_ERROR("radeon: ib test failed (sracth(0x%04X)=0x%08X)\n", + DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); r = -EINVAL; } @@ -3528,7 +3528,8 @@ void r600_ioctl_wait_idle(struct radeon_device *rdev, struct radeon_bo *bo) /* r7xx hw bug. write to HDP_DEBUG1 followed by fb read * rather than write to HDP_REG_COHERENCY_FLUSH_CNTL */ - if ((rdev->family >= CHIP_RV770) && (rdev->family <= CHIP_RV740)) { + if ((rdev->family >= CHIP_RV770) && (rdev->family <= CHIP_RV740) && + rdev->vram_scratch.ptr) { void __iomem *ptr = (void *)rdev->vram_scratch.ptr; u32 tmp; diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c index ebae14c..68932ba 100644 --- a/drivers/gpu/drm/radeon/radeon_atombios.c +++ b/drivers/gpu/drm/radeon/radeon_atombios.c @@ -317,6 +317,15 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev, *connector_type = DRM_MODE_CONNECTOR_DVID; } + /* MSI K9A2GM V2/V3 board has no HDMI or DVI */ + if ((dev->pdev->device == 0x796e) && + (dev->pdev->subsystem_vendor == 0x1462) && + (dev->pdev->subsystem_device == 0x7302)) { + if ((supported_device == ATOM_DEVICE_DFP2_SUPPORT) || + (supported_device == ATOM_DEVICE_DFP3_SUPPORT)) + return false; + } + /* a-bit f-i90hd - ciaranm on #radeonhd - this board has no DVI */ if ((dev->pdev->device == 0x7941) && (dev->pdev->subsystem_vendor == 0x147b) && diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 127a395f..b92d2f2 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -349,6 +349,8 @@ static void radeon_print_display_setup(struct drm_device *dev) DRM_INFO(" DFP4: %s\n", encoder_names[radeon_encoder->encoder_id]); if (devices & ATOM_DEVICE_DFP5_SUPPORT) DRM_INFO(" DFP5: %s\n", encoder_names[radeon_encoder->encoder_id]); + if (devices & ATOM_DEVICE_DFP6_SUPPORT) + DRM_INFO(" DFP6: %s\n", encoder_names[radeon_encoder->encoder_id]); if (devices & ATOM_DEVICE_TV1_SUPPORT) DRM_INFO(" TV1: %s\n", encoder_names[radeon_encoder->encoder_id]); if (devices & ATOM_DEVICE_CV_SUPPORT) @@ -841,8 +843,9 @@ static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb); - if (radeon_fb->obj) + if (radeon_fb->obj) { drm_gem_object_unreference_unlocked(radeon_fb->obj); + } drm_framebuffer_cleanup(fb); kfree(radeon_fb); } diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index c74a8b2..9cdf6a3 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -94,8 +94,10 @@ static void radeonfb_destroy_pinned_object(struct drm_gem_object *gobj) ret = radeon_bo_reserve(rbo, false); if (likely(ret == 0)) { radeon_bo_kunmap(rbo); + radeon_bo_unpin(rbo); radeon_bo_unreserve(rbo); } + drm_gem_object_handle_unreference(gobj); drm_gem_object_unreference_unlocked(gobj); } @@ -325,8 +327,6 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb { struct fb_info *info; struct radeon_framebuffer *rfb = &rfbdev->rfb; - struct radeon_bo *rbo; - int r; if (rfbdev->helper.fbdev) { info = rfbdev->helper.fbdev; @@ -338,14 +338,8 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb } if (rfb->obj) { - rbo = rfb->obj->driver_private; - r = radeon_bo_reserve(rbo, false); - if (likely(r == 0)) { - radeon_bo_kunmap(rbo); - radeon_bo_unpin(rbo); - radeon_bo_unreserve(rbo); - } - drm_gem_object_unreference_unlocked(rfb->obj); + radeonfb_destroy_pinned_object(rfb->obj); + rfb->obj = NULL; } drm_fb_helper_fini(&rfbdev->helper); drm_framebuffer_cleanup(&rfb->base); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index c578f26..d1e595d 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -201,11 +201,11 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, return r; } r = drm_gem_handle_create(filp, gobj, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(gobj); if (r) { - drm_gem_object_unreference_unlocked(gobj); return r; } - drm_gem_object_handle_unreference_unlocked(gobj); args->handle = handle; return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 5eee3c4..8fbbe1c 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -203,6 +203,10 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) */ int radeon_driver_firstopen_kms(struct drm_device *dev) { + struct radeon_device *rdev = dev->dev_private; + + if (rdev->powered_down) + return -EINVAL; return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 7cffb3e..3451a82 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -351,6 +351,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, INIT_LIST_HEAD(&fbo->lru); INIT_LIST_HEAD(&fbo->swap); fbo->vm_node = NULL; + atomic_set(&fbo->cpu_writers, 0); fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj); kref_init(&fbo->list_kref); diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c index ca90479..b1e02ff 100644 --- a/drivers/gpu/drm/ttm/ttm_page_alloc.c +++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c @@ -69,7 +69,7 @@ struct ttm_page_pool { spinlock_t lock; bool fill_lock; struct list_head list; - int gfp_flags; + gfp_t gfp_flags; unsigned npages; char *name; unsigned long nfrees; @@ -475,7 +475,7 @@ static void ttm_handle_caching_state_failure(struct list_head *pages, * This function is reentrant if caller updates count depending on number of * pages returned in pages array. */ -static int ttm_alloc_new_pages(struct list_head *pages, int gfp_flags, +static int ttm_alloc_new_pages(struct list_head *pages, gfp_t gfp_flags, int ttm_flags, enum ttm_caching_state cstate, unsigned count) { struct page **caching_array; @@ -666,7 +666,7 @@ int ttm_get_pages(struct list_head *pages, int flags, { struct ttm_page_pool *pool = ttm_get_pool(flags, cstate); struct page *p = NULL; - int gfp_flags = GFP_USER; + gfp_t gfp_flags = GFP_USER; int r; /* set zero flag for page allocation if required */ @@ -818,7 +818,7 @@ int ttm_page_alloc_init(struct ttm_mem_global *glob, unsigned max_pages) return 0; } -void ttm_page_alloc_fini() +void ttm_page_alloc_fini(void) { int i; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 72ec2e2..a96ed6d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -148,13 +148,16 @@ static struct pci_device_id vmw_pci_id_list[] = { {0, 0, 0} }; -static char *vmw_devname = "vmwgfx"; +static int enable_fbdev; static int vmw_probe(struct pci_dev *, const struct pci_device_id *); static void vmw_master_init(struct vmw_master *); static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, void *ptr); +MODULE_PARM_DESC(enable_fbdev, "Enable vmwgfx fbdev"); +module_param_named(enable_fbdev, enable_fbdev, int, 0600); + static void vmw_print_capabilities(uint32_t capabilities) { DRM_INFO("Capabilities:\n"); @@ -192,8 +195,6 @@ static int vmw_request_device(struct vmw_private *dev_priv) { int ret; - vmw_kms_save_vga(dev_priv); - ret = vmw_fifo_init(dev_priv, &dev_priv->fifo); if (unlikely(ret != 0)) { DRM_ERROR("Unable to initialize FIFO.\n"); @@ -206,9 +207,35 @@ static int vmw_request_device(struct vmw_private *dev_priv) static void vmw_release_device(struct vmw_private *dev_priv) { vmw_fifo_release(dev_priv, &dev_priv->fifo); - vmw_kms_restore_vga(dev_priv); } +int vmw_3d_resource_inc(struct vmw_private *dev_priv) +{ + int ret = 0; + + mutex_lock(&dev_priv->release_mutex); + if (unlikely(dev_priv->num_3d_resources++ == 0)) { + ret = vmw_request_device(dev_priv); + if (unlikely(ret != 0)) + --dev_priv->num_3d_resources; + } + mutex_unlock(&dev_priv->release_mutex); + return ret; +} + + +void vmw_3d_resource_dec(struct vmw_private *dev_priv) +{ + int32_t n3d; + + mutex_lock(&dev_priv->release_mutex); + if (unlikely(--dev_priv->num_3d_resources == 0)) + vmw_release_device(dev_priv); + n3d = (int32_t) dev_priv->num_3d_resources; + mutex_unlock(&dev_priv->release_mutex); + + BUG_ON(n3d < 0); +} static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) { @@ -228,6 +255,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) dev_priv->last_read_sequence = (uint32_t) -100; mutex_init(&dev_priv->hw_mutex); mutex_init(&dev_priv->cmdbuf_mutex); + mutex_init(&dev_priv->release_mutex); rwlock_init(&dev_priv->resource_lock); idr_init(&dev_priv->context_idr); idr_init(&dev_priv->surface_idr); @@ -244,6 +272,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) dev_priv->vram_start = pci_resource_start(dev->pdev, 1); dev_priv->mmio_start = pci_resource_start(dev->pdev, 2); + dev_priv->enable_fb = enable_fbdev; + mutex_lock(&dev_priv->hw_mutex); vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2); @@ -343,17 +373,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) dev->dev_private = dev_priv; - if (!dev->devname) - dev->devname = vmw_devname; - - if (dev_priv->capabilities & SVGA_CAP_IRQMASK) { - ret = drm_irq_install(dev); - if (unlikely(ret != 0)) { - DRM_ERROR("Failed installing irq: %d\n", ret); - goto out_no_irq; - } - } - ret = pci_request_regions(dev->pdev, "vmwgfx probe"); dev_priv->stealth = (ret != 0); if (dev_priv->stealth) { @@ -369,26 +388,52 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) goto out_no_device; } } - ret = vmw_request_device(dev_priv); + ret = vmw_kms_init(dev_priv); if (unlikely(ret != 0)) - goto out_no_device; - vmw_kms_init(dev_priv); + goto out_no_kms; vmw_overlay_init(dev_priv); - vmw_fb_init(dev_priv); + if (dev_priv->enable_fb) { + ret = vmw_3d_resource_inc(dev_priv); + if (unlikely(ret != 0)) + goto out_no_fifo; + vmw_kms_save_vga(dev_priv); + vmw_fb_init(dev_priv); + DRM_INFO("%s", vmw_fifo_have_3d(dev_priv) ? + "Detected device 3D availability.\n" : + "Detected no device 3D availability.\n"); + } else { + DRM_INFO("Delayed 3D detection since we're not " + "running the device in SVGA mode yet.\n"); + } + + if (dev_priv->capabilities & SVGA_CAP_IRQMASK) { + ret = drm_irq_install(dev); + if (unlikely(ret != 0)) { + DRM_ERROR("Failed installing irq: %d\n", ret); + goto out_no_irq; + } + } dev_priv->pm_nb.notifier_call = vmwgfx_pm_notifier; register_pm_notifier(&dev_priv->pm_nb); - DRM_INFO("%s", vmw_fifo_have_3d(dev_priv) ? "Have 3D\n" : "No 3D\n"); - return 0; -out_no_device: - if (dev_priv->capabilities & SVGA_CAP_IRQMASK) - drm_irq_uninstall(dev_priv->dev); - if (dev->devname == vmw_devname) - dev->devname = NULL; out_no_irq: + if (dev_priv->enable_fb) { + vmw_fb_close(dev_priv); + vmw_kms_restore_vga(dev_priv); + vmw_3d_resource_dec(dev_priv); + } +out_no_fifo: + vmw_overlay_close(dev_priv); + vmw_kms_close(dev_priv); +out_no_kms: + if (dev_priv->stealth) + pci_release_region(dev->pdev, 2); + else + pci_release_regions(dev->pdev); +out_no_device: ttm_object_device_release(&dev_priv->tdev); out_err4: iounmap(dev_priv->mmio_virt); @@ -415,19 +460,20 @@ static int vmw_driver_unload(struct drm_device *dev) unregister_pm_notifier(&dev_priv->pm_nb); - vmw_fb_close(dev_priv); + if (dev_priv->capabilities & SVGA_CAP_IRQMASK) + drm_irq_uninstall(dev_priv->dev); + if (dev_priv->enable_fb) { + vmw_fb_close(dev_priv); + vmw_kms_restore_vga(dev_priv); + vmw_3d_resource_dec(dev_priv); + } vmw_kms_close(dev_priv); vmw_overlay_close(dev_priv); - vmw_release_device(dev_priv); if (dev_priv->stealth) pci_release_region(dev->pdev, 2); else pci_release_regions(dev->pdev); - if (dev_priv->capabilities & SVGA_CAP_IRQMASK) - drm_irq_uninstall(dev_priv->dev); - if (dev->devname == vmw_devname) - dev->devname = NULL; ttm_object_device_release(&dev_priv->tdev); iounmap(dev_priv->mmio_virt); drm_mtrr_del(dev_priv->mmio_mtrr, dev_priv->mmio_start, @@ -500,7 +546,7 @@ static long vmw_unlocked_ioctl(struct file *filp, unsigned int cmd, struct drm_ioctl_desc *ioctl = &vmw_ioctls[nr - DRM_COMMAND_BASE]; - if (unlikely(ioctl->cmd != cmd)) { + if (unlikely(ioctl->cmd_drv != cmd)) { DRM_ERROR("Invalid command format, ioctl %d\n", nr - DRM_COMMAND_BASE); return -EINVAL; @@ -589,6 +635,16 @@ static int vmw_master_set(struct drm_device *dev, struct vmw_master *vmaster = vmw_master(file_priv->master); int ret = 0; + if (!dev_priv->enable_fb) { + ret = vmw_3d_resource_inc(dev_priv); + if (unlikely(ret != 0)) + return ret; + vmw_kms_save_vga(dev_priv); + mutex_lock(&dev_priv->hw_mutex); + vmw_write(dev_priv, SVGA_REG_TRACES, 0); + mutex_unlock(&dev_priv->hw_mutex); + } + if (active) { BUG_ON(active != &dev_priv->fbdev_master); ret = ttm_vt_lock(&active->lock, false, vmw_fp->tfile); @@ -617,7 +673,13 @@ static int vmw_master_set(struct drm_device *dev, return 0; out_no_active_lock: - vmw_release_device(dev_priv); + if (!dev_priv->enable_fb) { + mutex_lock(&dev_priv->hw_mutex); + vmw_write(dev_priv, SVGA_REG_TRACES, 1); + mutex_unlock(&dev_priv->hw_mutex); + vmw_kms_restore_vga(dev_priv); + vmw_3d_resource_dec(dev_priv); + } return ret; } @@ -645,11 +707,23 @@ static void vmw_master_drop(struct drm_device *dev, ttm_lock_set_kill(&vmaster->lock, true, SIGTERM); + if (!dev_priv->enable_fb) { + ret = ttm_bo_evict_mm(&dev_priv->bdev, TTM_PL_VRAM); + if (unlikely(ret != 0)) + DRM_ERROR("Unable to clean VRAM on master drop.\n"); + mutex_lock(&dev_priv->hw_mutex); + vmw_write(dev_priv, SVGA_REG_TRACES, 1); + mutex_unlock(&dev_priv->hw_mutex); + vmw_kms_restore_vga(dev_priv); + vmw_3d_resource_dec(dev_priv); + } + dev_priv->active_master = &dev_priv->fbdev_master; ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM); ttm_vt_unlock(&dev_priv->fbdev_master.lock); - vmw_fb_on(dev_priv); + if (dev_priv->enable_fb) + vmw_fb_on(dev_priv); } @@ -722,6 +796,7 @@ static struct drm_driver driver = { .irq_postinstall = vmw_irq_postinstall, .irq_uninstall = vmw_irq_uninstall, .irq_handler = vmw_irq_handler, + .get_vblank_counter = vmw_get_vblank_counter, .reclaim_buffers_locked = NULL, .get_map_ofs = drm_core_get_map_ofs, .get_reg_ofs = drm_core_get_reg_ofs, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 429f917..58de639 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -277,6 +277,7 @@ struct vmw_private { bool stealth; bool is_opened; + bool enable_fb; /** * Master management. @@ -285,6 +286,9 @@ struct vmw_private { struct vmw_master *active_master; struct vmw_master fbdev_master; struct notifier_block pm_nb; + + struct mutex release_mutex; + uint32_t num_3d_resources; }; static inline struct vmw_private *vmw_priv(struct drm_device *dev) @@ -319,6 +323,9 @@ static inline uint32_t vmw_read(struct vmw_private *dev_priv, return val; } +int vmw_3d_resource_inc(struct vmw_private *dev_priv); +void vmw_3d_resource_dec(struct vmw_private *dev_priv); + /** * GMR utilities - vmwgfx_gmr.c */ @@ -511,6 +518,7 @@ void vmw_kms_write_svga(struct vmw_private *vmw_priv, unsigned bbp, unsigned depth); int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +u32 vmw_get_vblank_counter(struct drm_device *dev, int crtc); /** * Overlay control - vmwgfx_overlay.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index 870967a..409e172 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -615,6 +615,11 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv, if (unlikely(ret != 0)) goto err_unlock; + if (bo->mem.mem_type == TTM_PL_VRAM && + bo->mem.mm_node->start < bo->num_pages) + (void) ttm_bo_validate(bo, &vmw_sys_placement, false, + false, false); + ret = ttm_bo_validate(bo, &ne_placement, false, false, false); /* Could probably bug on */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c index e6a1eb7..0fe3176 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c @@ -106,6 +106,7 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) mutex_lock(&dev_priv->hw_mutex); dev_priv->enable_state = vmw_read(dev_priv, SVGA_REG_ENABLE); dev_priv->config_done_state = vmw_read(dev_priv, SVGA_REG_CONFIG_DONE); + dev_priv->traces_state = vmw_read(dev_priv, SVGA_REG_TRACES); vmw_write(dev_priv, SVGA_REG_ENABLE, 1); min = 4; @@ -175,6 +176,8 @@ void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) dev_priv->config_done_state); vmw_write(dev_priv, SVGA_REG_ENABLE, dev_priv->enable_state); + vmw_write(dev_priv, SVGA_REG_TRACES, + dev_priv->traces_state); mutex_unlock(&dev_priv->hw_mutex); vmw_fence_queue_takedown(&fifo->fence_queue); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 64d7f47..e882ba0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -898,7 +898,19 @@ int vmw_kms_save_vga(struct vmw_private *vmw_priv) save->width = vmw_read(vmw_priv, SVGA_REG_DISPLAY_WIDTH); save->height = vmw_read(vmw_priv, SVGA_REG_DISPLAY_HEIGHT); vmw_write(vmw_priv, SVGA_REG_DISPLAY_ID, SVGA_ID_INVALID); + if (i == 0 && vmw_priv->num_displays == 1 && + save->width == 0 && save->height == 0) { + + /* + * It should be fairly safe to assume that these + * values are uninitialized. + */ + + save->width = vmw_priv->vga_width - save->pos_x; + save->height = vmw_priv->vga_height - save->pos_y; + } } + return 0; } @@ -984,3 +996,8 @@ out_unlock: ttm_read_unlock(&vmaster->lock); return ret; } + +u32 vmw_get_vblank_counter(struct drm_device *dev, int crtc) +{ + return 0; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 7083b1a..11cb39e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -27,6 +27,8 @@ #include "vmwgfx_kms.h" +#define VMWGFX_LDU_NUM_DU 8 + #define vmw_crtc_to_ldu(x) \ container_of(x, struct vmw_legacy_display_unit, base.crtc) #define vmw_encoder_to_ldu(x) \ @@ -536,6 +538,10 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv) { + struct drm_device *dev = dev_priv->dev; + int i; + int ret; + if (dev_priv->ldu_priv) { DRM_INFO("ldu system already on\n"); return -EINVAL; @@ -553,23 +559,24 @@ int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv) drm_mode_create_dirty_info_property(dev_priv->dev); - vmw_ldu_init(dev_priv, 0); - /* for old hardware without multimon only enable one display */ if (dev_priv->capabilities & SVGA_CAP_MULTIMON) { - vmw_ldu_init(dev_priv, 1); - vmw_ldu_init(dev_priv, 2); - vmw_ldu_init(dev_priv, 3); - vmw_ldu_init(dev_priv, 4); - vmw_ldu_init(dev_priv, 5); - vmw_ldu_init(dev_priv, 6); - vmw_ldu_init(dev_priv, 7); + for (i = 0; i < VMWGFX_LDU_NUM_DU; ++i) + vmw_ldu_init(dev_priv, i); + ret = drm_vblank_init(dev, VMWGFX_LDU_NUM_DU); + } else { + /* for old hardware without multimon only enable one display */ + vmw_ldu_init(dev_priv, 0); + ret = drm_vblank_init(dev, 1); } - return 0; + return ret; } int vmw_kms_close_legacy_display_system(struct vmw_private *dev_priv) { + struct drm_device *dev = dev_priv->dev; + + drm_vblank_cleanup(dev); if (!dev_priv->ldu_priv) return -ENOSYS; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 5f2d5df..c8c40e9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -211,6 +211,7 @@ static void vmw_hw_context_destroy(struct vmw_resource *res) cmd->body.cid = cpu_to_le32(res->id); vmw_fifo_commit(dev_priv, sizeof(*cmd)); + vmw_3d_resource_dec(dev_priv); } static int vmw_context_init(struct vmw_private *dev_priv, @@ -247,6 +248,7 @@ static int vmw_context_init(struct vmw_private *dev_priv, cmd->body.cid = cpu_to_le32(res->id); vmw_fifo_commit(dev_priv, sizeof(*cmd)); + (void) vmw_3d_resource_inc(dev_priv); vmw_resource_activate(res, vmw_hw_context_destroy); return 0; } @@ -406,6 +408,7 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res) cmd->body.sid = cpu_to_le32(res->id); vmw_fifo_commit(dev_priv, sizeof(*cmd)); + vmw_3d_resource_dec(dev_priv); } void vmw_surface_res_free(struct vmw_resource *res) @@ -473,6 +476,7 @@ int vmw_surface_init(struct vmw_private *dev_priv, } vmw_fifo_commit(dev_priv, submit_size); + (void) vmw_3d_resource_inc(dev_priv); vmw_resource_activate(res, vmw_hw_surface_destroy); return 0; } diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index b87569e..f366f96 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -598,7 +598,7 @@ static inline void vga_update_device_decodes(struct vga_device *vgadev, pr_debug("vgaarb: decoding count now is: %d\n", vga_decode_count); } -void __vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes, bool userspace) +static void __vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes, bool userspace) { struct vga_device *vgadev; unsigned long flags; diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 4d4d09b..97499d0 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -409,7 +409,7 @@ config SENSORS_CORETEMP config SENSORS_PKGTEMP tristate "Intel processor package temperature sensor" - depends on X86 && PCI && EXPERIMENTAL + depends on X86 && EXPERIMENTAL help If you say yes here you get support for the package level temperature sensor inside your CPU. Check documentation/driver for details. diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index de81111..a23b17a 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -36,6 +36,7 @@ #include <linux/pci.h> #include <asm/msr.h> #include <asm/processor.h> +#include <asm/smp.h> #define DRVNAME "coretemp" @@ -423,9 +424,18 @@ static int __cpuinit coretemp_device_add(unsigned int cpu) int err; struct platform_device *pdev; struct pdev_entry *pdev_entry; -#ifdef CONFIG_SMP struct cpuinfo_x86 *c = &cpu_data(cpu); -#endif + + /* + * CPUID.06H.EAX[0] indicates whether the CPU has thermal + * sensors. We check this bit only, all the early CPUs + * without thermal sensors will be filtered out. + */ + if (!cpu_has(c, X86_FEATURE_DTS)) { + printk(KERN_INFO DRVNAME ": CPU (model=0x%x)" + " has no thermal sensor.\n", c->x86_model); + return 0; + } mutex_lock(&pdev_list_mutex); @@ -482,14 +492,22 @@ exit: static void coretemp_device_remove(unsigned int cpu) { - struct pdev_entry *p, *n; + struct pdev_entry *p; + unsigned int i; + mutex_lock(&pdev_list_mutex); - list_for_each_entry_safe(p, n, &pdev_list, list) { - if (p->cpu == cpu) { - platform_device_unregister(p->pdev); - list_del(&p->list); - kfree(p); - } + list_for_each_entry(p, &pdev_list, list) { + if (p->cpu != cpu) + continue; + + platform_device_unregister(p->pdev); + list_del(&p->list); + mutex_unlock(&pdev_list_mutex); + kfree(p); + for_each_cpu(i, cpu_sibling_mask(cpu)) + if (i != cpu && !coretemp_device_add(i)) + break; + return; } mutex_unlock(&pdev_list_mutex); } @@ -527,30 +545,21 @@ static int __init coretemp_init(void) if (err) goto exit; - for_each_online_cpu(i) { - struct cpuinfo_x86 *c = &cpu_data(i); - /* - * CPUID.06H.EAX[0] indicates whether the CPU has thermal - * sensors. We check this bit only, all the early CPUs - * without thermal sensors will be filtered out. - */ - if (c->cpuid_level >= 6 && (cpuid_eax(0x06) & 0x01)) - coretemp_device_add(i); - else { - printk(KERN_INFO DRVNAME ": CPU (model=0x%x)" - " has no thermal sensor.\n", c->x86_model); - } - } + for_each_online_cpu(i) + coretemp_device_add(i); + +#ifndef CONFIG_HOTPLUG_CPU if (list_empty(&pdev_list)) { err = -ENODEV; goto exit_driver_unreg; } +#endif register_hotcpu_notifier(&coretemp_cpu_notifier); return 0; -exit_driver_unreg: #ifndef CONFIG_HOTPLUG_CPU +exit_driver_unreg: platform_driver_unregister(&coretemp_driver); #endif exit: diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c index 537841e..75afb3b 100644 --- a/drivers/hwmon/f71882fg.c +++ b/drivers/hwmon/f71882fg.c @@ -111,7 +111,7 @@ static struct platform_device *f71882fg_pdev; /* Super-I/O Function prototypes */ static inline int superio_inb(int base, int reg); static inline int superio_inw(int base, int reg); -static inline void superio_enter(int base); +static inline int superio_enter(int base); static inline void superio_select(int base, int ld); static inline void superio_exit(int base); @@ -861,11 +861,20 @@ static int superio_inw(int base, int reg) return val; } -static inline void superio_enter(int base) +static inline int superio_enter(int base) { + /* Don't step on other drivers' I/O space by accident */ + if (!request_muxed_region(base, 2, DRVNAME)) { + printk(KERN_ERR DRVNAME ": I/O address 0x%04x already in use\n", + base); + return -EBUSY; + } + /* according to the datasheet the key must be send twice! */ outb(SIO_UNLOCK_KEY, base); outb(SIO_UNLOCK_KEY, base); + + return 0; } static inline void superio_select(int base, int ld) @@ -877,6 +886,7 @@ static inline void superio_select(int base, int ld) static inline void superio_exit(int base) { outb(SIO_LOCK_KEY, base); + release_region(base, 2); } static inline int fan_from_reg(u16 reg) @@ -2175,21 +2185,15 @@ static int f71882fg_remove(struct platform_device *pdev) static int __init f71882fg_find(int sioaddr, unsigned short *address, struct f71882fg_sio_data *sio_data) { - int err = -ENODEV; u16 devid; - - /* Don't step on other drivers' I/O space by accident */ - if (!request_region(sioaddr, 2, DRVNAME)) { - printk(KERN_ERR DRVNAME ": I/O address 0x%04x already in use\n", - (int)sioaddr); - return -EBUSY; - } - - superio_enter(sioaddr); + int err = superio_enter(sioaddr); + if (err) + return err; devid = superio_inw(sioaddr, SIO_REG_MANID); if (devid != SIO_FINTEK_ID) { pr_debug(DRVNAME ": Not a Fintek device\n"); + err = -ENODEV; goto exit; } @@ -2213,6 +2217,7 @@ static int __init f71882fg_find(int sioaddr, unsigned short *address, default: printk(KERN_INFO DRVNAME ": Unsupported Fintek device: %04x\n", (unsigned int)devid); + err = -ENODEV; goto exit; } @@ -2223,12 +2228,14 @@ static int __init f71882fg_find(int sioaddr, unsigned short *address, if (!(superio_inb(sioaddr, SIO_REG_ENABLE) & 0x01)) { printk(KERN_WARNING DRVNAME ": Device not activated\n"); + err = -ENODEV; goto exit; } *address = superio_inw(sioaddr, SIO_REG_ADDR); if (*address == 0) { printk(KERN_WARNING DRVNAME ": Base address not set\n"); + err = -ENODEV; goto exit; } *address &= ~(REGION_LENGTH - 1); /* Ignore 3 LSB */ @@ -2239,7 +2246,6 @@ static int __init f71882fg_find(int sioaddr, unsigned short *address, (int)superio_inb(sioaddr, SIO_REG_DEVREV)); exit: superio_exit(sioaddr); - release_region(sioaddr, 2); return err; } diff --git a/drivers/hwmon/lis3lv02d.c b/drivers/hwmon/lis3lv02d.c index 6138f03..fc591ae 100644 --- a/drivers/hwmon/lis3lv02d.c +++ b/drivers/hwmon/lis3lv02d.c @@ -277,7 +277,7 @@ static irqreturn_t lis302dl_interrupt(int irq, void *dummy) wake_up_interruptible(&lis3_dev.misc_wait); kill_fasync(&lis3_dev.async_queue, SIGIO, POLL_IN); out: - if (lis3_dev.whoami == WAI_8B && lis3_dev.idev && + if (lis3_dev.pdata && lis3_dev.whoami == WAI_8B && lis3_dev.idev && lis3_dev.idev->input->users) return IRQ_WAKE_THREAD; return IRQ_HANDLED; @@ -718,7 +718,7 @@ int lis3lv02d_init_device(struct lis3lv02d *dev) * io-apic is not configurable (and generates a warning) but I keep it * in case of support for other hardware. */ - if (dev->whoami == WAI_8B) + if (dev->pdata && dev->whoami == WAI_8B) thread_fn = lis302dl_interrupt_thread1_8b; else thread_fn = NULL; diff --git a/drivers/hwmon/pkgtemp.c b/drivers/hwmon/pkgtemp.c index 74157fc..f119039 100644 --- a/drivers/hwmon/pkgtemp.c +++ b/drivers/hwmon/pkgtemp.c @@ -33,7 +33,6 @@ #include <linux/list.h> #include <linux/platform_device.h> #include <linux/cpu.h> -#include <linux/pci.h> #include <asm/msr.h> #include <asm/processor.h> @@ -224,7 +223,7 @@ static int __devinit pkgtemp_probe(struct platform_device *pdev) err = sysfs_create_group(&pdev->dev.kobj, &pkgtemp_group); if (err) - goto exit_free; + goto exit_dev; data->hwmon_dev = hwmon_device_register(&pdev->dev); if (IS_ERR(data->hwmon_dev)) { @@ -238,6 +237,8 @@ static int __devinit pkgtemp_probe(struct platform_device *pdev) exit_class: sysfs_remove_group(&pdev->dev.kobj, &pkgtemp_group); +exit_dev: + device_remove_file(&pdev->dev, &sensor_dev_attr_temp1_max.dev_attr); exit_free: kfree(data); exit: @@ -250,6 +251,7 @@ static int __devexit pkgtemp_remove(struct platform_device *pdev) hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&pdev->dev.kobj, &pkgtemp_group); + device_remove_file(&pdev->dev, &sensor_dev_attr_temp1_max.dev_attr); platform_set_drvdata(pdev, NULL); kfree(data); return 0; @@ -281,9 +283,10 @@ static int __cpuinit pkgtemp_device_add(unsigned int cpu) int err; struct platform_device *pdev; struct pdev_entry *pdev_entry; -#ifdef CONFIG_SMP struct cpuinfo_x86 *c = &cpu_data(cpu); -#endif + + if (!cpu_has(c, X86_FEATURE_PTS)) + return 0; mutex_lock(&pdev_list_mutex); @@ -339,17 +342,18 @@ exit: #ifdef CONFIG_HOTPLUG_CPU static void pkgtemp_device_remove(unsigned int cpu) { - struct pdev_entry *p, *n; + struct pdev_entry *p; unsigned int i; int err; mutex_lock(&pdev_list_mutex); - list_for_each_entry_safe(p, n, &pdev_list, list) { + list_for_each_entry(p, &pdev_list, list) { if (p->cpu != cpu) continue; platform_device_unregister(p->pdev); list_del(&p->list); + mutex_unlock(&pdev_list_mutex); kfree(p); for_each_cpu(i, cpu_core_mask(cpu)) { if (i != cpu) { @@ -358,7 +362,7 @@ static void pkgtemp_device_remove(unsigned int cpu) break; } } - break; + return; } mutex_unlock(&pdev_list_mutex); } @@ -399,11 +403,6 @@ static int __init pkgtemp_init(void) goto exit; for_each_online_cpu(i) { - struct cpuinfo_x86 *c = &cpu_data(i); - - if (!cpu_has(c, X86_FEATURE_PTS)) - continue; - err = pkgtemp_device_add(i); if (err) goto exit_devices_unreg; diff --git a/drivers/i2c/busses/i2c-davinci.c b/drivers/i2c/busses/i2c-davinci.c index 2222c87..b8feac5 100644 --- a/drivers/i2c/busses/i2c-davinci.c +++ b/drivers/i2c/busses/i2c-davinci.c @@ -357,9 +357,6 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) dev->terminate = 0; - /* write the data into mode register */ - davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag); - /* * First byte should be set here, not after interrupt, * because transmit-data-ready interrupt can come before @@ -371,6 +368,9 @@ i2c_davinci_xfer_msg(struct i2c_adapter *adap, struct i2c_msg *msg, int stop) dev->buf_len--; } + /* write the data into mode register; start transmitting */ + davinci_i2c_write_reg(dev, DAVINCI_I2C_MDR_REG, flag); + r = wait_for_completion_interruptible_timeout(&dev->cmd_complete, dev->adapter.timeout); if (r == 0) { diff --git a/drivers/i2c/busses/i2c-octeon.c b/drivers/i2c/busses/i2c-octeon.c index 0e9f85d..56dbe54 100644 --- a/drivers/i2c/busses/i2c-octeon.c +++ b/drivers/i2c/busses/i2c-octeon.c @@ -218,7 +218,7 @@ static int octeon_i2c_wait(struct octeon_i2c *i2c) return result; } else if (result == 0) { dev_dbg(i2c->dev, "%s: timeout\n", __func__); - result = -ETIMEDOUT; + return -ETIMEDOUT; } return 0; diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index 72902e0..bf831bf 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -662,8 +662,8 @@ static int s3c24xx_i2c_clockrate(struct s3c24xx_i2c *i2c, unsigned int *got) unsigned long sda_delay; if (pdata->sda_delay) { - sda_delay = (freq / 1000) * pdata->sda_delay; - sda_delay /= 1000000; + sda_delay = clkin * pdata->sda_delay; + sda_delay = DIV_ROUND_UP(sda_delay, 1000000); sda_delay = DIV_ROUND_UP(sda_delay, 5); if (sda_delay > 3) sda_delay = 3; diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index a10152b..0906fc5 100755..100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -83,7 +83,7 @@ static unsigned int mwait_substates; /* Reliable LAPIC Timer States, bit 1 for C1 etc. */ static unsigned int lapic_timer_reliable_states; -static struct cpuidle_device *intel_idle_cpuidle_devices; +static struct cpuidle_device __percpu *intel_idle_cpuidle_devices; static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state); static struct cpuidle_state *cpuidle_state_table; @@ -108,7 +108,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "NHM-C3", .desc = "MWAIT 0x10", .driver_data = (void *) 0x10, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .power_usage = 500, .target_residency = 80, @@ -117,7 +117,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "NHM-C6", .desc = "MWAIT 0x20", .driver_data = (void *) 0x20, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .power_usage = 350, .target_residency = 800, @@ -149,7 +149,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "ATM-C4", .desc = "MWAIT 0x30", .driver_data = (void *) 0x30, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .power_usage = 250, .target_residency = 400, @@ -159,7 +159,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "ATM-C6", .desc = "MWAIT 0x40", .driver_data = (void *) 0x40, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .power_usage = 150, .target_residency = 800, @@ -185,6 +185,16 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) local_irq_disable(); + /* + * If the state flag indicates that the TLB will be flushed or if this + * is the deepest c-state supported, do a voluntary leave mm to avoid + * costly and mostly unnecessary wakeups for flushing the user TLB's + * associated with the active mm. + */ + if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED || + (&dev->states[dev->state_count - 1] == state)) + leave_mm(cpu); + if (!(lapic_timer_reliable_states & (1 << (cstate)))) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index d88077a..13c8887 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -463,7 +463,8 @@ static int send_connect(struct iwch_ep *ep) V_MSS_IDX(mtu_idx) | V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor); + opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | + V_CONG_CONTROL_FLAVOR(cong_flavor); skb->priority = CPL_PRIORITY_SETUP; set_arp_failure_handler(skb, act_open_req_arp_failure); @@ -1280,7 +1281,8 @@ static void accept_cr(struct iwch_ep *ep, __be32 peer_ip, struct sk_buff *skb) V_MSS_IDX(mtu_idx) | V_L2T_IDX(ep->l2t->idx) | V_TX_CHANNEL(ep->l2t->smt_idx); opt0l = V_TOS((ep->tos >> 2) & M_TOS) | V_RCV_BUFSIZ(rcv_win>>10); - opt2 = V_FLAVORS_VALID(1) | V_CONG_CONTROL_FLAVOR(cong_flavor); + opt2 = F_RX_COALESCE_VALID | V_RX_COALESCE(0) | V_FLAVORS_VALID(1) | + V_CONG_CONTROL_FLAVOR(cong_flavor); rpl = cplhdr(skb); rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); diff --git a/drivers/leds/leds-ns2.c b/drivers/leds/leds-ns2.c index 74dce4b..350eb34 100644 --- a/drivers/leds/leds-ns2.c +++ b/drivers/leds/leds-ns2.c @@ -81,7 +81,7 @@ static int ns2_led_get_mode(struct ns2_led_data *led_dat, int cmd_level; int slow_level; - read_lock(&led_dat->rw_lock); + read_lock_irq(&led_dat->rw_lock); cmd_level = gpio_get_value(led_dat->cmd); slow_level = gpio_get_value(led_dat->slow); @@ -95,7 +95,7 @@ static int ns2_led_get_mode(struct ns2_led_data *led_dat, } } - read_unlock(&led_dat->rw_lock); + read_unlock_irq(&led_dat->rw_lock); return ret; } @@ -104,8 +104,9 @@ static void ns2_led_set_mode(struct ns2_led_data *led_dat, enum ns2_led_modes mode) { int i; + unsigned long flags; - write_lock(&led_dat->rw_lock); + write_lock_irqsave(&led_dat->rw_lock, flags); for (i = 0; i < ARRAY_SIZE(ns2_led_modval); i++) { if (mode == ns2_led_modval[i].mode) { @@ -116,7 +117,7 @@ static void ns2_led_set_mode(struct ns2_led_data *led_dat, } } - write_unlock(&led_dat->rw_lock); + write_unlock_irqrestore(&led_dat->rw_lock, flags); } static void ns2_led_set(struct led_classdev *led_cdev, diff --git a/drivers/mfd/max8925-core.c b/drivers/mfd/max8925-core.c index 04028a9..428377a 100644 --- a/drivers/mfd/max8925-core.c +++ b/drivers/mfd/max8925-core.c @@ -429,24 +429,25 @@ static void max8925_irq_sync_unlock(unsigned int irq) irq_tsc = cache_tsc; for (i = 0; i < ARRAY_SIZE(max8925_irqs); i++) { irq_data = &max8925_irqs[i]; + /* 1 -- disable, 0 -- enable */ switch (irq_data->mask_reg) { case MAX8925_CHG_IRQ1_MASK: - irq_chg[0] &= irq_data->enable; + irq_chg[0] &= ~irq_data->enable; break; case MAX8925_CHG_IRQ2_MASK: - irq_chg[1] &= irq_data->enable; + irq_chg[1] &= ~irq_data->enable; break; case MAX8925_ON_OFF_IRQ1_MASK: - irq_on[0] &= irq_data->enable; + irq_on[0] &= ~irq_data->enable; break; case MAX8925_ON_OFF_IRQ2_MASK: - irq_on[1] &= irq_data->enable; + irq_on[1] &= ~irq_data->enable; break; case MAX8925_RTC_IRQ_MASK: - irq_rtc &= irq_data->enable; + irq_rtc &= ~irq_data->enable; break; case MAX8925_TSC_IRQ_MASK: - irq_tsc &= irq_data->enable; + irq_tsc &= ~irq_data->enable; break; default: dev_err(chip->dev, "wrong IRQ\n"); diff --git a/drivers/mfd/wm831x-irq.c b/drivers/mfd/wm831x-irq.c index 7dabe4d..294183b 100644 --- a/drivers/mfd/wm831x-irq.c +++ b/drivers/mfd/wm831x-irq.c @@ -394,8 +394,13 @@ static int wm831x_irq_set_type(unsigned int irq, unsigned int type) irq = irq - wm831x->irq_base; - if (irq < WM831X_IRQ_GPIO_1 || irq > WM831X_IRQ_GPIO_11) - return -EINVAL; + if (irq < WM831X_IRQ_GPIO_1 || irq > WM831X_IRQ_GPIO_11) { + /* Ignore internal-only IRQs */ + if (irq >= 0 && irq < WM831X_NUM_IRQS) + return 0; + else + return -EINVAL; + } switch (type) { case IRQ_TYPE_EDGE_BOTH: diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 0b591b6..b743312 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -368,7 +368,7 @@ config VMWARE_BALLOON If unsure, say N. To compile this driver as a module, choose M here: the - module will be called vmware_balloon. + module will be called vmw_balloon. config ARM_CHARLCD bool "ARM Ltd. Character LCD Driver" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 255a80d..42eab95 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -33,5 +33,5 @@ obj-$(CONFIG_IWMC3200TOP) += iwmc3200top/ obj-$(CONFIG_HMC6352) += hmc6352.o obj-y += eeprom/ obj-y += cb710/ -obj-$(CONFIG_VMWARE_BALLOON) += vmware_balloon.o +obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o diff --git a/drivers/misc/vmware_balloon.c b/drivers/misc/vmw_balloon.c index 2a1e804..2a1e804 100644 --- a/drivers/misc/vmware_balloon.c +++ b/drivers/misc/vmw_balloon.c diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c index 71ad416..aacb862 100644 --- a/drivers/mmc/host/sdhci-s3c.c +++ b/drivers/mmc/host/sdhci-s3c.c @@ -241,8 +241,10 @@ static struct sdhci_ops sdhci_s3c_ops = { static void sdhci_s3c_notify_change(struct platform_device *dev, int state) { struct sdhci_host *host = platform_get_drvdata(dev); + unsigned long flags; + if (host) { - spin_lock(&host->lock); + spin_lock_irqsave(&host->lock, flags); if (state) { dev_dbg(&dev->dev, "card inserted.\n"); host->flags &= ~SDHCI_DEVICE_DEAD; @@ -253,7 +255,7 @@ static void sdhci_s3c_notify_change(struct platform_device *dev, int state) host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; } tasklet_schedule(&host->card_tasklet); - spin_unlock(&host->lock); + spin_unlock_irqrestore(&host->lock, flags); } } @@ -481,8 +483,10 @@ static int __devexit sdhci_s3c_remove(struct platform_device *pdev) sdhci_remove_host(host, 1); for (ptr = 0; ptr < 3; ptr++) { - clk_disable(sc->clk_bus[ptr]); - clk_put(sc->clk_bus[ptr]); + if (sc->clk_bus[ptr]) { + clk_disable(sc->clk_bus[ptr]); + clk_put(sc->clk_bus[ptr]); + } } clk_disable(sc->clk_io); clk_put(sc->clk_io); diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c index 133d515..513e0a7 100644 --- a/drivers/mtd/nand/omap2.c +++ b/drivers/mtd/nand/omap2.c @@ -413,7 +413,7 @@ static inline int omap_nand_dma_transfer(struct mtd_info *mtd, void *addr, prefetch_status = gpmc_read_status(GPMC_PREFETCH_COUNT); } while (prefetch_status); /* disable and stop the PFPW engine */ - gpmc_prefetch_reset(); + gpmc_prefetch_reset(info->gpmc_cs); dma_unmap_single(&info->pdev->dev, dma_addr, len, dir); return 0; diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c index fa42103..179871d 100644 --- a/drivers/net/3c59x.c +++ b/drivers/net/3c59x.c @@ -2942,6 +2942,9 @@ static void vortex_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct vortex_private *vp = netdev_priv(dev); + if (!VORTEX_PCI(vp)) + return; + wol->supported = WAKE_MAGIC; wol->wolopts = 0; @@ -2952,6 +2955,10 @@ static void vortex_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static int vortex_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct vortex_private *vp = netdev_priv(dev); + + if (!VORTEX_PCI(vp)) + return -EOPNOTSUPP; + if (wol->wolopts & ~WAKE_MAGIC) return -EINVAL; @@ -3201,6 +3208,9 @@ static void acpi_set_WOL(struct net_device *dev) return; } + if (VORTEX_PCI(vp)->current_state < PCI_D3hot) + return; + /* Change the power state to D3; RxEnable doesn't take effect. */ pci_set_power_state(VORTEX_PCI(vp), PCI_D3hot); } diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 63b9ba0..c73be28 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -1251,6 +1251,12 @@ static void atl1_free_ring_resources(struct atl1_adapter *adapter) rrd_ring->desc = NULL; rrd_ring->dma = 0; + + adapter->cmb.dma = 0; + adapter->cmb.cmb = NULL; + + adapter->smb.dma = 0; + adapter->smb.smb = NULL; } static void atl1_setup_mac_ctrl(struct atl1_adapter *adapter) @@ -2847,10 +2853,11 @@ static int atl1_resume(struct pci_dev *pdev) pci_enable_wake(pdev, PCI_D3cold, 0); atl1_reset_hw(&adapter->hw); - adapter->cmb.cmb->int_stats = 0; - if (netif_running(netdev)) + if (netif_running(netdev)) { + adapter->cmb.cmb->int_stats = 0; atl1_up(adapter); + } netif_device_attach(netdev); return 0; diff --git a/drivers/net/e1000e/hw.h b/drivers/net/e1000e/hw.h index 66ed08f..ba302a5 100644 --- a/drivers/net/e1000e/hw.h +++ b/drivers/net/e1000e/hw.h @@ -57,6 +57,7 @@ enum e1e_registers { E1000_SCTL = 0x00024, /* SerDes Control - RW */ E1000_FCAL = 0x00028, /* Flow Control Address Low - RW */ E1000_FCAH = 0x0002C, /* Flow Control Address High -RW */ + E1000_FEXTNVM4 = 0x00024, /* Future Extended NVM 4 - RW */ E1000_FEXTNVM = 0x00028, /* Future Extended NVM - RW */ E1000_FCT = 0x00030, /* Flow Control Type - RW */ E1000_VET = 0x00038, /* VLAN Ether Type - RW */ diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 63930d1..57b5435 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -105,6 +105,10 @@ #define E1000_FEXTNVM_SW_CONFIG 1 #define E1000_FEXTNVM_SW_CONFIG_ICH8M (1 << 27) /* Bit redefined for ICH8M :/ */ +#define E1000_FEXTNVM4_BEACON_DURATION_MASK 0x7 +#define E1000_FEXTNVM4_BEACON_DURATION_8USEC 0x7 +#define E1000_FEXTNVM4_BEACON_DURATION_16USEC 0x3 + #define PCIE_ICH8_SNOOP_ALL PCIE_NO_SNOOP_ALL #define E1000_ICH_RAR_ENTRIES 7 @@ -125,6 +129,7 @@ /* SMBus Address Phy Register */ #define HV_SMB_ADDR PHY_REG(768, 26) +#define HV_SMB_ADDR_MASK 0x007F #define HV_SMB_ADDR_PEC_EN 0x0200 #define HV_SMB_ADDR_VALID 0x0080 @@ -237,6 +242,8 @@ static s32 e1000_k1_gig_workaround_hv(struct e1000_hw *hw, bool link); static s32 e1000_set_mdio_slow_mode_hv(struct e1000_hw *hw); static bool e1000_check_mng_mode_ich8lan(struct e1000_hw *hw); static bool e1000_check_mng_mode_pchlan(struct e1000_hw *hw); +static s32 e1000_k1_workaround_lv(struct e1000_hw *hw); +static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate); static inline u16 __er16flash(struct e1000_hw *hw, unsigned long reg) { @@ -272,7 +279,7 @@ static inline void __ew32flash(struct e1000_hw *hw, unsigned long reg, u32 val) static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) { struct e1000_phy_info *phy = &hw->phy; - u32 ctrl; + u32 ctrl, fwsm; s32 ret_val = 0; phy->addr = 1; @@ -294,7 +301,8 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) * disabled, then toggle the LANPHYPC Value bit to force * the interconnect to PCIe mode. */ - if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { + fwsm = er32(FWSM); + if (!(fwsm & E1000_ICH_FWSM_FW_VALID)) { ctrl = er32(CTRL); ctrl |= E1000_CTRL_LANPHYPC_OVERRIDE; ctrl &= ~E1000_CTRL_LANPHYPC_VALUE; @@ -303,6 +311,13 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) ctrl &= ~E1000_CTRL_LANPHYPC_OVERRIDE; ew32(CTRL, ctrl); msleep(50); + + /* + * Gate automatic PHY configuration by hardware on + * non-managed 82579 + */ + if (hw->mac.type == e1000_pch2lan) + e1000_gate_hw_phy_config_ich8lan(hw, true); } /* @@ -315,6 +330,13 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw) if (ret_val) goto out; + /* Ungate automatic PHY configuration on non-managed 82579 */ + if ((hw->mac.type == e1000_pch2lan) && + !(fwsm & E1000_ICH_FWSM_FW_VALID)) { + msleep(10); + e1000_gate_hw_phy_config_ich8lan(hw, false); + } + phy->id = e1000_phy_unknown; ret_val = e1000e_get_phy_id(hw); if (ret_val) @@ -561,13 +583,10 @@ static s32 e1000_init_mac_params_ich8lan(struct e1000_adapter *adapter) if (mac->type == e1000_ich8lan) e1000e_set_kmrn_lock_loss_workaround_ich8lan(hw, true); - /* Disable PHY configuration by hardware, config by software */ - if (mac->type == e1000_pch2lan) { - u32 extcnf_ctrl = er32(EXTCNF_CTRL); - - extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; - ew32(EXTCNF_CTRL, extcnf_ctrl); - } + /* Gate automatic PHY configuration by hardware on managed 82579 */ + if ((mac->type == e1000_pch2lan) && + (er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) + e1000_gate_hw_phy_config_ich8lan(hw, true); return 0; } @@ -652,6 +671,12 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw) goto out; } + if (hw->mac.type == e1000_pch2lan) { + ret_val = e1000_k1_workaround_lv(hw); + if (ret_val) + goto out; + } + /* * Check if there was DownShift, must be checked * immediately after link-up @@ -895,6 +920,34 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) } /** + * e1000_write_smbus_addr - Write SMBus address to PHY needed during Sx states + * @hw: pointer to the HW structure + * + * Assumes semaphore already acquired. + * + **/ +static s32 e1000_write_smbus_addr(struct e1000_hw *hw) +{ + u16 phy_data; + u32 strap = er32(STRAP); + s32 ret_val = 0; + + strap &= E1000_STRAP_SMBUS_ADDRESS_MASK; + + ret_val = e1000_read_phy_reg_hv_locked(hw, HV_SMB_ADDR, &phy_data); + if (ret_val) + goto out; + + phy_data &= ~HV_SMB_ADDR_MASK; + phy_data |= (strap >> E1000_STRAP_SMBUS_ADDRESS_SHIFT); + phy_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID; + ret_val = e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR, phy_data); + +out: + return ret_val; +} + +/** * e1000_sw_lcd_config_ich8lan - SW-based LCD Configuration * @hw: pointer to the HW structure * @@ -903,7 +956,6 @@ static s32 e1000_check_reset_block_ich8lan(struct e1000_hw *hw) **/ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) { - struct e1000_adapter *adapter = hw->adapter; struct e1000_phy_info *phy = &hw->phy; u32 i, data, cnf_size, cnf_base_addr, sw_cfg_mask; s32 ret_val = 0; @@ -921,7 +973,8 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) if (phy->type != e1000_phy_igp_3) return ret_val; - if (adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_AMT) { + if ((hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_AMT) || + (hw->adapter->pdev->device == E1000_DEV_ID_ICH8_IGP_C)) { sw_cfg_mask = E1000_FEXTNVM_SW_CONFIG; break; } @@ -961,21 +1014,16 @@ static s32 e1000_sw_lcd_config_ich8lan(struct e1000_hw *hw) cnf_base_addr = data & E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK; cnf_base_addr >>= E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT; - if (!(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE) && - ((hw->mac.type == e1000_pchlan) || - (hw->mac.type == e1000_pch2lan))) { + if ((!(data & E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE) && + (hw->mac.type == e1000_pchlan)) || + (hw->mac.type == e1000_pch2lan)) { /* * HW configures the SMBus address and LEDs when the * OEM and LCD Write Enable bits are set in the NVM. * When both NVM bits are cleared, SW will configure * them instead. */ - data = er32(STRAP); - data &= E1000_STRAP_SMBUS_ADDRESS_MASK; - reg_data = data >> E1000_STRAP_SMBUS_ADDRESS_SHIFT; - reg_data |= HV_SMB_ADDR_PEC_EN | HV_SMB_ADDR_VALID; - ret_val = e1000_write_phy_reg_hv_locked(hw, HV_SMB_ADDR, - reg_data); + ret_val = e1000_write_smbus_addr(hw); if (ret_val) goto out; @@ -1440,10 +1488,6 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) goto out; /* Enable jumbo frame workaround in the PHY */ - e1e_rphy(hw, PHY_REG(769, 20), &data); - ret_val = e1e_wphy(hw, PHY_REG(769, 20), data & ~(1 << 14)); - if (ret_val) - goto out; e1e_rphy(hw, PHY_REG(769, 23), &data); data &= ~(0x7F << 5); data |= (0x37 << 5); @@ -1452,7 +1496,6 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) goto out; e1e_rphy(hw, PHY_REG(769, 16), &data); data &= ~(1 << 13); - data |= (1 << 12); ret_val = e1e_wphy(hw, PHY_REG(769, 16), data); if (ret_val) goto out; @@ -1477,7 +1520,7 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) mac_reg = er32(RCTL); mac_reg &= ~E1000_RCTL_SECRC; - ew32(FFLT_DBG, mac_reg); + ew32(RCTL, mac_reg); ret_val = e1000e_read_kmrn_reg(hw, E1000_KMRNCTRLSTA_CTRL_OFFSET, @@ -1503,17 +1546,12 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) goto out; /* Write PHY register values back to h/w defaults */ - e1e_rphy(hw, PHY_REG(769, 20), &data); - ret_val = e1e_wphy(hw, PHY_REG(769, 20), data & ~(1 << 14)); - if (ret_val) - goto out; e1e_rphy(hw, PHY_REG(769, 23), &data); data &= ~(0x7F << 5); ret_val = e1e_wphy(hw, PHY_REG(769, 23), data); if (ret_val) goto out; e1e_rphy(hw, PHY_REG(769, 16), &data); - data &= ~(1 << 12); data |= (1 << 13); ret_val = e1e_wphy(hw, PHY_REG(769, 16), data); if (ret_val) @@ -1559,6 +1597,69 @@ out: } /** + * e1000_k1_gig_workaround_lv - K1 Si workaround + * @hw: pointer to the HW structure + * + * Workaround to set the K1 beacon duration for 82579 parts + **/ +static s32 e1000_k1_workaround_lv(struct e1000_hw *hw) +{ + s32 ret_val = 0; + u16 status_reg = 0; + u32 mac_reg; + + if (hw->mac.type != e1000_pch2lan) + goto out; + + /* Set K1 beacon duration based on 1Gbps speed or otherwise */ + ret_val = e1e_rphy(hw, HV_M_STATUS, &status_reg); + if (ret_val) + goto out; + + if ((status_reg & (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) + == (HV_M_STATUS_LINK_UP | HV_M_STATUS_AUTONEG_COMPLETE)) { + mac_reg = er32(FEXTNVM4); + mac_reg &= ~E1000_FEXTNVM4_BEACON_DURATION_MASK; + + if (status_reg & HV_M_STATUS_SPEED_1000) + mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_8USEC; + else + mac_reg |= E1000_FEXTNVM4_BEACON_DURATION_16USEC; + + ew32(FEXTNVM4, mac_reg); + } + +out: + return ret_val; +} + +/** + * e1000_gate_hw_phy_config_ich8lan - disable PHY config via hardware + * @hw: pointer to the HW structure + * @gate: boolean set to true to gate, false to ungate + * + * Gate/ungate the automatic PHY configuration via hardware; perform + * the configuration via software instead. + **/ +static void e1000_gate_hw_phy_config_ich8lan(struct e1000_hw *hw, bool gate) +{ + u32 extcnf_ctrl; + + if (hw->mac.type != e1000_pch2lan) + return; + + extcnf_ctrl = er32(EXTCNF_CTRL); + + if (gate) + extcnf_ctrl |= E1000_EXTCNF_CTRL_GATE_PHY_CFG; + else + extcnf_ctrl &= ~E1000_EXTCNF_CTRL_GATE_PHY_CFG; + + ew32(EXTCNF_CTRL, extcnf_ctrl); + return; +} + +/** * e1000_lan_init_done_ich8lan - Check for PHY config completion * @hw: pointer to the HW structure * @@ -1602,6 +1703,9 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) if (e1000_check_reset_block(hw)) goto out; + /* Allow time for h/w to get to quiescent state after reset */ + msleep(10); + /* Perform any necessary post-reset workarounds */ switch (hw->mac.type) { case e1000_pchlan: @@ -1630,6 +1734,13 @@ static s32 e1000_post_phy_reset_ich8lan(struct e1000_hw *hw) /* Configure the LCD with the OEM bits in NVM */ ret_val = e1000_oem_bits_config_ich8lan(hw, true); + /* Ungate automatic PHY configuration on non-managed 82579 */ + if ((hw->mac.type == e1000_pch2lan) && + !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) { + msleep(10); + e1000_gate_hw_phy_config_ich8lan(hw, false); + } + out: return ret_val; } @@ -1646,6 +1757,11 @@ static s32 e1000_phy_hw_reset_ich8lan(struct e1000_hw *hw) { s32 ret_val = 0; + /* Gate automatic PHY configuration by hardware on non-managed 82579 */ + if ((hw->mac.type == e1000_pch2lan) && + !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) + e1000_gate_hw_phy_config_ich8lan(hw, true); + ret_val = e1000e_phy_hw_reset_generic(hw); if (ret_val) goto out; @@ -2910,6 +3026,14 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) * external PHY is reset. */ ctrl |= E1000_CTRL_PHY_RST; + + /* + * Gate automatic PHY configuration by hardware on + * non-managed 82579 + */ + if ((hw->mac.type == e1000_pch2lan) && + !(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) + e1000_gate_hw_phy_config_ich8lan(hw, true); } ret_val = e1000_acquire_swflag_ich8lan(hw); e_dbg("Issuing a global reset to ich8lan\n"); @@ -3460,13 +3584,20 @@ void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw) void e1000e_disable_gig_wol_ich8lan(struct e1000_hw *hw) { u32 phy_ctrl; + s32 ret_val; phy_ctrl = er32(PHY_CTRL); phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU | E1000_PHY_CTRL_GBE_DISABLE; ew32(PHY_CTRL, phy_ctrl); - if (hw->mac.type >= e1000_pchlan) - e1000_phy_hw_reset_ich8lan(hw); + if (hw->mac.type >= e1000_pchlan) { + e1000_oem_bits_config_ich8lan(hw, true); + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return; + e1000_write_smbus_addr(hw); + hw->phy.ops.release(hw); + } } /** diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 2b8ef44..e561d15 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -2704,6 +2704,16 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter) u32 psrctl = 0; u32 pages = 0; + /* Workaround Si errata on 82579 - configure jumbo frame flow */ + if (hw->mac.type == e1000_pch2lan) { + s32 ret_val; + + if (adapter->netdev->mtu > ETH_DATA_LEN) + ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, true); + else + ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, false); + } + /* Program MC offset vector base */ rctl = er32(RCTL); rctl &= ~(3 << E1000_RCTL_MO_SHIFT); @@ -2744,16 +2754,6 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter) e1e_wphy(hw, 22, phy_data); } - /* Workaround Si errata on 82579 - configure jumbo frame flow */ - if (hw->mac.type == e1000_pch2lan) { - s32 ret_val; - - if (rctl & E1000_RCTL_LPE) - ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, true); - else - ret_val = e1000_lv_jumbo_workaround_ich8lan(hw, false); - } - /* Setup buffer sizes */ rctl &= ~E1000_RCTL_SZ_4096; rctl |= E1000_RCTL_BSEX; @@ -4833,6 +4833,15 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu) return -EINVAL; } + /* Jumbo frame workaround on 82579 requires CRC be stripped */ + if ((adapter->hw.mac.type == e1000_pch2lan) && + !(adapter->flags2 & FLAG2_CRC_STRIPPING) && + (new_mtu > ETH_DATA_LEN)) { + e_err("Jumbo Frames not supported on 82579 when CRC " + "stripping is disabled.\n"); + return -EINVAL; + } + /* 82573 Errata 17 */ if (((adapter->hw.mac.type == e1000_82573) || (adapter->hw.mac.type == e1000_82574)) && diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c index 3506fd6..519e19e 100644 --- a/drivers/net/ibm_newemac/core.c +++ b/drivers/net/ibm_newemac/core.c @@ -2928,7 +2928,7 @@ static int __devinit emac_probe(struct platform_device *ofdev, if (dev->emac_irq != NO_IRQ) irq_dispose_mapping(dev->emac_irq); err_free: - kfree(ndev); + free_netdev(ndev); err_gone: /* if we were on the bootlist, remove us as we won't show up and * wake up all waiters to notify them in case they were waiting @@ -2971,7 +2971,7 @@ static int __devexit emac_remove(struct platform_device *ofdev) if (dev->emac_irq != NO_IRQ) irq_dispose_mapping(dev->emac_irq); - kfree(dev->ndev); + free_netdev(dev->ndev); return 0; } diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c index cabae7b..b075a35 100644 --- a/drivers/net/netxen/netxen_nic_init.c +++ b/drivers/net/netxen/netxen_nic_init.c @@ -1540,7 +1540,6 @@ netxen_process_rcv(struct netxen_adapter *adapter, if (pkt_offset) skb_pull(skb, pkt_offset); - skb->truesize = skb->len + sizeof(struct sk_buff); skb->protocol = eth_type_trans(skb, netdev); napi_gro_receive(&sds_ring->napi, skb); @@ -1602,8 +1601,6 @@ netxen_process_lro(struct netxen_adapter *adapter, skb_put(skb, lro_length + data_offset); - skb->truesize = skb->len + sizeof(struct sk_buff) + skb_headroom(skb); - skb_pull(skb, l2_hdr_offset); skb->protocol = eth_type_trans(skb, netdev); diff --git a/drivers/net/qlcnic/qlcnic_init.c b/drivers/net/qlcnic/qlcnic_init.c index 75ba744..2c7cf0b6 100644 --- a/drivers/net/qlcnic/qlcnic_init.c +++ b/drivers/net/qlcnic/qlcnic_init.c @@ -1316,7 +1316,7 @@ qlcnic_alloc_rx_skb(struct qlcnic_adapter *adapter, return -ENOMEM; } - skb_reserve(skb, 2); + skb_reserve(skb, NET_IP_ALIGN); dma = pci_map_single(pdev, skb->data, rds_ring->dma_size, PCI_DMA_FROMDEVICE); @@ -1404,7 +1404,6 @@ qlcnic_process_rcv(struct qlcnic_adapter *adapter, if (pkt_offset) skb_pull(skb, pkt_offset); - skb->truesize = skb->len + sizeof(struct sk_buff); skb->protocol = eth_type_trans(skb, netdev); napi_gro_receive(&sds_ring->napi, skb); @@ -1466,8 +1465,6 @@ qlcnic_process_lro(struct qlcnic_adapter *adapter, skb_put(skb, lro_length + data_offset); - skb->truesize = skb->len + sizeof(struct sk_buff) + skb_headroom(skb); - skb_pull(skb, l2_hdr_offset); skb->protocol = eth_type_trans(skb, netdev); @@ -1700,8 +1697,6 @@ qlcnic_process_rcv_diag(struct qlcnic_adapter *adapter, if (pkt_offset) skb_pull(skb, pkt_offset); - skb->truesize = skb->len + sizeof(struct sk_buff); - if (!qlcnic_check_loopback_buff(skb->data)) adapter->diag_cnt++; diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c index 07eb884..44150f2 100644 --- a/drivers/net/rionet.c +++ b/drivers/net/rionet.c @@ -384,7 +384,7 @@ static void rionet_remove(struct rio_dev *rdev) free_pages((unsigned long)rionet_active, rdev->net->hport->sys_size ? __ilog2(sizeof(void *)) + 4 : 0); unregister_netdev(ndev); - kfree(ndev); + free_netdev(ndev); list_for_each_entry_safe(peer, tmp, &rionet_peers, node) { list_del(&peer->node); diff --git a/drivers/net/sgiseeq.c b/drivers/net/sgiseeq.c index cc4bd8c..9265315 100644 --- a/drivers/net/sgiseeq.c +++ b/drivers/net/sgiseeq.c @@ -804,7 +804,7 @@ static int __devinit sgiseeq_probe(struct platform_device *pdev) err_out_free_page: free_page((unsigned long) sp->srings); err_out_free_dev: - kfree(dev); + free_netdev(dev); err_out: return err; diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c index 0909ae9..8150ba1 100644 --- a/drivers/net/smsc911x.c +++ b/drivers/net/smsc911x.c @@ -58,6 +58,7 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(SMSC_DRV_VERSION); +MODULE_ALIAS("platform:smsc911x"); #if USE_DEBUG > 0 static int debug = 16; diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c index 5efa577..6888e3d 100644 --- a/drivers/net/tulip/de2104x.c +++ b/drivers/net/tulip/de2104x.c @@ -243,6 +243,7 @@ enum { NWayState = (1 << 14) | (1 << 13) | (1 << 12), NWayRestart = (1 << 12), NonselPortActive = (1 << 9), + SelPortActive = (1 << 8), LinkFailStatus = (1 << 2), NetCxnErr = (1 << 1), }; @@ -363,7 +364,9 @@ static u16 t21040_csr15[] = { 0, 0, 0x0006, 0x0000, 0x0000, }; /* 21041 transceiver register settings: TP AUTO, BNC, AUI, TP, TP FD*/ static u16 t21041_csr13[] = { 0xEF01, 0xEF09, 0xEF09, 0xEF01, 0xEF09, }; -static u16 t21041_csr14[] = { 0xFFFF, 0xF7FD, 0xF7FD, 0x6F3F, 0x6F3D, }; +static u16 t21041_csr14[] = { 0xFFFF, 0xF7FD, 0xF7FD, 0x7F3F, 0x7F3D, }; +/* If on-chip autonegotiation is broken, use half-duplex (FF3F) instead */ +static u16 t21041_csr14_brk[] = { 0xFF3F, 0xF7FD, 0xF7FD, 0x7F3F, 0x7F3D, }; static u16 t21041_csr15[] = { 0x0008, 0x0006, 0x000E, 0x0008, 0x0008, }; @@ -1064,6 +1067,9 @@ static void de21041_media_timer (unsigned long data) unsigned int carrier; unsigned long flags; + /* clear port active bits */ + dw32(SIAStatus, NonselPortActive | SelPortActive); + carrier = (status & NetCxnErr) ? 0 : 1; if (carrier) { @@ -1158,14 +1164,29 @@ no_link_yet: static void de_media_interrupt (struct de_private *de, u32 status) { if (status & LinkPass) { + /* Ignore if current media is AUI or BNC and we can't use TP */ + if ((de->media_type == DE_MEDIA_AUI || + de->media_type == DE_MEDIA_BNC) && + (de->media_lock || + !de_ok_to_advertise(de, DE_MEDIA_TP_AUTO))) + return; + /* If current media is not TP, change it to TP */ + if ((de->media_type == DE_MEDIA_AUI || + de->media_type == DE_MEDIA_BNC)) { + de->media_type = DE_MEDIA_TP_AUTO; + de_stop_rxtx(de); + de_set_media(de); + de_start_rxtx(de); + } de_link_up(de); mod_timer(&de->media_timer, jiffies + DE_TIMER_LINK); return; } BUG_ON(!(status & LinkFail)); - - if (netif_carrier_ok(de->dev)) { + /* Mark the link as down only if current media is TP */ + if (netif_carrier_ok(de->dev) && de->media_type != DE_MEDIA_AUI && + de->media_type != DE_MEDIA_BNC) { de_link_down(de); mod_timer(&de->media_timer, jiffies + DE_TIMER_NO_LINK); } @@ -1229,6 +1250,7 @@ static void de_adapter_sleep (struct de_private *de) if (de->de21040) return; + dw32(CSR13, 0); /* Reset phy */ pci_read_config_dword(de->pdev, PCIPM, &pmctl); pmctl |= PM_Sleep; pci_write_config_dword(de->pdev, PCIPM, pmctl); @@ -1574,12 +1596,15 @@ static int __de_set_settings(struct de_private *de, struct ethtool_cmd *ecmd) return 0; /* nothing to change */ de_link_down(de); + mod_timer(&de->media_timer, jiffies + DE_TIMER_NO_LINK); de_stop_rxtx(de); de->media_type = new_media; de->media_lock = media_lock; de->media_advertise = ecmd->advertising; de_set_media(de); + if (netif_running(de->dev)) + de_start_rxtx(de); return 0; } @@ -1911,8 +1936,14 @@ fill_defaults: for (i = 0; i < DE_MAX_MEDIA; i++) { if (de->media[i].csr13 == 0xffff) de->media[i].csr13 = t21041_csr13[i]; - if (de->media[i].csr14 == 0xffff) - de->media[i].csr14 = t21041_csr14[i]; + if (de->media[i].csr14 == 0xffff) { + /* autonegotiation is broken at least on some chip + revisions - rev. 0x21 works, 0x11 does not */ + if (de->pdev->revision < 0x20) + de->media[i].csr14 = t21041_csr14_brk[i]; + else + de->media[i].csr14 = t21041_csr14[i]; + } if (de->media[i].csr15 == 0xffff) de->media[i].csr15 = t21041_csr15[i]; } @@ -2158,6 +2189,8 @@ static int de_resume (struct pci_dev *pdev) dev_err(&dev->dev, "pci_enable_device failed in resume\n"); goto out; } + pci_set_master(pdev); + de_init_rings(de); de_init_hw(de); out_attach: netif_device_attach(dev); diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-lib.c b/drivers/net/wireless/iwlwifi/iwl-agn-lib.c index 9dd9e64..8fd00a6 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-lib.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-lib.c @@ -1411,7 +1411,7 @@ void iwlagn_request_scan(struct iwl_priv *priv, struct ieee80211_vif *vif) clear_bit(STATUS_SCAN_HW, &priv->status); clear_bit(STATUS_SCANNING, &priv->status); /* inform mac80211 scan aborted */ - queue_work(priv->workqueue, &priv->scan_completed); + queue_work(priv->workqueue, &priv->abort_scan); } int iwlagn_manage_ibss_station(struct iwl_priv *priv, diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 07dbc27..e23c406 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -2613,6 +2613,11 @@ int iwl_force_reset(struct iwl_priv *priv, int mode, bool external) if (test_bit(STATUS_EXIT_PENDING, &priv->status)) return -EINVAL; + if (test_bit(STATUS_SCANNING, &priv->status)) { + IWL_DEBUG_INFO(priv, "scan in progress.\n"); + return -EINVAL; + } + if (mode >= IWL_MAX_FORCE_RESET) { IWL_DEBUG_INFO(priv, "invalid reset request.\n"); return -EINVAL; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 59a308b..d31661c 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -3018,7 +3018,7 @@ void iwl3945_request_scan(struct iwl_priv *priv, struct ieee80211_vif *vif) clear_bit(STATUS_SCANNING, &priv->status); /* inform mac80211 scan aborted */ - queue_work(priv->workqueue, &priv->scan_completed); + queue_work(priv->workqueue, &priv->abort_scan); } static void iwl3945_bg_restart(struct work_struct *data) diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c index b17235a..79c0005 100644 --- a/drivers/oprofile/oprofile_perf.c +++ b/drivers/oprofile/oprofile_perf.c @@ -77,7 +77,7 @@ static int op_create_counter(int cpu, int event) return 0; pevent = perf_event_create_kernel_counter(&counter_config[event].attr, - cpu, -1, + cpu, NULL, op_overflow_handler); if (IS_ERR(pevent)) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index c3ceebb..4789f8e 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -71,6 +71,49 @@ #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) +/* page table handling */ +#define LEVEL_STRIDE (9) +#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) + +static inline int agaw_to_level(int agaw) +{ + return agaw + 2; +} + +static inline int agaw_to_width(int agaw) +{ + return 30 + agaw * LEVEL_STRIDE; +} + +static inline int width_to_agaw(int width) +{ + return (width - 30) / LEVEL_STRIDE; +} + +static inline unsigned int level_to_offset_bits(int level) +{ + return (level - 1) * LEVEL_STRIDE; +} + +static inline int pfn_level_offset(unsigned long pfn, int level) +{ + return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; +} + +static inline unsigned long level_mask(int level) +{ + return -1UL << level_to_offset_bits(level); +} + +static inline unsigned long level_size(int level) +{ + return 1UL << level_to_offset_bits(level); +} + +static inline unsigned long align_to_level(unsigned long pfn, int level) +{ + return (pfn + level_size(level) - 1) & level_mask(level); +} /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ @@ -434,8 +477,6 @@ void free_iova_mem(struct iova *iova) } -static inline int width_to_agaw(int width); - static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw) { unsigned long sagaw; @@ -646,51 +687,6 @@ out: spin_unlock_irqrestore(&iommu->lock, flags); } -/* page table handling */ -#define LEVEL_STRIDE (9) -#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1) - -static inline int agaw_to_level(int agaw) -{ - return agaw + 2; -} - -static inline int agaw_to_width(int agaw) -{ - return 30 + agaw * LEVEL_STRIDE; - -} - -static inline int width_to_agaw(int width) -{ - return (width - 30) / LEVEL_STRIDE; -} - -static inline unsigned int level_to_offset_bits(int level) -{ - return (level - 1) * LEVEL_STRIDE; -} - -static inline int pfn_level_offset(unsigned long pfn, int level) -{ - return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK; -} - -static inline unsigned long level_mask(int level) -{ - return -1UL << level_to_offset_bits(level); -} - -static inline unsigned long level_size(int level) -{ - return 1UL << level_to_offset_bits(level); -} - -static inline unsigned long align_to_level(unsigned long pfn, int level) -{ - return (pfn + level_size(level) - 1) & level_mask(level); -} - static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, unsigned long pfn) { @@ -3761,6 +3757,33 @@ static void __devinit quirk_iommu_rwbf(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf); +#define GGC 0x52 +#define GGC_MEMORY_SIZE_MASK (0xf << 8) +#define GGC_MEMORY_SIZE_NONE (0x0 << 8) +#define GGC_MEMORY_SIZE_1M (0x1 << 8) +#define GGC_MEMORY_SIZE_2M (0x3 << 8) +#define GGC_MEMORY_VT_ENABLED (0x8 << 8) +#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8) +#define GGC_MEMORY_SIZE_3M_VT (0xa << 8) +#define GGC_MEMORY_SIZE_4M_VT (0xb << 8) + +static void __devinit quirk_calpella_no_shadow_gtt(struct pci_dev *dev) +{ + unsigned short ggc; + + if (pci_read_config_word(dev, GGC, &ggc)) + return; + + if (!(ggc & GGC_MEMORY_VT_ENABLED)) { + printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); + dmar_map_gfx = 0; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt); + /* On Tylersburg chipsets, some BIOSes have been known to enable the ISOCH DMAR unit for the Azalia sound device, but not give it any TLB entries, which causes it to deadlock. Check for that. We do diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index ce6a366..553d8ee 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -608,7 +608,7 @@ int pci_iov_resource_bar(struct pci_dev *dev, int resno, * the VF BAR size multiplied by the number of VFs. The alignment * is just the VF BAR size. */ -int pci_sriov_resource_alignment(struct pci_dev *dev, int resno) +resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno) { struct resource tmp; enum pci_bar_type type; diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 7754a67..6beb11b 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -264,7 +264,8 @@ extern int pci_iov_init(struct pci_dev *dev); extern void pci_iov_release(struct pci_dev *dev); extern int pci_iov_resource_bar(struct pci_dev *dev, int resno, enum pci_bar_type *type); -extern int pci_sriov_resource_alignment(struct pci_dev *dev, int resno); +extern resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, + int resno); extern void pci_restore_iov_state(struct pci_dev *dev); extern int pci_iov_bus_range(struct pci_bus *bus); @@ -320,7 +321,7 @@ static inline int pci_ats_enabled(struct pci_dev *dev) } #endif /* CONFIG_PCI_IOV */ -static inline int pci_resource_alignment(struct pci_dev *dev, +static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, struct resource *res) { #ifdef CONFIG_PCI_IOV diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 89ed181..857ae01 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -163,6 +163,26 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_2, quirk_isa_d DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_CBUS_3, quirk_isa_dma_hangs); /* + * Intel NM10 "TigerPoint" LPC PM1a_STS.BM_STS must be clear + * for some HT machines to use C4 w/o hanging. + */ +static void __devinit quirk_tigerpoint_bm_sts(struct pci_dev *dev) +{ + u32 pmbase; + u16 pm1a; + + pci_read_config_dword(dev, 0x40, &pmbase); + pmbase = pmbase & 0xff80; + pm1a = inw(pmbase); + + if (pm1a & 0x10) { + dev_info(&dev->dev, FW_BUG "TigerPoint LPC.BM_STS cleared\n"); + outw(0x10, pmbase); + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_TGP_LPC, quirk_tigerpoint_bm_sts); + +/* * Chipsets where PCI->PCI transfers vanish or hang */ static void __devinit quirk_nopcipci(struct pci_dev *dev) diff --git a/drivers/pcmcia/pcmcia_resource.c b/drivers/pcmcia/pcmcia_resource.c index a5c1765..9ba4dad 100644 --- a/drivers/pcmcia/pcmcia_resource.c +++ b/drivers/pcmcia/pcmcia_resource.c @@ -595,7 +595,13 @@ int pcmcia_request_io(struct pcmcia_device *p_dev) if (c->io[1].end) { ret = alloc_io_space(s, &c->io[1], p_dev->io_lines); if (ret) { + struct resource tmp = c->io[0]; + /* release the previously allocated resource */ release_io_space(s, &c->io[0]); + /* but preserve the settings, for they worked... */ + c->io[0].end = resource_size(&tmp); + c->io[0].start = tmp.start; + c->io[0].flags = tmp.flags; goto out; } } else diff --git a/drivers/pcmcia/pd6729.c b/drivers/pcmcia/pd6729.c index b8a869a..deef665 100644 --- a/drivers/pcmcia/pd6729.c +++ b/drivers/pcmcia/pd6729.c @@ -646,7 +646,7 @@ static int __devinit pd6729_pci_probe(struct pci_dev *dev, if (!pci_resource_start(dev, 0)) { dev_warn(&dev->dev, "refusing to load the driver as the " "io_base is NULL.\n"); - goto err_out_free_mem; + goto err_out_disable; } dev_info(&dev->dev, "Cirrus PD6729 PCI to PCMCIA Bridge at 0x%llx " diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e35ed12..2d61186 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -3093,7 +3093,8 @@ static const struct tpacpi_quirk tpacpi_hotkey_qtable[] __initconst = { TPACPI_Q_IBM('1', 'D', TPACPI_HK_Q_INIMASK), /* X22, X23, X24 */ }; -typedef u16 tpacpi_keymap_t[TPACPI_HOTKEY_MAP_LEN]; +typedef u16 tpacpi_keymap_entry_t; +typedef tpacpi_keymap_entry_t tpacpi_keymap_t[TPACPI_HOTKEY_MAP_LEN]; static int __init hotkey_init(struct ibm_init_struct *iibm) { @@ -3230,7 +3231,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) }; #define TPACPI_HOTKEY_MAP_SIZE sizeof(tpacpi_keymap_t) -#define TPACPI_HOTKEY_MAP_TYPESIZE sizeof(tpacpi_keymap_t[0]) +#define TPACPI_HOTKEY_MAP_TYPESIZE sizeof(tpacpi_keymap_entry_t) int res, i; int status; diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 422a709..cc8b337 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -700,7 +700,7 @@ static void print_constraints(struct regulator_dev *rdev) constraints->min_uA != constraints->max_uA) { ret = _regulator_get_current_limit(rdev); if (ret > 0) - count += sprintf(buf + count, "at %d uA ", ret / 1000); + count += sprintf(buf + count, "at %d mA ", ret / 1000); } if (constraints->valid_modes_mask & REGULATOR_MODE_FAST) @@ -2302,8 +2302,10 @@ struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc, dev_set_name(&rdev->dev, "regulator.%d", atomic_inc_return(®ulator_no) - 1); ret = device_register(&rdev->dev); - if (ret != 0) + if (ret != 0) { + put_device(&rdev->dev); goto clean; + } dev_set_drvdata(&rdev->dev, rdev); diff --git a/drivers/regulator/max8649.c b/drivers/regulator/max8649.c index 4520ace..6b60a9c 100644 --- a/drivers/regulator/max8649.c +++ b/drivers/regulator/max8649.c @@ -330,7 +330,7 @@ static int __devinit max8649_regulator_probe(struct i2c_client *client, /* set external clock frequency */ info->extclk_freq = pdata->extclk_freq; max8649_set_bits(info->i2c, MAX8649_SYNC, MAX8649_EXT_MASK, - info->extclk_freq); + info->extclk_freq << 6); } if (pdata->ramp_timing) { diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c index d26780e..261a07e 100644 --- a/drivers/rtc/rtc-ab3100.c +++ b/drivers/rtc/rtc-ab3100.c @@ -235,6 +235,7 @@ static int __init ab3100_rtc_probe(struct platform_device *pdev) err = PTR_ERR(rtc); return err; } + platform_set_drvdata(pdev, rtc); return 0; } @@ -244,6 +245,7 @@ static int __exit ab3100_rtc_remove(struct platform_device *pdev) struct rtc_device *rtc = platform_get_drvdata(pdev); rtc_device_unregister(rtc); + platform_set_drvdata(pdev, NULL); return 0; } diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c index a0d3ec8..f57a87f 100644 --- a/drivers/rtc/rtc-s3c.c +++ b/drivers/rtc/rtc-s3c.c @@ -310,11 +310,6 @@ static int s3c_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm) s3c_rtc_setaie(alrm->enabled); - if (alrm->enabled) - enable_irq_wake(s3c_rtc_alarmno); - else - disable_irq_wake(s3c_rtc_alarmno); - return 0; } @@ -587,6 +582,10 @@ static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state) ticnt_en_save &= S3C64XX_RTCCON_TICEN; } s3c_rtc_enable(pdev, 0); + + if (device_may_wakeup(&pdev->dev)) + enable_irq_wake(s3c_rtc_alarmno); + return 0; } @@ -600,6 +599,10 @@ static int s3c_rtc_resume(struct platform_device *pdev) tmp = readb(s3c_rtc_base + S3C2410_RTCCON); writeb(tmp | ticnt_en_save, s3c_rtc_base + S3C2410_RTCCON); } + + if (device_may_wakeup(&pdev->dev)) + disable_irq_wake(s3c_rtc_alarmno); + return 0; } #else diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c index 6edf20b..2c7d2d9 100644 --- a/drivers/s390/net/ctcm_main.c +++ b/drivers/s390/net/ctcm_main.c @@ -1154,7 +1154,7 @@ static struct net_device *ctcm_init_netdevice(struct ctcm_priv *priv) dev_fsm, dev_fsm_len, GFP_KERNEL); if (priv->fsm == NULL) { CTCMY_DBF_DEV(SETUP, dev, "init_fsm error"); - kfree(dev); + free_netdev(dev); return NULL; } fsm_newstate(priv->fsm, DEV_STATE_STOPPED); @@ -1165,7 +1165,7 @@ static struct net_device *ctcm_init_netdevice(struct ctcm_priv *priv) grp = ctcmpc_init_mpc_group(priv); if (grp == NULL) { MPC_DBF_DEV(SETUP, dev, "init_mpc_group error"); - kfree(dev); + free_netdev(dev); return NULL; } tasklet_init(&grp->mpc_tasklet2, diff --git a/drivers/serial/mfd.c b/drivers/serial/mfd.c index 324c385..5dff45c 100644 --- a/drivers/serial/mfd.c +++ b/drivers/serial/mfd.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/console.h> #include <linux/sysrq.h> +#include <linux/slab.h> #include <linux/serial_reg.h> #include <linux/circ_buf.h> #include <linux/delay.h> diff --git a/drivers/serial/mrst_max3110.c b/drivers/serial/mrst_max3110.c index f6ad1ec..51c15f5 100644 --- a/drivers/serial/mrst_max3110.c +++ b/drivers/serial/mrst_max3110.c @@ -29,6 +29,7 @@ #include <linux/module.h> #include <linux/ioport.h> +#include <linux/irq.h> #include <linux/init.h> #include <linux/console.h> #include <linux/sysrq.h> diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 0bcf4c1..b5a78a1 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -23,6 +23,7 @@ #include <linux/init.h> #include <linux/cache.h> #include <linux/mutex.h> +#include <linux/of_device.h> #include <linux/slab.h> #include <linux/mod_devicetable.h> #include <linux/spi/spi.h> @@ -86,6 +87,10 @@ static int spi_match_device(struct device *dev, struct device_driver *drv) const struct spi_device *spi = to_spi_device(dev); const struct spi_driver *sdrv = to_spi_driver(drv); + /* Attempt an OF style match */ + if (of_driver_match_device(dev, drv)) + return 1; + if (sdrv->id_table) return !!spi_match_id(sdrv->id_table, spi); diff --git a/drivers/spi/spi_gpio.c b/drivers/spi/spi_gpio.c index e24a634..63e51b0 100644 --- a/drivers/spi/spi_gpio.c +++ b/drivers/spi/spi_gpio.c @@ -350,7 +350,7 @@ static int __init spi_gpio_probe(struct platform_device *pdev) spi_gpio->bitbang.master = spi_master_get(master); spi_gpio->bitbang.chipselect = spi_gpio_chipselect; - if ((master_flags & (SPI_MASTER_NO_RX | SPI_MASTER_NO_RX)) == 0) { + if ((master_flags & (SPI_MASTER_NO_TX | SPI_MASTER_NO_RX)) == 0) { spi_gpio->bitbang.txrx_word[SPI_MODE_0] = spi_gpio_txrx_word_mode0; spi_gpio->bitbang.txrx_word[SPI_MODE_1] = spi_gpio_txrx_word_mode1; spi_gpio->bitbang.txrx_word[SPI_MODE_2] = spi_gpio_txrx_word_mode2; diff --git a/drivers/spi/spi_mpc8xxx.c b/drivers/spi/spi_mpc8xxx.c index d31b57f..1dd86b8 100644 --- a/drivers/spi/spi_mpc8xxx.c +++ b/drivers/spi/spi_mpc8xxx.c @@ -408,11 +408,17 @@ static void mpc8xxx_spi_cpm_bufs_start(struct mpc8xxx_spi *mspi) xfer_ofs = mspi->xfer_in_progress->len - mspi->count; - out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma + xfer_ofs); + if (mspi->rx_dma == mspi->dma_dummy_rx) + out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma); + else + out_be32(&rx_bd->cbd_bufaddr, mspi->rx_dma + xfer_ofs); out_be16(&rx_bd->cbd_datlen, 0); out_be16(&rx_bd->cbd_sc, BD_SC_EMPTY | BD_SC_INTRPT | BD_SC_WRAP); - out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma + xfer_ofs); + if (mspi->tx_dma == mspi->dma_dummy_tx) + out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma); + else + out_be32(&tx_bd->cbd_bufaddr, mspi->tx_dma + xfer_ofs); out_be16(&tx_bd->cbd_datlen, xfer_len); out_be16(&tx_bd->cbd_sc, BD_SC_READY | BD_SC_INTRPT | BD_SC_WRAP | BD_SC_LAST); diff --git a/drivers/staging/ti-st/st.h b/drivers/staging/ti-st/st.h index 9952579..1b3060e 100644 --- a/drivers/staging/ti-st/st.h +++ b/drivers/staging/ti-st/st.h @@ -80,5 +80,4 @@ struct st_proto_s { extern long st_register(struct st_proto_s *); extern long st_unregister(enum proto_type); -extern struct platform_device *st_get_plat_device(void); #endif /* ST_H */ diff --git a/drivers/staging/ti-st/st_core.c b/drivers/staging/ti-st/st_core.c index 063c9b1..b85d8bf 100644 --- a/drivers/staging/ti-st/st_core.c +++ b/drivers/staging/ti-st/st_core.c @@ -38,7 +38,6 @@ #include "st_ll.h" #include "st.h" -#define VERBOSE /* strings to be used for rfkill entries and by * ST Core to be used for sysfs debug entry */ @@ -581,7 +580,7 @@ long st_register(struct st_proto_s *new_proto) long err = 0; unsigned long flags = 0; - st_kim_ref(&st_gdata); + st_kim_ref(&st_gdata, 0); pr_info("%s(%d) ", __func__, new_proto->type); if (st_gdata == NULL || new_proto == NULL || new_proto->recv == NULL || new_proto->reg_complete_cb == NULL) { @@ -713,7 +712,7 @@ long st_unregister(enum proto_type type) pr_debug("%s: %d ", __func__, type); - st_kim_ref(&st_gdata); + st_kim_ref(&st_gdata, 0); if (type < ST_BT || type >= ST_MAX) { pr_err(" protocol %d not supported", type); return -EPROTONOSUPPORT; @@ -767,7 +766,7 @@ long st_write(struct sk_buff *skb) #endif long len; - st_kim_ref(&st_gdata); + st_kim_ref(&st_gdata, 0); if (unlikely(skb == NULL || st_gdata == NULL || st_gdata->tty == NULL)) { pr_err("data/tty unavailable to perform write"); @@ -818,7 +817,7 @@ static int st_tty_open(struct tty_struct *tty) struct st_data_s *st_gdata; pr_info("%s ", __func__); - st_kim_ref(&st_gdata); + st_kim_ref(&st_gdata, 0); st_gdata->tty = tty; tty->disc_data = st_gdata; diff --git a/drivers/staging/ti-st/st_core.h b/drivers/staging/ti-st/st_core.h index e0c32d1..8601320 100644 --- a/drivers/staging/ti-st/st_core.h +++ b/drivers/staging/ti-st/st_core.h @@ -117,7 +117,7 @@ int st_core_init(struct st_data_s **); void st_core_exit(struct st_data_s *); /* ask for reference from KIM */ -void st_kim_ref(struct st_data_s **); +void st_kim_ref(struct st_data_s **, int); #define GPS_STUB_TEST #ifdef GPS_STUB_TEST diff --git a/drivers/staging/ti-st/st_kim.c b/drivers/staging/ti-st/st_kim.c index b4a6c7f..9e99463 100644 --- a/drivers/staging/ti-st/st_kim.c +++ b/drivers/staging/ti-st/st_kim.c @@ -72,11 +72,26 @@ const unsigned char *protocol_names[] = { PROTO_ENTRY(ST_GPS, "GPS"), }; +#define MAX_ST_DEVICES 3 /* Imagine 1 on each UART for now */ +struct platform_device *st_kim_devices[MAX_ST_DEVICES]; /**********************************************************************/ /* internal functions */ /** + * st_get_plat_device - + * function which returns the reference to the platform device + * requested by id. As of now only 1 such device exists (id=0) + * the context requesting for reference can get the id to be + * requested by a. The protocol driver which is registering or + * b. the tty device which is opened. + */ +static struct platform_device *st_get_plat_device(int id) +{ + return st_kim_devices[id]; +} + +/** * validate_firmware_response - * function to return whether the firmware response was proper * in case of error don't complete so that waiting for proper @@ -353,7 +368,7 @@ void st_kim_chip_toggle(enum proto_type type, enum kim_gpio_state state) struct kim_data_s *kim_gdata; pr_info(" %s ", __func__); - kim_pdev = st_get_plat_device(); + kim_pdev = st_get_plat_device(0); kim_gdata = dev_get_drvdata(&kim_pdev->dev); if (kim_gdata->gpios[type] == -1) { @@ -574,12 +589,12 @@ static int kim_toggle_radio(void *data, bool blocked) * This would enable multiple such platform devices to exist * on a given platform */ -void st_kim_ref(struct st_data_s **core_data) +void st_kim_ref(struct st_data_s **core_data, int id) { struct platform_device *pdev; struct kim_data_s *kim_gdata; /* get kim_gdata reference from platform device */ - pdev = st_get_plat_device(); + pdev = st_get_plat_device(id); kim_gdata = dev_get_drvdata(&pdev->dev); *core_data = kim_gdata->core_data; } @@ -623,6 +638,7 @@ static int kim_probe(struct platform_device *pdev) long *gpios = pdev->dev.platform_data; struct kim_data_s *kim_gdata; + st_kim_devices[pdev->id] = pdev; kim_gdata = kzalloc(sizeof(struct kim_data_s), GFP_ATOMIC); if (!kim_gdata) { pr_err("no mem to allocate"); diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig index 7e594449..9eed5b5 100644 --- a/drivers/usb/core/Kconfig +++ b/drivers/usb/core/Kconfig @@ -91,12 +91,12 @@ config USB_DYNAMIC_MINORS If you are unsure about this, say N here. config USB_SUSPEND - bool "USB runtime power management (suspend/resume and wakeup)" + bool "USB runtime power management (autosuspend) and wakeup" depends on USB && PM_RUNTIME help If you say Y here, you can use driver calls or the sysfs - "power/level" file to suspend or resume individual USB - peripherals and to enable or disable autosuspend (see + "power/control" file to enable or disable autosuspend for + individual USB peripherals (see Documentation/usb/power-management.txt for more details). Also, USB "remote wakeup" signaling is supported, whereby some diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c index f06f5db..1e6ccef 100644 --- a/drivers/usb/core/file.c +++ b/drivers/usb/core/file.c @@ -159,9 +159,9 @@ void usb_major_cleanup(void) int usb_register_dev(struct usb_interface *intf, struct usb_class_driver *class_driver) { - int retval = -EINVAL; + int retval; int minor_base = class_driver->minor_base; - int minor = 0; + int minor; char name[20]; char *temp; @@ -173,12 +173,17 @@ int usb_register_dev(struct usb_interface *intf, */ minor_base = 0; #endif - intf->minor = -1; - - dbg ("looking for a minor, starting at %d", minor_base); if (class_driver->fops == NULL) - goto exit; + return -EINVAL; + if (intf->minor >= 0) + return -EADDRINUSE; + + retval = init_usb_class(); + if (retval) + return retval; + + dev_dbg(&intf->dev, "looking for a minor, starting at %d", minor_base); down_write(&minor_rwsem); for (minor = minor_base; minor < MAX_USB_MINORS; ++minor) { @@ -186,20 +191,12 @@ int usb_register_dev(struct usb_interface *intf, continue; usb_minors[minor] = class_driver->fops; - - retval = 0; + intf->minor = minor; break; } up_write(&minor_rwsem); - - if (retval) - goto exit; - - retval = init_usb_class(); - if (retval) - goto exit; - - intf->minor = minor; + if (intf->minor < 0) + return -EXFULL; /* create a usb class device for this usb interface */ snprintf(name, sizeof(name), class_driver->name, minor - minor_base); @@ -213,11 +210,11 @@ int usb_register_dev(struct usb_interface *intf, "%s", temp); if (IS_ERR(intf->usb_dev)) { down_write(&minor_rwsem); - usb_minors[intf->minor] = NULL; + usb_minors[minor] = NULL; + intf->minor = -1; up_write(&minor_rwsem); retval = PTR_ERR(intf->usb_dev); } -exit: return retval; } EXPORT_SYMBOL_GPL(usb_register_dev); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 844683e..9f0ce7d 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1802,6 +1802,7 @@ free_interfaces: intf->dev.groups = usb_interface_groups; intf->dev.dma_mask = dev->dev.dma_mask; INIT_WORK(&intf->reset_ws, __usb_queue_reset_device); + intf->minor = -1; device_initialize(&intf->dev); dev_set_name(&intf->dev, "%d-%s:%d.%d", dev->bus->busnum, dev->devpath, diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index 59dc3d3..5ab5bb8 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -322,6 +322,7 @@ cppi_channel_allocate(struct dma_controller *c, index, transmit ? 'T' : 'R', cppi_ch); cppi_ch->hw_ep = ep; cppi_ch->channel.status = MUSB_DMA_STATUS_FREE; + cppi_ch->channel.max_len = 0x7fffffff; DBG(4, "Allocate CPPI%d %cX\n", index, transmit ? 'T' : 'R'); return &cppi_ch->channel; diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 6fca870..d065e23 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -300,6 +300,11 @@ static void txstate(struct musb *musb, struct musb_request *req) #ifndef CONFIG_MUSB_PIO_ONLY if (is_dma_capable() && musb_ep->dma) { struct dma_controller *c = musb->dma_controller; + size_t request_size; + + /* setup DMA, then program endpoint CSR */ + request_size = min_t(size_t, request->length - request->actual, + musb_ep->dma->max_len); use_dma = (request->dma != DMA_ADDR_INVALID); @@ -307,11 +312,6 @@ static void txstate(struct musb *musb, struct musb_request *req) #ifdef CONFIG_USB_INVENTRA_DMA { - size_t request_size; - - /* setup DMA, then program endpoint CSR */ - request_size = min_t(size_t, request->length, - musb_ep->dma->max_len); if (request_size < musb_ep->packet_sz) musb_ep->dma->desired_mode = 0; else @@ -373,8 +373,8 @@ static void txstate(struct musb *musb, struct musb_request *req) use_dma = use_dma && c->channel_program( musb_ep->dma, musb_ep->packet_sz, 0, - request->dma, - request->length); + request->dma + request->actual, + request_size); if (!use_dma) { c->channel_release(musb_ep->dma); musb_ep->dma = NULL; @@ -386,8 +386,8 @@ static void txstate(struct musb *musb, struct musb_request *req) use_dma = use_dma && c->channel_program( musb_ep->dma, musb_ep->packet_sz, request->zero, - request->dma, - request->length); + request->dma + request->actual, + request_size); #endif } #endif @@ -501,26 +501,14 @@ void musb_g_tx(struct musb *musb, u8 epnum) request->zero = 0; } - /* ... or if not, then complete it. */ - musb_g_giveback(musb_ep, request, 0); - - /* - * Kickstart next transfer if appropriate; - * the packet that just completed might not - * be transmitted for hours or days. - * REVISIT for double buffering... - * FIXME revisit for stalls too... - */ - musb_ep_select(mbase, epnum); - csr = musb_readw(epio, MUSB_TXCSR); - if (csr & MUSB_TXCSR_FIFONOTEMPTY) - return; - - request = musb_ep->desc ? next_request(musb_ep) : NULL; - if (!request) { - DBG(4, "%s idle now\n", - musb_ep->end_point.name); - return; + if (request->actual == request->length) { + musb_g_giveback(musb_ep, request, 0); + request = musb_ep->desc ? next_request(musb_ep) : NULL; + if (!request) { + DBG(4, "%s idle now\n", + musb_ep->end_point.name); + return; + } } } @@ -568,11 +556,19 @@ static void rxstate(struct musb *musb, struct musb_request *req) { const u8 epnum = req->epnum; struct usb_request *request = &req->request; - struct musb_ep *musb_ep = &musb->endpoints[epnum].ep_out; + struct musb_ep *musb_ep; void __iomem *epio = musb->endpoints[epnum].regs; unsigned fifo_count = 0; - u16 len = musb_ep->packet_sz; + u16 len; u16 csr = musb_readw(epio, MUSB_RXCSR); + struct musb_hw_ep *hw_ep = &musb->endpoints[epnum]; + + if (hw_ep->is_shared_fifo) + musb_ep = &hw_ep->ep_in; + else + musb_ep = &hw_ep->ep_out; + + len = musb_ep->packet_sz; /* We shouldn't get here while DMA is active, but we do... */ if (dma_channel_status(musb_ep->dma) == MUSB_DMA_STATUS_BUSY) { @@ -647,8 +643,8 @@ static void rxstate(struct musb *musb, struct musb_request *req) */ csr |= MUSB_RXCSR_DMAENAB; -#ifdef USE_MODE1 csr |= MUSB_RXCSR_AUTOCLEAR; +#ifdef USE_MODE1 /* csr |= MUSB_RXCSR_DMAMODE; */ /* this special sequence (enabling and then @@ -663,10 +659,11 @@ static void rxstate(struct musb *musb, struct musb_request *req) if (request->actual < request->length) { int transfer_size = 0; #ifdef USE_MODE1 - transfer_size = min(request->length, + transfer_size = min(request->length - request->actual, channel->max_len); #else - transfer_size = len; + transfer_size = min(request->length - request->actual, + (unsigned)len); #endif if (transfer_size <= musb_ep->packet_sz) musb_ep->dma->desired_mode = 0; @@ -740,9 +737,15 @@ void musb_g_rx(struct musb *musb, u8 epnum) u16 csr; struct usb_request *request; void __iomem *mbase = musb->mregs; - struct musb_ep *musb_ep = &musb->endpoints[epnum].ep_out; + struct musb_ep *musb_ep; void __iomem *epio = musb->endpoints[epnum].regs; struct dma_channel *dma; + struct musb_hw_ep *hw_ep = &musb->endpoints[epnum]; + + if (hw_ep->is_shared_fifo) + musb_ep = &hw_ep->ep_in; + else + musb_ep = &hw_ep->ep_out; musb_ep_select(mbase, epnum); @@ -1081,7 +1084,7 @@ struct free_record { /* * Context: controller locked, IRQs blocked. */ -static void musb_ep_restart(struct musb *musb, struct musb_request *req) +void musb_ep_restart(struct musb *musb, struct musb_request *req) { DBG(3, "<== %s request %p len %u on hw_ep%d\n", req->tx ? "TX/IN" : "RX/OUT", diff --git a/drivers/usb/musb/musb_gadget.h b/drivers/usb/musb/musb_gadget.h index c8b1403..572b1da 100644 --- a/drivers/usb/musb/musb_gadget.h +++ b/drivers/usb/musb/musb_gadget.h @@ -105,4 +105,6 @@ extern void musb_gadget_cleanup(struct musb *); extern void musb_g_giveback(struct musb_ep *, struct usb_request *, int); +extern void musb_ep_restart(struct musb *, struct musb_request *); + #endif /* __MUSB_GADGET_H */ diff --git a/drivers/usb/musb/musb_gadget_ep0.c b/drivers/usb/musb/musb_gadget_ep0.c index 59bef8f..6dd03f4 100644 --- a/drivers/usb/musb/musb_gadget_ep0.c +++ b/drivers/usb/musb/musb_gadget_ep0.c @@ -261,6 +261,7 @@ __acquires(musb->lock) ctrlrequest->wIndex & 0x0f; struct musb_ep *musb_ep; struct musb_hw_ep *ep; + struct musb_request *request; void __iomem *regs; int is_in; u16 csr; @@ -302,6 +303,14 @@ __acquires(musb->lock) musb_writew(regs, MUSB_RXCSR, csr); } + /* Maybe start the first request in the queue */ + request = to_musb_request( + next_request(musb_ep)); + if (!musb_ep->busy && request) { + DBG(3, "restarting the request\n"); + musb_ep_restart(musb, request); + } + /* select ep0 again */ musb_ep_select(mbase, 0); } break; diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index 877d20b..9e65c47 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -660,6 +660,12 @@ static bool musb_tx_dma_program(struct dma_controller *dma, qh->segsize = length; + /* + * Ensure the data reaches to main memory before starting + * DMA transfer + */ + wmb(); + if (!dma->channel_program(channel, pkt_size, mode, urb->transfer_dma + offset, length)) { dma->channel_release(channel); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 29e850a..7c80082 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -243,7 +243,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq, int r, nlogs = 0; while (datalen > 0) { - if (unlikely(headcount >= VHOST_NET_MAX_SG)) { + if (unlikely(seg >= VHOST_NET_MAX_SG)) { r = -ENOBUFS; goto err; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index c579dcc..dd3d6f7 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -858,11 +858,12 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, if (r < 0) return r; len -= l; - if (!len) + if (!len) { + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); return 0; + } } - if (vq->log_ctx) - eventfd_signal(vq->log_ctx, 1); /* Length written exceeds what we have stored. This is a bug. */ BUG(); return 0; diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 84f8423..7ccc967 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -3508,7 +3508,7 @@ static void fbcon_exit(void) softback_buf = 0UL; for (i = 0; i < FB_MAX; i++) { - int pending; + int pending = 0; mapped = 0; info = registered_fb[i]; @@ -3516,7 +3516,8 @@ static void fbcon_exit(void) if (info == NULL) continue; - pending = cancel_work_sync(&info->queue); + if (info->queue.func) + pending = cancel_work_sync(&info->queue); DPRINTK("fbcon: %s pending work\n", (pending ? "canceled" : "no")); diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c index 815f84b..70477c2 100644 --- a/drivers/video/efifb.c +++ b/drivers/video/efifb.c @@ -13,7 +13,7 @@ #include <linux/platform_device.h> #include <linux/screen_info.h> #include <linux/dmi.h> - +#include <linux/pci.h> #include <video/vga.h> static struct fb_var_screeninfo efifb_defined __devinitdata = { @@ -39,17 +39,31 @@ enum { M_I20, /* 20-Inch iMac */ M_I20_SR, /* 20-Inch iMac (Santa Rosa) */ M_I24, /* 24-Inch iMac */ + M_I24_8_1, /* 24-Inch iMac, 8,1th gen */ + M_I24_10_1, /* 24-Inch iMac, 10,1th gen */ + M_I27_11_1, /* 27-Inch iMac, 11,1th gen */ M_MINI, /* Mac Mini */ + M_MINI_3_1, /* Mac Mini, 3,1th gen */ + M_MINI_4_1, /* Mac Mini, 4,1th gen */ M_MB, /* MacBook */ M_MB_2, /* MacBook, 2nd rev. */ M_MB_3, /* MacBook, 3rd rev. */ + M_MB_5_1, /* MacBook, 5th rev. */ + M_MB_6_1, /* MacBook, 6th rev. */ + M_MB_7_1, /* MacBook, 7th rev. */ M_MB_SR, /* MacBook, 2nd gen, (Santa Rosa) */ M_MBA, /* MacBook Air */ M_MBP, /* MacBook Pro */ M_MBP_2, /* MacBook Pro 2nd gen */ + M_MBP_2_2, /* MacBook Pro 2,2nd gen */ M_MBP_SR, /* MacBook Pro (Santa Rosa) */ M_MBP_4, /* MacBook Pro, 4th gen */ M_MBP_5_1, /* MacBook Pro, 5,1th gen */ + M_MBP_5_2, /* MacBook Pro, 5,2th gen */ + M_MBP_5_3, /* MacBook Pro, 5,3rd gen */ + M_MBP_6_1, /* MacBook Pro, 6,1th gen */ + M_MBP_6_2, /* MacBook Pro, 6,2th gen */ + M_MBP_7_1, /* MacBook Pro, 7,1th gen */ M_UNKNOWN /* placeholder */ }; @@ -64,14 +78,28 @@ static struct efifb_dmi_info { [M_I20] = { "i20", 0x80010000, 1728 * 4, 1680, 1050 }, /* guess */ [M_I20_SR] = { "imac7", 0x40010000, 1728 * 4, 1680, 1050 }, [M_I24] = { "i24", 0x80010000, 2048 * 4, 1920, 1200 }, /* guess */ + [M_I24_8_1] = { "imac8", 0xc0060000, 2048 * 4, 1920, 1200 }, + [M_I24_10_1] = { "imac10", 0xc0010000, 2048 * 4, 1920, 1080 }, + [M_I27_11_1] = { "imac11", 0xc0010000, 2560 * 4, 2560, 1440 }, [M_MINI]= { "mini", 0x80000000, 2048 * 4, 1024, 768 }, + [M_MINI_3_1] = { "mini31", 0x40010000, 1024 * 4, 1024, 768 }, + [M_MINI_4_1] = { "mini41", 0xc0010000, 2048 * 4, 1920, 1200 }, [M_MB] = { "macbook", 0x80000000, 2048 * 4, 1280, 800 }, + [M_MB_5_1] = { "macbook51", 0x80010000, 2048 * 4, 1280, 800 }, + [M_MB_6_1] = { "macbook61", 0x80010000, 2048 * 4, 1280, 800 }, + [M_MB_7_1] = { "macbook71", 0x80010000, 2048 * 4, 1280, 800 }, [M_MBA] = { "mba", 0x80000000, 2048 * 4, 1280, 800 }, [M_MBP] = { "mbp", 0x80010000, 1472 * 4, 1440, 900 }, [M_MBP_2] = { "mbp2", 0, 0, 0, 0 }, /* placeholder */ + [M_MBP_2_2] = { "mbp22", 0x80010000, 1472 * 4, 1440, 900 }, [M_MBP_SR] = { "mbp3", 0x80030000, 2048 * 4, 1440, 900 }, [M_MBP_4] = { "mbp4", 0xc0060000, 2048 * 4, 1920, 1200 }, [M_MBP_5_1] = { "mbp51", 0xc0010000, 2048 * 4, 1440, 900 }, + [M_MBP_5_2] = { "mbp52", 0xc0010000, 2048 * 4, 1920, 1200 }, + [M_MBP_5_3] = { "mbp53", 0xd0010000, 2048 * 4, 1440, 900 }, + [M_MBP_6_1] = { "mbp61", 0x90030000, 2048 * 4, 1920, 1200 }, + [M_MBP_6_2] = { "mbp62", 0x90030000, 2048 * 4, 1680, 1050 }, + [M_MBP_7_1] = { "mbp71", 0xc0010000, 2048 * 4, 1280, 800 }, [M_UNKNOWN] = { NULL, 0, 0, 0, 0 } }; @@ -92,7 +120,12 @@ static const struct dmi_system_id dmi_system_table[] __initconst = { EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "iMac6,1", M_I24), EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac6,1", M_I24), EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac7,1", M_I20_SR), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac8,1", M_I24_8_1), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac10,1", M_I24_10_1), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "iMac11,1", M_I27_11_1), EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "Macmini1,1", M_MINI), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini3,1", M_MINI_3_1), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "Macmini4,1", M_MINI_4_1), EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook1,1", M_MB), /* At least one of these two will be right; maybe both? */ EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook2,1", M_MB), @@ -101,14 +134,23 @@ static const struct dmi_system_id dmi_system_table[] __initconst = { EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBook3,1", M_MB), EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook3,1", M_MB), EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook4,1", M_MB), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook5,1", M_MB_5_1), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook6,1", M_MB_6_1), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBook7,1", M_MB_7_1), EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookAir1,1", M_MBA), EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro1,1", M_MBP), EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,1", M_MBP_2), + EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro2,2", M_MBP_2_2), EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro2,1", M_MBP_2), EFIFB_DMI_SYSTEM_ID("Apple Computer, Inc.", "MacBookPro3,1", M_MBP_SR), EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro3,1", M_MBP_SR), EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro4,1", M_MBP_4), EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,1", M_MBP_5_1), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,2", M_MBP_5_2), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro5,3", M_MBP_5_3), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,1", M_MBP_6_1), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro6,2", M_MBP_6_2), + EFIFB_DMI_SYSTEM_ID("Apple Inc.", "MacBookPro7,1", M_MBP_7_1), {}, }; @@ -116,7 +158,7 @@ static int set_system(const struct dmi_system_id *id) { struct efifb_dmi_info *info = id->driver_data; if (info->base == 0) - return -ENODEV; + return 0; printk(KERN_INFO "efifb: dmi detected %s - framebuffer at %p " "(%dx%d, stride %d)\n", id->ident, @@ -124,18 +166,55 @@ static int set_system(const struct dmi_system_id *id) info->stride); /* Trust the bootloader over the DMI tables */ - if (screen_info.lfb_base == 0) + if (screen_info.lfb_base == 0) { +#if defined(CONFIG_PCI) + struct pci_dev *dev = NULL; + int found_bar = 0; +#endif screen_info.lfb_base = info->base; - if (screen_info.lfb_linelength == 0) - screen_info.lfb_linelength = info->stride; - if (screen_info.lfb_width == 0) - screen_info.lfb_width = info->width; - if (screen_info.lfb_height == 0) - screen_info.lfb_height = info->height; - if (screen_info.orig_video_isVGA == 0) - screen_info.orig_video_isVGA = VIDEO_TYPE_EFI; - return 0; +#if defined(CONFIG_PCI) + /* make sure that the address in the table is actually on a + * VGA device's PCI BAR */ + + for_each_pci_dev(dev) { + int i; + if ((dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) + continue; + for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { + resource_size_t start, end; + + start = pci_resource_start(dev, i); + if (start == 0) + break; + end = pci_resource_end(dev, i); + if (screen_info.lfb_base >= start && + screen_info.lfb_base < end) { + found_bar = 1; + } + } + } + if (!found_bar) + screen_info.lfb_base = 0; +#endif + } + if (screen_info.lfb_base) { + if (screen_info.lfb_linelength == 0) + screen_info.lfb_linelength = info->stride; + if (screen_info.lfb_width == 0) + screen_info.lfb_width = info->width; + if (screen_info.lfb_height == 0) + screen_info.lfb_height = info->height; + if (screen_info.orig_video_isVGA == 0) + screen_info.orig_video_isVGA = VIDEO_TYPE_EFI; + } else { + screen_info.lfb_linelength = 0; + screen_info.lfb_width = 0; + screen_info.lfb_height = 0; + screen_info.orig_video_isVGA = 0; + return 0; + } + return 1; } static int efifb_setcolreg(unsigned regno, unsigned red, unsigned green, diff --git a/drivers/video/pxa168fb.c b/drivers/video/pxa168fb.c index 5d786bd..a31a77f 100644 --- a/drivers/video/pxa168fb.c +++ b/drivers/video/pxa168fb.c @@ -298,8 +298,8 @@ static void set_dma_control0(struct pxa168fb_info *fbi) * Set bit to enable graphics DMA. */ x = readl(fbi->reg_base + LCD_SPU_DMA_CTRL0); - x |= fbi->active ? 0x00000100 : 0; - fbi->active = 0; + x &= ~CFG_GRA_ENA_MASK; + x |= fbi->active ? CFG_GRA_ENA(1) : CFG_GRA_ENA(0); /* * If we are in a pseudo-color mode, we need to enable diff --git a/drivers/video/sis/sis_main.c b/drivers/video/sis/sis_main.c index 559bf17..b52f8e4 100644 --- a/drivers/video/sis/sis_main.c +++ b/drivers/video/sis/sis_main.c @@ -1701,6 +1701,9 @@ static int sisfb_ioctl(struct fb_info *info, unsigned int cmd, break; case FBIOGET_VBLANK: + + memset(&sisvbblank, 0, sizeof(struct fb_vblank)); + sisvbblank.count = 0; sisvbblank.flags = sisfb_setupvbblankflags(ivideo, &sisvbblank.vcount, &sisvbblank.hcount); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 29bac51..d409495 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -755,7 +755,10 @@ int register_xenstore_notifier(struct notifier_block *nb) { int ret = 0; - blocking_notifier_chain_register(&xenstore_chain, nb); + if (xenstored_ready > 0) + ret = nb->notifier_call(nb, 0, NULL); + else + blocking_notifier_chain_register(&xenstore_chain, nb); return ret; } @@ -769,7 +772,7 @@ EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); void xenbus_probe(struct work_struct *unused) { - BUG_ON((xenstored_ready <= 0)); + xenstored_ready = 1; /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_frontend); @@ -835,8 +838,8 @@ static int __init xenbus_init(void) xen_store_evtchn = xen_start_info->store_evtchn; xen_store_mfn = xen_start_info->store_mfn; xen_store_interface = mfn_to_virt(xen_store_mfn); + xenstored_ready = 1; } - xenstored_ready = 1; } /* Initialize the interface to xenstore. */ @@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) */ ret = retry(iocb); - if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) + if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { + /* + * There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || + ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)) + ret = -EINTR; aio_complete(iocb, ret, 0); + } out: spin_lock_irq(&ctx->ctx_lock); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c65c341..7e83b35 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -232,7 +232,7 @@ static int small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, void **request_buf) { - int rc = 0; + int rc; rc = cifs_reconnect_tcon(tcon, smb_command); if (rc) @@ -250,7 +250,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, if (tcon != NULL) cifs_stats_inc(&tcon->num_smbs_sent); - return rc; + return 0; } int @@ -281,16 +281,9 @@ small_smb_init_no_tc(const int smb_command, const int wct, /* If the return code is zero, this function must fill in request_buf pointer */ static int -smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, - void **request_buf /* returned */ , - void **response_buf /* returned */ ) +__smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, + void **request_buf, void **response_buf) { - int rc = 0; - - rc = cifs_reconnect_tcon(tcon, smb_command); - if (rc) - return rc; - *request_buf = cifs_buf_get(); if (*request_buf == NULL) { /* BB should we add a retry in here if not a writepage? */ @@ -309,7 +302,31 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, if (tcon != NULL) cifs_stats_inc(&tcon->num_smbs_sent); - return rc; + return 0; +} + +/* If the return code is zero, this function must fill in request_buf pointer */ +static int +smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, + void **request_buf, void **response_buf) +{ + int rc; + + rc = cifs_reconnect_tcon(tcon, smb_command); + if (rc) + return rc; + + return __smb_init(smb_command, wct, tcon, request_buf, response_buf); +} + +static int +smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon, + void **request_buf, void **response_buf) +{ + if (tcon->ses->need_reconnect || tcon->need_reconnect) + return -EHOSTDOWN; + + return __smb_init(smb_command, wct, tcon, request_buf, response_buf); } static int validate_t2(struct smb_t2_rsp *pSMB) @@ -4534,8 +4551,8 @@ CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon) cFYI(1, "In QFSUnixInfo"); QFSUnixRetry: - rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, - (void **) &pSMBr); + rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, + (void **) &pSMB, (void **) &pSMBr); if (rc) return rc; @@ -4604,8 +4621,8 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap) cFYI(1, "In SETFSUnixInfo"); SETFSUnixRetry: /* BB switch to small buf init to save memory */ - rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, - (void **) &pSMBr); + rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, + (void **) &pSMB, (void **) &pSMBr); if (rc) return rc; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 93f77d4..53cce8c 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -801,6 +801,8 @@ retry_iget5_locked: inode->i_flags |= S_NOATIME | S_NOCMTIME; if (inode->i_state & I_NEW) { inode->i_ino = hash; + if (S_ISREG(inode->i_mode)) + inode->i_data.backing_dev_info = sb->s_bdi; #ifdef CONFIG_CIFS_FSCACHE /* initialize per-inode cache cookie pointer */ CIFS_I(inode)->fscache = NULL; diff --git a/fs/compat.c b/fs/compat.c index 718c706..0644a15 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1153,7 +1153,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, { compat_ssize_t tot_len; struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov; + struct iovec *iov = iovstack; ssize_t ret; io_fn_t fn; iov_fn_t fnv; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 5581122..ab38fef 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -72,22 +72,11 @@ int writeback_in_progress(struct backing_dev_info *bdi) static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) { struct super_block *sb = inode->i_sb; - struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; - /* - * For inodes on standard filesystems, we use superblock's bdi. For - * inodes on virtual filesystems, we want to use inode mapping's bdi - * because they can possibly point to something useful (think about - * block_dev filesystem). - */ - if (sb->s_bdi && sb->s_bdi != &noop_backing_dev_info) { - /* Some device inodes could play dirty tricks. Catch them... */ - WARN(bdi != sb->s_bdi && bdi_cap_writeback_dirty(bdi), - "Dirtiable inode bdi %s != sb bdi %s\n", - bdi->name, sb->s_bdi->name); - return sb->s_bdi; - } - return bdi; + if (strcmp(sb->s_type->name, "bdev") == 0) + return inode->i_mapping->backing_dev_info; + + return sb->s_bdi; } static void bdi_queue_work(struct backing_dev_info *bdi, diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index d367af1..cde755c 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1354,7 +1354,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, loff_t file_size; unsigned int num; unsigned int offset; - size_t total_len; + size_t total_len = 0; req = fuse_get_req(fc); if (IS_ERR(req)) diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index a76e0aa..3919150 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, } inode->i_mode = new_mode; + inode->i_ctime = CURRENT_TIME; di->i_mode = cpu_to_le16(inode->i_mode); + di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); ocfs2_journal_dirty(handle, di_bh); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1361997..cbe2f05 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, size_t caller_veclen, u8 target_node, int *status) { - int ret; + int ret = 0; struct o2net_msg *msg = NULL; size_t veclen, caller_bytes = 0; struct kvec *vec = NULL; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f04ebcf..c49f6de 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, goto out_commit; } + cpos = split_hash; + ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, + data_ac, meta_ac, new_dx_leaves, + num_dx_leaves); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + for (i = 0; i < num_dx_leaves; i++) { ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), orig_dx_leaves[i], @@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, mlog_errno(ret); goto out_commit; } - } - cpos = split_hash; - ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, - data_ac, meta_ac, new_dx_leaves, - num_dx_leaves); - if (ret) { - mlog_errno(ret); - goto out_commit; + ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), + new_dx_leaves[i], + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out_commit; + } } ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4b6ae2c..7652989 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -1030,6 +1030,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node); +void dlm_force_free_mles(struct dlm_ctxt *dlm); int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); int __dlm_lockres_has_locks(struct dlm_lock_resource *res); int __dlm_lockres_unused(struct dlm_lock_resource *res); diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 5efdd37..901ca52 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) spin_lock(&dlm->track_lock); if (oldres) track_list = &oldres->tracking; - else + else { track_list = &dlm->tracking_list; + if (list_empty(track_list)) { + dl = NULL; + spin_unlock(&dlm->track_lock); + goto bail; + } + } list_for_each_entry(res, track_list, tracking) { if (&res->tracking == &dlm->tracking_list) @@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) } else dl = NULL; +bail: /* passed to seq_show */ return dl; } diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 153abb5..11a5c87 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -693,6 +693,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) dlm_mark_domain_leaving(dlm); dlm_leave_domain(dlm); + dlm_force_free_mles(dlm); dlm_complete_dlm_shutdown(dlm); } dlm_put(dlm); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ffb4c68..f564b0e 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm, wake_up(&res->wq); wake_up(&dlm->migration_wq); } + +void dlm_force_free_mles(struct dlm_ctxt *dlm) +{ + int i; + struct hlist_head *bucket; + struct dlm_master_list_entry *mle; + struct hlist_node *tmp, *list; + + /* + * We notified all other nodes that we are exiting the domain and + * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still + * around we force free them and wake any processes that are waiting + * on the mles + */ + spin_lock(&dlm->spinlock); + spin_lock(&dlm->master_lock); + + BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); + BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); + + for (i = 0; i < DLM_HASH_BUCKETS; i++) { + bucket = dlm_master_hash(dlm, i); + hlist_for_each_safe(list, tmp, bucket) { + mle = hlist_entry(list, struct dlm_master_list_entry, + master_hash_node); + if (mle->type != DLM_MLE_BLOCK) { + mlog(ML_ERROR, "bad mle: %p\n", mle); + dlm_print_one_mle(mle); + } + atomic_set(&mle->woken, 1); + wake_up(&mle->wq); + + __dlm_unlink_mle(dlm, mle); + __dlm_mle_detach_hb_events(dlm, mle); + __dlm_put_mle(mle); + } + } + spin_unlock(&dlm->master_lock); + spin_unlock(&dlm->spinlock); +} diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index d1ce48e..1d596d8 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -84,6 +84,7 @@ enum { OI_LS_PARENT, OI_LS_RENAME1, OI_LS_RENAME2, + OI_LS_REFLINK_TARGET, }; int ocfs2_dlm_init(struct ocfs2_super *osb); diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 33f1c9a..fa31d05 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -235,18 +235,31 @@ #define OCFS2_HAS_REFCOUNT_FL (0x0010) /* Inode attributes, keep in sync with EXT2 */ -#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ -#define OCFS2_UNRM_FL (0x00000002) /* Undelete */ -#define OCFS2_COMPR_FL (0x00000004) /* Compress file */ -#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ -#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ -#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ -#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ -#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ -#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ - -#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ -#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ +#define OCFS2_SECRM_FL FS_SECRM_FL /* Secure deletion */ +#define OCFS2_UNRM_FL FS_UNRM_FL /* Undelete */ +#define OCFS2_COMPR_FL FS_COMPR_FL /* Compress file */ +#define OCFS2_SYNC_FL FS_SYNC_FL /* Synchronous updates */ +#define OCFS2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */ +#define OCFS2_APPEND_FL FS_APPEND_FL /* writes to file may only append */ +#define OCFS2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ +#define OCFS2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ +/* Reserved for compression usage... */ +#define OCFS2_DIRTY_FL FS_DIRTY_FL +#define OCFS2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ +#define OCFS2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ +#define OCFS2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ +/* End compression flags --- maybe not all used */ +#define OCFS2_BTREE_FL FS_BTREE_FL /* btree format dir */ +#define OCFS2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */ +#define OCFS2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */ +#define OCFS2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */ +#define OCFS2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ +#define OCFS2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ +#define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ +#define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ + +#define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ +#define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ /* * Extent record flags (e_node.leaf.flags) diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 2d3420a..5d24150 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h @@ -23,10 +23,10 @@ /* * ioctl commands */ -#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) -#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) -#define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) -#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) +#define OCFS2_IOC_GETFLAGS FS_IOC_GETFLAGS +#define OCFS2_IOC_SETFLAGS FS_IOC_SETFLAGS +#define OCFS2_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define OCFS2_IOC32_SETFLAGS FS_IOC32_SETFLAGS /* * Space reservation / allocation / free ioctls and argument structure diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 0afeda831..efdd756 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4201,8 +4201,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry, goto out; } - mutex_lock(&new_inode->i_mutex); - ret = ocfs2_inode_lock(new_inode, &new_bh, 1); + mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD); + ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1, + OI_LS_REFLINK_TARGET); if (ret) { mlog_errno(ret); goto out_unlock; diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index d8b6e42..3e78db3 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap, struct ocfs2_alloc_reservation *resv, int *cstart, int *clen) { - unsigned int wanted = *clen; - if (resv == NULL || ocfs2_resmap_disabled(resmap)) return -ENOSPC; spin_lock(&resv_lock); - /* - * We don't want to over-allocate for temporary - * windows. Otherwise, we run the risk of fragmenting the - * allocation space. - */ - wanted = ocfs2_resv_window_bits(resmap, resv); - if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) - wanted = *clen; - if (ocfs2_resv_empty(resv)) { - mlog(0, "empty reservation, find new window\n"); + /* + * We don't want to over-allocate for temporary + * windows. Otherwise, we run the risk of fragmenting the + * allocation space. + */ + unsigned int wanted = ocfs2_resv_window_bits(resmap, resv); + if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) + wanted = *clen; + + mlog(0, "empty reservation, find new window\n"); /* * Try to get a window here. If it works, we must fall * through and test the bitmap . This avoids some diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8a286f5..849c2f0 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -357,7 +357,7 @@ out: static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, struct ocfs2_group_desc *bg, struct ocfs2_chain_list *cl, - u64 p_blkno, u32 clusters) + u64 p_blkno, unsigned int clusters) { struct ocfs2_extent_list *el = &bg->bg_list; struct ocfs2_extent_rec *rec; @@ -369,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, rec->e_blkno = cpu_to_le64(p_blkno); rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc)); - rec->e_leaf_clusters = cpu_to_le32(clusters); + rec->e_leaf_clusters = cpu_to_le16(clusters); le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); le16_add_cpu(&bg->bg_free_bits_count, clusters * le16_to_cpu(cl->cl_bpc)); diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 32499d21..9975457 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -128,7 +128,7 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry, } /* Fast symlinks can't be large */ - len = strlen(target); + len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb)); link = kzalloc(len + 1, GFP_NOFS); if (!link) { status = -ENOMEM; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d03469f..06fa5e7 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode, xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; - down_read(&oi->ip_xattr_sem); ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, buffer_size, &xis); if (ret == -ENODATA && di->i_xattr_loc) ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, buffer_size, &xbs); - up_read(&oi->ip_xattr_sem); return ret; } @@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode, mlog_errno(ret); return ret; } + down_read(&OCFS2_I(inode)->ip_xattr_sem); ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, name, buffer, buffer_size); + up_read(&OCFS2_I(inode)->ip_xattr_sem); ocfs2_inode_unlock(inode, 0); diff --git a/fs/proc/base.c b/fs/proc/base.c index a1c43e7..8e4adda 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2675,7 +2675,7 @@ static const struct pid_entry tgid_base_stuff[] = { INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), - INF("limits", S_IRUSR, proc_pid_limits), + INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif @@ -3011,7 +3011,7 @@ static const struct pid_entry tid_base_stuff[] = { INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), - INF("limits", S_IRUSR, proc_pid_limits), + INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 271afc4..1dbca4e8 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -363,13 +363,13 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, mss->referenced += PAGE_SIZE; mapcount = page_mapcount(page); if (mapcount >= 2) { - if (pte_dirty(ptent)) + if (pte_dirty(ptent) || PageDirty(page)) mss->shared_dirty += PAGE_SIZE; else mss->shared_clean += PAGE_SIZE; mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; } else { - if (pte_dirty(ptent)) + if (pte_dirty(ptent) || PageDirty(page)) mss->private_dirty += PAGE_SIZE; else mss->private_clean += PAGE_SIZE; diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 91c817f..2367fb3 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, static const struct file_operations proc_vmcore_operations = { .read = read_vmcore, - .llseek = generic_file_llseek, + .llseek = default_llseek, }; static struct vmcore* __init get_new_element(void) diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index f53505d..5cbb81e 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -170,6 +170,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page, int reiserfs_unpack(struct inode *inode, struct file *filp) { int retval = 0; + int depth; int index; struct page *page; struct address_space *mapping; @@ -188,8 +189,8 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) /* we need to make sure nobody is changing the file size beneath ** us */ - mutex_lock(&inode->i_mutex); - reiserfs_write_lock(inode->i_sb); + reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb); + depth = reiserfs_write_lock_once(inode->i_sb); write_from = inode->i_size & (blocksize - 1); /* if we are on a block boundary, we are already unpacked. */ @@ -224,6 +225,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) out: mutex_unlock(&inode->i_mutex); - reiserfs_write_unlock(inode->i_sb); + reiserfs_write_unlock_once(inode->i_sb, depth); return retval; } diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ed575fb..7e206fc 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -405,9 +405,15 @@ xlog_cil_push( new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); new_ctx->ticket = xlog_cil_ticket_alloc(log); - /* lock out transaction commit, but don't block on background push */ + /* + * Lock out transaction commit, but don't block for background pushes + * unless we are well over the CIL space limit. See the definition of + * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic + * used here. + */ if (!down_write_trylock(&cil->xc_ctx_lock)) { - if (!push_seq) + if (!push_seq && + cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) goto out_free_ticket; down_write(&cil->xc_ctx_lock); } @@ -422,7 +428,7 @@ xlog_cil_push( goto out_skip; /* check for a previously pushed seqeunce */ - if (push_seq < cil->xc_ctx->sequence) + if (push_seq && push_seq < cil->xc_ctx->sequence) goto out_skip; /* diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index ced52b9..edcdfe0 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -426,13 +426,13 @@ struct xfs_cil { }; /* - * The amount of log space we should the CIL to aggregate is difficult to size. - * Whatever we chose we have to make we can get a reservation for the log space - * effectively, that it is large enough to capture sufficient relogging to - * reduce log buffer IO significantly, but it is not too large for the log or - * induces too much latency when writing out through the iclogs. We track both - * space consumed and the number of vectors in the checkpoint context, so we - * need to decide which to use for limiting. + * The amount of log space we allow the CIL to aggregate is difficult to size. + * Whatever we choose, we have to make sure we can get a reservation for the + * log space effectively, that it is large enough to capture sufficient + * relogging to reduce log buffer IO significantly, but it is not too large for + * the log or induces too much latency when writing out through the iclogs. We + * track both space consumed and the number of vectors in the checkpoint + * context, so we need to decide which to use for limiting. * * Every log buffer we write out during a push needs a header reserved, which * is at least one sector and more for v2 logs. Hence we need a reservation of @@ -459,16 +459,21 @@ struct xfs_cil { * checkpoint transaction ticket is specific to the checkpoint context, rather * than the CIL itself. * - * With dynamic reservations, we can basically make up arbitrary limits for the - * checkpoint size so long as they don't violate any other size rules. Hence - * the initial maximum size for the checkpoint transaction will be set to a - * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit - * right now based on the latency of writing out a large amount of data through - * the circular iclog buffers. + * With dynamic reservations, we can effectively make up arbitrary limits for + * the checkpoint size so long as they don't violate any other size rules. + * Recovery imposes a rule that no transaction exceed half the log, so we are + * limited by that. Furthermore, the log transaction reservation subsystem + * tries to keep 25% of the log free, so we need to keep below that limit or we + * risk running out of free log space to start any new transactions. + * + * In order to keep background CIL push efficient, we will set a lower + * threshold at which background pushing is attempted without blocking current + * transaction commits. A separate, higher bound defines when CIL pushes are + * enforced to ensure we stay within our maximum checkpoint size bounds. + * threshold, yet give us plenty of space for aggregation on large logs. */ - -#define XLOG_CIL_SPACE_LIMIT(log) \ - (min((log->l_logsize >> 2), (8 * 1024 * 1024))) +#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) +#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) /* * The reservation head lsn is not made up of a cycle number and block number. diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index c0786d4..984cdc6 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -55,7 +55,7 @@ extern u8 acpi_gbl_permanent_mmap; /* - * Globals that are publically available, allowing for + * Globals that are publicly available, allowing for * run time configuration */ extern u32 acpi_dbg_level; diff --git a/include/asm-generic/hardirq.h b/include/asm-generic/hardirq.h index 62f5908..04d0a97 100644 --- a/include/asm-generic/hardirq.h +++ b/include/asm-generic/hardirq.h @@ -3,13 +3,13 @@ #include <linux/cache.h> #include <linux/threads.h> -#include <linux/irq.h> typedef struct { unsigned int __softirq_pending; } ____cacheline_aligned irq_cpustat_t; #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ +#include <linux/irq.h> #ifndef ack_bad_irq static inline void ack_bad_irq(unsigned int irq) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 8a92a17..ef2af99 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -220,6 +220,8 @@ \ BUG_TABLE \ \ + JUMP_TABLE \ + \ /* PCI quirks */ \ .pci_fixup : AT(ADDR(.pci_fixup) - LOAD_OFFSET) { \ VMLINUX_SYMBOL(__start_pci_fixups_early) = .; \ @@ -563,6 +565,14 @@ #define BUG_TABLE #endif +#define JUMP_TABLE \ + . = ALIGN(8); \ + __jump_table : AT(ADDR(__jump_table) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___jump_table) = .; \ + *(__jump_table) \ + VMLINUX_SYMBOL(__stop___jump_table) = .; \ + } + #ifdef CONFIG_PM_TRACE #define TRACEDATA \ . = ALIGN(4); \ diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7809d230..4c9461a 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -612,7 +612,7 @@ struct drm_gem_object { struct kref refcount; /** Handle count of this object. Each handle also holds a reference */ - struct kref handlecount; + atomic_t handle_count; /* number of handles on this object */ /** Related drm device */ struct drm_device *dev; @@ -808,7 +808,6 @@ struct drm_driver { */ int (*gem_init_object) (struct drm_gem_object *obj); void (*gem_free_object) (struct drm_gem_object *obj); - void (*gem_free_object_unlocked) (struct drm_gem_object *obj); /* vga arb irq handler */ void (*vgaarb_irq)(struct drm_device *dev, bool state); @@ -1175,6 +1174,7 @@ extern int drm_release(struct inode *inode, struct file *filp); extern int drm_mmap(struct file *filp, struct vm_area_struct *vma); extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma); extern void drm_vm_open_locked(struct vm_area_struct *vma); +extern void drm_vm_close_locked(struct vm_area_struct *vma); extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map); extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev); extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); @@ -1455,12 +1455,11 @@ int drm_gem_init(struct drm_device *dev); void drm_gem_destroy(struct drm_device *dev); void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); -void drm_gem_object_free_unlocked(struct kref *kref); struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); -void drm_gem_object_handle_free(struct kref *kref); +void drm_gem_object_handle_free(struct drm_gem_object *obj); void drm_gem_vm_open(struct vm_area_struct *vma); void drm_gem_vm_close(struct vm_area_struct *vma); int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); @@ -1483,8 +1482,12 @@ drm_gem_object_unreference(struct drm_gem_object *obj) static inline void drm_gem_object_unreference_unlocked(struct drm_gem_object *obj) { - if (obj != NULL) - kref_put(&obj->refcount, drm_gem_object_free_unlocked); + if (obj != NULL) { + struct drm_device *dev = obj->dev; + mutex_lock(&dev->struct_mutex); + kref_put(&obj->refcount, drm_gem_object_free); + mutex_unlock(&dev->struct_mutex); + } } int drm_gem_handle_create(struct drm_file *file_priv, @@ -1495,7 +1498,7 @@ static inline void drm_gem_object_handle_reference(struct drm_gem_object *obj) { drm_gem_object_reference(obj); - kref_get(&obj->handlecount); + atomic_inc(&obj->handle_count); } static inline void @@ -1504,12 +1507,15 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj) if (obj == NULL) return; + if (atomic_read(&obj->handle_count) == 0) + return; /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ - kref_put(&obj->handlecount, drm_gem_object_handle_free); + if (atomic_dec_and_test(&obj->handle_count)) + drm_gem_object_handle_free(obj); drm_gem_object_unreference(obj); } @@ -1519,12 +1525,17 @@ drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj) if (obj == NULL) return; + if (atomic_read(&obj->handle_count) == 0) + return; + /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ - kref_put(&obj->handlecount, drm_gem_object_handle_free); + + if (atomic_dec_and_test(&obj->handle_count)) + drm_gem_object_handle_free(obj); drm_gem_object_unreference_unlocked(obj); } diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 3a9940e..883c1d4 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -85,7 +85,6 @@ {0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x5462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x5464, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ - {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5548, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5549, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ {0x1002, 0x554A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ @@ -103,6 +102,7 @@ {0x1002, 0x564F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP}, \ {0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \ {0x1002, 0x5954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS480|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \ diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 36ca972..1be416b 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -53,6 +53,7 @@ struct cpuidle_state { #define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */ #define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */ #define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */ +#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */ #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index ce29b81..ba8319a 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -102,6 +102,9 @@ static inline u64 dma_get_mask(struct device *dev) return DMA_BIT_MASK(32); } +#ifdef ARCH_HAS_DMA_SET_COHERENT_MASK +int dma_set_coherent_mask(struct device *dev, u64 mask); +#else static inline int dma_set_coherent_mask(struct device *dev, u64 mask) { if (!dma_supported(dev, mask)) @@ -109,6 +112,7 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask) dev->coherent_dma_mask = mask; return 0; } +#endif extern u64 dma_get_required_mask(struct device *dev); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c61d4ca..e210649 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -548,7 +548,7 @@ static inline bool dma_dev_has_pq_continue(struct dma_device *dma) return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; } -static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +static inline unsigned short dma_dev_to_maxpq(struct dma_device *dma) { return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; } diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h index 52c0da4..bef3cda 100644 --- a/include/linux/dynamic_debug.h +++ b/include/linux/dynamic_debug.h @@ -1,6 +1,8 @@ #ifndef _DYNAMIC_DEBUG_H #define _DYNAMIC_DEBUG_H +#include <linux/jump_label.h> + /* dynamic_printk_enabled, and dynamic_printk_enabled2 are bitmasks in which * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They * use independent hash functions, to reduce the chance of false positives. @@ -22,8 +24,6 @@ struct _ddebug { const char *function; const char *filename; const char *format; - char primary_hash; - char secondary_hash; unsigned int lineno:24; /* * The flags field controls the behaviour at the callsite. @@ -33,6 +33,7 @@ struct _ddebug { #define _DPRINTK_FLAGS_PRINT (1<<0) /* printk() a message using the format */ #define _DPRINTK_FLAGS_DEFAULT 0 unsigned int flags:8; + char enabled; } __attribute__((aligned(8))); @@ -42,33 +43,35 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n, #if defined(CONFIG_DYNAMIC_DEBUG) extern int ddebug_remove_module(const char *mod_name); -#define __dynamic_dbg_enabled(dd) ({ \ - int __ret = 0; \ - if (unlikely((dynamic_debug_enabled & (1LL << DEBUG_HASH)) && \ - (dynamic_debug_enabled2 & (1LL << DEBUG_HASH2)))) \ - if (unlikely(dd.flags)) \ - __ret = 1; \ - __ret; }) - #define dynamic_pr_debug(fmt, ...) do { \ + __label__ do_printk; \ + __label__ out; \ static struct _ddebug descriptor \ __used \ __attribute__((section("__verbose"), aligned(8))) = \ - { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ - DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ - if (__dynamic_dbg_enabled(descriptor)) \ - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ + _DPRINTK_FLAGS_DEFAULT }; \ + JUMP_LABEL(&descriptor.enabled, do_printk); \ + goto out; \ +do_printk: \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ +out: ; \ } while (0) #define dynamic_dev_dbg(dev, fmt, ...) do { \ + __label__ do_printk; \ + __label__ out; \ static struct _ddebug descriptor \ __used \ __attribute__((section("__verbose"), aligned(8))) = \ - { KBUILD_MODNAME, __func__, __FILE__, fmt, DEBUG_HASH, \ - DEBUG_HASH2, __LINE__, _DPRINTK_FLAGS_DEFAULT }; \ - if (__dynamic_dbg_enabled(descriptor)) \ - dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ + { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ + _DPRINTK_FLAGS_DEFAULT }; \ + JUMP_LABEL(&descriptor.enabled, do_printk); \ + goto out; \ +do_printk: \ + dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ +out: ; \ } while (0) #else diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 02b8b24..8beabb9 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -191,8 +191,8 @@ struct ftrace_event_call { unsigned int flags; #ifdef CONFIG_PERF_EVENTS - int perf_refcount; - struct hlist_head *perf_events; + int perf_refcount; + struct hlist_head __percpu *perf_events; #endif }; @@ -252,8 +252,8 @@ DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); -extern int perf_trace_enable(struct perf_event *event); -extern void perf_trace_disable(struct perf_event *event); +extern int perf_trace_add(struct perf_event *event, int flags); +extern void perf_trace_del(struct perf_event *event, int flags); extern int ftrace_profile_set_filter(struct perf_event *event, int event_id, char *filter_str); extern void ftrace_profile_free_filter(struct perf_event *event); diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index a0384a4..531495d 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -18,6 +18,7 @@ #include <asm/atomic.h> #include <asm/ptrace.h> #include <asm/system.h> +#include <trace/events/irq.h> /* * These correspond to the IORESOURCE_IRQ_* defines in @@ -407,7 +408,12 @@ asmlinkage void do_softirq(void); asmlinkage void __do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action *)); extern void softirq_init(void); -#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) +static inline void __raise_softirq_irqoff(unsigned int nr) +{ + trace_softirq_raise((struct softirq_action *)(unsigned long)nr, NULL); + or_softirq_pending(1UL << nr); +} + extern void raise_softirq_irqoff(unsigned int nr); extern void raise_softirq(unsigned int nr); extern void wakeup_softirqd(void); diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h new file mode 100644 index 0000000..b72cd9f --- /dev/null +++ b/include/linux/jump_label.h @@ -0,0 +1,64 @@ +#ifndef _LINUX_JUMP_LABEL_H +#define _LINUX_JUMP_LABEL_H + +#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_HAVE_ARCH_JUMP_LABEL) +# include <asm/jump_label.h> +# define HAVE_JUMP_LABEL +#endif + +enum jump_label_type { + JUMP_LABEL_ENABLE, + JUMP_LABEL_DISABLE +}; + +struct module; + +#ifdef HAVE_JUMP_LABEL + +extern struct jump_entry __start___jump_table[]; +extern struct jump_entry __stop___jump_table[]; + +extern void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type); +extern void arch_jump_label_text_poke_early(jump_label_t addr); +extern void jump_label_update(unsigned long key, enum jump_label_type type); +extern void jump_label_apply_nops(struct module *mod); +extern int jump_label_text_reserved(void *start, void *end); + +#define enable_jump_label(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_ENABLE); + +#define disable_jump_label(key) \ + jump_label_update((unsigned long)key, JUMP_LABEL_DISABLE); + +#else + +#define JUMP_LABEL(key, label) \ +do { \ + if (unlikely(*key)) \ + goto label; \ +} while (0) + +#define enable_jump_label(cond_var) \ +do { \ + *(cond_var) = 1; \ +} while (0) + +#define disable_jump_label(cond_var) \ +do { \ + *(cond_var) = 0; \ +} while (0) + +static inline int jump_label_apply_nops(struct module *mod) +{ + return 0; +} + +static inline int jump_label_text_reserved(void *start, void *end) +{ + return 0; +} + +#endif + +#endif diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fd..b29e745 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -350,7 +350,10 @@ struct module struct tracepoint *tracepoints; unsigned int num_tracepoints; #endif - +#ifdef HAVE_JUMP_LABEL + struct jump_entry *jump_entries; + unsigned int num_jump_entries; +#endif #ifdef CONFIG_TRACING const char **trace_bprintk_fmt_start; unsigned int num_trace_bprintk_fmt; @@ -686,17 +689,16 @@ extern int module_sysfs_initialized; #ifdef CONFIG_GENERIC_BUG -int module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, +void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); void module_bug_cleanup(struct module *); #else /* !CONFIG_GENERIC_BUG */ -static inline int module_bug_finalize(const Elf_Ehdr *hdr, +static inline void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - return 0; } static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 59d0669..1235669 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -27,8 +27,6 @@ #define MAX_LINKS 32 -struct net; - struct sockaddr_nl { sa_family_t nl_family; /* AF_NETLINK */ unsigned short nl_pad; /* zero */ @@ -151,6 +149,8 @@ struct nlattr { #include <linux/capability.h> #include <linux/skbuff.h> +struct net; + static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) { return (struct nlmsghdr *)skb->data; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 10d3330..570fdde 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -393,6 +393,9 @@ #define PCI_DEVICE_ID_VLSI_82C147 0x0105 #define PCI_DEVICE_ID_VLSI_VAS96011 0x0702 +/* AMD RD890 Chipset */ +#define PCI_DEVICE_ID_RD890_IOMMU 0x5a23 + #define PCI_VENDOR_ID_ADL 0x1005 #define PCI_DEVICE_ID_ADL_2301 0x2301 diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 49466b1..0eb5083 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -39,6 +39,15 @@ preempt_enable(); \ } while (0) +#define get_cpu_ptr(var) ({ \ + preempt_disable(); \ + this_cpu_ptr(var); }) + +#define put_cpu_ptr(var) do { \ + (void)(var); \ + preempt_enable(); \ +} while (0) + #ifdef CONFIG_SMP /* minimum unit size, also is the maximum supported allocation size */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 33f08da..a9227e9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -529,7 +529,6 @@ struct hw_perf_event { int last_cpu; }; struct { /* software */ - s64 remaining; struct hrtimer hrtimer; }; #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -539,6 +538,7 @@ struct hw_perf_event { }; #endif }; + int state; local64_t prev_count; u64 sample_period; u64 last_period; @@ -550,6 +550,13 @@ struct hw_perf_event { #endif }; +/* + * hw_perf_event::state flags + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + struct perf_event; /* @@ -561,36 +568,70 @@ struct perf_event; * struct pmu - generic performance monitoring unit */ struct pmu { - int (*enable) (struct perf_event *event); - void (*disable) (struct perf_event *event); - int (*start) (struct perf_event *event); - void (*stop) (struct perf_event *event); - void (*read) (struct perf_event *event); - void (*unthrottle) (struct perf_event *event); + struct list_head entry; + + int * __percpu pmu_disable_count; + struct perf_cpu_context * __percpu pmu_cpu_context; + int task_ctx_nr; /* - * Group events scheduling is treated as a transaction, add group - * events as a whole and perform one schedulability test. If the test - * fails, roll back the whole group + * Fully disable/enable this PMU, can be used to protect from the PMI + * as well as for lazy/batch writing of the MSRs. */ + void (*pmu_enable) (struct pmu *pmu); /* optional */ + void (*pmu_disable) (struct pmu *pmu); /* optional */ /* - * Start the transaction, after this ->enable() doesn't need - * to do schedulability tests. + * Try and initialize the event for this PMU. + * Should return -ENOENT when the @event doesn't match this PMU. */ - void (*start_txn) (const struct pmu *pmu); + int (*event_init) (struct perf_event *event); + +#define PERF_EF_START 0x01 /* start the counter when adding */ +#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ +#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ + /* - * If ->start_txn() disabled the ->enable() schedulability test + * Adds/Removes a counter to/from the PMU, can be done inside + * a transaction, see the ->*_txn() methods. + */ + int (*add) (struct perf_event *event, int flags); + void (*del) (struct perf_event *event, int flags); + + /* + * Starts/Stops a counter present on the PMU. The PMI handler + * should stop the counter when perf_event_overflow() returns + * !0. ->start() will be used to continue. + */ + void (*start) (struct perf_event *event, int flags); + void (*stop) (struct perf_event *event, int flags); + + /* + * Updates the counter value of the event. + */ + void (*read) (struct perf_event *event); + + /* + * Group events scheduling is treated as a transaction, add + * group events as a whole and perform one schedulability test. + * If the test fails, roll back the whole group + * + * Start the transaction, after this ->add() doesn't need to + * do schedulability tests. + */ + void (*start_txn) (struct pmu *pmu); /* optional */ + /* + * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. */ - int (*commit_txn) (const struct pmu *pmu); + int (*commit_txn) (struct pmu *pmu); /* optional */ /* - * Will cancel the transaction, assumes ->disable() is called for - * each successfull ->enable() during the transaction. + * Will cancel the transaction, assumes ->del() is called + * for each successfull ->add() during the transaction. */ - void (*cancel_txn) (const struct pmu *pmu); + void (*cancel_txn) (struct pmu *pmu); /* optional */ }; /** @@ -669,7 +710,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - const struct pmu *pmu; + struct pmu *pmu; enum perf_event_active_state state; unsigned int attach_state; @@ -763,12 +804,19 @@ struct perf_event { #endif /* CONFIG_PERF_EVENTS */ }; +enum perf_event_context_type { + task_context, + cpu_context, +}; + /** * struct perf_event_context - event context structure * * Used as a container for task events and CPU events as well: */ struct perf_event_context { + enum perf_event_context_type type; + struct pmu *pmu; /* * Protect the states of the events in the list, * nr_active, and the list: @@ -808,6 +856,12 @@ struct perf_event_context { struct rcu_head rcu_head; }; +/* + * Number of contexts where an event can trigger: + * task, softirq, hardirq, nmi. + */ +#define PERF_NR_CONTEXTS 4 + /** * struct perf_event_cpu_context - per cpu event context structure */ @@ -815,18 +869,9 @@ struct perf_cpu_context { struct perf_event_context ctx; struct perf_event_context *task_ctx; int active_oncpu; - int max_pertask; int exclusive; - struct swevent_hlist *swevent_hlist; - struct mutex hlist_mutex; - int hlist_refcount; - - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; + struct list_head rotation_list; + int jiffies_interval; }; struct perf_output_handle { @@ -842,28 +887,22 @@ struct perf_output_handle { #ifdef CONFIG_PERF_EVENTS -/* - * Set by architecture code: - */ -extern int perf_max_events; - -extern const struct pmu *hw_perf_event_init(struct perf_event *event); +extern int perf_pmu_register(struct pmu *pmu); +extern void perf_pmu_unregister(struct pmu *pmu); extern int perf_num_counters(void); extern const char *perf_pmu_name(void); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); -extern void perf_event_task_tick(struct task_struct *task); extern int perf_event_init_task(struct task_struct *child); extern void perf_event_exit_task(struct task_struct *child); extern void perf_event_free_task(struct task_struct *task); +extern void perf_event_delayed_put(struct task_struct *task); extern void set_perf_event_pending(void); extern void perf_event_do_pending(void); extern void perf_event_print_debug(void); -extern void __perf_disable(void); -extern bool __perf_enable(void); -extern void perf_disable(void); -extern void perf_enable(void); +extern void perf_pmu_disable(struct pmu *pmu); +extern void perf_pmu_enable(struct pmu *pmu); extern int perf_event_task_disable(void); extern int perf_event_task_enable(void); extern void perf_event_update_userpage(struct perf_event *event); @@ -871,7 +910,7 @@ extern int perf_event_release_kernel(struct perf_event *event); extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid, + struct task_struct *task, perf_overflow_handler_t callback); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -922,14 +961,7 @@ extern int perf_event_overflow(struct perf_event *event, int nmi, */ static inline int is_software_event(struct perf_event *event) { - switch (event->attr.type) { - case PERF_TYPE_SOFTWARE: - case PERF_TYPE_TRACEPOINT: - /* for now the breakpoint stuff also works as software event */ - case PERF_TYPE_BREAKPOINT: - return 1; - } - return 0; + return event->pmu->task_ctx_nr == perf_sw_context; } extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; @@ -978,7 +1010,21 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_comm(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); +/* Callchains */ +DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry); + +extern void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs); +extern void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs); + + +static inline void +perf_callchain_store(struct perf_callchain_entry *entry, u64 ip) +{ + if (entry->nr < PERF_MAX_STACK_DEPTH) + entry->ip[entry->nr++] = ip; +} extern int sysctl_perf_event_paranoid; extern int sysctl_perf_event_mlock; @@ -1021,21 +1067,19 @@ extern int perf_swevent_get_recursion_context(void); extern void perf_swevent_put_recursion_context(int rctx); extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); +extern void perf_event_task_tick(void); #else static inline void perf_event_task_sched_in(struct task_struct *task) { } static inline void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next) { } -static inline void -perf_event_task_tick(struct task_struct *task) { } static inline int perf_event_init_task(struct task_struct *child) { return 0; } static inline void perf_event_exit_task(struct task_struct *child) { } static inline void perf_event_free_task(struct task_struct *task) { } +static inline void perf_event_delayed_put(struct task_struct *task) { } static inline void perf_event_do_pending(void) { } static inline void perf_event_print_debug(void) { } -static inline void perf_disable(void) { } -static inline void perf_enable(void) { } static inline int perf_event_task_disable(void) { return -EINVAL; } static inline int perf_event_task_enable(void) { return -EINVAL; } @@ -1058,6 +1102,7 @@ static inline int perf_swevent_get_recursion_context(void) { return -1; } static inline void perf_swevent_put_recursion_context(int rctx) { } static inline void perf_event_enable(struct perf_event *event) { } static inline void perf_event_disable(struct perf_event *event) { } +static inline void perf_event_task_tick(void) { } #endif #define perf_output_put(handle, x) \ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9fbc54a..83af1f8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -454,7 +454,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held()) + rcu_dereference_check(p, rcu_read_lock_bh_held() || irqs_disabled()) /** * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched diff --git a/include/linux/sched.h b/include/linux/sched.h index 1e2a6db..eb3c1ce 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1160,6 +1160,13 @@ struct sched_rt_entity { struct rcu_node; +enum perf_event_task_context { + perf_invalid_context = -1, + perf_hw_context = 0, + perf_sw_context, + perf_nr_task_contexts, +}; + struct task_struct { volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */ void *stack; @@ -1431,7 +1438,7 @@ struct task_struct { struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp; + struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts]; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif diff --git a/include/linux/socket.h b/include/linux/socket.h index a2fada9..a8f56e1 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -322,7 +322,7 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata, int offset, unsigned int len, __wsum *csump); -extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode); +extern long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode); extern int memcpy_toiovec(struct iovec *v, unsigned char *kdata, int len); extern int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata, int offset, int len); diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 6b524a0..1808960 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -126,8 +126,8 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ -static inline int stop_machine(int (*fn)(void *), void *data, - const struct cpumask *cpus) +static inline int __stop_machine(int (*fn)(void *), void *data, + const struct cpumask *cpus) { int ret; local_irq_disable(); @@ -136,5 +136,11 @@ static inline int stop_machine(int (*fn)(void *), void *data, return ret; } +static inline int stop_machine(int (*fn)(void *), void *data, + const struct cpumask *cpus) +{ + return __stop_machine(fn, data, cpus); +} + #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ #endif /* _LINUX_STOP_MACHINE */ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 103d1b6..a4a90b67 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/types.h> #include <linux/rcupdate.h> +#include <linux/jump_label.h> struct module; struct tracepoint; @@ -145,7 +146,9 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin, extern struct tracepoint __tracepoint_##name; \ static inline void trace_##name(proto) \ { \ - if (unlikely(__tracepoint_##name.state)) \ + JUMP_LABEL(&__tracepoint_##name.state, do_trace); \ + return; \ +do_trace: \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args)); \ diff --git a/include/linux/wait.h b/include/linux/wait.h index 0836ccc..3efc9f3 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -614,6 +614,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); (wait)->private = current; \ (wait)->func = autoremove_wake_function; \ INIT_LIST_HEAD(&(wait)->task_list); \ + (wait)->flags = 0; \ } while (0) /** diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 45375b4..4d40c4d 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -121,6 +121,7 @@ static inline int addrconf_finite_timeout(unsigned long timeout) * IPv6 Address Label subsystem (addrlabel.c) */ extern int ipv6_addr_label_init(void); +extern void ipv6_addr_label_cleanup(void); extern void ipv6_addr_label_rtnl_register(void); extern u32 ipv6_addr_label(struct net *net, const struct in6_addr *addr, diff --git a/include/net/dst.h b/include/net/dst.h index 81d1413..0238650 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -242,6 +242,7 @@ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); skb_dst_drop(skb); nf_reset(skb); } diff --git a/include/net/route.h b/include/net/route.h index bd732d6..7e5e73b 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -199,6 +199,8 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, fl.fl_ip_sport = sport; fl.fl_ip_dport = dport; fl.proto = protocol; + if (inet_sk(sk)->transparent) + fl.flags |= FLOWI_FLAG_ANYSRC; ip_rt_put(*rp); *rp = NULL; security_sk_classify_flow(sk, &fl); diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fc8f36d..4f53532 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -298,8 +298,8 @@ struct xfrm_state_afinfo { const struct xfrm_type *type_map[IPPROTO_MAX]; struct xfrm_mode *mode_map[XFRM_MODE_MAX]; int (*init_flags)(struct xfrm_state *x); - void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, + void (*init_tempsel)(struct xfrm_selector *sel, struct flowi *fl); + void (*init_temprop)(struct xfrm_state *x, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 0e4cfb6..6fa7cbab7 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -5,7 +5,9 @@ #define _TRACE_IRQ_H #include <linux/tracepoint.h> -#include <linux/interrupt.h> + +struct irqaction; +struct softirq_action; #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } #define show_softirq_name(val) \ @@ -93,7 +95,10 @@ DECLARE_EVENT_CLASS(softirq, ), TP_fast_assign( - __entry->vec = (int)(h - vec); + if (vec) + __entry->vec = (int)(h - vec); + else + __entry->vec = (int)(long)h; ), TP_printk("vec=%d [action=%s]", __entry->vec, @@ -136,6 +141,23 @@ DEFINE_EVENT(softirq, softirq_exit, TP_ARGS(h, vec) ); +/** + * softirq_raise - called immediately when a softirq is raised + * @h: pointer to struct softirq_action + * @vec: pointer to first struct softirq_action in softirq_vec array + * + * The @h parameter contains a pointer to the softirq vector number which is + * raised. @vec is NULL and it means @h includes vector number not + * softirq_action. When used in combination with the softirq_entry tracepoint + * we can determine the softirq raise latency. + */ +DEFINE_EVENT(softirq, softirq_raise, + + TP_PROTO(struct softirq_action *h, struct softirq_action *vec), + + TP_ARGS(h, vec) +); + #endif /* _TRACE_IRQ_H */ /* This part must be outside protection */ diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h index 188deca..8fe1e93 100644 --- a/include/trace/events/napi.h +++ b/include/trace/events/napi.h @@ -6,10 +6,31 @@ #include <linux/netdevice.h> #include <linux/tracepoint.h> +#include <linux/ftrace.h> + +#define NO_DEV "(no_device)" + +TRACE_EVENT(napi_poll, -DECLARE_TRACE(napi_poll, TP_PROTO(struct napi_struct *napi), - TP_ARGS(napi)); + + TP_ARGS(napi), + + TP_STRUCT__entry( + __field( struct napi_struct *, napi) + __string( dev_name, napi->dev ? napi->dev->name : NO_DEV) + ), + + TP_fast_assign( + __entry->napi = napi; + __assign_str(dev_name, napi->dev ? napi->dev->name : NO_DEV); + ), + + TP_printk("napi poll on napi struct %p for device %s", + __entry->napi, __get_str(dev_name)) +); + +#undef NO_DEV #endif /* _TRACE_NAPI_H_ */ diff --git a/include/trace/events/net.h b/include/trace/events/net.h new file mode 100644 index 0000000..5f247f5 --- /dev/null +++ b/include/trace/events/net.h @@ -0,0 +1,82 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM net + +#if !defined(_TRACE_NET_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_NET_H + +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <linux/tracepoint.h> + +TRACE_EVENT(net_dev_xmit, + + TP_PROTO(struct sk_buff *skb, + int rc), + + TP_ARGS(skb, rc), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __field( int, rc ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __entry->rc = rc; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u rc=%d", + __get_str(name), __entry->skbaddr, __entry->len, __entry->rc) +); + +DECLARE_EVENT_CLASS(net_dev_template, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + __field( unsigned int, len ) + __string( name, skb->dev->name ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = skb->len; + __assign_str(name, skb->dev->name); + ), + + TP_printk("dev=%s skbaddr=%p len=%u", + __get_str(name), __entry->skbaddr, __entry->len) +) + +DEFINE_EVENT(net_dev_template, net_dev_queue, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_receive_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); + +DEFINE_EVENT(net_dev_template, netif_rx, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb) +); +#endif /* _TRACE_NET_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/power.h b/include/trace/events/power.h index 35a2a6e..286784d 100644 --- a/include/trace/events/power.h +++ b/include/trace/events/power.h @@ -10,12 +10,17 @@ #ifndef _TRACE_POWER_ENUM_ #define _TRACE_POWER_ENUM_ enum { - POWER_NONE = 0, - POWER_CSTATE = 1, - POWER_PSTATE = 2, + POWER_NONE = 0, + POWER_CSTATE = 1, /* C-State */ + POWER_PSTATE = 2, /* Fequency change or DVFS */ + POWER_SSTATE = 3, /* Suspend */ }; #endif +/* + * The power events are used for cpuidle & suspend (power_start, power_end) + * and for cpufreq (power_frequency) + */ DECLARE_EVENT_CLASS(power, TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id), @@ -70,6 +75,85 @@ TRACE_EVENT(power_end, ); +/* + * The clock events are used for clock enable/disable and for + * clock rate change + */ +DECLARE_EVENT_CLASS(clock, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + __field( u64, cpu_id ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + __entry->cpu_id = cpu_id; + ), + + TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), + (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) +); + +DEFINE_EVENT(clock, clock_enable, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +DEFINE_EVENT(clock, clock_disable, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +DEFINE_EVENT(clock, clock_set_rate, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + +/* + * The power domain events are used for power domains transitions + */ +DECLARE_EVENT_CLASS(power_domain, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id), + + TP_STRUCT__entry( + __string( name, name ) + __field( u64, state ) + __field( u64, cpu_id ) + ), + + TP_fast_assign( + __assign_str(name, name); + __entry->state = state; + __entry->cpu_id = cpu_id; +), + + TP_printk("%s state=%lu cpu_id=%lu", __get_str(name), + (unsigned long)__entry->state, (unsigned long)__entry->cpu_id) +); + +DEFINE_EVENT(power_domain, power_domain_target, + + TP_PROTO(const char *name, unsigned int state, unsigned int cpu_id), + + TP_ARGS(name, state, cpu_id) +); + #endif /* _TRACE_POWER_H */ /* This part must be outside protection */ diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 4b2be6d..75ce9d5 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -35,6 +35,23 @@ TRACE_EVENT(kfree_skb, __entry->skbaddr, __entry->protocol, __entry->location) ); +TRACE_EVENT(consume_skb, + + TP_PROTO(struct sk_buff *skb), + + TP_ARGS(skb), + + TP_STRUCT__entry( + __field( void *, skbaddr ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + ), + + TP_printk("skbaddr=%p", __entry->skbaddr) +); + TRACE_EVENT(skb_copy_datagram_iovec, TP_PROTO(const struct sk_buff *skb, int len), @@ -743,6 +743,8 @@ static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, { struct semid_ds out; + memset(&out, 0, sizeof(out)); + ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm); out.sem_otime = in->sem_otime; diff --git a/kernel/Makefile b/kernel/Makefile index 0b72d1a..d52b473 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ - async.o range.o + async.o range.o jump_label.o obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o obj-y += groups.o diff --git a/kernel/exit.c b/kernel/exit.c index 0312022..e2bdf37 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); -#ifdef CONFIG_PERF_EVENTS - WARN_ON_ONCE(tsk->perf_event_ctxp); -#endif + perf_event_delayed_put(tsk); trace_sched_process_free(tsk); put_task_struct(tsk); } diff --git a/kernel/fork.c b/kernel/fork.c index b7e9d60..c445f8c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -356,10 +356,10 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (IS_ERR(pol)) goto fail_nomem_policy; vma_set_policy(tmp, pol); + tmp->vm_mm = mm; if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; tmp->vm_flags &= ~VM_LOCKED; - tmp->vm_mm = mm; tmp->vm_next = tmp->vm_prev = NULL; file = tmp->vm_file; if (file) { diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c7c2aed..3b714e8 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -433,8 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, struct task_struct *tsk) { - return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk), - triggered); + return perf_event_create_kernel_counter(attr, -1, tsk, triggered); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); @@ -516,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, get_online_cpus(); for_each_online_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); - bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); + bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered); *pevent = bp; @@ -566,6 +565,61 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { .priority = 0x7fffffff }; +static void bp_perf_event_destroy(struct perf_event *event) +{ + release_bp_slot(event); +} + +static int hw_breakpoint_event_init(struct perf_event *bp) +{ + int err; + + if (bp->attr.type != PERF_TYPE_BREAKPOINT) + return -ENOENT; + + err = register_perf_hw_breakpoint(bp); + if (err) + return err; + + bp->destroy = bp_perf_event_destroy; + + return 0; +} + +static int hw_breakpoint_add(struct perf_event *bp, int flags) +{ + if (!(flags & PERF_EF_START)) + bp->hw.state = PERF_HES_STOPPED; + + return arch_install_hw_breakpoint(bp); +} + +static void hw_breakpoint_del(struct perf_event *bp, int flags) +{ + arch_uninstall_hw_breakpoint(bp); +} + +static void hw_breakpoint_start(struct perf_event *bp, int flags) +{ + bp->hw.state = 0; +} + +static void hw_breakpoint_stop(struct perf_event *bp, int flags) +{ + bp->hw.state = PERF_HES_STOPPED; +} + +static struct pmu perf_breakpoint = { + .task_ctx_nr = perf_sw_context, /* could eventually get its own */ + + .event_init = hw_breakpoint_event_init, + .add = hw_breakpoint_add, + .del = hw_breakpoint_del, + .start = hw_breakpoint_start, + .stop = hw_breakpoint_stop, + .read = hw_breakpoint_pmu_read, +}; + static int __init init_hw_breakpoint(void) { unsigned int **task_bp_pinned; @@ -587,6 +641,8 @@ static int __init init_hw_breakpoint(void) constraints_initialized = 1; + perf_pmu_register(&perf_breakpoint); + return register_die_notifier(&hw_breakpoint_exceptions_nb); err_alloc: @@ -602,8 +658,3 @@ static int __init init_hw_breakpoint(void) core_initcall(init_hw_breakpoint); -struct pmu perf_ops_bp = { - .enable = arch_install_hw_breakpoint, - .disable = arch_uninstall_hw_breakpoint, - .read = hw_breakpoint_pmu_read, -}; diff --git a/kernel/jump_label.c b/kernel/jump_label.c new file mode 100644 index 0000000..7be868b --- /dev/null +++ b/kernel/jump_label.c @@ -0,0 +1,429 @@ +/* + * jump label support + * + * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> + * + */ +#include <linux/jump_label.h> +#include <linux/memory.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/slab.h> +#include <linux/sort.h> +#include <linux/err.h> + +#ifdef HAVE_JUMP_LABEL + +#define JUMP_LABEL_HASH_BITS 6 +#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS) +static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE]; + +/* mutex to protect coming/going of the the jump_label table */ +static DEFINE_MUTEX(jump_label_mutex); + +struct jump_label_entry { + struct hlist_node hlist; + struct jump_entry *table; + int nr_entries; + /* hang modules off here */ + struct hlist_head modules; + unsigned long key; +}; + +struct jump_label_module_entry { + struct hlist_node hlist; + struct jump_entry *table; + int nr_entries; + struct module *mod; +}; + +static int jump_label_cmp(const void *a, const void *b) +{ + const struct jump_entry *jea = a; + const struct jump_entry *jeb = b; + + if (jea->key < jeb->key) + return -1; + + if (jea->key > jeb->key) + return 1; + + return 0; +} + +static void +sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop) +{ + unsigned long size; + + size = (((unsigned long)stop - (unsigned long)start) + / sizeof(struct jump_entry)); + sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); +} + +static struct jump_label_entry *get_jump_label_entry(jump_label_t key) +{ + struct hlist_head *head; + struct hlist_node *node; + struct jump_label_entry *e; + u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0); + + head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (key == e->key) + return e; + } + return NULL; +} + +static struct jump_label_entry * +add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table) +{ + struct hlist_head *head; + struct jump_label_entry *e; + u32 hash; + + e = get_jump_label_entry(key); + if (e) + return ERR_PTR(-EEXIST); + + e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + hash = jhash((void *)&key, sizeof(jump_label_t), 0); + head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; + e->key = key; + e->table = table; + e->nr_entries = nr_entries; + INIT_HLIST_HEAD(&(e->modules)); + hlist_add_head(&e->hlist, head); + return e; +} + +static int +build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop) +{ + struct jump_entry *iter, *iter_begin; + struct jump_label_entry *entry; + int count; + + sort_jump_label_entries(start, stop); + iter = start; + while (iter < stop) { + entry = get_jump_label_entry(iter->key); + if (!entry) { + iter_begin = iter; + count = 0; + while ((iter < stop) && + (iter->key == iter_begin->key)) { + iter++; + count++; + } + entry = add_jump_label_entry(iter_begin->key, + count, iter_begin); + if (IS_ERR(entry)) + return PTR_ERR(entry); + } else { + WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n"); + return -1; + } + } + return 0; +} + +/*** + * jump_label_update - update jump label text + * @key - key value associated with a a jump label + * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE + * + * Will enable/disable the jump for jump label @key, depending on the + * value of @type. + * + */ + +void jump_label_update(unsigned long key, enum jump_label_type type) +{ + struct jump_entry *iter; + struct jump_label_entry *entry; + struct hlist_node *module_node; + struct jump_label_module_entry *e_module; + int count; + + mutex_lock(&jump_label_mutex); + entry = get_jump_label_entry((jump_label_t)key); + if (entry) { + count = entry->nr_entries; + iter = entry->table; + while (count--) { + if (kernel_text_address(iter->code)) + arch_jump_label_transform(iter, type); + iter++; + } + /* eanble/disable jump labels in modules */ + hlist_for_each_entry(e_module, module_node, &(entry->modules), + hlist) { + count = e_module->nr_entries; + iter = e_module->table; + while (count--) { + if (kernel_text_address(iter->code)) + arch_jump_label_transform(iter, type); + iter++; + } + } + } + mutex_unlock(&jump_label_mutex); +} + +static int addr_conflict(struct jump_entry *entry, void *start, void *end) +{ + if (entry->code <= (unsigned long)end && + entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start) + return 1; + + return 0; +} + +#ifdef CONFIG_MODULES + +static int module_conflict(void *start, void *end) +{ + struct hlist_head *head; + struct hlist_node *node, *node_next, *module_node, *module_node_next; + struct jump_label_entry *e; + struct jump_label_module_entry *e_module; + struct jump_entry *iter; + int i, count; + int conflict = 0; + + for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { + head = &jump_label_table[i]; + hlist_for_each_entry_safe(e, node, node_next, head, hlist) { + hlist_for_each_entry_safe(e_module, module_node, + module_node_next, + &(e->modules), hlist) { + count = e_module->nr_entries; + iter = e_module->table; + while (count--) { + if (addr_conflict(iter, start, end)) { + conflict = 1; + goto out; + } + iter++; + } + } + } + } +out: + return conflict; +} + +#endif + +/*** + * jump_label_text_reserved - check if addr range is reserved + * @start: start text addr + * @end: end text addr + * + * checks if the text addr located between @start and @end + * overlaps with any of the jump label patch addresses. Code + * that wants to modify kernel text should first verify that + * it does not overlap with any of the jump label addresses. + * + * returns 1 if there is an overlap, 0 otherwise + */ +int jump_label_text_reserved(void *start, void *end) +{ + struct jump_entry *iter; + struct jump_entry *iter_start = __start___jump_table; + struct jump_entry *iter_stop = __start___jump_table; + int conflict = 0; + + mutex_lock(&jump_label_mutex); + iter = iter_start; + while (iter < iter_stop) { + if (addr_conflict(iter, start, end)) { + conflict = 1; + goto out; + } + iter++; + } + + /* now check modules */ +#ifdef CONFIG_MODULES + conflict = module_conflict(start, end); +#endif +out: + mutex_unlock(&jump_label_mutex); + return conflict; +} + +static __init int init_jump_label(void) +{ + int ret; + struct jump_entry *iter_start = __start___jump_table; + struct jump_entry *iter_stop = __stop___jump_table; + struct jump_entry *iter; + + mutex_lock(&jump_label_mutex); + ret = build_jump_label_hashtable(__start___jump_table, + __stop___jump_table); + iter = iter_start; + while (iter < iter_stop) { + arch_jump_label_text_poke_early(iter->code); + iter++; + } + mutex_unlock(&jump_label_mutex); + return ret; +} +early_initcall(init_jump_label); + +#ifdef CONFIG_MODULES + +static struct jump_label_module_entry * +add_jump_label_module_entry(struct jump_label_entry *entry, + struct jump_entry *iter_begin, + int count, struct module *mod) +{ + struct jump_label_module_entry *e; + + e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + e->mod = mod; + e->nr_entries = count; + e->table = iter_begin; + hlist_add_head(&e->hlist, &entry->modules); + return e; +} + +static int add_jump_label_module(struct module *mod) +{ + struct jump_entry *iter, *iter_begin; + struct jump_label_entry *entry; + struct jump_label_module_entry *module_entry; + int count; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return 0; + + sort_jump_label_entries(mod->jump_entries, + mod->jump_entries + mod->num_jump_entries); + iter = mod->jump_entries; + while (iter < mod->jump_entries + mod->num_jump_entries) { + entry = get_jump_label_entry(iter->key); + iter_begin = iter; + count = 0; + while ((iter < mod->jump_entries + mod->num_jump_entries) && + (iter->key == iter_begin->key)) { + iter++; + count++; + } + if (!entry) { + entry = add_jump_label_entry(iter_begin->key, 0, NULL); + if (IS_ERR(entry)) + return PTR_ERR(entry); + } + module_entry = add_jump_label_module_entry(entry, iter_begin, + count, mod); + if (IS_ERR(module_entry)) + return PTR_ERR(module_entry); + } + return 0; +} + +static void remove_jump_label_module(struct module *mod) +{ + struct hlist_head *head; + struct hlist_node *node, *node_next, *module_node, *module_node_next; + struct jump_label_entry *e; + struct jump_label_module_entry *e_module; + int i; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return; + + for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { + head = &jump_label_table[i]; + hlist_for_each_entry_safe(e, node, node_next, head, hlist) { + hlist_for_each_entry_safe(e_module, module_node, + module_node_next, + &(e->modules), hlist) { + if (e_module->mod == mod) { + hlist_del(&e_module->hlist); + kfree(e_module); + } + } + if (hlist_empty(&e->modules) && (e->nr_entries == 0)) { + hlist_del(&e->hlist); + kfree(e); + } + } + } +} + +static int +jump_label_module_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct module *mod = data; + int ret = 0; + + switch (val) { + case MODULE_STATE_COMING: + mutex_lock(&jump_label_mutex); + ret = add_jump_label_module(mod); + if (ret) + remove_jump_label_module(mod); + mutex_unlock(&jump_label_mutex); + break; + case MODULE_STATE_GOING: + mutex_lock(&jump_label_mutex); + remove_jump_label_module(mod); + mutex_unlock(&jump_label_mutex); + break; + } + return ret; +} + +/*** + * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop() + * @mod: module to patch + * + * Allow for run-time selection of the optimal nops. Before the module + * loads patch these with arch_get_jump_label_nop(), which is specified by + * the arch specific jump label code. + */ +void jump_label_apply_nops(struct module *mod) +{ + struct jump_entry *iter; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return; + + iter = mod->jump_entries; + while (iter < mod->jump_entries + mod->num_jump_entries) { + arch_jump_label_text_poke_early(iter->code); + iter++; + } +} + +struct notifier_block jump_label_module_nb = { + .notifier_call = jump_label_module_notify, + .priority = 0, +}; + +static __init int init_jump_label_module(void) +{ + return register_module_notifier(&jump_label_module_nb); +} +early_initcall(init_jump_label_module); + +#endif /* CONFIG_MODULES */ + +#endif diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 6b5580c..01a0700 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -365,8 +365,6 @@ static unsigned int setup_sgl(struct __kfifo *fifo, struct scatterlist *sgl, n = setup_sgl_buf(sgl, fifo->data + off, nents, l); n += setup_sgl_buf(sgl + n, fifo->data, nents - n, len - l); - if (n) - sg_mark_end(sgl + n - 1); return n; } diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 282035f..ec4210c 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -47,6 +47,7 @@ #include <linux/memory.h> #include <linux/ftrace.h> #include <linux/cpu.h> +#include <linux/jump_label.h> #include <asm-generic/sections.h> #include <asm/cacheflush.h> @@ -399,7 +400,7 @@ static inline int kprobe_optready(struct kprobe *p) * Return an optimized kprobe whose optimizing code replaces * instructions including addr (exclude breakpoint). */ -struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) +static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) { int i; struct kprobe *p = NULL; @@ -831,6 +832,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, void __kprobes kretprobe_hash_lock(struct task_struct *tsk, struct hlist_head **head, unsigned long *flags) +__acquires(hlist_lock) { unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); spinlock_t *hlist_lock; @@ -842,6 +844,7 @@ void __kprobes kretprobe_hash_lock(struct task_struct *tsk, static void __kprobes kretprobe_table_lock(unsigned long hash, unsigned long *flags) +__acquires(hlist_lock) { spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); spin_lock_irqsave(hlist_lock, *flags); @@ -849,6 +852,7 @@ static void __kprobes kretprobe_table_lock(unsigned long hash, void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) +__releases(hlist_lock) { unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); spinlock_t *hlist_lock; @@ -857,7 +861,9 @@ void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, spin_unlock_irqrestore(hlist_lock, *flags); } -void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) +static void __kprobes kretprobe_table_unlock(unsigned long hash, + unsigned long *flags) +__releases(hlist_lock) { spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); spin_unlock_irqrestore(hlist_lock, *flags); @@ -1141,7 +1147,8 @@ int __kprobes register_kprobe(struct kprobe *p) preempt_disable(); if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr) || - ftrace_text_reserved(p->addr, p->addr)) { + ftrace_text_reserved(p->addr, p->addr) || + jump_label_text_reserved(p->addr, p->addr)) { preempt_enable(); return -EINVAL; } @@ -1339,18 +1346,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num) if (num <= 0) return -EINVAL; for (i = 0; i < num; i++) { - unsigned long addr; + unsigned long addr, offset; jp = jps[i]; addr = arch_deref_entry_point(jp->entry); - if (!kernel_text_address(addr)) - ret = -EINVAL; - else { - /* Todo: Verify probepoint is a function entry point */ + /* Verify probepoint is a function entry point */ + if (kallsyms_lookup_size_offset(addr, NULL, &offset) && + offset == 0) { jp->kp.pre_handler = setjmp_pre_handler; jp->kp.break_handler = longjmp_break_handler; ret = register_kprobe(&jp->kp); - } + } else + ret = -EINVAL; + if (ret < 0) { if (i > 0) unregister_jprobes(jps, i); diff --git a/kernel/module.c b/kernel/module.c index d0b5f8d..2df4630 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -55,6 +55,7 @@ #include <linux/async.h> #include <linux/percpu.h> #include <linux/kmemleak.h> +#include <linux/jump_label.h> #define CREATE_TRACE_POINTS #include <trace/events/module.h> @@ -1537,6 +1538,7 @@ static int __unlink_module(void *_mod) { struct module *mod = _mod; list_del(&mod->list); + module_bug_cleanup(mod); return 0; } @@ -2308,6 +2310,11 @@ static void find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->tracepoints), &mod->num_tracepoints); #endif +#ifdef HAVE_JUMP_LABEL + mod->jump_entries = section_objs(info, "__jump_table", + sizeof(*mod->jump_entries), + &mod->num_jump_entries); +#endif #ifdef CONFIG_EVENT_TRACING mod->trace_events = section_objs(info, "_ftrace_events", sizeof(*mod->trace_events), @@ -2625,6 +2632,7 @@ static struct module *load_module(void __user *umod, if (err < 0) goto ddebug; + module_bug_finalize(info.hdr, info.sechdrs, mod); list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2650,6 +2658,8 @@ static struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + module_bug_cleanup(mod); + ddebug: if (!mod->taints) dynamic_debug_remove(info.debug); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fc51268..1ec3916 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -31,24 +31,18 @@ #include <linux/kernel_stat.h> #include <linux/perf_event.h> #include <linux/ftrace_event.h> -#include <linux/hw_breakpoint.h> #include <asm/irq_regs.h> -/* - * Each CPU has a list of per CPU events: - */ -static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); - -int perf_max_events __read_mostly = 1; -static int perf_reserved_percpu __read_mostly; -static int perf_overcommit __read_mostly = 1; - static atomic_t nr_events __read_mostly; static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; static atomic_t nr_task_events __read_mostly; +static LIST_HEAD(pmus); +static DEFINE_MUTEX(pmus_lock); +static struct srcu_struct pmus_srcu; + /* * perf event paranoia level: * -1 - not paranoid at all @@ -67,22 +61,6 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; static atomic64_t perf_event_id; -/* - * Lock for (sysadmin-configurable) event reservations: - */ -static DEFINE_SPINLOCK(perf_resource_lock); - -/* - * Architecture provided APIs - weak aliases: - */ -extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) -{ - return NULL; -} - -void __weak hw_perf_disable(void) { barrier(); } -void __weak hw_perf_enable(void) { barrier(); } - void __weak perf_event_print_debug(void) { } extern __weak const char *perf_pmu_name(void) @@ -90,18 +68,36 @@ extern __weak const char *perf_pmu_name(void) return "pmu"; } -static DEFINE_PER_CPU(int, perf_disable_count); +void perf_pmu_disable(struct pmu *pmu) +{ + int *count = this_cpu_ptr(pmu->pmu_disable_count); + if (!(*count)++) + pmu->pmu_disable(pmu); +} -void perf_disable(void) +void perf_pmu_enable(struct pmu *pmu) { - if (!__get_cpu_var(perf_disable_count)++) - hw_perf_disable(); + int *count = this_cpu_ptr(pmu->pmu_disable_count); + if (!--(*count)) + pmu->pmu_enable(pmu); } -void perf_enable(void) +static DEFINE_PER_CPU(struct list_head, rotation_list); + +/* + * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized + * because they're strictly cpu affine and rotate_start is called with IRQs + * disabled, while rotate_context is called from IRQ context. + */ +static void perf_pmu_rotate_start(struct pmu *pmu) { - if (!--__get_cpu_var(perf_disable_count)) - hw_perf_enable(); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + struct list_head *head = &__get_cpu_var(rotation_list); + + WARN_ON(!irqs_disabled()); + + if (list_empty(&cpuctx->rotation_list)) + list_add(&cpuctx->rotation_list, head); } static void get_ctx(struct perf_event_context *ctx) @@ -156,13 +152,13 @@ static u64 primary_event_id(struct perf_event *event) * the context could get moved to another task. */ static struct perf_event_context * -perf_lock_task_context(struct task_struct *task, unsigned long *flags) +perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) { struct perf_event_context *ctx; rcu_read_lock(); - retry: - ctx = rcu_dereference(task->perf_event_ctxp); +retry: + ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); if (ctx) { /* * If this context is a clone of another, it might @@ -175,7 +171,7 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) * can't get swapped on us any more. */ raw_spin_lock_irqsave(&ctx->lock, *flags); - if (ctx != rcu_dereference(task->perf_event_ctxp)) { + if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { raw_spin_unlock_irqrestore(&ctx->lock, *flags); goto retry; } @@ -194,12 +190,13 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) * can't get swapped to another task. This also increments its * reference count so that the context can't get freed. */ -static struct perf_event_context *perf_pin_task_context(struct task_struct *task) +static struct perf_event_context * +perf_pin_task_context(struct task_struct *task, int ctxn) { struct perf_event_context *ctx; unsigned long flags; - ctx = perf_lock_task_context(task, &flags); + ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { ++ctx->pin_count; raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -307,6 +304,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) } list_add_rcu(&event->event_entry, &ctx->event_list); + if (!ctx->nr_events) + perf_pmu_rotate_start(ctx->pmu); ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; @@ -441,7 +440,7 @@ event_sched_out(struct perf_event *event, event->state = PERF_EVENT_STATE_OFF; } event->tstamp_stopped = ctx->time; - event->pmu->disable(event); + event->pmu->del(event, 0); event->oncpu = -1; if (!is_software_event(event)) @@ -471,6 +470,12 @@ group_sched_out(struct perf_event *group_event, cpuctx->exclusive = 0; } +static inline struct perf_cpu_context * +__get_cpu_context(struct perf_event_context *ctx) +{ + return this_cpu_ptr(ctx->pmu->pmu_cpu_context); +} + /* * Cross CPU call to remove a performance event * @@ -479,9 +484,9 @@ group_sched_out(struct perf_event *group_event, */ static void __perf_event_remove_from_context(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a task context, we need to check whether it is @@ -492,27 +497,11 @@ static void __perf_event_remove_from_context(void *info) return; raw_spin_lock(&ctx->lock); - /* - * Protect the list operation against NMI by disabling the - * events on a global level. - */ - perf_disable(); event_sched_out(event, cpuctx, ctx); list_del_event(event, ctx); - if (!ctx->task) { - /* - * Allow more per task events with respect to the - * reservation: - */ - cpuctx->max_pertask = - min(perf_max_events - ctx->nr_events, - perf_max_events - perf_reserved_percpu); - } - - perf_enable(); raw_spin_unlock(&ctx->lock); } @@ -577,8 +566,8 @@ retry: static void __perf_event_disable(void *info) { struct perf_event *event = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a per-task event, need to check whether this @@ -633,7 +622,7 @@ void perf_event_disable(struct perf_event *event) return; } - retry: +retry: task_oncpu_function_call(task, __perf_event_disable, event); raw_spin_lock_irq(&ctx->lock); @@ -672,7 +661,7 @@ event_sched_in(struct perf_event *event, */ smp_wmb(); - if (event->pmu->enable(event)) { + if (event->pmu->add(event, PERF_EF_START)) { event->state = PERF_EVENT_STATE_INACTIVE; event->oncpu = -1; return -EAGAIN; @@ -696,22 +685,15 @@ group_sched_in(struct perf_event *group_event, struct perf_event_context *ctx) { struct perf_event *event, *partial_group = NULL; - const struct pmu *pmu = group_event->pmu; - bool txn = false; + struct pmu *pmu = group_event->pmu; if (group_event->state == PERF_EVENT_STATE_OFF) return 0; - /* Check if group transaction availabe */ - if (pmu->start_txn) - txn = true; - - if (txn) - pmu->start_txn(pmu); + pmu->start_txn(pmu); if (event_sched_in(group_event, cpuctx, ctx)) { - if (txn) - pmu->cancel_txn(pmu); + pmu->cancel_txn(pmu); return -EAGAIN; } @@ -725,7 +707,7 @@ group_sched_in(struct perf_event *group_event, } } - if (!txn || !pmu->commit_txn(pmu)) + if (!pmu->commit_txn(pmu)) return 0; group_error: @@ -740,8 +722,7 @@ group_error: } event_sched_out(group_event, cpuctx, ctx); - if (txn) - pmu->cancel_txn(pmu); + pmu->cancel_txn(pmu); return -EAGAIN; } @@ -794,10 +775,10 @@ static void add_event_to_ctx(struct perf_event *event, */ static void __perf_install_in_context(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; struct perf_event *leader = event->group_leader; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; /* @@ -817,12 +798,6 @@ static void __perf_install_in_context(void *info) ctx->is_active = 1; update_context_time(ctx); - /* - * Protect the list operation against NMI by disabling the - * events on a global level. NOP for non NMI based events. - */ - perf_disable(); - add_event_to_ctx(event, ctx); if (event->cpu != -1 && event->cpu != smp_processor_id()) @@ -860,12 +835,7 @@ static void __perf_install_in_context(void *info) } } - if (!err && !ctx->task && cpuctx->max_pertask) - cpuctx->max_pertask--; - - unlock: - perf_enable(); - +unlock: raw_spin_unlock(&ctx->lock); } @@ -888,6 +858,8 @@ perf_install_in_context(struct perf_event_context *ctx, { struct task_struct *task = ctx->task; + event->ctx = ctx; + if (!task) { /* * Per cpu events are installed via an smp call and @@ -936,10 +908,12 @@ static void __perf_event_mark_enabled(struct perf_event *event, event->state = PERF_EVENT_STATE_INACTIVE; event->tstamp_enabled = ctx->time - event->total_time_enabled; - list_for_each_entry(sub, &event->sibling_list, group_entry) - if (sub->state >= PERF_EVENT_STATE_INACTIVE) + list_for_each_entry(sub, &event->sibling_list, group_entry) { + if (sub->state >= PERF_EVENT_STATE_INACTIVE) { sub->tstamp_enabled = ctx->time - sub->total_time_enabled; + } + } } /* @@ -948,9 +922,9 @@ static void __perf_event_mark_enabled(struct perf_event *event, static void __perf_event_enable(void *info) { struct perf_event *event = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = event->ctx; struct perf_event *leader = event->group_leader; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; /* @@ -984,12 +958,10 @@ static void __perf_event_enable(void *info) if (!group_can_go_on(event, cpuctx, 1)) { err = -EEXIST; } else { - perf_disable(); if (event == leader) err = group_sched_in(event, cpuctx, ctx); else err = event_sched_in(event, cpuctx, ctx); - perf_enable(); } if (err) { @@ -1005,7 +977,7 @@ static void __perf_event_enable(void *info) } } - unlock: +unlock: raw_spin_unlock(&ctx->lock); } @@ -1046,7 +1018,7 @@ void perf_event_enable(struct perf_event *event) if (event->state == PERF_EVENT_STATE_ERROR) event->state = PERF_EVENT_STATE_OFF; - retry: +retry: raw_spin_unlock_irq(&ctx->lock); task_oncpu_function_call(task, __perf_event_enable, event); @@ -1066,7 +1038,7 @@ void perf_event_enable(struct perf_event *event) if (event->state == PERF_EVENT_STATE_OFF) __perf_event_mark_enabled(event, ctx); - out: +out: raw_spin_unlock_irq(&ctx->lock); } @@ -1097,26 +1069,26 @@ static void ctx_sched_out(struct perf_event_context *ctx, struct perf_event *event; raw_spin_lock(&ctx->lock); + perf_pmu_disable(ctx->pmu); ctx->is_active = 0; if (likely(!ctx->nr_events)) goto out; update_context_time(ctx); - perf_disable(); if (!ctx->nr_active) - goto out_enable; + goto out; - if (event_type & EVENT_PINNED) + if (event_type & EVENT_PINNED) { list_for_each_entry(event, &ctx->pinned_groups, group_entry) group_sched_out(event, cpuctx, ctx); + } - if (event_type & EVENT_FLEXIBLE) + if (event_type & EVENT_FLEXIBLE) { list_for_each_entry(event, &ctx->flexible_groups, group_entry) group_sched_out(event, cpuctx, ctx); - - out_enable: - perf_enable(); - out: + } +out: + perf_pmu_enable(ctx->pmu); raw_spin_unlock(&ctx->lock); } @@ -1214,34 +1186,25 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, } } -/* - * Called from scheduler to remove the events of the current task, - * with interrupts disabled. - * - * We stop each event and update the event value in event->count. - * - * This does not protect us against NMI, but disable() - * sets the disabled bit in the control field of event _before_ - * accessing the event control register. If a NMI hits, then it will - * not restart the event. - */ -void perf_event_task_sched_out(struct task_struct *task, - struct task_struct *next) +void perf_event_context_sched_out(struct task_struct *task, int ctxn, + struct task_struct *next) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; struct perf_event_context *next_ctx; struct perf_event_context *parent; + struct perf_cpu_context *cpuctx; int do_switch = 1; - perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); + if (likely(!ctx)) + return; - if (likely(!ctx || !cpuctx->task_ctx)) + cpuctx = __get_cpu_context(ctx); + if (!cpuctx->task_ctx) return; rcu_read_lock(); parent = rcu_dereference(ctx->parent_ctx); - next_ctx = next->perf_event_ctxp; + next_ctx = next->perf_event_ctxp[ctxn]; if (parent && next_ctx && rcu_dereference(next_ctx->parent_ctx) == parent) { /* @@ -1260,8 +1223,8 @@ void perf_event_task_sched_out(struct task_struct *task, * XXX do we need a memory barrier of sorts * wrt to rcu_dereference() of perf_event_ctxp */ - task->perf_event_ctxp = next_ctx; - next->perf_event_ctxp = ctx; + task->perf_event_ctxp[ctxn] = next_ctx; + next->perf_event_ctxp[ctxn] = ctx; ctx->task = next; next_ctx->task = task; do_switch = 0; @@ -1279,10 +1242,35 @@ void perf_event_task_sched_out(struct task_struct *task, } } +#define for_each_task_context_nr(ctxn) \ + for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++) + +/* + * Called from scheduler to remove the events of the current task, + * with interrupts disabled. + * + * We stop each event and update the event value in event->count. + * + * This does not protect us against NMI, but disable() + * sets the disabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * not restart the event. + */ +void perf_event_task_sched_out(struct task_struct *task, + struct task_struct *next) +{ + int ctxn; + + perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0); + + for_each_task_context_nr(ctxn) + perf_event_context_sched_out(task, ctxn, next); +} + static void task_ctx_sched_out(struct perf_event_context *ctx, enum event_type_t event_type) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); if (!cpuctx->task_ctx) return; @@ -1355,9 +1343,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx, if (event->cpu != -1 && event->cpu != smp_processor_id()) continue; - if (group_can_go_on(event, cpuctx, can_add_hw)) + if (group_can_go_on(event, cpuctx, can_add_hw)) { if (group_sched_in(event, cpuctx, ctx)) can_add_hw = 0; + } } } @@ -1373,8 +1362,6 @@ ctx_sched_in(struct perf_event_context *ctx, ctx->timestamp = perf_clock(); - perf_disable(); - /* * First go through the list and put on any pinned groups * in order to give them the best chance of going on. @@ -1386,8 +1373,7 @@ ctx_sched_in(struct perf_event_context *ctx, if (event_type & EVENT_FLEXIBLE) ctx_flexible_sched_in(ctx, cpuctx); - perf_enable(); - out: +out: raw_spin_unlock(&ctx->lock); } @@ -1399,43 +1385,28 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, ctx_sched_in(ctx, cpuctx, event_type); } -static void task_ctx_sched_in(struct task_struct *task, +static void task_ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_cpu_context *cpuctx; - if (likely(!ctx)) - return; + cpuctx = __get_cpu_context(ctx); if (cpuctx->task_ctx == ctx) return; + ctx_sched_in(ctx, cpuctx, event_type); cpuctx->task_ctx = ctx; } -/* - * Called from scheduler to add the events of the current task - * with interrupts disabled. - * - * We restore the event value and then enable it. - * - * This does not protect us against NMI, but enable() - * sets the enabled bit in the control field of event _before_ - * accessing the event control register. If a NMI hits, then it will - * keep the event running. - */ -void perf_event_task_sched_in(struct task_struct *task) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_event_context *ctx = task->perf_event_ctxp; - if (likely(!ctx)) - return; +void perf_event_context_sched_in(struct perf_event_context *ctx) +{ + struct perf_cpu_context *cpuctx; + cpuctx = __get_cpu_context(ctx); if (cpuctx->task_ctx == ctx) return; - perf_disable(); - + perf_pmu_disable(ctx->pmu); /* * We want to keep the following priority order: * cpu pinned (that don't need to move), task pinned, @@ -1449,7 +1420,37 @@ void perf_event_task_sched_in(struct task_struct *task) cpuctx->task_ctx = ctx; - perf_enable(); + /* + * Since these rotations are per-cpu, we need to ensure the + * cpu-context we got scheduled on is actually rotating. + */ + perf_pmu_rotate_start(ctx->pmu); + perf_pmu_enable(ctx->pmu); +} + +/* + * Called from scheduler to add the events of the current task + * with interrupts disabled. + * + * We restore the event value and then enable it. + * + * This does not protect us against NMI, but enable() + * sets the enabled bit in the control field of event _before_ + * accessing the event control register. If a NMI hits, then it will + * keep the event running. + */ +void perf_event_task_sched_in(struct task_struct *task) +{ + struct perf_event_context *ctx; + int ctxn; + + for_each_task_context_nr(ctxn) { + ctx = task->perf_event_ctxp[ctxn]; + if (likely(!ctx)) + continue; + + perf_event_context_sched_in(ctx); + } } #define MAX_INTERRUPTS (~0ULL) @@ -1529,22 +1530,6 @@ do { \ return div64_u64(dividend, divisor); } -static void perf_event_stop(struct perf_event *event) -{ - if (!event->pmu->stop) - return event->pmu->disable(event); - - return event->pmu->stop(event); -} - -static int perf_event_start(struct perf_event *event) -{ - if (!event->pmu->start) - return event->pmu->enable(event); - - return event->pmu->start(event); -} - static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) { struct hw_perf_event *hwc = &event->hw; @@ -1564,15 +1549,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) hwc->sample_period = sample_period; if (local64_read(&hwc->period_left) > 8*sample_period) { - perf_disable(); - perf_event_stop(event); + event->pmu->stop(event, PERF_EF_UPDATE); local64_set(&hwc->period_left, 0); - perf_event_start(event); - perf_enable(); + event->pmu->start(event, PERF_EF_RELOAD); } } -static void perf_ctx_adjust_freq(struct perf_event_context *ctx) +static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) { struct perf_event *event; struct hw_perf_event *hwc; @@ -1597,23 +1580,19 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx) */ if (interrupts == MAX_INTERRUPTS) { perf_log_throttle(event, 1); - perf_disable(); - event->pmu->unthrottle(event); - perf_enable(); + event->pmu->start(event, 0); } if (!event->attr.freq || !event->attr.sample_freq) continue; - perf_disable(); event->pmu->read(event); now = local64_read(&event->count); delta = now - hwc->freq_count_stamp; hwc->freq_count_stamp = now; if (delta > 0) - perf_adjust_period(event, TICK_NSEC, delta); - perf_enable(); + perf_adjust_period(event, period, delta); } raw_spin_unlock(&ctx->lock); } @@ -1631,32 +1610,38 @@ static void rotate_ctx(struct perf_event_context *ctx) raw_spin_unlock(&ctx->lock); } -void perf_event_task_tick(struct task_struct *curr) +/* + * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized + * because they're strictly cpu affine and rotate_start is called with IRQs + * disabled, while rotate_context is called from IRQ context. + */ +static void perf_rotate_context(struct perf_cpu_context *cpuctx) { - struct perf_cpu_context *cpuctx; - struct perf_event_context *ctx; - int rotate = 0; - - if (!atomic_read(&nr_events)) - return; + u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC; + struct perf_event_context *ctx = NULL; + int rotate = 0, remove = 1; - cpuctx = &__get_cpu_var(perf_cpu_context); - if (cpuctx->ctx.nr_events && - cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) - rotate = 1; + if (cpuctx->ctx.nr_events) { + remove = 0; + if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) + rotate = 1; + } - ctx = curr->perf_event_ctxp; - if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active) - rotate = 1; + ctx = cpuctx->task_ctx; + if (ctx && ctx->nr_events) { + remove = 0; + if (ctx->nr_events != ctx->nr_active) + rotate = 1; + } - perf_ctx_adjust_freq(&cpuctx->ctx); + perf_pmu_disable(cpuctx->ctx.pmu); + perf_ctx_adjust_freq(&cpuctx->ctx, interval); if (ctx) - perf_ctx_adjust_freq(ctx); + perf_ctx_adjust_freq(ctx, interval); if (!rotate) - return; + goto done; - perf_disable(); cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (ctx) task_ctx_sched_out(ctx, EVENT_FLEXIBLE); @@ -1667,8 +1652,27 @@ void perf_event_task_tick(struct task_struct *curr) cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE); if (ctx) - task_ctx_sched_in(curr, EVENT_FLEXIBLE); - perf_enable(); + task_ctx_sched_in(ctx, EVENT_FLEXIBLE); + +done: + if (remove) + list_del_init(&cpuctx->rotation_list); + + perf_pmu_enable(cpuctx->ctx.pmu); +} + +void perf_event_task_tick(void) +{ + struct list_head *head = &__get_cpu_var(rotation_list); + struct perf_cpu_context *cpuctx, *tmp; + + WARN_ON(!irqs_disabled()); + + list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { + if (cpuctx->jiffies_interval == 1 || + !(jiffies % cpuctx->jiffies_interval)) + perf_rotate_context(cpuctx); + } } static int event_enable_on_exec(struct perf_event *event, @@ -1690,20 +1694,18 @@ static int event_enable_on_exec(struct perf_event *event, * Enable all of a task's events that have been marked enable-on-exec. * This expects task == current. */ -static void perf_event_enable_on_exec(struct task_struct *task) +static void perf_event_enable_on_exec(struct perf_event_context *ctx) { - struct perf_event_context *ctx; struct perf_event *event; unsigned long flags; int enabled = 0; int ret; local_irq_save(flags); - ctx = task->perf_event_ctxp; if (!ctx || !ctx->nr_events) goto out; - __perf_event_task_sched_out(ctx); + task_ctx_sched_out(ctx, EVENT_ALL); raw_spin_lock(&ctx->lock); @@ -1727,8 +1729,8 @@ static void perf_event_enable_on_exec(struct task_struct *task) raw_spin_unlock(&ctx->lock); - perf_event_task_sched_in(task); - out: + perf_event_context_sched_in(ctx); +out: local_irq_restore(flags); } @@ -1737,9 +1739,9 @@ static void perf_event_enable_on_exec(struct task_struct *task) */ static void __perf_event_read(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a task context, we need to check whether it is @@ -1787,11 +1789,219 @@ static u64 perf_event_read(struct perf_event *event) } /* - * Initialize the perf_event context in a task_struct: + * Callchain support */ + +struct callchain_cpus_entries { + struct rcu_head rcu_head; + struct perf_callchain_entry *cpu_entries[0]; +}; + +static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]); +static atomic_t nr_callchain_events; +static DEFINE_MUTEX(callchain_mutex); +struct callchain_cpus_entries *callchain_cpus_entries; + + +__weak void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +__weak void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ +} + +static void release_callchain_buffers_rcu(struct rcu_head *head) +{ + struct callchain_cpus_entries *entries; + int cpu; + + entries = container_of(head, struct callchain_cpus_entries, rcu_head); + + for_each_possible_cpu(cpu) + kfree(entries->cpu_entries[cpu]); + + kfree(entries); +} + +static void release_callchain_buffers(void) +{ + struct callchain_cpus_entries *entries; + + entries = callchain_cpus_entries; + rcu_assign_pointer(callchain_cpus_entries, NULL); + call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); +} + +static int alloc_callchain_buffers(void) +{ + int cpu; + int size; + struct callchain_cpus_entries *entries; + + /* + * We can't use the percpu allocation API for data that can be + * accessed from NMI. Use a temporary manual per cpu allocation + * until that gets sorted out. + */ + size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) * + num_possible_cpus(); + + entries = kzalloc(size, GFP_KERNEL); + if (!entries) + return -ENOMEM; + + size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS; + + for_each_possible_cpu(cpu) { + entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, + cpu_to_node(cpu)); + if (!entries->cpu_entries[cpu]) + goto fail; + } + + rcu_assign_pointer(callchain_cpus_entries, entries); + + return 0; + +fail: + for_each_possible_cpu(cpu) + kfree(entries->cpu_entries[cpu]); + kfree(entries); + + return -ENOMEM; +} + +static int get_callchain_buffers(void) +{ + int err = 0; + int count; + + mutex_lock(&callchain_mutex); + + count = atomic_inc_return(&nr_callchain_events); + if (WARN_ON_ONCE(count < 1)) { + err = -EINVAL; + goto exit; + } + + if (count > 1) { + /* If the allocation failed, give up */ + if (!callchain_cpus_entries) + err = -ENOMEM; + goto exit; + } + + err = alloc_callchain_buffers(); + if (err) + release_callchain_buffers(); +exit: + mutex_unlock(&callchain_mutex); + + return err; +} + +static void put_callchain_buffers(void) +{ + if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { + release_callchain_buffers(); + mutex_unlock(&callchain_mutex); + } +} + +static int get_recursion_context(int *recursion) +{ + int rctx; + + if (in_nmi()) + rctx = 3; + else if (in_irq()) + rctx = 2; + else if (in_softirq()) + rctx = 1; + else + rctx = 0; + + if (recursion[rctx]) + return -1; + + recursion[rctx]++; + barrier(); + + return rctx; +} + +static inline void put_recursion_context(int *recursion, int rctx) +{ + barrier(); + recursion[rctx]--; +} + +static struct perf_callchain_entry *get_callchain_entry(int *rctx) +{ + int cpu; + struct callchain_cpus_entries *entries; + + *rctx = get_recursion_context(__get_cpu_var(callchain_recursion)); + if (*rctx == -1) + return NULL; + + entries = rcu_dereference(callchain_cpus_entries); + if (!entries) + return NULL; + + cpu = smp_processor_id(); + + return &entries->cpu_entries[cpu][*rctx]; +} + static void -__perf_event_init_context(struct perf_event_context *ctx, - struct task_struct *task) +put_callchain_entry(int rctx) +{ + put_recursion_context(__get_cpu_var(callchain_recursion), rctx); +} + +static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) +{ + int rctx; + struct perf_callchain_entry *entry; + + + entry = get_callchain_entry(&rctx); + if (rctx == -1) + return NULL; + + if (!entry) + goto exit_put; + + entry->nr = 0; + + if (!user_mode(regs)) { + perf_callchain_store(entry, PERF_CONTEXT_KERNEL); + perf_callchain_kernel(entry, regs); + if (current->mm) + regs = task_pt_regs(current); + else + regs = NULL; + } + + if (regs) { + perf_callchain_store(entry, PERF_CONTEXT_USER); + perf_callchain_user(entry, regs); + } + +exit_put: + put_callchain_entry(rctx); + + return entry; +} + +/* + * Initialize the perf_event context in a task_struct: + */ +static void __perf_event_init_context(struct perf_event_context *ctx) { raw_spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); @@ -1799,45 +2009,38 @@ __perf_event_init_context(struct perf_event_context *ctx, INIT_LIST_HEAD(&ctx->flexible_groups); INIT_LIST_HEAD(&ctx->event_list); atomic_set(&ctx->refcount, 1); - ctx->task = task; } -static struct perf_event_context *find_get_context(pid_t pid, int cpu) +static struct perf_event_context * +alloc_perf_context(struct pmu *pmu, struct task_struct *task) { struct perf_event_context *ctx; - struct perf_cpu_context *cpuctx; - struct task_struct *task; - unsigned long flags; - int err; - - if (pid == -1 && cpu != -1) { - /* Must be root to operate on a CPU event: */ - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EACCES); - if (cpu < 0 || cpu >= nr_cpumask_bits) - return ERR_PTR(-EINVAL); + ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); + if (!ctx) + return NULL; - /* - * We could be clever and allow to attach a event to an - * offline CPU and activate it when the CPU comes up, but - * that's for later. - */ - if (!cpu_online(cpu)) - return ERR_PTR(-ENODEV); + __perf_event_init_context(ctx); + if (task) { + ctx->task = task; + get_task_struct(task); + } + ctx->pmu = pmu; - cpuctx = &per_cpu(perf_cpu_context, cpu); - ctx = &cpuctx->ctx; - get_ctx(ctx); + return ctx; +} - return ctx; - } +static struct task_struct * +find_lively_task_by_vpid(pid_t vpid) +{ + struct task_struct *task; + int err; rcu_read_lock(); - if (!pid) + if (!vpid) task = current; else - task = find_task_by_vpid(pid); + task = find_task_by_vpid(vpid); if (task) get_task_struct(task); rcu_read_unlock(); @@ -1857,35 +2060,79 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto errout; - retry: - ctx = perf_lock_task_context(task, &flags); + return task; +errout: + put_task_struct(task); + return ERR_PTR(err); + +} + +static struct perf_event_context * +find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) +{ + struct perf_event_context *ctx; + struct perf_cpu_context *cpuctx; + unsigned long flags; + int ctxn, err; + + if (!task && cpu != -1) { + /* Must be root to operate on a CPU event: */ + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EACCES); + + if (cpu < 0 || cpu >= nr_cpumask_bits) + return ERR_PTR(-EINVAL); + + /* + * We could be clever and allow to attach a event to an + * offline CPU and activate it when the CPU comes up, but + * that's for later. + */ + if (!cpu_online(cpu)) + return ERR_PTR(-ENODEV); + + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + ctx = &cpuctx->ctx; + get_ctx(ctx); + + return ctx; + } + + err = -EINVAL; + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + goto errout; + +retry: + ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { unclone_ctx(ctx); raw_spin_unlock_irqrestore(&ctx->lock, flags); } if (!ctx) { - ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); + ctx = alloc_perf_context(pmu, task); err = -ENOMEM; if (!ctx) goto errout; - __perf_event_init_context(ctx, task); + get_ctx(ctx); - if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) { + + if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { /* * We raced with some other task; use * the context they set. */ + put_task_struct(task); kfree(ctx); goto retry; } - get_task_struct(task); } put_task_struct(task); return ctx; - errout: +errout: put_task_struct(task); return ERR_PTR(err); } @@ -1918,6 +2165,8 @@ static void free_event(struct perf_event *event) atomic_dec(&nr_comm_events); if (event->attr.task) atomic_dec(&nr_task_events); + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) + put_callchain_buffers(); } if (event->buffer) { @@ -1928,7 +2177,9 @@ static void free_event(struct perf_event *event) if (event->destroy) event->destroy(event); - put_ctx(event->ctx); + if (event->ctx) + put_ctx(event->ctx); + call_rcu(&event->rcu_head, free_event_rcu); } @@ -2349,6 +2600,9 @@ int perf_event_task_disable(void) static int perf_event_index(struct perf_event *event) { + if (event->hw.state & PERF_HES_STOPPED) + return 0; + if (event->state != PERF_EVENT_STATE_ACTIVE) return 0; @@ -2961,16 +3215,6 @@ void perf_event_do_pending(void) } /* - * Callchain support -- arch specific - */ - -__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - return NULL; -} - - -/* * We assume there is only KVM supporting the callbacks. * Later on, we might change it to a list if there is * another virtualization implementation supporting the callbacks. @@ -3076,7 +3320,7 @@ again: if (handle->wakeup != local_read(&buffer->wakeup)) perf_output_wakeup(handle); - out: +out: preempt_enable(); } @@ -3464,14 +3708,20 @@ static void perf_event_output(struct perf_event *event, int nmi, struct perf_output_handle handle; struct perf_event_header header; + /* protect the callchain buffers */ + rcu_read_lock(); + perf_prepare_sample(&header, data, event, regs); if (perf_output_begin(&handle, event, header.size, nmi, 1)) - return; + goto exit; perf_output_sample(&handle, &header, data, event); perf_output_end(&handle); + +exit: + rcu_read_unlock(); } /* @@ -3585,16 +3835,27 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, static void perf_event_task_event(struct perf_task_event *task_event) { struct perf_cpu_context *cpuctx; - struct perf_event_context *ctx = task_event->task_ctx; + struct perf_event_context *ctx; + struct pmu *pmu; + int ctxn; rcu_read_lock(); - cpuctx = &get_cpu_var(perf_cpu_context); - perf_event_task_ctx(&cpuctx->ctx, task_event); - if (!ctx) - ctx = rcu_dereference(current->perf_event_ctxp); - if (ctx) - perf_event_task_ctx(ctx, task_event); - put_cpu_var(perf_cpu_context); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + perf_event_task_ctx(&cpuctx->ctx, task_event); + + ctx = task_event->task_ctx; + if (!ctx) { + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + goto next; + ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + } + if (ctx) + perf_event_task_ctx(ctx, task_event); +next: + put_cpu_ptr(pmu->pmu_cpu_context); + } rcu_read_unlock(); } @@ -3699,8 +3960,10 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) { struct perf_cpu_context *cpuctx; struct perf_event_context *ctx; - unsigned int size; char comm[TASK_COMM_LEN]; + unsigned int size; + struct pmu *pmu; + int ctxn; memset(comm, 0, sizeof(comm)); strlcpy(comm, comm_event->task->comm, sizeof(comm)); @@ -3712,21 +3975,36 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; rcu_read_lock(); - cpuctx = &get_cpu_var(perf_cpu_context); - perf_event_comm_ctx(&cpuctx->ctx, comm_event); - ctx = rcu_dereference(current->perf_event_ctxp); - if (ctx) - perf_event_comm_ctx(ctx, comm_event); - put_cpu_var(perf_cpu_context); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + perf_event_comm_ctx(&cpuctx->ctx, comm_event); + + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + goto next; + + ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + if (ctx) + perf_event_comm_ctx(ctx, comm_event); +next: + put_cpu_ptr(pmu->pmu_cpu_context); + } rcu_read_unlock(); } void perf_event_comm(struct task_struct *task) { struct perf_comm_event comm_event; + struct perf_event_context *ctx; + int ctxn; + + for_each_task_context_nr(ctxn) { + ctx = task->perf_event_ctxp[ctxn]; + if (!ctx) + continue; - if (task->perf_event_ctxp) - perf_event_enable_on_exec(task); + perf_event_enable_on_exec(ctx); + } if (!atomic_read(&nr_comm_events)) return; @@ -3828,6 +4106,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) char tmp[16]; char *buf = NULL; const char *name; + struct pmu *pmu; + int ctxn; memset(tmp, 0, sizeof(tmp)); @@ -3880,12 +4160,23 @@ got_name: mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; rcu_read_lock(); - cpuctx = &get_cpu_var(perf_cpu_context); - perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); - ctx = rcu_dereference(current->perf_event_ctxp); - if (ctx) - perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC); - put_cpu_var(perf_cpu_context); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, + vma->vm_flags & VM_EXEC); + + ctxn = pmu->task_ctx_nr; + if (ctxn < 0) + goto next; + + ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); + if (ctx) { + perf_event_mmap_ctx(ctx, mmap_event, + vma->vm_flags & VM_EXEC); + } +next: + put_cpu_ptr(pmu->pmu_cpu_context); + } rcu_read_unlock(); kfree(buf); @@ -3967,8 +4258,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, struct hw_perf_event *hwc = &event->hw; int ret = 0; - throttle = (throttle && event->pmu->unthrottle != NULL); - if (!throttle) { hwc->interrupts++; } else { @@ -4036,6 +4325,17 @@ int perf_event_overflow(struct perf_event *event, int nmi, * Generic software event infrastructure */ +struct swevent_htable { + struct swevent_hlist *swevent_hlist; + struct mutex hlist_mutex; + int hlist_refcount; + + /* Recursion avoidance in each contexts */ + int recursion[PERF_NR_CONTEXTS]; +}; + +static DEFINE_PER_CPU(struct swevent_htable, swevent_htable); + /* * We directly increment event->count and keep a second value in * event->hw.period_left to count intervals. This period event @@ -4093,7 +4393,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow, } } -static void perf_swevent_add(struct perf_event *event, u64 nr, +static void perf_swevent_event(struct perf_event *event, u64 nr, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { @@ -4119,6 +4419,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { + if (event->hw.state & PERF_HES_STOPPED) + return 0; + if (regs) { if (event->attr.exclude_user && user_mode(regs)) return 1; @@ -4165,11 +4468,11 @@ __find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id) /* For the read side: events when they trigger */ static inline struct hlist_head * -find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) +find_swevent_head_rcu(struct swevent_htable *swhash, u64 type, u32 event_id) { struct swevent_hlist *hlist; - hlist = rcu_dereference(ctx->swevent_hlist); + hlist = rcu_dereference(swhash->swevent_hlist); if (!hlist) return NULL; @@ -4178,7 +4481,7 @@ find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id) /* For the event head insertion and removal in the hlist */ static inline struct hlist_head * -find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) +find_swevent_head(struct swevent_htable *swhash, struct perf_event *event) { struct swevent_hlist *hlist; u32 event_id = event->attr.config; @@ -4189,7 +4492,7 @@ find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event) * and release. Which makes the protected version suitable here. * The context lock guarantees that. */ - hlist = rcu_dereference_protected(ctx->swevent_hlist, + hlist = rcu_dereference_protected(swhash->swevent_hlist, lockdep_is_held(&event->ctx->lock)); if (!hlist) return NULL; @@ -4202,23 +4505,19 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, struct perf_sample_data *data, struct pt_regs *regs) { - struct perf_cpu_context *cpuctx; + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); struct perf_event *event; struct hlist_node *node; struct hlist_head *head; - cpuctx = &__get_cpu_var(perf_cpu_context); - rcu_read_lock(); - - head = find_swevent_head_rcu(cpuctx, type, event_id); - + head = find_swevent_head_rcu(swhash, type, event_id); if (!head) goto end; hlist_for_each_entry_rcu(event, node, head, hlist_entry) { if (perf_swevent_match(event, type, event_id, data, regs)) - perf_swevent_add(event, nr, nmi, data, regs); + perf_swevent_event(event, nr, nmi, data, regs); } end: rcu_read_unlock(); @@ -4226,33 +4525,17 @@ end: int perf_swevent_get_recursion_context(void) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - int rctx; + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); - if (in_nmi()) - rctx = 3; - else if (in_irq()) - rctx = 2; - else if (in_softirq()) - rctx = 1; - else - rctx = 0; - - if (cpuctx->recursion[rctx]) - return -1; - - cpuctx->recursion[rctx]++; - barrier(); - - return rctx; + return get_recursion_context(swhash->recursion); } EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); void inline perf_swevent_put_recursion_context(int rctx) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - barrier(); - cpuctx->recursion[rctx]--; + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); + + put_recursion_context(swhash->recursion, rctx); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, @@ -4278,20 +4561,20 @@ static void perf_swevent_read(struct perf_event *event) { } -static int perf_swevent_enable(struct perf_event *event) +static int perf_swevent_add(struct perf_event *event, int flags) { + struct swevent_htable *swhash = &__get_cpu_var(swevent_htable); struct hw_perf_event *hwc = &event->hw; - struct perf_cpu_context *cpuctx; struct hlist_head *head; - cpuctx = &__get_cpu_var(perf_cpu_context); - if (hwc->sample_period) { hwc->last_period = hwc->sample_period; perf_swevent_set_period(event); } - head = find_swevent_head(cpuctx, event); + hwc->state = !(flags & PERF_EF_START); + + head = find_swevent_head(swhash, event); if (WARN_ON_ONCE(!head)) return -EINVAL; @@ -4300,202 +4583,27 @@ static int perf_swevent_enable(struct perf_event *event) return 0; } -static void perf_swevent_disable(struct perf_event *event) +static void perf_swevent_del(struct perf_event *event, int flags) { hlist_del_rcu(&event->hlist_entry); } -static void perf_swevent_void(struct perf_event *event) +static void perf_swevent_start(struct perf_event *event, int flags) { + event->hw.state = 0; } -static int perf_swevent_int(struct perf_event *event) +static void perf_swevent_stop(struct perf_event *event, int flags) { - return 0; -} - -static const struct pmu perf_ops_generic = { - .enable = perf_swevent_enable, - .disable = perf_swevent_disable, - .start = perf_swevent_int, - .stop = perf_swevent_void, - .read = perf_swevent_read, - .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ -}; - -/* - * hrtimer based swevent callback - */ - -static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) -{ - enum hrtimer_restart ret = HRTIMER_RESTART; - struct perf_sample_data data; - struct pt_regs *regs; - struct perf_event *event; - u64 period; - - event = container_of(hrtimer, struct perf_event, hw.hrtimer); - event->pmu->read(event); - - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; - regs = get_irq_regs(); - - if (regs && !perf_exclude_event(event, regs)) { - if (!(event->attr.exclude_idle && current->pid == 0)) - if (perf_event_overflow(event, 0, &data, regs)) - ret = HRTIMER_NORESTART; - } - - period = max_t(u64, 10000, event->hw.sample_period); - hrtimer_forward_now(hrtimer, ns_to_ktime(period)); - - return ret; + event->hw.state = PERF_HES_STOPPED; } -static void perf_swevent_start_hrtimer(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swevent_hrtimer; - if (hwc->sample_period) { - u64 period; - - if (hwc->remaining) { - if (hwc->remaining < 0) - period = 10000; - else - period = hwc->remaining; - hwc->remaining = 0; - } else { - period = max_t(u64, 10000, hwc->sample_period); - } - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } -} - -static void perf_swevent_cancel_hrtimer(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - if (hwc->sample_period) { - ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); - hwc->remaining = ktime_to_ns(remaining); - - hrtimer_cancel(&hwc->hrtimer); - } -} - -/* - * Software event: cpu wall time clock - */ - -static void cpu_clock_perf_event_update(struct perf_event *event) -{ - int cpu = raw_smp_processor_id(); - s64 prev; - u64 now; - - now = cpu_clock(cpu); - prev = local64_xchg(&event->hw.prev_count, now); - local64_add(now - prev, &event->count); -} - -static int cpu_clock_perf_event_enable(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int cpu = raw_smp_processor_id(); - - local64_set(&hwc->prev_count, cpu_clock(cpu)); - perf_swevent_start_hrtimer(event); - - return 0; -} - -static void cpu_clock_perf_event_disable(struct perf_event *event) -{ - perf_swevent_cancel_hrtimer(event); - cpu_clock_perf_event_update(event); -} - -static void cpu_clock_perf_event_read(struct perf_event *event) -{ - cpu_clock_perf_event_update(event); -} - -static const struct pmu perf_ops_cpu_clock = { - .enable = cpu_clock_perf_event_enable, - .disable = cpu_clock_perf_event_disable, - .read = cpu_clock_perf_event_read, -}; - -/* - * Software event: task time clock - */ - -static void task_clock_perf_event_update(struct perf_event *event, u64 now) -{ - u64 prev; - s64 delta; - - prev = local64_xchg(&event->hw.prev_count, now); - delta = now - prev; - local64_add(delta, &event->count); -} - -static int task_clock_perf_event_enable(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 now; - - now = event->ctx->time; - - local64_set(&hwc->prev_count, now); - - perf_swevent_start_hrtimer(event); - - return 0; -} - -static void task_clock_perf_event_disable(struct perf_event *event) -{ - perf_swevent_cancel_hrtimer(event); - task_clock_perf_event_update(event, event->ctx->time); - -} - -static void task_clock_perf_event_read(struct perf_event *event) -{ - u64 time; - - if (!in_nmi()) { - update_context_time(event->ctx); - time = event->ctx->time; - } else { - u64 now = perf_clock(); - u64 delta = now - event->ctx->timestamp; - time = event->ctx->time + delta; - } - - task_clock_perf_event_update(event, time); -} - -static const struct pmu perf_ops_task_clock = { - .enable = task_clock_perf_event_enable, - .disable = task_clock_perf_event_disable, - .read = task_clock_perf_event_read, -}; - /* Deref the hlist from the update side */ static inline struct swevent_hlist * -swevent_hlist_deref(struct perf_cpu_context *cpuctx) +swevent_hlist_deref(struct swevent_htable *swhash) { - return rcu_dereference_protected(cpuctx->swevent_hlist, - lockdep_is_held(&cpuctx->hlist_mutex)); + return rcu_dereference_protected(swhash->swevent_hlist, + lockdep_is_held(&swhash->hlist_mutex)); } static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) @@ -4506,27 +4614,27 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head) kfree(hlist); } -static void swevent_hlist_release(struct perf_cpu_context *cpuctx) +static void swevent_hlist_release(struct swevent_htable *swhash) { - struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx); + struct swevent_hlist *hlist = swevent_hlist_deref(swhash); if (!hlist) return; - rcu_assign_pointer(cpuctx->swevent_hlist, NULL); + rcu_assign_pointer(swhash->swevent_hlist, NULL); call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu); } static void swevent_hlist_put_cpu(struct perf_event *event, int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); - mutex_lock(&cpuctx->hlist_mutex); + mutex_lock(&swhash->hlist_mutex); - if (!--cpuctx->hlist_refcount) - swevent_hlist_release(cpuctx); + if (!--swhash->hlist_refcount) + swevent_hlist_release(swhash); - mutex_unlock(&cpuctx->hlist_mutex); + mutex_unlock(&swhash->hlist_mutex); } static void swevent_hlist_put(struct perf_event *event) @@ -4544,12 +4652,12 @@ static void swevent_hlist_put(struct perf_event *event) static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); int err = 0; - mutex_lock(&cpuctx->hlist_mutex); + mutex_lock(&swhash->hlist_mutex); - if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) { + if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) { struct swevent_hlist *hlist; hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); @@ -4557,11 +4665,11 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu) err = -ENOMEM; goto exit; } - rcu_assign_pointer(cpuctx->swevent_hlist, hlist); + rcu_assign_pointer(swhash->swevent_hlist, hlist); } - cpuctx->hlist_refcount++; - exit: - mutex_unlock(&cpuctx->hlist_mutex); + swhash->hlist_refcount++; +exit: + mutex_unlock(&swhash->hlist_mutex); return err; } @@ -4585,7 +4693,7 @@ static int swevent_hlist_get(struct perf_event *event) put_online_cpus(); return 0; - fail: +fail: for_each_possible_cpu(cpu) { if (cpu == failed_cpu) break; @@ -4596,17 +4704,64 @@ static int swevent_hlist_get(struct perf_event *event) return err; } -#ifdef CONFIG_EVENT_TRACING +atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; + +static void sw_perf_event_destroy(struct perf_event *event) +{ + u64 event_id = event->attr.config; + + WARN_ON(event->parent); + + atomic_dec(&perf_swevent_enabled[event_id]); + swevent_hlist_put(event); +} + +static int perf_swevent_init(struct perf_event *event) +{ + int event_id = event->attr.config; + + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + switch (event_id) { + case PERF_COUNT_SW_CPU_CLOCK: + case PERF_COUNT_SW_TASK_CLOCK: + return -ENOENT; + + default: + break; + } + + if (event_id > PERF_COUNT_SW_MAX) + return -ENOENT; + + if (!event->parent) { + int err; + + err = swevent_hlist_get(event); + if (err) + return err; + + atomic_inc(&perf_swevent_enabled[event_id]); + event->destroy = sw_perf_event_destroy; + } + + return 0; +} + +static struct pmu perf_swevent = { + .task_ctx_nr = perf_sw_context, -static const struct pmu perf_ops_tracepoint = { - .enable = perf_trace_enable, - .disable = perf_trace_disable, - .start = perf_swevent_int, - .stop = perf_swevent_void, + .event_init = perf_swevent_init, + .add = perf_swevent_add, + .del = perf_swevent_del, + .start = perf_swevent_start, + .stop = perf_swevent_stop, .read = perf_swevent_read, - .unthrottle = perf_swevent_void, }; +#ifdef CONFIG_EVENT_TRACING + static int perf_tp_filter_match(struct perf_event *event, struct perf_sample_data *data) { @@ -4650,7 +4805,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size, hlist_for_each_entry_rcu(event, node, head, hlist_entry) { if (perf_tp_event_match(event, &data, regs)) - perf_swevent_add(event, count, 1, &data, regs); + perf_swevent_event(event, count, 1, &data, regs); } perf_swevent_put_recursion_context(rctx); @@ -4662,10 +4817,13 @@ static void tp_perf_event_destroy(struct perf_event *event) perf_trace_destroy(event); } -static const struct pmu *tp_perf_event_init(struct perf_event *event) +static int perf_tp_event_init(struct perf_event *event) { int err; + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -ENOENT; + /* * Raw tracepoint data is a severe data leak, only allow root to * have these. @@ -4673,15 +4831,31 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event) if ((event->attr.sample_type & PERF_SAMPLE_RAW) && perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); + return -EPERM; err = perf_trace_init(event); if (err) - return NULL; + return err; event->destroy = tp_perf_event_destroy; - return &perf_ops_tracepoint; + return 0; +} + +static struct pmu perf_tracepoint = { + .task_ctx_nr = perf_sw_context, + + .event_init = perf_tp_event_init, + .add = perf_trace_add, + .del = perf_trace_del, + .start = perf_swevent_start, + .stop = perf_swevent_stop, + .read = perf_swevent_read, +}; + +static inline void perf_tp_register(void) +{ + perf_pmu_register(&perf_tracepoint); } static int perf_event_set_filter(struct perf_event *event, void __user *arg) @@ -4709,9 +4883,8 @@ static void perf_event_free_filter(struct perf_event *event) #else -static const struct pmu *tp_perf_event_init(struct perf_event *event) +static inline void perf_tp_register(void) { - return NULL; } static int perf_event_set_filter(struct perf_event *event, void __user *arg) @@ -4726,105 +4899,389 @@ static void perf_event_free_filter(struct perf_event *event) #endif /* CONFIG_EVENT_TRACING */ #ifdef CONFIG_HAVE_HW_BREAKPOINT -static void bp_perf_event_destroy(struct perf_event *event) +void perf_bp_event(struct perf_event *bp, void *data) { - release_bp_slot(event); + struct perf_sample_data sample; + struct pt_regs *regs = data; + + perf_sample_data_init(&sample, bp->attr.bp_addr); + + if (!bp->hw.state && !perf_exclude_event(bp, regs)) + perf_swevent_event(bp, 1, 1, &sample, regs); } +#endif -static const struct pmu *bp_perf_event_init(struct perf_event *bp) +/* + * hrtimer based swevent callback + */ + +static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) { - int err; + enum hrtimer_restart ret = HRTIMER_RESTART; + struct perf_sample_data data; + struct pt_regs *regs; + struct perf_event *event; + u64 period; - err = register_perf_hw_breakpoint(bp); - if (err) - return ERR_PTR(err); + event = container_of(hrtimer, struct perf_event, hw.hrtimer); + event->pmu->read(event); - bp->destroy = bp_perf_event_destroy; + perf_sample_data_init(&data, 0); + data.period = event->hw.last_period; + regs = get_irq_regs(); + + if (regs && !perf_exclude_event(event, regs)) { + if (!(event->attr.exclude_idle && current->pid == 0)) + if (perf_event_overflow(event, 0, &data, regs)) + ret = HRTIMER_NORESTART; + } - return &perf_ops_bp; + period = max_t(u64, 10000, event->hw.sample_period); + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); + + return ret; } -void perf_bp_event(struct perf_event *bp, void *data) +static void perf_swevent_start_hrtimer(struct perf_event *event) { - struct perf_sample_data sample; - struct pt_regs *regs = data; + struct hw_perf_event *hwc = &event->hw; - perf_sample_data_init(&sample, bp->attr.bp_addr); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swevent_hrtimer; + if (hwc->sample_period) { + s64 period = local64_read(&hwc->period_left); + + if (period) { + if (period < 0) + period = 10000; - if (!perf_exclude_event(bp, regs)) - perf_swevent_add(bp, 1, 1, &sample, regs); + local64_set(&hwc->period_left, 0); + } else { + period = max_t(u64, 10000, hwc->sample_period); + } + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(period), 0, + HRTIMER_MODE_REL_PINNED, 0); + } } -#else -static const struct pmu *bp_perf_event_init(struct perf_event *bp) + +static void perf_swevent_cancel_hrtimer(struct perf_event *event) { - return NULL; + struct hw_perf_event *hwc = &event->hw; + + if (hwc->sample_period) { + ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); + local64_set(&hwc->period_left, ktime_to_ns(remaining)); + + hrtimer_cancel(&hwc->hrtimer); + } } -void perf_bp_event(struct perf_event *bp, void *regs) +/* + * Software event: cpu wall time clock + */ + +static void cpu_clock_event_update(struct perf_event *event) { + s64 prev; + u64 now; + + now = local_clock(); + prev = local64_xchg(&event->hw.prev_count, now); + local64_add(now - prev, &event->count); } -#endif -atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; +static void cpu_clock_event_start(struct perf_event *event, int flags) +{ + local64_set(&event->hw.prev_count, local_clock()); + perf_swevent_start_hrtimer(event); +} -static void sw_perf_event_destroy(struct perf_event *event) +static void cpu_clock_event_stop(struct perf_event *event, int flags) { - u64 event_id = event->attr.config; + perf_swevent_cancel_hrtimer(event); + cpu_clock_event_update(event); +} - WARN_ON(event->parent); +static int cpu_clock_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + cpu_clock_event_start(event, flags); - atomic_dec(&perf_swevent_enabled[event_id]); - swevent_hlist_put(event); + return 0; } -static const struct pmu *sw_perf_event_init(struct perf_event *event) +static void cpu_clock_event_del(struct perf_event *event, int flags) { - const struct pmu *pmu = NULL; - u64 event_id = event->attr.config; + cpu_clock_event_stop(event, flags); +} + +static void cpu_clock_event_read(struct perf_event *event) +{ + cpu_clock_event_update(event); +} + +static int cpu_clock_event_init(struct perf_event *event) +{ + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) + return -ENOENT; + return 0; +} + +static struct pmu perf_cpu_clock = { + .task_ctx_nr = perf_sw_context, + + .event_init = cpu_clock_event_init, + .add = cpu_clock_event_add, + .del = cpu_clock_event_del, + .start = cpu_clock_event_start, + .stop = cpu_clock_event_stop, + .read = cpu_clock_event_read, +}; + +/* + * Software event: task time clock + */ + +static void task_clock_event_update(struct perf_event *event, u64 now) +{ + u64 prev; + s64 delta; + + prev = local64_xchg(&event->hw.prev_count, now); + delta = now - prev; + local64_add(delta, &event->count); +} + +static void task_clock_event_start(struct perf_event *event, int flags) +{ + local64_set(&event->hw.prev_count, event->ctx->time); + perf_swevent_start_hrtimer(event); +} + +static void task_clock_event_stop(struct perf_event *event, int flags) +{ + perf_swevent_cancel_hrtimer(event); + task_clock_event_update(event, event->ctx->time); +} + +static int task_clock_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + task_clock_event_start(event, flags); + + return 0; +} + +static void task_clock_event_del(struct perf_event *event, int flags) +{ + task_clock_event_stop(event, PERF_EF_UPDATE); +} + +static void task_clock_event_read(struct perf_event *event) +{ + u64 time; + + if (!in_nmi()) { + update_context_time(event->ctx); + time = event->ctx->time; + } else { + u64 now = perf_clock(); + u64 delta = now - event->ctx->timestamp; + time = event->ctx->time + delta; + } + + task_clock_event_update(event, time); +} + +static int task_clock_event_init(struct perf_event *event) +{ + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) + return -ENOENT; + + return 0; +} + +static struct pmu perf_task_clock = { + .task_ctx_nr = perf_sw_context, + + .event_init = task_clock_event_init, + .add = task_clock_event_add, + .del = task_clock_event_del, + .start = task_clock_event_start, + .stop = task_clock_event_stop, + .read = task_clock_event_read, +}; + +static void perf_pmu_nop_void(struct pmu *pmu) +{ +} + +static int perf_pmu_nop_int(struct pmu *pmu) +{ + return 0; +} + +static void perf_pmu_start_txn(struct pmu *pmu) +{ + perf_pmu_disable(pmu); +} + +static int perf_pmu_commit_txn(struct pmu *pmu) +{ + perf_pmu_enable(pmu); + return 0; +} + +static void perf_pmu_cancel_txn(struct pmu *pmu) +{ + perf_pmu_enable(pmu); +} + +/* + * Ensures all contexts with the same task_ctx_nr have the same + * pmu_cpu_context too. + */ +static void *find_pmu_context(int ctxn) +{ + struct pmu *pmu; + + if (ctxn < 0) + return NULL; + + list_for_each_entry(pmu, &pmus, entry) { + if (pmu->task_ctx_nr == ctxn) + return pmu->pmu_cpu_context; + } + + return NULL; +} + +static void free_pmu_context(void * __percpu cpu_context) +{ + struct pmu *pmu; + + mutex_lock(&pmus_lock); /* - * Software events (currently) can't in general distinguish - * between user, kernel and hypervisor events. - * However, context switches and cpu migrations are considered - * to be kernel events, and page faults are never hypervisor - * events. + * Like a real lame refcount. */ - switch (event_id) { - case PERF_COUNT_SW_CPU_CLOCK: - pmu = &perf_ops_cpu_clock; + list_for_each_entry(pmu, &pmus, entry) { + if (pmu->pmu_cpu_context == cpu_context) + goto out; + } - break; - case PERF_COUNT_SW_TASK_CLOCK: - /* - * If the user instantiates this as a per-cpu event, - * use the cpu_clock event instead. - */ - if (event->ctx->task) - pmu = &perf_ops_task_clock; - else - pmu = &perf_ops_cpu_clock; + free_percpu(cpu_context); +out: + mutex_unlock(&pmus_lock); +} - break; - case PERF_COUNT_SW_PAGE_FAULTS: - case PERF_COUNT_SW_PAGE_FAULTS_MIN: - case PERF_COUNT_SW_PAGE_FAULTS_MAJ: - case PERF_COUNT_SW_CONTEXT_SWITCHES: - case PERF_COUNT_SW_CPU_MIGRATIONS: - case PERF_COUNT_SW_ALIGNMENT_FAULTS: - case PERF_COUNT_SW_EMULATION_FAULTS: - if (!event->parent) { - int err; - - err = swevent_hlist_get(event); - if (err) - return ERR_PTR(err); +int perf_pmu_register(struct pmu *pmu) +{ + int cpu, ret; - atomic_inc(&perf_swevent_enabled[event_id]); - event->destroy = sw_perf_event_destroy; + mutex_lock(&pmus_lock); + ret = -ENOMEM; + pmu->pmu_disable_count = alloc_percpu(int); + if (!pmu->pmu_disable_count) + goto unlock; + + pmu->pmu_cpu_context = find_pmu_context(pmu->task_ctx_nr); + if (pmu->pmu_cpu_context) + goto got_cpu_context; + + pmu->pmu_cpu_context = alloc_percpu(struct perf_cpu_context); + if (!pmu->pmu_cpu_context) + goto free_pdc; + + for_each_possible_cpu(cpu) { + struct perf_cpu_context *cpuctx; + + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + __perf_event_init_context(&cpuctx->ctx); + cpuctx->ctx.type = cpu_context; + cpuctx->ctx.pmu = pmu; + cpuctx->jiffies_interval = 1; + INIT_LIST_HEAD(&cpuctx->rotation_list); + } + +got_cpu_context: + if (!pmu->start_txn) { + if (pmu->pmu_enable) { + /* + * If we have pmu_enable/pmu_disable calls, install + * transaction stubs that use that to try and batch + * hardware accesses. + */ + pmu->start_txn = perf_pmu_start_txn; + pmu->commit_txn = perf_pmu_commit_txn; + pmu->cancel_txn = perf_pmu_cancel_txn; + } else { + pmu->start_txn = perf_pmu_nop_void; + pmu->commit_txn = perf_pmu_nop_int; + pmu->cancel_txn = perf_pmu_nop_void; + } + } + + if (!pmu->pmu_enable) { + pmu->pmu_enable = perf_pmu_nop_void; + pmu->pmu_disable = perf_pmu_nop_void; + } + + list_add_rcu(&pmu->entry, &pmus); + ret = 0; +unlock: + mutex_unlock(&pmus_lock); + + return ret; + +free_pdc: + free_percpu(pmu->pmu_disable_count); + goto unlock; +} + +void perf_pmu_unregister(struct pmu *pmu) +{ + mutex_lock(&pmus_lock); + list_del_rcu(&pmu->entry); + mutex_unlock(&pmus_lock); + + /* + * We dereference the pmu list under both SRCU and regular RCU, so + * synchronize against both of those. + */ + synchronize_srcu(&pmus_srcu); + synchronize_rcu(); + + free_percpu(pmu->pmu_disable_count); + free_pmu_context(pmu->pmu_cpu_context); +} + +struct pmu *perf_init_event(struct perf_event *event) +{ + struct pmu *pmu = NULL; + int idx; + + idx = srcu_read_lock(&pmus_srcu); + list_for_each_entry_rcu(pmu, &pmus, entry) { + int ret = pmu->event_init(event); + if (!ret) + goto unlock; + + if (ret != -ENOENT) { + pmu = ERR_PTR(ret); + goto unlock; } - pmu = &perf_ops_generic; - break; } + pmu = ERR_PTR(-ENOENT); +unlock: + srcu_read_unlock(&pmus_srcu, idx); return pmu; } @@ -4833,20 +5290,17 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event) * Allocate and initialize a event structure */ static struct perf_event * -perf_event_alloc(struct perf_event_attr *attr, - int cpu, - struct perf_event_context *ctx, +perf_event_alloc(struct perf_event_attr *attr, int cpu, struct perf_event *group_leader, struct perf_event *parent_event, - perf_overflow_handler_t overflow_handler, - gfp_t gfpflags) + perf_overflow_handler_t overflow_handler) { - const struct pmu *pmu; + struct pmu *pmu; struct perf_event *event; struct hw_perf_event *hwc; long err; - event = kzalloc(sizeof(*event), gfpflags); + event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) return ERR_PTR(-ENOMEM); @@ -4871,7 +5325,6 @@ perf_event_alloc(struct perf_event_attr *attr, event->attr = *attr; event->group_leader = group_leader; event->pmu = NULL; - event->ctx = ctx; event->oncpu = -1; event->parent = parent_event; @@ -4905,29 +5358,8 @@ perf_event_alloc(struct perf_event_attr *attr, if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) goto done; - switch (attr->type) { - case PERF_TYPE_RAW: - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - pmu = hw_perf_event_init(event); - break; + pmu = perf_init_event(event); - case PERF_TYPE_SOFTWARE: - pmu = sw_perf_event_init(event); - break; - - case PERF_TYPE_TRACEPOINT: - pmu = tp_perf_event_init(event); - break; - - case PERF_TYPE_BREAKPOINT: - pmu = bp_perf_event_init(event); - break; - - - default: - break; - } done: err = 0; if (!pmu) @@ -4952,6 +5384,13 @@ done: atomic_inc(&nr_comm_events); if (event->attr.task) atomic_inc(&nr_task_events); + if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { + err = get_callchain_buffers(); + if (err) { + free_event(event); + return ERR_PTR(err); + } + } } return event; @@ -5099,12 +5538,16 @@ SYSCALL_DEFINE5(perf_event_open, struct perf_event_attr __user *, attr_uptr, pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) { - struct perf_event *event, *group_leader = NULL, *output_event = NULL; + struct perf_event *group_leader = NULL, *output_event = NULL; + struct perf_event *event, *sibling; struct perf_event_attr attr; struct perf_event_context *ctx; struct file *event_file = NULL; struct file *group_file = NULL; + struct task_struct *task = NULL; + struct pmu *pmu; int event_fd; + int move_group = 0; int fput_needed = 0; int err; @@ -5130,20 +5573,11 @@ SYSCALL_DEFINE5(perf_event_open, if (event_fd < 0) return event_fd; - /* - * Get the target context (task or percpu): - */ - ctx = find_get_context(pid, cpu); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto err_fd; - } - if (group_fd != -1) { group_leader = perf_fget_light(group_fd, &fput_needed); if (IS_ERR(group_leader)) { err = PTR_ERR(group_leader); - goto err_put_context; + goto err_fd; } group_file = group_leader->filp; if (flags & PERF_FLAG_FD_OUTPUT) @@ -5152,6 +5586,58 @@ SYSCALL_DEFINE5(perf_event_open, group_leader = NULL; } + event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL); + if (IS_ERR(event)) { + err = PTR_ERR(event); + goto err_fd; + } + + /* + * Special case software events and allow them to be part of + * any hardware group. + */ + pmu = event->pmu; + + if (group_leader && + (is_software_event(event) != is_software_event(group_leader))) { + if (is_software_event(event)) { + /* + * If event and group_leader are not both a software + * event, and event is, then group leader is not. + * + * Allow the addition of software events to !software + * groups, this is safe because software events never + * fail to schedule. + */ + pmu = group_leader->pmu; + } else if (is_software_event(group_leader) && + (group_leader->group_flags & PERF_GROUP_SOFTWARE)) { + /* + * In case the group is a pure software group, and we + * try to add a hardware event, move the whole group to + * the hardware context. + */ + move_group = 1; + } + } + + if (pid != -1) { + task = find_lively_task_by_vpid(pid); + if (IS_ERR(task)) { + err = PTR_ERR(task); + goto err_group_fd; + } + } + + /* + * Get the target context (task or percpu): + */ + ctx = find_get_context(pmu, task, cpu); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto err_group_fd; + } + /* * Look up the group leader (we will attach this event to it): */ @@ -5163,42 +5649,66 @@ SYSCALL_DEFINE5(perf_event_open, * becoming part of another group-sibling): */ if (group_leader->group_leader != group_leader) - goto err_put_context; + goto err_context; /* * Do not allow to attach to a group in a different * task or CPU context: */ - if (group_leader->ctx != ctx) - goto err_put_context; + if (move_group) { + if (group_leader->ctx->type != ctx->type) + goto err_context; + } else { + if (group_leader->ctx != ctx) + goto err_context; + } + /* * Only a group leader can be exclusive or pinned */ if (attr.exclusive || attr.pinned) - goto err_put_context; - } - - event = perf_event_alloc(&attr, cpu, ctx, group_leader, - NULL, NULL, GFP_KERNEL); - if (IS_ERR(event)) { - err = PTR_ERR(event); - goto err_put_context; + goto err_context; } if (output_event) { err = perf_event_set_output(event, output_event); if (err) - goto err_free_put_context; + goto err_context; } event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); if (IS_ERR(event_file)) { err = PTR_ERR(event_file); - goto err_free_put_context; + goto err_context; + } + + if (move_group) { + struct perf_event_context *gctx = group_leader->ctx; + + mutex_lock(&gctx->mutex); + perf_event_remove_from_context(group_leader); + list_for_each_entry(sibling, &group_leader->sibling_list, + group_entry) { + perf_event_remove_from_context(sibling); + put_ctx(gctx); + } + mutex_unlock(&gctx->mutex); + put_ctx(gctx); } event->filp = event_file; WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); + + if (move_group) { + perf_install_in_context(ctx, group_leader, cpu); + get_ctx(ctx); + list_for_each_entry(sibling, &group_leader->sibling_list, + group_entry) { + perf_install_in_context(ctx, sibling, cpu); + get_ctx(ctx); + } + } + perf_install_in_context(ctx, event, cpu); ++ctx->generation; mutex_unlock(&ctx->mutex); @@ -5219,11 +5729,11 @@ SYSCALL_DEFINE5(perf_event_open, fd_install(event_fd, event_file); return event_fd; -err_free_put_context: - free_event(event); -err_put_context: - fput_light(group_file, fput_needed); +err_context: put_ctx(ctx); +err_group_fd: + fput_light(group_file, fput_needed); + free_event(event); err_fd: put_unused_fd(event_fd); return err; @@ -5234,32 +5744,31 @@ err_fd: * * @attr: attributes of the counter to create * @cpu: cpu in which the counter is bound - * @pid: task to profile + * @task: task to profile (NULL for percpu) */ struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid, + struct task_struct *task, perf_overflow_handler_t overflow_handler) { - struct perf_event *event; struct perf_event_context *ctx; + struct perf_event *event; int err; /* * Get the target context (task or percpu): */ - ctx = find_get_context(pid, cpu); - if (IS_ERR(ctx)) { - err = PTR_ERR(ctx); - goto err_exit; - } - - event = perf_event_alloc(attr, cpu, ctx, NULL, - NULL, overflow_handler, GFP_KERNEL); + event = perf_event_alloc(attr, cpu, NULL, NULL, overflow_handler); if (IS_ERR(event)) { err = PTR_ERR(event); - goto err_put_context; + goto err; + } + + ctx = find_get_context(event->pmu, task, cpu); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto err_free; } event->filp = NULL; @@ -5277,112 +5786,13 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, return event; - err_put_context: - put_ctx(ctx); - err_exit: +err_free: + free_event(event); +err: return ERR_PTR(err); } EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); -/* - * inherit a event from parent task to child task: - */ -static struct perf_event * -inherit_event(struct perf_event *parent_event, - struct task_struct *parent, - struct perf_event_context *parent_ctx, - struct task_struct *child, - struct perf_event *group_leader, - struct perf_event_context *child_ctx) -{ - struct perf_event *child_event; - - /* - * Instead of creating recursive hierarchies of events, - * we link inherited events back to the original parent, - * which has a filp for sure, which we use as the reference - * count: - */ - if (parent_event->parent) - parent_event = parent_event->parent; - - child_event = perf_event_alloc(&parent_event->attr, - parent_event->cpu, child_ctx, - group_leader, parent_event, - NULL, GFP_KERNEL); - if (IS_ERR(child_event)) - return child_event; - get_ctx(child_ctx); - - /* - * Make the child state follow the state of the parent event, - * not its attr.disabled bit. We hold the parent's mutex, - * so we won't race with perf_event_{en, dis}able_family. - */ - if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) - child_event->state = PERF_EVENT_STATE_INACTIVE; - else - child_event->state = PERF_EVENT_STATE_OFF; - - if (parent_event->attr.freq) { - u64 sample_period = parent_event->hw.sample_period; - struct hw_perf_event *hwc = &child_event->hw; - - hwc->sample_period = sample_period; - hwc->last_period = sample_period; - - local64_set(&hwc->period_left, sample_period); - } - - child_event->overflow_handler = parent_event->overflow_handler; - - /* - * Link it up in the child's context: - */ - add_event_to_ctx(child_event, child_ctx); - - /* - * Get a reference to the parent filp - we will fput it - * when the child event exits. This is safe to do because - * we are in the parent and we know that the filp still - * exists and has a nonzero count: - */ - atomic_long_inc(&parent_event->filp->f_count); - - /* - * Link this into the parent event's child list - */ - WARN_ON_ONCE(parent_event->ctx->parent_ctx); - mutex_lock(&parent_event->child_mutex); - list_add_tail(&child_event->child_list, &parent_event->child_list); - mutex_unlock(&parent_event->child_mutex); - - return child_event; -} - -static int inherit_group(struct perf_event *parent_event, - struct task_struct *parent, - struct perf_event_context *parent_ctx, - struct task_struct *child, - struct perf_event_context *child_ctx) -{ - struct perf_event *leader; - struct perf_event *sub; - struct perf_event *child_ctr; - - leader = inherit_event(parent_event, parent, parent_ctx, - child, NULL, child_ctx); - if (IS_ERR(leader)) - return PTR_ERR(leader); - list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { - child_ctr = inherit_event(sub, parent, parent_ctx, - child, leader, child_ctx); - if (IS_ERR(child_ctr)) - return PTR_ERR(child_ctr); - } - return 0; -} - static void sync_child_event(struct perf_event *child_event, struct task_struct *child) { @@ -5439,16 +5849,13 @@ __perf_event_exit_task(struct perf_event *child_event, } } -/* - * When a child task exits, feed back event values to parent events. - */ -void perf_event_exit_task(struct task_struct *child) +static void perf_event_exit_task_context(struct task_struct *child, int ctxn) { struct perf_event *child_event, *tmp; struct perf_event_context *child_ctx; unsigned long flags; - if (likely(!child->perf_event_ctxp)) { + if (likely(!child->perf_event_ctxp[ctxn])) { perf_event_task(child, NULL, 0); return; } @@ -5460,7 +5867,7 @@ void perf_event_exit_task(struct task_struct *child) * scheduled, so we are now safe from rescheduling changing * our context. */ - child_ctx = child->perf_event_ctxp; + child_ctx = child->perf_event_ctxp[ctxn]; __perf_event_task_sched_out(child_ctx); /* @@ -5469,7 +5876,7 @@ void perf_event_exit_task(struct task_struct *child) * incremented the context's refcount before we do put_ctx below. */ raw_spin_lock(&child_ctx->lock); - child->perf_event_ctxp = NULL; + child->perf_event_ctxp[ctxn] = NULL; /* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all @@ -5522,6 +5929,17 @@ again: put_ctx(child_ctx); } +/* + * When a child task exits, feed back event values to parent events. + */ +void perf_event_exit_task(struct task_struct *child) +{ + int ctxn; + + for_each_task_context_nr(ctxn) + perf_event_exit_task_context(child, ctxn); +} + static void perf_free_event(struct perf_event *event, struct perf_event_context *ctx) { @@ -5543,48 +5961,165 @@ static void perf_free_event(struct perf_event *event, /* * free an unexposed, unused context as created by inheritance by - * init_task below, used by fork() in case of fail. + * perf_event_init_task below, used by fork() in case of fail. */ void perf_event_free_task(struct task_struct *task) { - struct perf_event_context *ctx = task->perf_event_ctxp; + struct perf_event_context *ctx; struct perf_event *event, *tmp; + int ctxn; - if (!ctx) - return; + for_each_task_context_nr(ctxn) { + ctx = task->perf_event_ctxp[ctxn]; + if (!ctx) + continue; - mutex_lock(&ctx->mutex); + mutex_lock(&ctx->mutex); again: - list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) - perf_free_event(event, ctx); + list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, + group_entry) + perf_free_event(event, ctx); - list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, - group_entry) - perf_free_event(event, ctx); + list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, + group_entry) + perf_free_event(event, ctx); - if (!list_empty(&ctx->pinned_groups) || - !list_empty(&ctx->flexible_groups)) - goto again; + if (!list_empty(&ctx->pinned_groups) || + !list_empty(&ctx->flexible_groups)) + goto again; - mutex_unlock(&ctx->mutex); + mutex_unlock(&ctx->mutex); - put_ctx(ctx); + put_ctx(ctx); + } +} + +void perf_event_delayed_put(struct task_struct *task) +{ + int ctxn; + + for_each_task_context_nr(ctxn) + WARN_ON_ONCE(task->perf_event_ctxp[ctxn]); +} + +/* + * inherit a event from parent task to child task: + */ +static struct perf_event * +inherit_event(struct perf_event *parent_event, + struct task_struct *parent, + struct perf_event_context *parent_ctx, + struct task_struct *child, + struct perf_event *group_leader, + struct perf_event_context *child_ctx) +{ + struct perf_event *child_event; + unsigned long flags; + + /* + * Instead of creating recursive hierarchies of events, + * we link inherited events back to the original parent, + * which has a filp for sure, which we use as the reference + * count: + */ + if (parent_event->parent) + parent_event = parent_event->parent; + + child_event = perf_event_alloc(&parent_event->attr, + parent_event->cpu, + group_leader, parent_event, + NULL); + if (IS_ERR(child_event)) + return child_event; + get_ctx(child_ctx); + + /* + * Make the child state follow the state of the parent event, + * not its attr.disabled bit. We hold the parent's mutex, + * so we won't race with perf_event_{en, dis}able_family. + */ + if (parent_event->state >= PERF_EVENT_STATE_INACTIVE) + child_event->state = PERF_EVENT_STATE_INACTIVE; + else + child_event->state = PERF_EVENT_STATE_OFF; + + if (parent_event->attr.freq) { + u64 sample_period = parent_event->hw.sample_period; + struct hw_perf_event *hwc = &child_event->hw; + + hwc->sample_period = sample_period; + hwc->last_period = sample_period; + + local64_set(&hwc->period_left, sample_period); + } + + child_event->ctx = child_ctx; + child_event->overflow_handler = parent_event->overflow_handler; + + /* + * Link it up in the child's context: + */ + raw_spin_lock_irqsave(&child_ctx->lock, flags); + add_event_to_ctx(child_event, child_ctx); + raw_spin_unlock_irqrestore(&child_ctx->lock, flags); + + /* + * Get a reference to the parent filp - we will fput it + * when the child event exits. This is safe to do because + * we are in the parent and we know that the filp still + * exists and has a nonzero count: + */ + atomic_long_inc(&parent_event->filp->f_count); + + /* + * Link this into the parent event's child list + */ + WARN_ON_ONCE(parent_event->ctx->parent_ctx); + mutex_lock(&parent_event->child_mutex); + list_add_tail(&child_event->child_list, &parent_event->child_list); + mutex_unlock(&parent_event->child_mutex); + + return child_event; +} + +static int inherit_group(struct perf_event *parent_event, + struct task_struct *parent, + struct perf_event_context *parent_ctx, + struct task_struct *child, + struct perf_event_context *child_ctx) +{ + struct perf_event *leader; + struct perf_event *sub; + struct perf_event *child_ctr; + + leader = inherit_event(parent_event, parent, parent_ctx, + child, NULL, child_ctx); + if (IS_ERR(leader)) + return PTR_ERR(leader); + list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { + child_ctr = inherit_event(sub, parent, parent_ctx, + child, leader, child_ctx); + if (IS_ERR(child_ctr)) + return PTR_ERR(child_ctr); + } + return 0; } static int inherit_task_group(struct perf_event *event, struct task_struct *parent, struct perf_event_context *parent_ctx, - struct task_struct *child, + struct task_struct *child, int ctxn, int *inherited_all) { int ret; - struct perf_event_context *child_ctx = child->perf_event_ctxp; + struct perf_event_context *child_ctx; if (!event->attr.inherit) { *inherited_all = 0; return 0; } + child_ctx = child->perf_event_ctxp[ctxn]; if (!child_ctx) { /* * This is executed from the parent task context, so @@ -5593,14 +6128,11 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, * child. */ - child_ctx = kzalloc(sizeof(struct perf_event_context), - GFP_KERNEL); + child_ctx = alloc_perf_context(event->pmu, child); if (!child_ctx) return -ENOMEM; - __perf_event_init_context(child_ctx, child); - child->perf_event_ctxp = child_ctx; - get_task_struct(child); + child->perf_event_ctxp[ctxn] = child_ctx; } ret = inherit_group(event, parent, parent_ctx, @@ -5612,11 +6144,10 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, return ret; } - /* * Initialize the perf_event context in task_struct */ -int perf_event_init_task(struct task_struct *child) +int perf_event_init_context(struct task_struct *child, int ctxn) { struct perf_event_context *child_ctx, *parent_ctx; struct perf_event_context *cloned_ctx; @@ -5625,19 +6156,19 @@ int perf_event_init_task(struct task_struct *child) int inherited_all = 1; int ret = 0; - child->perf_event_ctxp = NULL; + child->perf_event_ctxp[ctxn] = NULL; mutex_init(&child->perf_event_mutex); INIT_LIST_HEAD(&child->perf_event_list); - if (likely(!parent->perf_event_ctxp)) + if (likely(!parent->perf_event_ctxp[ctxn])) return 0; /* * If the parent's context is a clone, pin it so it won't get * swapped under us. */ - parent_ctx = perf_pin_task_context(parent); + parent_ctx = perf_pin_task_context(parent, ctxn); /* * No need to check if parent_ctx != NULL here; since we saw @@ -5657,20 +6188,20 @@ int perf_event_init_task(struct task_struct *child) * the list, not manipulating it: */ list_for_each_entry(event, &parent_ctx->pinned_groups, group_entry) { - ret = inherit_task_group(event, parent, parent_ctx, child, - &inherited_all); + ret = inherit_task_group(event, parent, parent_ctx, + child, ctxn, &inherited_all); if (ret) break; } list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) { - ret = inherit_task_group(event, parent, parent_ctx, child, - &inherited_all); + ret = inherit_task_group(event, parent, parent_ctx, + child, ctxn, &inherited_all); if (ret) break; } - child_ctx = child->perf_event_ctxp; + child_ctx = child->perf_event_ctxp[ctxn]; if (child_ctx && inherited_all) { /* @@ -5699,63 +6230,98 @@ int perf_event_init_task(struct task_struct *child) return ret; } +/* + * Initialize the perf_event context in task_struct + */ +int perf_event_init_task(struct task_struct *child) +{ + int ctxn, ret; + + for_each_task_context_nr(ctxn) { + ret = perf_event_init_context(child, ctxn); + if (ret) + return ret; + } + + return 0; +} + static void __init perf_event_init_all_cpus(void) { + struct swevent_htable *swhash; int cpu; - struct perf_cpu_context *cpuctx; for_each_possible_cpu(cpu) { - cpuctx = &per_cpu(perf_cpu_context, cpu); - mutex_init(&cpuctx->hlist_mutex); - __perf_event_init_context(&cpuctx->ctx, NULL); + swhash = &per_cpu(swevent_htable, cpu); + mutex_init(&swhash->hlist_mutex); + INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); } } static void __cpuinit perf_event_init_cpu(int cpu) { - struct perf_cpu_context *cpuctx; - - cpuctx = &per_cpu(perf_cpu_context, cpu); - - spin_lock(&perf_resource_lock); - cpuctx->max_pertask = perf_max_events - perf_reserved_percpu; - spin_unlock(&perf_resource_lock); + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); - mutex_lock(&cpuctx->hlist_mutex); - if (cpuctx->hlist_refcount > 0) { + mutex_lock(&swhash->hlist_mutex); + if (swhash->hlist_refcount > 0) { struct swevent_hlist *hlist; - hlist = kzalloc(sizeof(*hlist), GFP_KERNEL); - WARN_ON_ONCE(!hlist); - rcu_assign_pointer(cpuctx->swevent_hlist, hlist); + hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu)); + WARN_ON(!hlist); + rcu_assign_pointer(swhash->swevent_hlist, hlist); } - mutex_unlock(&cpuctx->hlist_mutex); + mutex_unlock(&swhash->hlist_mutex); } #ifdef CONFIG_HOTPLUG_CPU -static void __perf_event_exit_cpu(void *info) +static void perf_pmu_rotate_stop(struct pmu *pmu) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_event_context *ctx = &cpuctx->ctx; + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + + WARN_ON(!irqs_disabled()); + + list_del_init(&cpuctx->rotation_list); +} + +static void __perf_event_exit_context(void *__info) +{ + struct perf_event_context *ctx = __info; struct perf_event *event, *tmp; + perf_pmu_rotate_stop(ctx->pmu); + list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) __perf_event_remove_from_context(event); list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) __perf_event_remove_from_context(event); } + +static void perf_event_exit_cpu_context(int cpu) +{ + struct perf_event_context *ctx; + struct pmu *pmu; + int idx; + + idx = srcu_read_lock(&pmus_srcu); + list_for_each_entry_rcu(pmu, &pmus, entry) { + ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx; + + mutex_lock(&ctx->mutex); + smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1); + mutex_unlock(&ctx->mutex); + } + srcu_read_unlock(&pmus_srcu, idx); +} + static void perf_event_exit_cpu(int cpu) { - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_event_context *ctx = &cpuctx->ctx; + struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); - mutex_lock(&cpuctx->hlist_mutex); - swevent_hlist_release(cpuctx); - mutex_unlock(&cpuctx->hlist_mutex); + mutex_lock(&swhash->hlist_mutex); + swevent_hlist_release(swhash); + mutex_unlock(&swhash->hlist_mutex); - mutex_lock(&ctx->mutex); - smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1); - mutex_unlock(&ctx->mutex); + perf_event_exit_cpu_context(cpu); } #else static inline void perf_event_exit_cpu(int cpu) { } @@ -5785,118 +6351,13 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -/* - * This has to have a higher priority than migration_notifier in sched.c. - */ -static struct notifier_block __cpuinitdata perf_cpu_nb = { - .notifier_call = perf_cpu_notify, - .priority = 20, -}; - void __init perf_event_init(void) { perf_event_init_all_cpus(); - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, - (void *)(long)smp_processor_id()); - register_cpu_notifier(&perf_cpu_nb); -} - -static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", perf_reserved_percpu); -} - -static ssize_t -perf_set_reserve_percpu(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - const char *buf, - size_t count) -{ - struct perf_cpu_context *cpuctx; - unsigned long val; - int err, cpu, mpt; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > perf_max_events) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_reserved_percpu = val; - for_each_online_cpu(cpu) { - cpuctx = &per_cpu(perf_cpu_context, cpu); - raw_spin_lock_irq(&cpuctx->ctx.lock); - mpt = min(perf_max_events - cpuctx->ctx.nr_events, - perf_max_events - perf_reserved_percpu); - cpuctx->max_pertask = mpt; - raw_spin_unlock_irq(&cpuctx->ctx.lock); - } - spin_unlock(&perf_resource_lock); - - return count; -} - -static ssize_t perf_show_overcommit(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - char *buf) -{ - return sprintf(buf, "%d\n", perf_overcommit); -} - -static ssize_t -perf_set_overcommit(struct sysdev_class *class, - struct sysdev_class_attribute *attr, - const char *buf, size_t count) -{ - unsigned long val; - int err; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > 1) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_overcommit = val; - spin_unlock(&perf_resource_lock); - - return count; -} - -static SYSDEV_CLASS_ATTR( - reserve_percpu, - 0644, - perf_show_reserve_percpu, - perf_set_reserve_percpu - ); - -static SYSDEV_CLASS_ATTR( - overcommit, - 0644, - perf_show_overcommit, - perf_set_overcommit - ); - -static struct attribute *perfclass_attrs[] = { - &attr_reserve_percpu.attr, - &attr_overcommit.attr, - NULL -}; - -static struct attribute_group perfclass_attr_group = { - .attrs = perfclass_attrs, - .name = "perf_events", -}; - -static int __init perf_event_sysfs_init(void) -{ - return sysfs_create_group(&cpu_sysdev_class.kset.kobj, - &perfclass_attr_group); + init_srcu_struct(&pmus_srcu); + perf_pmu_register(&perf_swevent); + perf_pmu_register(&perf_cpu_clock); + perf_pmu_register(&perf_task_clock); + perf_tp_register(); + perf_cpu_notifier(perf_cpu_notify); } -device_initcall(perf_event_sysfs_init); diff --git a/kernel/sched.c b/kernel/sched.c index dc85ceb..c0d2067 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3584,7 +3584,7 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); raw_spin_unlock(&rq->lock); - perf_event_task_tick(curr); + perf_event_task_tick(); #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); diff --git a/kernel/smp.c b/kernel/smp.c index 75c970c..ed6aacf 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -365,9 +365,10 @@ call: EXPORT_SYMBOL_GPL(smp_call_function_any); /** - * __smp_call_function_single(): Run a function on another CPU + * __smp_call_function_single(): Run a function on a specific CPU * @cpu: The CPU to run on. * @data: Pre-allocated and setup data structure + * @wait: If true, wait until function has completed on specified CPU. * * Like smp_call_function_single(), but allow caller to pass in a * pre-allocated data structure. Useful for embedding @data inside @@ -376,8 +377,10 @@ EXPORT_SYMBOL_GPL(smp_call_function_any); void __smp_call_function_single(int cpu, struct call_single_data *data, int wait) { - csd_lock(data); + unsigned int this_cpu; + unsigned long flags; + this_cpu = get_cpu(); /* * Can deadlock when called with interrupts disabled. * We allow cpu's that are not yet online though, as no one else can @@ -387,7 +390,15 @@ void __smp_call_function_single(int cpu, struct call_single_data *data, WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled() && !oops_in_progress); - generic_exec_single(cpu, data, wait); + if (cpu == this_cpu) { + local_irq_save(flags); + data->func(data->info); + local_irq_restore(flags); + } else { + csd_lock(data); + generic_exec_single(cpu, data, wait); + } + put_cpu(); } /** diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c index 4f10451..f8b11a2 100644 --- a/kernel/test_kprobes.c +++ b/kernel/test_kprobes.c @@ -115,7 +115,9 @@ static int test_kprobes(void) int ret; struct kprobe *kps[2] = {&kp, &kp2}; - kp.addr = 0; /* addr should be cleard for reusing kprobe. */ + /* addr and flags should be cleard for reusing kprobe. */ + kp.addr = NULL; + kp.flags = 0; ret = register_kprobes(kps, 2); if (ret < 0) { printk(KERN_ERR "Kprobe smoke test failed: " @@ -210,7 +212,9 @@ static int test_jprobes(void) int ret; struct jprobe *jps[2] = {&jp, &jp2}; - jp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ + /* addr and flags should be cleard for reusing kprobe. */ + jp.kp.addr = NULL; + jp.kp.flags = 0; ret = register_jprobes(jps, 2); if (ret < 0) { printk(KERN_ERR "Kprobe smoke test failed: " @@ -323,7 +327,9 @@ static int test_kretprobes(void) int ret; struct kretprobe *rps[2] = {&rp, &rp2}; - rp.kp.addr = 0; /* addr should be cleard for reusing kprobe. */ + /* addr and flags should be cleard for reusing kprobe. */ + rp.kp.addr = NULL; + rp.kp.flags = 0; ret = register_kretprobes(rps, 2); if (ret < 0) { printk(KERN_ERR "Kprobe smoke test failed: " diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 538501c..e550d2ed 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -49,6 +49,11 @@ config HAVE_SYSCALL_TRACEPOINTS help See Documentation/trace/ftrace-design.txt +config HAVE_C_RECORDMCOUNT + bool + help + C version of recordmcount available? + config TRACER_MAX_TRACE bool diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fa7ece6..65fb077 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -884,10 +884,8 @@ enum { FTRACE_ENABLE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), FTRACE_UPDATE_TRACE_FUNC = (1 << 2), - FTRACE_ENABLE_MCOUNT = (1 << 3), - FTRACE_DISABLE_MCOUNT = (1 << 4), - FTRACE_START_FUNC_RET = (1 << 5), - FTRACE_STOP_FUNC_RET = (1 << 6), + FTRACE_START_FUNC_RET = (1 << 3), + FTRACE_STOP_FUNC_RET = (1 << 4), }; static int ftrace_filtered; @@ -1226,8 +1224,6 @@ static void ftrace_shutdown(int command) static void ftrace_startup_sysctl(void) { - int command = FTRACE_ENABLE_MCOUNT; - if (unlikely(ftrace_disabled)) return; @@ -1235,23 +1231,17 @@ static void ftrace_startup_sysctl(void) saved_ftrace_func = NULL; /* ftrace_start_up is true if we want ftrace running */ if (ftrace_start_up) - command |= FTRACE_ENABLE_CALLS; - - ftrace_run_update_code(command); + ftrace_run_update_code(FTRACE_ENABLE_CALLS); } static void ftrace_shutdown_sysctl(void) { - int command = FTRACE_DISABLE_MCOUNT; - if (unlikely(ftrace_disabled)) return; /* ftrace_start_up is true if ftrace is running */ if (ftrace_start_up) - command |= FTRACE_DISABLE_CALLS; - - ftrace_run_update_code(command); + ftrace_run_update_code(FTRACE_DISABLE_CALLS); } static cycle_t ftrace_update_time; @@ -1368,24 +1358,29 @@ enum { #define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */ struct ftrace_iterator { - struct ftrace_page *pg; - int hidx; - int idx; - unsigned flags; - struct trace_parser parser; + loff_t pos; + loff_t func_pos; + struct ftrace_page *pg; + struct dyn_ftrace *func; + struct ftrace_func_probe *probe; + struct trace_parser parser; + int hidx; + int idx; + unsigned flags; }; static void * -t_hash_next(struct seq_file *m, void *v, loff_t *pos) +t_hash_next(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; - struct hlist_node *hnd = v; + struct hlist_node *hnd = NULL; struct hlist_head *hhd; - WARN_ON(!(iter->flags & FTRACE_ITER_HASH)); - (*pos)++; + iter->pos = *pos; + if (iter->probe) + hnd = &iter->probe->node; retry: if (iter->hidx >= FTRACE_FUNC_HASHSIZE) return NULL; @@ -1408,7 +1403,12 @@ t_hash_next(struct seq_file *m, void *v, loff_t *pos) } } - return hnd; + if (WARN_ON_ONCE(!hnd)) + return NULL; + + iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node); + + return iter; } static void *t_hash_start(struct seq_file *m, loff_t *pos) @@ -1417,26 +1417,32 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos) void *p = NULL; loff_t l; - if (!(iter->flags & FTRACE_ITER_HASH)) - *pos = 0; - - iter->flags |= FTRACE_ITER_HASH; + if (iter->func_pos > *pos) + return NULL; iter->hidx = 0; - for (l = 0; l <= *pos; ) { - p = t_hash_next(m, p, &l); + for (l = 0; l <= (*pos - iter->func_pos); ) { + p = t_hash_next(m, &l); if (!p) break; } - return p; + if (!p) + return NULL; + + /* Only set this if we have an item */ + iter->flags |= FTRACE_ITER_HASH; + + return iter; } -static int t_hash_show(struct seq_file *m, void *v) +static int +t_hash_show(struct seq_file *m, struct ftrace_iterator *iter) { struct ftrace_func_probe *rec; - struct hlist_node *hnd = v; - rec = hlist_entry(hnd, struct ftrace_func_probe, node); + rec = iter->probe; + if (WARN_ON_ONCE(!rec)) + return -EIO; if (rec->ops->print) return rec->ops->print(m, rec->ip, rec->ops, rec->data); @@ -1457,12 +1463,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) struct dyn_ftrace *rec = NULL; if (iter->flags & FTRACE_ITER_HASH) - return t_hash_next(m, v, pos); + return t_hash_next(m, pos); (*pos)++; + iter->pos = *pos; if (iter->flags & FTRACE_ITER_PRINTALL) - return NULL; + return t_hash_start(m, pos); retry: if (iter->idx >= iter->pg->index) { @@ -1491,7 +1498,20 @@ t_next(struct seq_file *m, void *v, loff_t *pos) } } - return rec; + if (!rec) + return t_hash_start(m, pos); + + iter->func_pos = *pos; + iter->func = rec; + + return iter; +} + +static void reset_iter_read(struct ftrace_iterator *iter) +{ + iter->pos = 0; + iter->func_pos = 0; + iter->flags &= ~(FTRACE_ITER_PRINTALL & FTRACE_ITER_HASH); } static void *t_start(struct seq_file *m, loff_t *pos) @@ -1502,6 +1522,12 @@ static void *t_start(struct seq_file *m, loff_t *pos) mutex_lock(&ftrace_lock); /* + * If an lseek was done, then reset and start from beginning. + */ + if (*pos < iter->pos) + reset_iter_read(iter); + + /* * For set_ftrace_filter reading, if we have the filter * off, we can short cut and just print out that all * functions are enabled. @@ -1518,6 +1544,11 @@ static void *t_start(struct seq_file *m, loff_t *pos) if (iter->flags & FTRACE_ITER_HASH) return t_hash_start(m, pos); + /* + * Unfortunately, we need to restart at ftrace_pages_start + * every time we let go of the ftrace_mutex. This is because + * those pointers can change without the lock. + */ iter->pg = ftrace_pages_start; iter->idx = 0; for (l = 0; l <= *pos; ) { @@ -1526,10 +1557,14 @@ static void *t_start(struct seq_file *m, loff_t *pos) break; } - if (!p && iter->flags & FTRACE_ITER_FILTER) - return t_hash_start(m, pos); + if (!p) { + if (iter->flags & FTRACE_ITER_FILTER) + return t_hash_start(m, pos); - return p; + return NULL; + } + + return iter; } static void t_stop(struct seq_file *m, void *p) @@ -1540,16 +1575,18 @@ static void t_stop(struct seq_file *m, void *p) static int t_show(struct seq_file *m, void *v) { struct ftrace_iterator *iter = m->private; - struct dyn_ftrace *rec = v; + struct dyn_ftrace *rec; if (iter->flags & FTRACE_ITER_HASH) - return t_hash_show(m, v); + return t_hash_show(m, iter); if (iter->flags & FTRACE_ITER_PRINTALL) { seq_printf(m, "#### all functions enabled ####\n"); return 0; } + rec = iter->func; + if (!rec) return 0; @@ -2418,7 +2455,7 @@ static const struct file_operations ftrace_filter_fops = { .open = ftrace_filter_open, .read = seq_read, .write = ftrace_filter_write, - .llseek = no_llseek, + .llseek = ftrace_regex_lseek, .release = ftrace_filter_release, }; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 492197e..4e2f034 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2606,6 +2606,19 @@ void ring_buffer_record_enable_cpu(struct ring_buffer *buffer, int cpu) } EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); +/* + * The total entries in the ring buffer is the running counter + * of entries entered into the ring buffer, minus the sum of + * the entries read from the ring buffer and the number of + * entries that were overwritten. + */ +static inline unsigned long +rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) +{ + return local_read(&cpu_buffer->entries) - + (local_read(&cpu_buffer->overrun) + cpu_buffer->read); +} + /** * ring_buffer_entries_cpu - get the number of entries in a cpu buffer * @buffer: The ring buffer @@ -2614,16 +2627,13 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_enable_cpu); unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu) { struct ring_buffer_per_cpu *cpu_buffer; - unsigned long ret; if (!cpumask_test_cpu(cpu, buffer->cpumask)) return 0; cpu_buffer = buffer->buffers[cpu]; - ret = (local_read(&cpu_buffer->entries) - local_read(&cpu_buffer->overrun)) - - cpu_buffer->read; - return ret; + return rb_num_of_entries(cpu_buffer); } EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu); @@ -2684,8 +2694,7 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer) /* if you care about this being correct, lock the buffer */ for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; - entries += (local_read(&cpu_buffer->entries) - - local_read(&cpu_buffer->overrun)) - cpu_buffer->read; + entries += rb_num_of_entries(cpu_buffer); } return entries; diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 31cc4cb..39c059c 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,7 +9,7 @@ #include <linux/kprobes.h> #include "trace.h" -static char *perf_trace_buf[4]; +static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses @@ -24,7 +24,7 @@ static int total_ref_count; static int perf_trace_event_init(struct ftrace_event_call *tp_event, struct perf_event *p_event) { - struct hlist_head *list; + struct hlist_head __percpu *list; int ret = -ENOMEM; int cpu; @@ -42,11 +42,11 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event, tp_event->perf_events = list; if (!total_ref_count) { - char *buf; + char __percpu *buf; int i; - for (i = 0; i < 4; i++) { - buf = (char *)alloc_percpu(perf_trace_t); + for (i = 0; i < PERF_NR_CONTEXTS; i++) { + buf = (char __percpu *)alloc_percpu(perf_trace_t); if (!buf) goto fail; @@ -65,7 +65,7 @@ fail: if (!total_ref_count) { int i; - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } @@ -101,22 +101,26 @@ int perf_trace_init(struct perf_event *p_event) return ret; } -int perf_trace_enable(struct perf_event *p_event) +int perf_trace_add(struct perf_event *p_event, int flags) { struct ftrace_event_call *tp_event = p_event->tp_event; + struct hlist_head __percpu *pcpu_list; struct hlist_head *list; - list = tp_event->perf_events; - if (WARN_ON_ONCE(!list)) + pcpu_list = tp_event->perf_events; + if (WARN_ON_ONCE(!pcpu_list)) return -EINVAL; - list = this_cpu_ptr(list); + if (!(flags & PERF_EF_START)) + p_event->hw.state = PERF_HES_STOPPED; + + list = this_cpu_ptr(pcpu_list); hlist_add_head_rcu(&p_event->hlist_entry, list); return 0; } -void perf_trace_disable(struct perf_event *p_event) +void perf_trace_del(struct perf_event *p_event, int flags) { hlist_del_rcu(&p_event->hlist_entry); } @@ -142,7 +146,7 @@ void perf_trace_destroy(struct perf_event *p_event) tp_event->perf_events = NULL; if (!--total_ref_count) { - for (i = 0; i < 4; i++) { + for (i = 0; i < PERF_NR_CONTEXTS; i++) { free_percpu(perf_trace_buf[i]); perf_trace_buf[i] = NULL; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 4c758f1..398c0e8 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -600,21 +600,29 @@ out: enum { FORMAT_HEADER = 1, - FORMAT_PRINTFMT = 2, + FORMAT_FIELD_SEPERATOR = 2, + FORMAT_PRINTFMT = 3, }; static void *f_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_event_call *call = m->private; struct ftrace_event_field *field; - struct list_head *head; + struct list_head *common_head = &ftrace_common_fields; + struct list_head *head = trace_get_fields(call); (*pos)++; switch ((unsigned long)v) { case FORMAT_HEADER: - head = &ftrace_common_fields; + if (unlikely(list_empty(common_head))) + return NULL; + + field = list_entry(common_head->prev, + struct ftrace_event_field, link); + return field; + case FORMAT_FIELD_SEPERATOR: if (unlikely(list_empty(head))) return NULL; @@ -626,31 +634,10 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) return NULL; } - head = trace_get_fields(call); - - /* - * To separate common fields from event fields, the - * LSB is set on the first event field. Clear it in case. - */ - v = (void *)((unsigned long)v & ~1L); - field = v; - /* - * If this is a common field, and at the end of the list, then - * continue with main list. - */ - if (field->link.prev == &ftrace_common_fields) { - if (unlikely(list_empty(head))) - return NULL; - field = list_entry(head->prev, struct ftrace_event_field, link); - /* Set the LSB to notify f_show to print an extra newline */ - field = (struct ftrace_event_field *) - ((unsigned long)field | 1); - return field; - } - - /* If we are done tell f_show to print the format */ - if (field->link.prev == head) + if (field->link.prev == common_head) + return (void *)FORMAT_FIELD_SEPERATOR; + else if (field->link.prev == head) return (void *)FORMAT_PRINTFMT; field = list_entry(field->link.prev, struct ftrace_event_field, link); @@ -688,22 +675,16 @@ static int f_show(struct seq_file *m, void *v) seq_printf(m, "format:\n"); return 0; + case FORMAT_FIELD_SEPERATOR: + seq_putc(m, '\n'); + return 0; + case FORMAT_PRINTFMT: seq_printf(m, "\nprint fmt: %s\n", call->print_fmt); return 0; } - /* - * To separate common fields from event fields, the - * LSB is set on the first event field. Clear it and - * print a newline if it is set. - */ - if ((unsigned long)v & 1) { - seq_putc(m, '\n'); - v = (void *)((unsigned long)v & ~1L); - } - field = v; /* diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 6f23369..ef49e93 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -15,15 +15,19 @@ #include "trace.h" #include "trace_output.h" +/* When set, irq functions will be ignored */ +static int ftrace_graph_skip_irqs; + struct fgraph_cpu_data { pid_t last_pid; int depth; + int depth_irq; int ignore; unsigned long enter_funcs[FTRACE_RETFUNC_DEPTH]; }; struct fgraph_data { - struct fgraph_cpu_data *cpu_data; + struct fgraph_cpu_data __percpu *cpu_data; /* Place to preserve last processed entry. */ struct ftrace_graph_ent_entry ent; @@ -41,6 +45,7 @@ struct fgraph_data { #define TRACE_GRAPH_PRINT_PROC 0x8 #define TRACE_GRAPH_PRINT_DURATION 0x10 #define TRACE_GRAPH_PRINT_ABS_TIME 0x20 +#define TRACE_GRAPH_PRINT_IRQS 0x40 static struct tracer_opt trace_opts[] = { /* Display overruns? (for self-debug purpose) */ @@ -55,13 +60,15 @@ static struct tracer_opt trace_opts[] = { { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, /* Display absolute time of an entry */ { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, + /* Display interrupts */ + { TRACER_OPT(funcgraph-irqs, TRACE_GRAPH_PRINT_IRQS) }, { } /* Empty entry */ }; static struct tracer_flags tracer_flags = { /* Don't display overruns and proc by default */ .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD | - TRACE_GRAPH_PRINT_DURATION, + TRACE_GRAPH_PRINT_DURATION | TRACE_GRAPH_PRINT_IRQS, .opts = trace_opts }; @@ -204,6 +211,14 @@ int __trace_graph_entry(struct trace_array *tr, return 1; } +static inline int ftrace_graph_ignore_irqs(void) +{ + if (!ftrace_graph_skip_irqs) + return 0; + + return in_irq(); +} + int trace_graph_entry(struct ftrace_graph_ent *trace) { struct trace_array *tr = graph_array; @@ -218,7 +233,8 @@ int trace_graph_entry(struct ftrace_graph_ent *trace) return 0; /* trace it when it is-nested-in or is a function enabled. */ - if (!(trace->depth || ftrace_graph_addr(trace->func))) + if (!(trace->depth || ftrace_graph_addr(trace->func)) || + ftrace_graph_ignore_irqs()) return 0; local_irq_save(flags); @@ -649,8 +665,9 @@ trace_print_graph_duration(unsigned long long duration, struct trace_seq *s) /* Print nsecs (we don't want to exceed 7 numbers) */ if (len < 7) { - snprintf(nsecs_str, min(sizeof(nsecs_str), 8UL - len), "%03lu", - nsecs_rem); + size_t slen = min_t(size_t, sizeof(nsecs_str), 8UL - len); + + snprintf(nsecs_str, slen, "%03lu", nsecs_rem); ret = trace_seq_printf(s, ".%s", nsecs_str); if (!ret) return TRACE_TYPE_PARTIAL_LINE; @@ -855,6 +872,92 @@ print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s, return 0; } +/* + * Entry check for irq code + * + * returns 1 if + * - we are inside irq code + * - we just extered irq code + * + * retunns 0 if + * - funcgraph-interrupts option is set + * - we are not inside irq code + */ +static int +check_irq_entry(struct trace_iterator *iter, u32 flags, + unsigned long addr, int depth) +{ + int cpu = iter->cpu; + struct fgraph_data *data = iter->private; + int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); + + if (flags & TRACE_GRAPH_PRINT_IRQS) + return 0; + + /* + * We are inside the irq code + */ + if (*depth_irq >= 0) + return 1; + + if ((addr < (unsigned long)__irqentry_text_start) || + (addr >= (unsigned long)__irqentry_text_end)) + return 0; + + /* + * We are entering irq code. + */ + *depth_irq = depth; + return 1; +} + +/* + * Return check for irq code + * + * returns 1 if + * - we are inside irq code + * - we just left irq code + * + * returns 0 if + * - funcgraph-interrupts option is set + * - we are not inside irq code + */ +static int +check_irq_return(struct trace_iterator *iter, u32 flags, int depth) +{ + int cpu = iter->cpu; + struct fgraph_data *data = iter->private; + int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); + + if (flags & TRACE_GRAPH_PRINT_IRQS) + return 0; + + /* + * We are not inside the irq code. + */ + if (*depth_irq == -1) + return 0; + + /* + * We are inside the irq code, and this is returning entry. + * Let's not trace it and clear the entry depth, since + * we are out of irq code. + * + * This condition ensures that we 'leave the irq code' once + * we are out of the entry depth. Thus protecting us from + * the RETURN entry loss. + */ + if (*depth_irq >= depth) { + *depth_irq = -1; + return 1; + } + + /* + * We are inside the irq code, and this is not the entry. + */ + return 1; +} + static enum print_line_t print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, struct trace_iterator *iter, u32 flags) @@ -865,6 +968,9 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, static enum print_line_t ret; int cpu = iter->cpu; + if (check_irq_entry(iter, flags, call->func, call->depth)) + return TRACE_TYPE_HANDLED; + if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func, flags)) return TRACE_TYPE_PARTIAL_LINE; @@ -902,6 +1008,9 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, int ret; int i; + if (check_irq_return(iter, flags, trace->depth)) + return TRACE_TYPE_HANDLED; + if (data) { struct fgraph_cpu_data *cpu_data; int cpu = iter->cpu; @@ -1210,9 +1319,12 @@ void graph_trace_open(struct trace_iterator *iter) pid_t *pid = &(per_cpu_ptr(data->cpu_data, cpu)->last_pid); int *depth = &(per_cpu_ptr(data->cpu_data, cpu)->depth); int *ignore = &(per_cpu_ptr(data->cpu_data, cpu)->ignore); + int *depth_irq = &(per_cpu_ptr(data->cpu_data, cpu)->depth_irq); + *pid = -1; *depth = 0; *ignore = 0; + *depth_irq = -1; } iter->private = data; @@ -1235,6 +1347,14 @@ void graph_trace_close(struct trace_iterator *iter) } } +static int func_graph_set_flag(u32 old_flags, u32 bit, int set) +{ + if (bit == TRACE_GRAPH_PRINT_IRQS) + ftrace_graph_skip_irqs = !set; + + return 0; +} + static struct trace_event_functions graph_functions = { .trace = print_graph_function_event, }; @@ -1261,6 +1381,7 @@ static struct tracer graph_trace __read_mostly = { .print_line = print_graph_function, .print_header = print_graph_headers, .flags = &tracer_flags, + .set_flag = func_graph_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function_graph, #endif diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c index a7cc379..209b379 100644 --- a/kernel/trace/trace_workqueue.c +++ b/kernel/trace/trace_workqueue.c @@ -263,6 +263,11 @@ int __init trace_workqueue_early_init(void) { int ret, cpu; + for_each_possible_cpu(cpu) { + spin_lock_init(&workqueue_cpu_stat(cpu)->lock); + INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); + } + ret = register_trace_workqueue_insertion(probe_workqueue_insertion, NULL); if (ret) goto out; @@ -279,11 +284,6 @@ int __init trace_workqueue_early_init(void) if (ret) goto no_creation; - for_each_possible_cpu(cpu) { - spin_lock_init(&workqueue_cpu_stat(cpu)->lock); - INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list); - } - return 0; no_creation: diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index c77f3ec..d6073a5 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -25,6 +25,7 @@ #include <linux/err.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/jump_label.h> extern struct tracepoint __start___tracepoints[]; extern struct tracepoint __stop___tracepoints[]; @@ -263,7 +264,13 @@ static void set_tracepoint(struct tracepoint_entry **entry, * is used. */ rcu_assign_pointer(elem->funcs, (*entry)->funcs); - elem->state = active; + if (!elem->state && active) { + enable_jump_label(&elem->state); + elem->state = active; + } else if (elem->state && !active) { + disable_jump_label(&elem->state); + elem->state = active; + } } /* @@ -277,7 +284,10 @@ static void disable_tracepoint(struct tracepoint *elem) if (elem->unregfunc && elem->state) elem->unregfunc(); - elem->state = 0; + if (elem->state) { + disable_jump_label(&elem->state); + elem->state = 0; + } rcu_assign_pointer(elem->funcs, NULL); } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f9c3c5..dc8e168 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -43,7 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); #endif -static int __read_mostly did_panic; static int __initdata no_watchdog; @@ -187,18 +186,6 @@ static int is_softlockup(unsigned long touch_ts) return 0; } -static int -watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr) -{ - did_panic = 1; - - return NOTIFY_DONE; -} - -static struct notifier_block panic_block = { - .notifier_call = watchdog_panic, -}; - #ifdef CONFIG_HARDLOCKUP_DETECTOR static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, @@ -371,14 +358,14 @@ static int watchdog_nmi_enable(int cpu) /* Try to register using hardware perf events */ wd_attr = &wd_hw_attr; wd_attr->sample_period = hw_nmi_get_sample_period(); - event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback); + event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback); if (!IS_ERR(event)) { printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n"); goto out_save; } printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event); - return -1; + return PTR_ERR(event); /* success path */ out_save: @@ -422,17 +409,19 @@ static int watchdog_prepare_cpu(int cpu) static int watchdog_enable(int cpu) { struct task_struct *p = per_cpu(softlockup_watchdog, cpu); + int err; /* enable the perf event */ - if (watchdog_nmi_enable(cpu) != 0) - return -1; + err = watchdog_nmi_enable(cpu); + if (err) + return err; /* create the watchdog thread */ if (!p) { p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu); if (IS_ERR(p)) { printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu); - return -1; + return PTR_ERR(p); } kthread_bind(p, cpu); per_cpu(watchdog_touch_ts, cpu) = 0; @@ -484,6 +473,9 @@ static void watchdog_disable_all_cpus(void) { int cpu; + if (no_watchdog) + return; + for_each_online_cpu(cpu) watchdog_disable(cpu); @@ -526,17 +518,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; + int err = 0; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - if (watchdog_prepare_cpu(hotcpu)) - return NOTIFY_BAD; + err = watchdog_prepare_cpu(hotcpu); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: - if (watchdog_enable(hotcpu)) - return NOTIFY_BAD; + err = watchdog_enable(hotcpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: @@ -549,7 +540,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; #endif /* CONFIG_HOTPLUG_CPU */ } - return NOTIFY_OK; + return notifier_from_errno(err); } static struct notifier_block __cpuinitdata cpu_nfb = { @@ -565,13 +556,11 @@ static int __init spawn_watchdog_task(void) return 0; err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); - WARN_ON(err == NOTIFY_BAD); + WARN_ON(notifier_to_errno(err)); cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); register_cpu_notifier(&cpu_nfb); - atomic_notifier_chain_register(&panic_notifier_list, &panic_block); - return 0; } early_initcall(spawn_watchdog_task); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1b4afd2..e85d549 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -482,6 +482,7 @@ config PROVE_LOCKING select DEBUG_SPINLOCK select DEBUG_MUTEXES select DEBUG_LOCK_ALLOC + select TRACE_IRQFLAGS default n help This feature enables the kernel to prove that all locking @@ -579,11 +580,10 @@ config DEBUG_LOCKDEP of more runtime overhead. config TRACE_IRQFLAGS - depends on DEBUG_KERNEL bool - default y - depends on TRACE_IRQFLAGS_SUPPORT - depends on PROVE_LOCKING + help + Enables hooks to interrupt enabling and disabling for + either tracing or lock debugging. config DEBUG_SPINLOCK_SLEEP bool "Spinlock debugging: sleep-inside-spinlock checking" @@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr) return NULL; } -int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *mod) +void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { char *secstrings; unsigned int i; @@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * could potentially lead to deadlock and thus be counter-productive. */ list_add(&mod->bug_list, &module_bug_list); - - return 0; } void module_bug_cleanup(struct module *mod) diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 02afc25..e925c7b 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -26,19 +26,11 @@ #include <linux/dynamic_debug.h> #include <linux/debugfs.h> #include <linux/slab.h> +#include <linux/jump_label.h> extern struct _ddebug __start___verbose[]; extern struct _ddebug __stop___verbose[]; -/* dynamic_debug_enabled, and dynamic_debug_enabled2 are bitmasks in which - * bit n is set to 1 if any modname hashes into the bucket n, 0 otherwise. They - * use independent hash functions, to reduce the chance of false positives. - */ -long long dynamic_debug_enabled; -EXPORT_SYMBOL_GPL(dynamic_debug_enabled); -long long dynamic_debug_enabled2; -EXPORT_SYMBOL_GPL(dynamic_debug_enabled2); - struct ddebug_table { struct list_head link; char *mod_name; @@ -88,26 +80,6 @@ static char *ddebug_describe_flags(struct _ddebug *dp, char *buf, } /* - * must be called with ddebug_lock held - */ - -static int disabled_hash(char hash, bool first_table) -{ - struct ddebug_table *dt; - char table_hash_value; - - list_for_each_entry(dt, &ddebug_tables, link) { - if (first_table) - table_hash_value = dt->ddebugs->primary_hash; - else - table_hash_value = dt->ddebugs->secondary_hash; - if (dt->num_enabled && (hash == table_hash_value)) - return 0; - } - return 1; -} - -/* * Search the tables for _ddebug's which match the given * `query' and apply the `flags' and `mask' to them. Tells * the user which ddebug's were changed, or whether none @@ -170,17 +142,9 @@ static void ddebug_change(const struct ddebug_query *query, dt->num_enabled++; dp->flags = newflags; if (newflags) { - dynamic_debug_enabled |= - (1LL << dp->primary_hash); - dynamic_debug_enabled2 |= - (1LL << dp->secondary_hash); + enable_jump_label(&dp->enabled); } else { - if (disabled_hash(dp->primary_hash, true)) - dynamic_debug_enabled &= - ~(1LL << dp->primary_hash); - if (disabled_hash(dp->secondary_hash, false)) - dynamic_debug_enabled2 &= - ~(1LL << dp->secondary_hash); + disable_jump_label(&dp->enabled); } if (verbose) printk(KERN_INFO diff --git a/lib/list_sort.c b/lib/list_sort.c index 4b5cb79..a7616fa 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -70,7 +70,7 @@ static void merge_and_restore_back_links(void *priv, * element comparison is needed, so the client's cmp() * routine can invoke cond_resched() periodically. */ - (*cmp)(priv, tail, tail); + (*cmp)(priv, tail->next, tail->next); tail->next->prev = tail; tail = tail->next; diff --git a/mm/fremap.c b/mm/fremap.c index 46f5dac..ec520c7 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -125,7 +125,6 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, { struct mm_struct *mm = current->mm; struct address_space *mapping; - unsigned long end = start + size; struct vm_area_struct *vma; int err = -EINVAL; int has_write_lock = 0; @@ -142,6 +141,10 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (start + size <= start) return err; + /* Does pgoff wrap? */ + if (pgoff + (size >> PAGE_SHIFT) < pgoff) + return err; + /* Can we represent this offset inside this architecture's pte's? */ #if PTE_FILE_MAX_BITS < BITS_PER_LONG if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS)) @@ -168,7 +171,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, if (!(vma->vm_flags & VM_CAN_NONLINEAR)) goto out; - if (end <= start || start < vma->vm_start || end > vma->vm_end) + if (start < vma->vm_start || start + size > vma->vm_end) goto out; /* Must set VM_NONLINEAR before any pages are populated. */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cc5be78..c032738 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2324,11 +2324,8 @@ retry_avoidcopy: * and just make the page writable */ avoidcopy = (page_mapcount(old_page) == 1); if (avoidcopy) { - if (!trylock_page(old_page)) { - if (PageAnon(old_page)) - page_move_anon_rmap(old_page, vma, address); - } else - unlock_page(old_page); + if (PageAnon(old_page)) + page_move_anon_rmap(old_page, vma, address); set_huge_ptep_writable(vma, address, ptep); return 0; } @@ -2404,7 +2401,7 @@ retry_avoidcopy: set_huge_pte_at(mm, address, ptep, make_huge_pte(vma, new_page, 1)); page_remove_rmap(old_page); - hugepage_add_anon_rmap(new_page, vma, address); + hugepage_add_new_anon_rmap(new_page, vma, address); /* Make the old page be freed below */ new_page = old_page; mmu_notifier_invalidate_range_end(mm, @@ -2631,10 +2628,16 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, vma, address); } - if (!pagecache_page) { - page = pte_page(entry); + /* + * hugetlb_cow() requires page locks of pte_page(entry) and + * pagecache_page, so here we need take the former one + * when page != pagecache_page or !pagecache_page. + * Note that locking order is always pagecache_page -> page, + * so no worry about deadlock. + */ + page = pte_page(entry); + if (page != pagecache_page) lock_page(page); - } spin_lock(&mm->page_table_lock); /* Check for a racing update before calling hugetlb_cow */ @@ -2661,9 +2664,8 @@ out_page_table_lock: if (pagecache_page) { unlock_page(pagecache_page); put_page(pagecache_page); - } else { - unlock_page(page); } + unlock_page(page); out_mutex: mutex_unlock(&hugetlb_instantiation_mutex); @@ -712,7 +712,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, if (!ptep) goto out; - if (pte_write(*ptep)) { + if (pte_write(*ptep) || pte_dirty(*ptep)) { pte_t entry; swapped = PageSwapCache(page); @@ -735,7 +735,9 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, set_pte_at(mm, addr, ptep, entry); goto out_unlock; } - entry = pte_wrprotect(entry); + if (pte_dirty(entry)) + set_page_dirty(page); + entry = pte_mkclean(pte_wrprotect(entry)); set_pte_at_notify(mm, addr, ptep, entry); } *orig_pte = *ptep; @@ -2009,6 +2009,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, removed_exe_file_vma(mm); fput(new->vm_file); } + unlink_anon_vmas(new); out_free_mpol: mpol_put(pol); out_free_vma: diff --git a/mm/oom_kill.c b/mm/oom_kill.c index fc81cb2..4029583 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -121,8 +121,8 @@ struct task_struct *find_lock_task_mm(struct task_struct *p) } /* return true if the task is not adequate as candidate victim task. */ -static bool oom_unkillable_task(struct task_struct *p, struct mem_cgroup *mem, - const nodemask_t *nodemask) +static bool oom_unkillable_task(struct task_struct *p, + const struct mem_cgroup *mem, const nodemask_t *nodemask) { if (is_global_init(p)) return true; @@ -208,8 +208,13 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, */ points += p->signal->oom_score_adj; - if (points < 0) - return 0; + /* + * Never return 0 for an eligible task that may be killed since it's + * possible that no single user task uses more than 0.1% of memory and + * no single admin tasks uses more than 3.0%. + */ + if (points <= 0) + return 1; return (points < 1000) ? points : 1000; } @@ -339,26 +344,24 @@ static struct task_struct *select_bad_process(unsigned int *ppoints, /** * dump_tasks - dump current memory state of all system tasks * @mem: current's memory controller, if constrained + * @nodemask: nodemask passed to page allocator for mempolicy ooms * - * Dumps the current memory state of all system tasks, excluding kernel threads. + * Dumps the current memory state of all eligible tasks. Tasks not in the same + * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes + * are not shown. * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj * value, oom_score_adj value, and name. * - * If the actual is non-NULL, only tasks that are a member of the mem_cgroup are - * shown. - * * Call with tasklist_lock read-locked. */ -static void dump_tasks(const struct mem_cgroup *mem) +static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask) { struct task_struct *p; struct task_struct *task; pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n"); for_each_process(p) { - if (p->flags & PF_KTHREAD) - continue; - if (mem && !task_in_mem_cgroup(p, mem)) + if (oom_unkillable_task(p, mem, nodemask)) continue; task = find_lock_task_mm(p); @@ -381,7 +384,7 @@ static void dump_tasks(const struct mem_cgroup *mem) } static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, - struct mem_cgroup *mem) + struct mem_cgroup *mem, const nodemask_t *nodemask) { task_lock(current); pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " @@ -394,7 +397,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, mem_cgroup_print_oom_info(mem, p); show_mem(); if (sysctl_oom_dump_tasks) - dump_tasks(mem); + dump_tasks(mem, nodemask); } #define K(x) ((x) << (PAGE_SHIFT-10)) @@ -436,7 +439,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, unsigned int victim_points = 0; if (printk_ratelimit()) - dump_header(p, gfp_mask, order, mem); + dump_header(p, gfp_mask, order, mem, nodemask); /* * If the task is already exiting, don't alarm the sysadmin or kill @@ -482,7 +485,7 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, - int order) + int order, const nodemask_t *nodemask) { if (likely(!sysctl_panic_on_oom)) return; @@ -496,7 +499,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, return; } read_lock(&tasklist_lock); - dump_header(NULL, gfp_mask, order, NULL); + dump_header(NULL, gfp_mask, order, NULL, nodemask); read_unlock(&tasklist_lock); panic("Out of memory: %s panic_on_oom is enabled\n", sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); @@ -509,7 +512,7 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) unsigned int points = 0; struct task_struct *p; - check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0); + check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL); limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; read_lock(&tasklist_lock); retry: @@ -641,6 +644,7 @@ static void clear_system_oom(void) void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order, nodemask_t *nodemask) { + const nodemask_t *mpol_mask; struct task_struct *p; unsigned long totalpages; unsigned long freed = 0; @@ -670,7 +674,8 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, */ constraint = constrained_alloc(zonelist, gfp_mask, nodemask, &totalpages); - check_panic_on_oom(constraint, gfp_mask, order); + mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; + check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); read_lock(&tasklist_lock); if (sysctl_oom_kill_allocating_task && @@ -688,15 +693,13 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, } retry: - p = select_bad_process(&points, totalpages, NULL, - constraint == CONSTRAINT_MEMORY_POLICY ? nodemask : - NULL); + p = select_bad_process(&points, totalpages, NULL, mpol_mask); if (PTR_ERR(p) == -1UL) goto out; /* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { - dump_header(NULL, gfp_mask, order, NULL); + dump_header(NULL, gfp_mask, order, NULL, mpol_mask); read_unlock(&tasklist_lock); panic("Out of memory and no killable processes...\n"); } diff --git a/mm/percpu.c b/mm/percpu.c index 58c572b..c76ef38 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -1401,9 +1401,9 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, if (pcpu_first_unit_cpu == NR_CPUS) pcpu_first_unit_cpu = cpu; + pcpu_last_unit_cpu = cpu; } } - pcpu_last_unit_cpu = cpu; pcpu_nr_units = unit; for_each_possible_cpu(cpu) @@ -381,7 +381,13 @@ vma_address(struct page *page, struct vm_area_struct *vma) unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { if (PageAnon(page)) { - if (vma->anon_vma->root != page_anon_vma(page)->root) + struct anon_vma *page__anon_vma = page_anon_vma(page); + /* + * Note: swapoff's unuse_vma() is more efficient with this + * check, and needs it to match anon_vma when KSM is active. + */ + if (!vma->anon_vma || !page__anon_vma || + vma->anon_vma->root != page__anon_vma->root) return -EFAULT; } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { if (!vma->vm_file || @@ -1564,13 +1570,14 @@ static void __hugepage_set_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address, int exclusive) { struct anon_vma *anon_vma = vma->anon_vma; + BUG_ON(!anon_vma); - if (!exclusive) { - struct anon_vma_chain *avc; - avc = list_entry(vma->anon_vma_chain.prev, - struct anon_vma_chain, same_vma); - anon_vma = avc->anon_vma; - } + + if (PageAnon(page)) + return; + if (!exclusive) + anon_vma = anon_vma->root; + anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; page->mapping = (struct address_space *) anon_vma; page->index = linear_page_index(vma, address); @@ -1581,6 +1588,8 @@ void hugepage_add_anon_rmap(struct page *page, { struct anon_vma *anon_vma = vma->anon_vma; int first; + + BUG_ON(!PageLocked(page)); BUG_ON(!anon_vma); BUG_ON(address < vma->vm_start || address >= vma->vm_end); first = atomic_inc_and_test(&page->_mapcount); diff --git a/mm/vmscan.c b/mm/vmscan.c index c391c32..c5dfabf 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1804,12 +1804,11 @@ static void shrink_zone(int priority, struct zone *zone, * If a zone is deemed to be full of pinned pages then just give it a light * scan then give up on it. */ -static bool shrink_zones(int priority, struct zonelist *zonelist, +static void shrink_zones(int priority, struct zonelist *zonelist, struct scan_control *sc) { struct zoneref *z; struct zone *zone; - bool all_unreclaimable = true; for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(sc->gfp_mask), sc->nodemask) { @@ -1827,8 +1826,38 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, } shrink_zone(priority, zone, sc); - all_unreclaimable = false; } +} + +static bool zone_reclaimable(struct zone *zone) +{ + return zone->pages_scanned < zone_reclaimable_pages(zone) * 6; +} + +/* + * As hibernation is going on, kswapd is freezed so that it can't mark + * the zone into all_unreclaimable. It can't handle OOM during hibernation. + * So let's check zone's unreclaimable in direct reclaim as well as kswapd. + */ +static bool all_unreclaimable(struct zonelist *zonelist, + struct scan_control *sc) +{ + struct zoneref *z; + struct zone *zone; + bool all_unreclaimable = true; + + for_each_zone_zonelist_nodemask(zone, z, zonelist, + gfp_zone(sc->gfp_mask), sc->nodemask) { + if (!populated_zone(zone)) + continue; + if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) + continue; + if (zone_reclaimable(zone)) { + all_unreclaimable = false; + break; + } + } + return all_unreclaimable; } @@ -1852,7 +1881,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, struct scan_control *sc) { int priority; - bool all_unreclaimable; unsigned long total_scanned = 0; struct reclaim_state *reclaim_state = current->reclaim_state; struct zoneref *z; @@ -1869,7 +1897,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, sc->nr_scanned = 0; if (!priority) disable_swap_token(); - all_unreclaimable = shrink_zones(priority, zonelist, sc); + shrink_zones(priority, zonelist, sc); /* * Don't shrink slabs when reclaiming memory from * over limit cgroups @@ -1931,7 +1959,7 @@ out: return sc->nr_reclaimed; /* top priority shrink_zones still had more to do? don't OOM, then */ - if (scanning_global_lru(sc) && !all_unreclaimable) + if (scanning_global_lru(sc) && !all_unreclaimable(zonelist, sc)) return 1; return 0; @@ -2197,8 +2225,7 @@ loop_again: total_scanned += sc.nr_scanned; if (zone->all_unreclaimable) continue; - if (nr_slab == 0 && - zone->pages_scanned >= (zone_reclaimable_pages(zone) * 6)) + if (nr_slab == 0 && !zone_reclaimable(zone)) zone->all_unreclaimable = 1; /* * If we've done a decent amount of scanning and diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 01ddb04..0eb96f7 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -24,8 +24,11 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, if (vlan_dev) skb->dev = vlan_dev; - else if (vlan_id) - goto drop; + else if (vlan_id) { + if (!(skb->dev->flags & IFF_PROMISC)) + goto drop; + skb->pkt_type = PACKET_OTHERHOST; + } return (polling ? netif_receive_skb(skb) : netif_rx(skb)); @@ -102,8 +105,11 @@ vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, if (vlan_dev) skb->dev = vlan_dev; - else if (vlan_id) - goto drop; + else if (vlan_id) { + if (!(skb->dev->flags & IFF_PROMISC)) + goto drop; + skb->pkt_type = PACKET_OTHERHOST; + } for (p = napi->gro_list; p; p = p->next) { NAPI_GRO_CB(p)->same_flow = diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 0ea20c3..17c5ba7 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -426,8 +426,10 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) /* Allocate an fcall for the reply */ rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); - if (!rpl_context) + if (!rpl_context) { + err = -ENOMEM; goto err_close; + } /* * If the request has a buffer, steal it, otherwise @@ -445,8 +447,8 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) } rpl_context->rc = req->rc; if (!rpl_context->rc) { - kfree(rpl_context); - goto err_close; + err = -ENOMEM; + goto err_free2; } /* @@ -458,11 +460,8 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) */ if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { err = post_recv(client, rpl_context); - if (err) { - kfree(rpl_context->rc); - kfree(rpl_context); - goto err_close; - } + if (err) + goto err_free1; } else atomic_dec(&rdma->rq_count); @@ -471,8 +470,10 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) /* Post the request */ c = kmalloc(sizeof *c, GFP_KERNEL); - if (!c) - goto err_close; + if (!c) { + err = -ENOMEM; + goto err_free1; + } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, @@ -499,9 +500,15 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) return ib_post_send(rdma->qp, &wr, &bad_wr); error: + kfree(c); + kfree(rpl_context->rc); + kfree(rpl_context); P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); return -EIO; - + err_free1: + kfree(rpl_context->rc); + err_free2: + kfree(rpl_context); err_close: spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index dcfbe99..b885159 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -329,7 +329,8 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args) mutex_lock(&virtio_9p_lock); list_for_each_entry(chan, &virtio_chan_list, chan_list) { - if (!strncmp(devname, chan->tag, chan->tag_len)) { + if (!strncmp(devname, chan->tag, chan->tag_len) && + strlen(devname) == chan->tag_len) { if (!chan->inuse) { chan->inuse = true; found = 1; diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 651babd..ad2b232 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -399,12 +399,6 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) unregister_netdev(net_dev); free_netdev(net_dev); } - read_lock_irq(&devs_lock); - if (list_empty(&br2684_devs)) { - /* last br2684 device */ - unregister_atmdevice_notifier(&atm_dev_notifier); - } - read_unlock_irq(&devs_lock); return; } @@ -675,7 +669,6 @@ static int br2684_create(void __user *arg) if (list_empty(&br2684_devs)) { /* 1st br2684 device */ - register_atmdevice_notifier(&atm_dev_notifier); brdev->number = 1; } else brdev->number = BRPRIV(list_entry_brdev(br2684_devs.prev))->number + 1; @@ -815,6 +808,7 @@ static int __init br2684_init(void) return -ENOMEM; #endif register_atm_ioctl(&br2684_ioctl_ops); + register_atmdevice_notifier(&atm_dev_notifier); return 0; } @@ -830,9 +824,7 @@ static void __exit br2684_exit(void) #endif - /* if not already empty */ - if (!list_empty(&br2684_devs)) - unregister_atmdevice_notifier(&atm_dev_notifier); + unregister_atmdevice_notifier(&atm_dev_notifier); while (!list_empty(&br2684_devs)) { net_dev = list_entry_brdev(br2684_devs.next); diff --git a/net/core/datagram.c b/net/core/datagram.c index 251997a..282806b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -243,6 +243,7 @@ void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb) unlock_sock_fast(sk, slow); /* skb is now orphaned, can be freed outside of locked section */ + trace_kfree_skb(skb, skb_free_datagram_locked); __kfree_skb(skb); } EXPORT_SYMBOL(skb_free_datagram_locked); diff --git a/net/core/dev.c b/net/core/dev.c index 660dd41..7ec85e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -128,6 +128,8 @@ #include <linux/jhash.h> #include <linux/random.h> #include <trace/events/napi.h> +#include <trace/events/net.h> +#include <trace/events/skb.h> #include <linux/pci.h> #include "net-sysfs.h" @@ -1978,6 +1980,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, } rc = ops->ndo_start_xmit(skb, dev); + trace_net_dev_xmit(skb, rc); if (rc == NETDEV_TX_OK) txq_trans_update(txq); return rc; @@ -1998,6 +2001,7 @@ gso: skb_dst_drop(nskb); rc = ops->ndo_start_xmit(nskb, dev); + trace_net_dev_xmit(nskb, rc); if (unlikely(rc != NETDEV_TX_OK)) { if (rc & ~NETDEV_TX_MASK) goto out_kfree_gso_skb; @@ -2186,6 +2190,7 @@ int dev_queue_xmit(struct sk_buff *skb) #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS); #endif + trace_net_dev_queue(skb); if (q->enqueue) { rc = __dev_xmit_skb(skb, q, dev, txq); goto out; @@ -2512,6 +2517,7 @@ int netif_rx(struct sk_buff *skb) if (netdev_tstamp_prequeue) net_timestamp_check(skb); + trace_netif_rx(skb); #ifdef CONFIG_RPS { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -2571,6 +2577,7 @@ static void net_tx_action(struct softirq_action *h) clist = clist->next; WARN_ON(atomic_read(&skb->users)); + trace_kfree_skb(skb, net_tx_action); __kfree_skb(skb); } } @@ -2828,6 +2835,7 @@ static int __netif_receive_skb(struct sk_buff *skb) if (!netdev_tstamp_prequeue) net_timestamp_check(skb); + trace_netif_receive_skb(skb); if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb)) return NET_RX_SUCCESS; diff --git a/net/core/iovec.c b/net/core/iovec.c index 1cd98df..e6b133b 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -35,9 +35,10 @@ * in any case. */ -int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) +long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address, int mode) { - int size, err, ct; + int size, ct; + long err; if (m->msg_namelen) { if (mode == VERIFY_READ) { diff --git a/net/core/net-traces.c b/net/core/net-traces.c index afa6380..7f1bb2a 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -26,6 +26,7 @@ #define CREATE_TRACE_POINTS #include <trace/events/skb.h> +#include <trace/events/net.h> #include <trace/events/napi.h> EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c83b421..56ba3c4 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -466,6 +466,7 @@ void consume_skb(struct sk_buff *skb) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; + trace_consume_skb(skb); __kfree_skb(skb); } EXPORT_SYMBOL(consume_skb); diff --git a/net/core/sock.c b/net/core/sock.c index b05b9b6..ef30e9d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1351,9 +1351,9 @@ int sock_i_uid(struct sock *sk) { int uid; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); return uid; } EXPORT_SYMBOL(sock_i_uid); @@ -1362,9 +1362,9 @@ unsigned long sock_i_ino(struct sock *sk) { unsigned long ino; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); return ino; } EXPORT_SYMBOL(sock_i_ino); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 571f895..72380a3 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -217,6 +217,7 @@ config NET_IPIP config NET_IPGRE tristate "IP: GRE tunnels over IP" + depends on IPV6 || IPV6=n help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 945b20a..35c93e8 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -45,7 +45,7 @@ #include <net/netns/generic.h> #include <net/rtnetlink.h> -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> @@ -699,7 +699,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((dst = rt->rt_gateway) == 0) goto tx_error_icmp; } -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { struct in6_addr *addr6; int addr_type; @@ -774,7 +774,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev goto tx_error; } } -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) { struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); @@ -850,7 +850,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev if ((iph->ttl = tiph->ttl) == 0) { if (skb->protocol == htons(ETH_P_IP)) iph->ttl = old_iph->ttl; -#ifdef CONFIG_IPV6 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) else if (skb->protocol == htons(ETH_P_IPV6)) iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit; #endif diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04b6989..7649d77 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -488,9 +488,8 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) * we can switch to copy when see the first bad fragment. */ if (skb_has_frags(skb)) { - struct sk_buff *frag; + struct sk_buff *frag, *frag2; int first_len = skb_pagelen(skb); - int truesizes = 0; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -503,18 +502,18 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) - goto slow_path; + goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) - goto slow_path; + goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; } - truesizes += frag->truesize; + skb->truesize -= frag->truesize; } /* Everything is OK. Generate! */ @@ -524,7 +523,6 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb_shinfo(skb)->frag_list; skb_frag_list_init(skb); skb->data_len = first_len - skb_headlen(skb); - skb->truesize -= truesizes; skb->len = first_len; iph->tot_len = htons(first_len); iph->frag_off = htons(IP_MF); @@ -576,6 +574,15 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); return err; + +slow_path_clean: + skb_walk_frags(skb, frag2) { + if (frag2 == frag) + break; + frag2->sk = NULL; + frag2->destructor = NULL; + skb->truesize += frag2->truesize; + } } slow_path: diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index b254daf..43eec80 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -112,6 +112,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); + nskb->protocol = htons(ETH_P_IP); if (ip_route_me_harder(nskb, addr_type)) goto free_nskb; diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index eab8de3..f3a9b42 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -66,9 +66,11 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(skb->sk); - if (inet && inet->nodefrag) + if (sk && (sk->sk_family == PF_INET) && + inet->nodefrag) return NF_ACCEPT; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 1679e2c0..ee5f419 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -893,13 +893,15 @@ static void fast_csum(__sum16 *csum, unsigned char s[4]; if (offset & 1) { - s[0] = s[2] = 0; + s[0] = ~0; s[1] = ~*optr; + s[2] = 0; s[3] = *nptr; } else { - s[1] = s[3] = 0; s[0] = ~*optr; + s[1] = ~0; s[2] = *nptr; + s[3] = 0; } *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6298f75..ac6559c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1231,7 +1231,7 @@ restart: } if (net_ratelimit()) - printk(KERN_WARNING "Neighbour table overflow.\n"); + printk(KERN_WARNING "ipv4: Neighbour table overflow.\n"); rt_drop(rt); return -ENOBUFS; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3fb1428..f115ea6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -386,8 +386,6 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ mask = 0; - if (sk->sk_err) - mask = POLLERR; /* * POLLHUP is certainly not done right. But poll() doesn't @@ -457,6 +455,11 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; } + /* This barrier is coupled with smp_wmb() in tcp_reset() */ + smp_rmb(); + if (sk->sk_err) + mask |= POLLERR; + return mask; } EXPORT_SYMBOL(tcp_poll); @@ -940,7 +943,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sg = sk->sk_route_caps & NETIF_F_SG; while (--iovlen >= 0) { - int seglen = iov->iov_len; + size_t seglen = iov->iov_len; unsigned char __user *from = iov->iov_base; iov++; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e663b78..b55f60f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2545,7 +2545,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) cnt += tcp_skb_pcount(skb); if (cnt > packets) { - if (tcp_is_sack(tp) || (oldcnt >= packets)) + if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + (oldcnt >= packets)) break; mss = skb_shinfo(skb)->gso_size; @@ -4048,6 +4049,8 @@ static void tcp_reset(struct sock *sk) default: sk->sk_err = ECONNRESET; } + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_error_report(sk); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c35b469..74c54b3 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -135,13 +135,16 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) /* This function calculates a "timeout" which is equivalent to the timeout of a * TCP connection after "boundary" unsuccessful, exponentially backed-off - * retransmissions with an initial RTO of TCP_RTO_MIN. + * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if + * syn_set flag is set. */ static bool retransmits_timed_out(struct sock *sk, - unsigned int boundary) + unsigned int boundary, + bool syn_set) { unsigned int timeout, linear_backoff_thresh; unsigned int start_ts; + unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; if (!inet_csk(sk)->icsk_retransmits) return false; @@ -151,12 +154,12 @@ static bool retransmits_timed_out(struct sock *sk, else start_ts = tcp_sk(sk)->retrans_stamp; - linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); + linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); if (boundary <= linear_backoff_thresh) - timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; + timeout = ((2 << boundary) - 1) * rto_base; else - timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + + timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; return (tcp_time_stamp - start_ts) >= timeout; @@ -167,14 +170,15 @@ static int tcp_write_timeout(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); int retry_until; - bool do_reset; + bool do_reset, syn_set = 0; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { if (icsk->icsk_retransmits) dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; + syn_set = 1; } else { - if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { + if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); @@ -187,14 +191,14 @@ static int tcp_write_timeout(struct sock *sk) retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || - !retransmits_timed_out(sk, retry_until); + !retransmits_timed_out(sk, retry_until, 0); if (tcp_out_of_resources(sk, do_reset)) return 1; } } - if (retransmits_timed_out(sk, retry_until)) { + if (retransmits_timed_out(sk, retry_until, syn_set)) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -436,7 +440,7 @@ out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); - if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) + if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) __sk_dst_reset(sk); out:; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 869078d..a580349 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -61,7 +61,7 @@ static int xfrm4_get_saddr(struct net *net, static int xfrm4_get_tos(struct flowi *fl) { - return fl->fl4_tos; + return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ } static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 1ef1366..4794762 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -21,21 +21,25 @@ static int xfrm4_init_flags(struct xfrm_state *x) } static void -__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm4_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) +{ + sel->daddr.a4 = fl->fl4_dst; + sel->saddr.a4 = fl->fl4_src; + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET; + sel->prefixlen_d = 32; + sel->prefixlen_s = 32; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm4_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) { - x->sel.daddr.a4 = fl->fl4_dst; - x->sel.saddr.a4 = fl->fl4_src; - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET; - x->sel.prefixlen_d = 32; - x->sel.prefixlen_s = 32; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; x->id = tmpl->id; if (x->id.daddr.a4 == 0) x->id.daddr.a4 = daddr->a4; @@ -70,6 +74,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .owner = THIS_MODULE, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, + .init_temprop = xfrm4_init_temprop, .output = xfrm4_output, .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ab70a3f..324fac3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4637,10 +4637,12 @@ int __init addrconf_init(void) if (err < 0) { printk(KERN_CRIT "IPv6 Addrconf:" " cannot initialize default policy table: %d.\n", err); - return err; + goto out; } - register_pernet_subsys(&addrconf_ops); + err = register_pernet_subsys(&addrconf_ops); + if (err < 0) + goto out_addrlabel; /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup @@ -4692,7 +4694,9 @@ errout: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: unregister_pernet_subsys(&addrconf_ops); - +out_addrlabel: + ipv6_addr_label_cleanup(); +out: return err; } @@ -4703,6 +4707,7 @@ void addrconf_cleanup(void) unregister_netdevice_notifier(&ipv6_dev_notf); unregister_pernet_subsys(&addrconf_ops); + ipv6_addr_label_cleanup(); rtnl_lock(); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index f0e774c..8175f80 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -393,6 +393,11 @@ int __init ipv6_addr_label_init(void) return register_pernet_subsys(&ipv6_addr_label_ops); } +void ipv6_addr_label_cleanup(void) +{ + unregister_pernet_subsys(&ipv6_addr_label_ops); +} + static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), }, [IFAL_LABEL] = { .len = sizeof(u32), }, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d40b330..980912e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -639,7 +639,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (skb_has_frags(skb)) { int first_len = skb_pagelen(skb); - int truesizes = 0; + struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -651,18 +651,18 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) - goto slow_path; + goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) - goto slow_path; + goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; - truesizes += frag->truesize; } + skb->truesize -= frag->truesize; } err = 0; @@ -693,7 +693,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); - skb->truesize -= truesizes; skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); @@ -756,6 +755,15 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) IPSTATS_MIB_FRAGFAILS); dst_release(&rt->dst); return err; + +slow_path_clean: + skb_walk_frags(skb, frag2) { + if (frag2 == frag) + break; + frag2->sk = NULL; + frag2->destructor = NULL; + skb->truesize += frag2->truesize; + } } slow_path: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d126365..8323136 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -670,7 +670,7 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *dad if (net_ratelimit()) printk(KERN_WARNING - "Neighbour table overflow.\n"); + "ipv6: Neighbour table overflow.\n"); dst_free(&rt->dst); return NULL; } diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index f417b77..a67575d 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -20,23 +20,27 @@ #include <net/addrconf.h> static void -__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr) +__xfrm6_init_tempsel(struct xfrm_selector *sel, struct flowi *fl) { /* Initialize temporary selector matching only * to current session. */ - ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst); - ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src); - x->sel.dport = xfrm_flowi_dport(fl); - x->sel.dport_mask = htons(0xffff); - x->sel.sport = xfrm_flowi_sport(fl); - x->sel.sport_mask = htons(0xffff); - x->sel.family = AF_INET6; - x->sel.prefixlen_d = 128; - x->sel.prefixlen_s = 128; - x->sel.proto = fl->proto; - x->sel.ifindex = fl->oif; + ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl->fl6_dst); + ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl->fl6_src); + sel->dport = xfrm_flowi_dport(fl); + sel->dport_mask = htons(0xffff); + sel->sport = xfrm_flowi_sport(fl); + sel->sport_mask = htons(0xffff); + sel->family = AF_INET6; + sel->prefixlen_d = 128; + sel->prefixlen_s = 128; + sel->proto = fl->proto; + sel->ifindex = fl->oif; +} + +static void +xfrm6_init_temprop(struct xfrm_state *x, struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr) +{ x->id = tmpl->id; if (ipv6_addr_any((struct in6_addr*)&x->id.daddr)) memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr)); @@ -168,6 +172,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .eth_proto = htons(ETH_P_IPV6), .owner = THIS_MODULE, .init_tempsel = __xfrm6_init_tempsel, + .init_temprop = xfrm6_init_temprop, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index fa0f37e..28624282 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2199,9 +2199,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, struct net_device *prev_dev = NULL; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - if (status->flag & RX_FLAG_INTERNAL_CMTR) - goto out_free_skb; - if (skb_headroom(skb) < sizeof(*rthdr) && pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) goto out_free_skb; @@ -2260,7 +2257,6 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, } else goto out_free_skb; - status->flag |= RX_FLAG_INTERNAL_CMTR; return; out_free_skb: diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 7dcf7a4..8d9e4c94 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -48,15 +48,17 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) { unsigned int off, len; struct nf_ct_ext_type *t; + size_t alloc_size; rcu_read_lock(); t = rcu_dereference(nf_ct_ext_types[id]); BUG_ON(t == NULL); off = ALIGN(sizeof(struct nf_ct_ext), t->align); len = off + t->len; + alloc_size = t->alloc_size; rcu_read_unlock(); - *ext = kzalloc(t->alloc_size, gfp); + *ext = kzalloc(alloc_size, gfp); if (!*ext) return NULL; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 53d8922..f64de954 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1376,7 +1376,7 @@ static int sip_help_tcp(struct sk_buff *skb, unsigned int protoff, unsigned int msglen, origlen; const char *dptr, *end; s16 diff, tdiff = 0; - int ret; + int ret = NF_ACCEPT; typeof(nf_nat_sip_seq_adjust_hook) nf_nat_sip_seq_adjust; if (ctinfo != IP_CT_ESTABLISHED && diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c index 5490fc3..daab8c4 100644 --- a/net/netfilter/nf_tproxy_core.c +++ b/net/netfilter/nf_tproxy_core.c @@ -70,7 +70,11 @@ nf_tproxy_destructor(struct sk_buff *skb) int nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { - if (inet_sk(sk)->transparent) { + bool transparent = (sk->sk_state == TCP_TIME_WAIT) ? + inet_twsk(sk)->tw_transparent : + inet_sk(sk)->transparent; + + if (transparent) { skb_orphan(skb); skb->sk = sk; skb->destructor = nf_tproxy_destructor; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index b2a3ae6..1500302 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -225,12 +225,13 @@ static void pipe_grant_credits(struct sock *sk) static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); - struct pnpipehdr *hdr = pnp_hdr(skb); + struct pnpipehdr *hdr; int wake = 0; if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) return -EINVAL; + hdr = pnp_hdr(skb); if (hdr->data[0] != PN_PEP_TYPE_COMMON) { LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n", (unsigned)hdr->data[0]); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index c397524..c519939 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -43,7 +43,7 @@ void rds_tcp_state_change(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (conn == NULL) { state_change = sk->sk_state_change; @@ -68,7 +68,7 @@ void rds_tcp_state_change(struct sock *sk) break; } out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); state_change(sk); } diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 975183f..27844f2 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -114,7 +114,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) rdsdebug("listen data ready sk %p\n", sk); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); ready = sk->sk_user_data; if (ready == NULL) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -131,7 +131,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes) queue_work(rds_wq, &rds_tcp_listen_work); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk, bytes); } diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index 1aba687..e437974 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -324,7 +324,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) rdsdebug("data ready sk %p bytes %d\n", sk, bytes); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (conn == NULL) { /* check for teardown race */ ready = sk->sk_data_ready; @@ -338,7 +338,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes) if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM) queue_delayed_work(rds_wq, &conn->c_recv_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); ready(sk, bytes); } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index a28b895..2f012a0 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -224,7 +224,7 @@ void rds_tcp_write_space(struct sock *sk) struct rds_connection *conn; struct rds_tcp_connection *tc; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); conn = sk->sk_user_data; if (conn == NULL) { write_space = sk->sk_write_space; @@ -244,7 +244,7 @@ void rds_tcp_write_space(struct sock *sk) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); /* * write_space is only called when data leaves tcp's send queue if diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 8e45e76..d952e7e 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -679,7 +679,7 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) return -EINVAL; - if (addr->srose_ndigis > ROSE_MAX_DIGIS) + if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) { @@ -739,7 +739,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le if (addr_len == sizeof(struct sockaddr_rose) && addr->srose_ndigis > 1) return -EINVAL; - if (addr->srose_ndigis > ROSE_MAX_DIGIS) + if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; /* Source + Destination digis should not exceed ROSE_MAX_DIGIS */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b6309db..fe9306b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -800,7 +800,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) u32 _xid; __be32 *xp; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); dprintk("RPC: xs_udp_data_ready...\n"); if (!(xprt = xprt_from_sock(sk))) goto out; @@ -852,7 +852,7 @@ static void xs_udp_data_ready(struct sock *sk, int len) dropit: skb_free_datagram(sk, skb); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) @@ -1229,7 +1229,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) dprintk("RPC: xs_tcp_data_ready...\n"); - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; if (xprt->shutdown) @@ -1248,7 +1248,7 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes) read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); } while (read > 0); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /* @@ -1301,7 +1301,7 @@ static void xs_tcp_state_change(struct sock *sk) { struct rpc_xprt *xprt; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); @@ -1313,7 +1313,7 @@ static void xs_tcp_state_change(struct sock *sk) switch (sk->sk_state) { case TCP_ESTABLISHED: - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->transport_lock); if (!xprt_test_and_set_connected(xprt)) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1327,7 +1327,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt_wake_pending_tasks(xprt, -EAGAIN); } - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->transport_lock); break; case TCP_FIN_WAIT1: /* The client initiated a shutdown of the socket */ @@ -1365,7 +1365,7 @@ static void xs_tcp_state_change(struct sock *sk) xs_sock_mark_closed(xprt); } out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /** @@ -1376,7 +1376,7 @@ static void xs_error_report(struct sock *sk) { struct rpc_xprt *xprt; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: %s client %p...\n" @@ -1384,7 +1384,7 @@ static void xs_error_report(struct sock *sk) __func__, xprt, sk->sk_err); xprt_wake_pending_tasks(xprt, -EAGAIN); out: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void xs_write_space(struct sock *sk) @@ -1416,13 +1416,13 @@ static void xs_write_space(struct sock *sk) */ static void xs_udp_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); /* from net/core/sock.c:sock_def_write_space */ if (sock_writeable(sk)) xs_write_space(sk); - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } /** @@ -1437,13 +1437,13 @@ static void xs_udp_write_space(struct sock *sk) */ static void xs_tcp_write_space(struct sock *sk) { - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); /* from net/core/stream.c:sk_stream_write_space */ if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) xs_write_space(sk); - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt) diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c index 3feb28e..674d426 100644 --- a/net/wireless/wext-priv.c +++ b/net/wireless/wext-priv.c @@ -152,7 +152,7 @@ static int ioctl_private_iw_point(struct iw_point *iwp, unsigned int cmd, } else if (!iwp->pointer) return -EFAULT; - extra = kmalloc(extra_size, GFP_KERNEL); + extra = kzalloc(extra_size, GFP_KERNEL); if (!extra) return -ENOMEM; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2b3ed7a..cbab6e1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1175,9 +1175,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; - family = tmpl->encap_family; - if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(net, &tmp, remote, family); + if (xfrm_addr_any(local, tmpl->encap_family)) { + error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); if (error) goto fail; local = &tmp; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5208b12f..eb96ce5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -656,15 +656,23 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) EXPORT_SYMBOL(xfrm_sad_getinfo); static int -xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, - struct xfrm_tmpl *tmpl, - xfrm_address_t *daddr, xfrm_address_t *saddr, - unsigned short family) +xfrm_init_tempstate(struct xfrm_state *x, struct flowi *fl, + struct xfrm_tmpl *tmpl, + xfrm_address_t *daddr, xfrm_address_t *saddr, + unsigned short family) { struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) return -1; - afinfo->init_tempsel(x, fl, tmpl, daddr, saddr); + afinfo->init_tempsel(&x->sel, fl); + + if (family != tmpl->encap_family) { + xfrm_state_put_afinfo(afinfo); + afinfo = xfrm_state_get_afinfo(tmpl->encap_family); + if (!afinfo) + return -1; + } + afinfo->init_temprop(x, tmpl, daddr, saddr); xfrm_state_put_afinfo(afinfo); return 0; } @@ -790,37 +798,38 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int error = 0; struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; + unsigned short encap_family = tmpl->encap_family; to_put = NULL; spin_lock_bh(&xfrm_state_lock); - h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, family); + h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { - if (x->props.family == family && + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, &best, &acquire_in_progress, &error); } if (best) goto found; - h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); + h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { - if (x->props.family == family && + if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && - xfrm_state_addr_check(x, daddr, saddr, family) && + xfrm_state_addr_check(x, daddr, saddr, encap_family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) - xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + xfrm_state_look_at(pol, x, fl, encap_family, daddr, saddr, &best, &acquire_in_progress, &error); } @@ -829,7 +838,7 @@ found: if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi, - tmpl->id.proto, family)) != NULL) { + tmpl->id.proto, encap_family)) != NULL) { to_put = x0; error = -EEXIST; goto out; @@ -839,9 +848,9 @@ found: error = -ENOMEM; goto out; } - /* Initialize temporary selector matching only + /* Initialize temporary state matching only * to current session. */ - xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family); memcpy(&x->mark, &pol->mark, sizeof(x->mark)); error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); @@ -856,10 +865,10 @@ found: x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); - h = xfrm_src_hash(net, daddr, saddr, family); + h = xfrm_src_hash(net, daddr, saddr, encap_family); hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); if (x->id.spi) { - h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, family); + h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index ee03a4f..0647379 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -24,6 +24,7 @@ static int __init example_init(void) { int i; unsigned int ret; + unsigned int nents; struct scatterlist sg[10]; printk(KERN_INFO "DMA fifo test start\n"); @@ -61,9 +62,9 @@ static int __init example_init(void) * byte at the beginning, after the kfifo_skip(). */ sg_init_table(sg, ARRAY_SIZE(sg)); - ret = kfifo_dma_in_prepare(&fifo, sg, ARRAY_SIZE(sg), FIFO_SIZE); - printk(KERN_INFO "DMA sgl entries: %d\n", ret); - if (!ret) { + nents = kfifo_dma_in_prepare(&fifo, sg, ARRAY_SIZE(sg), FIFO_SIZE); + printk(KERN_INFO "DMA sgl entries: %d\n", nents); + if (!nents) { /* fifo is full and no sgl was created */ printk(KERN_WARNING "error kfifo_dma_in_prepare\n"); return -EIO; @@ -71,7 +72,7 @@ static int __init example_init(void) /* receive data */ printk(KERN_INFO "scatterlist for receive:\n"); - for (i = 0; i < ARRAY_SIZE(sg); i++) { + for (i = 0; i < nents; i++) { printk(KERN_INFO "sg[%d] -> " "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n", @@ -91,16 +92,16 @@ static int __init example_init(void) kfifo_dma_in_finish(&fifo, ret); /* Prepare to transmit data, example: 8 bytes */ - ret = kfifo_dma_out_prepare(&fifo, sg, ARRAY_SIZE(sg), 8); - printk(KERN_INFO "DMA sgl entries: %d\n", ret); - if (!ret) { + nents = kfifo_dma_out_prepare(&fifo, sg, ARRAY_SIZE(sg), 8); + printk(KERN_INFO "DMA sgl entries: %d\n", nents); + if (!nents) { /* no data was available and no sgl was created */ printk(KERN_WARNING "error kfifo_dma_out_prepare\n"); return -EIO; } printk(KERN_INFO "scatterlist for transmit:\n"); - for (i = 0; i < ARRAY_SIZE(sg); i++) { + for (i = 0; i < nents; i++) { printk(KERN_INFO "sg[%d] -> " "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n", diff --git a/scripts/Makefile b/scripts/Makefile index 842dbc2..2e08810 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -11,6 +11,7 @@ hostprogs-$(CONFIG_KALLSYMS) += kallsyms hostprogs-$(CONFIG_LOGO) += pnmtologo hostprogs-$(CONFIG_VT) += conmakehash hostprogs-$(CONFIG_IKCONFIG) += bin2c +hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount always := $(hostprogs-y) $(hostprogs-m) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a1a5cf9..4d03a7ef 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -209,12 +209,16 @@ cmd_modversions = \ endif ifdef CONFIG_FTRACE_MCOUNT_RECORD +ifdef BUILD_C_RECORDMCOUNT +cmd_record_mcount = $(srctree)/scripts/recordmcount "$(@)"; +else cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ "$(if $(CONFIG_64BIT),64,32)" \ "$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; endif +endif define rule_cc_o_c $(call echo-cmd,checksrc) $(cmd_checksrc) \ diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 54fd1b7..7bfcf1a 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -101,14 +101,6 @@ basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(basetarget)))" modname_flags = $(if $(filter 1,$(words $(modname))),\ -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))") -#hash values -ifdef CONFIG_DYNAMIC_DEBUG -debug_flags = -D"DEBUG_HASH=$(shell ./scripts/basic/hash djb2 $(@D)$(modname))"\ - -D"DEBUG_HASH2=$(shell ./scripts/basic/hash r5 $(@D)$(modname))" -else -debug_flags = -endif - orig_c_flags = $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(KBUILD_SUBDIR_CCFLAGS) \ $(ccflags-y) $(CFLAGS_$(basetarget).o) _c_flags = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(orig_c_flags)) @@ -152,8 +144,7 @@ endif c_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ $(__c_flags) $(modkern_cflags) \ - -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) \ - $(debug_flags) + -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags) a_flags = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) \ $(__a_flags) $(modkern_aflags) diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile index 0955995..4c324a1 100644 --- a/scripts/basic/Makefile +++ b/scripts/basic/Makefile @@ -9,7 +9,7 @@ # fixdep: Used to generate dependency information during build process # docproc: Used in Documentation/DocBook -hostprogs-y := fixdep docproc hash +hostprogs-y := fixdep docproc always := $(hostprogs-y) # fixdep is needed to compile other host programs diff --git a/scripts/basic/hash.c b/scripts/basic/hash.c deleted file mode 100644 index 2ef5d3f..0000000 --- a/scripts/basic/hash.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (C) 2008 Red Hat, Inc., Jason Baron <jbaron@redhat.com> - * - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#define DYNAMIC_DEBUG_HASH_BITS 6 - -static const char *program; - -static void usage(void) -{ - printf("Usage: %s <djb2|r5> <modname>\n", program); - exit(1); -} - -/* djb2 hashing algorithm by Dan Bernstein. From: - * http://www.cse.yorku.ca/~oz/hash.html - */ - -static unsigned int djb2_hash(char *str) -{ - unsigned long hash = 5381; - int c; - - c = *str; - while (c) { - hash = ((hash << 5) + hash) + c; - c = *++str; - } - return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1)); -} - -static unsigned int r5_hash(char *str) -{ - unsigned long hash = 0; - int c; - - c = *str; - while (c) { - hash = (hash + (c << 4) + (c >> 4)) * 11; - c = *++str; - } - return (unsigned int)(hash & ((1 << DYNAMIC_DEBUG_HASH_BITS) - 1)); -} - -int main(int argc, char *argv[]) -{ - program = argv[0]; - - if (argc != 3) - usage(); - if (!strcmp(argv[1], "djb2")) - printf("%d\n", djb2_hash(argv[2])); - else if (!strcmp(argv[1], "r5")) - printf("%d\n", r5_hash(argv[2])); - else - usage(); - exit(0); -} - diff --git a/scripts/gcc-goto.sh b/scripts/gcc-goto.sh new file mode 100644 index 0000000..520d16b --- /dev/null +++ b/scripts/gcc-goto.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# Test for gcc 'asm goto' suport +# Copyright (C) 2010, Jason Baron <jbaron@redhat.com> + +echo "int main(void) { entry: asm goto (\"\"::::entry); return 0; }" | $@ -x c - -c -o /dev/null >/dev/null 2>&1 && echo "y" diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c new file mode 100644 index 0000000..7f7f718 --- /dev/null +++ b/scripts/recordmcount.c @@ -0,0 +1,345 @@ +/* + * recordmcount.c: construct a table of the locations of calls to 'mcount' + * so that ftrace can find them quickly. + * Copyright 2009 John F. Reiser <jreiser@BitWagon.com>. All rights reserved. + * Licensed under the GNU General Public License, version 2 (GPLv2). + * + * Restructured to fit Linux format, as well as other updates: + * Copyright 2010 Steven Rostedt <srostedt@redhat.com>, Red Hat Inc. + */ + +/* + * Strategy: alter the .o file in-place. + * + * Append a new STRTAB that has the new section names, followed by a new array + * ElfXX_Shdr[] that has the new section headers, followed by the section + * contents for __mcount_loc and its relocations. The old shstrtab strings, + * and the old ElfXX_Shdr[] array, remain as "garbage" (commonly, a couple + * kilobytes.) Subsequent processing by /bin/ld (or the kernel module loader) + * will ignore the garbage regions, because they are not designated by the + * new .e_shoff nor the new ElfXX_Shdr[]. [In order to remove the garbage, + * then use "ld -r" to create a new file that omits the garbage.] + */ + +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <elf.h> +#include <fcntl.h> +#include <setjmp.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +static int fd_map; /* File descriptor for file being modified. */ +static int mmap_failed; /* Boolean flag. */ +static void *ehdr_curr; /* current ElfXX_Ehdr * for resource cleanup */ +static char gpfx; /* prefix for global symbol name (sometimes '_') */ +static struct stat sb; /* Remember .st_size, etc. */ +static jmp_buf jmpenv; /* setjmp/longjmp per-file error escape */ + +/* setjmp() return values */ +enum { + SJ_SETJMP = 0, /* hardwired first return */ + SJ_FAIL, + SJ_SUCCEED +}; + +/* Per-file resource cleanup when multiple files. */ +static void +cleanup(void) +{ + if (!mmap_failed) + munmap(ehdr_curr, sb.st_size); + else + free(ehdr_curr); + close(fd_map); +} + +static void __attribute__((noreturn)) +fail_file(void) +{ + cleanup(); + longjmp(jmpenv, SJ_FAIL); +} + +static void __attribute__((noreturn)) +succeed_file(void) +{ + cleanup(); + longjmp(jmpenv, SJ_SUCCEED); +} + +/* ulseek, uread, ...: Check return value for errors. */ + +static off_t +ulseek(int const fd, off_t const offset, int const whence) +{ + off_t const w = lseek(fd, offset, whence); + if ((off_t)-1 == w) { + perror("lseek"); + fail_file(); + } + return w; +} + +static size_t +uread(int const fd, void *const buf, size_t const count) +{ + size_t const n = read(fd, buf, count); + if (n != count) { + perror("read"); + fail_file(); + } + return n; +} + +static size_t +uwrite(int const fd, void const *const buf, size_t const count) +{ + size_t const n = write(fd, buf, count); + if (n != count) { + perror("write"); + fail_file(); + } + return n; +} + +static void * +umalloc(size_t size) +{ + void *const addr = malloc(size); + if (0 == addr) { + fprintf(stderr, "malloc failed: %zu bytes\n", size); + fail_file(); + } + return addr; +} + +/* + * Get the whole file as a programming convenience in order to avoid + * malloc+lseek+read+free of many pieces. If successful, then mmap + * avoids copying unused pieces; else just read the whole file. + * Open for both read and write; new info will be appended to the file. + * Use MAP_PRIVATE so that a few changes to the in-memory ElfXX_Ehdr + * do not propagate to the file until an explicit overwrite at the last. + * This preserves most aspects of consistency (all except .st_size) + * for simultaneous readers of the file while we are appending to it. + * However, multiple writers still are bad. We choose not to use + * locking because it is expensive and the use case of kernel build + * makes multiple writers unlikely. + */ +static void *mmap_file(char const *fname) +{ + void *addr; + + fd_map = open(fname, O_RDWR); + if (0 > fd_map || 0 > fstat(fd_map, &sb)) { + perror(fname); + fail_file(); + } + if (!S_ISREG(sb.st_mode)) { + fprintf(stderr, "not a regular file: %s\n", fname); + fail_file(); + } + addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, + fd_map, 0); + mmap_failed = 0; + if (MAP_FAILED == addr) { + mmap_failed = 1; + addr = umalloc(sb.st_size); + uread(fd_map, addr, sb.st_size); + } + return addr; +} + +/* w8rev, w8nat, ...: Handle endianness. */ + +static uint64_t w8rev(uint64_t const x) +{ + return ((0xff & (x >> (0 * 8))) << (7 * 8)) + | ((0xff & (x >> (1 * 8))) << (6 * 8)) + | ((0xff & (x >> (2 * 8))) << (5 * 8)) + | ((0xff & (x >> (3 * 8))) << (4 * 8)) + | ((0xff & (x >> (4 * 8))) << (3 * 8)) + | ((0xff & (x >> (5 * 8))) << (2 * 8)) + | ((0xff & (x >> (6 * 8))) << (1 * 8)) + | ((0xff & (x >> (7 * 8))) << (0 * 8)); +} + +static uint32_t w4rev(uint32_t const x) +{ + return ((0xff & (x >> (0 * 8))) << (3 * 8)) + | ((0xff & (x >> (1 * 8))) << (2 * 8)) + | ((0xff & (x >> (2 * 8))) << (1 * 8)) + | ((0xff & (x >> (3 * 8))) << (0 * 8)); +} + +static uint32_t w2rev(uint16_t const x) +{ + return ((0xff & (x >> (0 * 8))) << (1 * 8)) + | ((0xff & (x >> (1 * 8))) << (0 * 8)); +} + +static uint64_t w8nat(uint64_t const x) +{ + return x; +} + +static uint32_t w4nat(uint32_t const x) +{ + return x; +} + +static uint32_t w2nat(uint16_t const x) +{ + return x; +} + +static uint64_t (*w8)(uint64_t); +static uint32_t (*w)(uint32_t); +static uint32_t (*w2)(uint16_t); + +/* Names of the sections that could contain calls to mcount. */ +static int +is_mcounted_section_name(char const *const txtname) +{ + return 0 == strcmp(".text", txtname) || + 0 == strcmp(".sched.text", txtname) || + 0 == strcmp(".spinlock.text", txtname) || + 0 == strcmp(".irqentry.text", txtname) || + 0 == strcmp(".text.unlikely", txtname); +} + +/* 32 bit and 64 bit are very similar */ +#include "recordmcount.h" +#define RECORD_MCOUNT_64 +#include "recordmcount.h" + +static void +do_file(char const *const fname) +{ + Elf32_Ehdr *const ehdr = mmap_file(fname); + unsigned int reltype = 0; + + ehdr_curr = ehdr; + w = w4nat; + w2 = w2nat; + w8 = w8nat; + switch (ehdr->e_ident[EI_DATA]) { + static unsigned int const endian = 1; + default: { + fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", + ehdr->e_ident[EI_DATA], fname); + fail_file(); + } break; + case ELFDATA2LSB: { + if (1 != *(unsigned char const *)&endian) { + /* main() is big endian, file.o is little endian. */ + w = w4rev; + w2 = w2rev; + w8 = w8rev; + } + } break; + case ELFDATA2MSB: { + if (0 != *(unsigned char const *)&endian) { + /* main() is little endian, file.o is big endian. */ + w = w4rev; + w2 = w2rev; + w8 = w8rev; + } + } break; + } /* end switch */ + if (0 != memcmp(ELFMAG, ehdr->e_ident, SELFMAG) + || ET_REL != w2(ehdr->e_type) + || EV_CURRENT != ehdr->e_ident[EI_VERSION]) { + fprintf(stderr, "unrecognized ET_REL file %s\n", fname); + fail_file(); + } + + gpfx = 0; + switch (w2(ehdr->e_machine)) { + default: { + fprintf(stderr, "unrecognized e_machine %d %s\n", + w2(ehdr->e_machine), fname); + fail_file(); + } break; + case EM_386: reltype = R_386_32; break; + case EM_ARM: reltype = R_ARM_ABS32; break; + case EM_IA_64: reltype = R_IA64_IMM64; gpfx = '_'; break; + case EM_PPC: reltype = R_PPC_ADDR32; gpfx = '_'; break; + case EM_PPC64: reltype = R_PPC64_ADDR64; gpfx = '_'; break; + case EM_S390: /* reltype: e_class */ gpfx = '_'; break; + case EM_SH: reltype = R_SH_DIR32; break; + case EM_SPARCV9: reltype = R_SPARC_64; gpfx = '_'; break; + case EM_X86_64: reltype = R_X86_64_64; break; + } /* end switch */ + + switch (ehdr->e_ident[EI_CLASS]) { + default: { + fprintf(stderr, "unrecognized ELF class %d %s\n", + ehdr->e_ident[EI_CLASS], fname); + fail_file(); + } break; + case ELFCLASS32: { + if (sizeof(Elf32_Ehdr) != w2(ehdr->e_ehsize) + || sizeof(Elf32_Shdr) != w2(ehdr->e_shentsize)) { + fprintf(stderr, + "unrecognized ET_REL file: %s\n", fname); + fail_file(); + } + if (EM_S390 == w2(ehdr->e_machine)) + reltype = R_390_32; + do32(ehdr, fname, reltype); + } break; + case ELFCLASS64: { + Elf64_Ehdr *const ghdr = (Elf64_Ehdr *)ehdr; + if (sizeof(Elf64_Ehdr) != w2(ghdr->e_ehsize) + || sizeof(Elf64_Shdr) != w2(ghdr->e_shentsize)) { + fprintf(stderr, + "unrecognized ET_REL file: %s\n", fname); + fail_file(); + } + if (EM_S390 == w2(ghdr->e_machine)) + reltype = R_390_64; + do64(ghdr, fname, reltype); + } break; + } /* end switch */ + + cleanup(); +} + +int +main(int argc, char const *argv[]) +{ + int n_error = 0; /* gcc-4.3.0 false positive complaint */ + if (argc <= 1) + fprintf(stderr, "usage: recordmcount file.o...\n"); + else /* Process each file in turn, allowing deep failure. */ + for (--argc, ++argv; 0 < argc; --argc, ++argv) { + int const sjval = setjmp(jmpenv); + switch (sjval) { + default: { + fprintf(stderr, "internal error: %s\n", argv[0]); + exit(1); + } break; + case SJ_SETJMP: { /* normal sequence */ + /* Avoid problems if early cleanup() */ + fd_map = -1; + ehdr_curr = NULL; + mmap_failed = 1; + do_file(argv[0]); + } break; + case SJ_FAIL: { /* error in do_file or below */ + ++n_error; + } break; + case SJ_SUCCEED: { /* premature success */ + /* do nothing */ + } break; + } /* end switch */ + } + return !!n_error; +} + + diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h new file mode 100644 index 0000000..7f39d09 --- /dev/null +++ b/scripts/recordmcount.h @@ -0,0 +1,366 @@ +/* + * recordmcount.h + * + * This code was taken out of recordmcount.c written by + * Copyright 2009 John F. Reiser <jreiser@BitWagon.com>. All rights reserved. + * + * The original code had the same algorithms for both 32bit + * and 64bit ELF files, but the code was duplicated to support + * the difference in structures that were used. This + * file creates a macro of everything that is different between + * the 64 and 32 bit code, such that by including this header + * twice we can create both sets of functions by including this + * header once with RECORD_MCOUNT_64 undefined, and again with + * it defined. + * + * This conversion to macros was done by: + * Copyright 2010 Steven Rostedt <srostedt@redhat.com>, Red Hat Inc. + * + * Licensed under the GNU General Public License, version 2 (GPLv2). + */ +#undef append_func +#undef sift_rel_mcount +#undef find_secsym_ndx +#undef __has_rel_mcount +#undef has_rel_mcount +#undef tot_relsize +#undef do_func +#undef Elf_Ehdr +#undef Elf_Shdr +#undef Elf_Rel +#undef Elf_Rela +#undef Elf_Sym +#undef ELF_R_SYM +#undef ELF_R_INFO +#undef ELF_ST_BIND +#undef uint_t +#undef _w +#undef _align +#undef _size + +#ifdef RECORD_MCOUNT_64 +# define append_func append64 +# define sift_rel_mcount sift64_rel_mcount +# define find_secsym_ndx find64_secsym_ndx +# define __has_rel_mcount __has64_rel_mcount +# define has_rel_mcount has64_rel_mcount +# define tot_relsize tot64_relsize +# define do_func do64 +# define Elf_Ehdr Elf64_Ehdr +# define Elf_Shdr Elf64_Shdr +# define Elf_Rel Elf64_Rel +# define Elf_Rela Elf64_Rela +# define Elf_Sym Elf64_Sym +# define ELF_R_SYM ELF64_R_SYM +# define ELF_R_INFO ELF64_R_INFO +# define ELF_ST_BIND ELF64_ST_BIND +# define uint_t uint64_t +# define _w w8 +# define _align 7u +# define _size 8 +#else +# define append_func append32 +# define sift_rel_mcount sift32_rel_mcount +# define find_secsym_ndx find32_secsym_ndx +# define __has_rel_mcount __has32_rel_mcount +# define has_rel_mcount has32_rel_mcount +# define tot_relsize tot32_relsize +# define do_func do32 +# define Elf_Ehdr Elf32_Ehdr +# define Elf_Shdr Elf32_Shdr +# define Elf_Rel Elf32_Rel +# define Elf_Rela Elf32_Rela +# define Elf_Sym Elf32_Sym +# define ELF_R_SYM ELF32_R_SYM +# define ELF_R_INFO ELF32_R_INFO +# define ELF_ST_BIND ELF32_ST_BIND +# define uint_t uint32_t +# define _w w +# define _align 3u +# define _size 4 +#endif + +/* Append the new shstrtab, Elf_Shdr[], __mcount_loc and its relocations. */ +static void append_func(Elf_Ehdr *const ehdr, + Elf_Shdr *const shstr, + uint_t const *const mloc0, + uint_t const *const mlocp, + Elf_Rel const *const mrel0, + Elf_Rel const *const mrelp, + unsigned int const rel_entsize, + unsigned int const symsec_sh_link) +{ + /* Begin constructing output file */ + Elf_Shdr mcsec; + char const *mc_name = (sizeof(Elf_Rela) == rel_entsize) + ? ".rela__mcount_loc" + : ".rel__mcount_loc"; + unsigned const old_shnum = w2(ehdr->e_shnum); + uint_t const old_shoff = _w(ehdr->e_shoff); + uint_t const old_shstr_sh_size = _w(shstr->sh_size); + uint_t const old_shstr_sh_offset = _w(shstr->sh_offset); + uint_t t = 1 + strlen(mc_name) + _w(shstr->sh_size); + uint_t new_e_shoff; + + shstr->sh_size = _w(t); + shstr->sh_offset = _w(sb.st_size); + t += sb.st_size; + t += (_align & -t); /* word-byte align */ + new_e_shoff = t; + + /* body for new shstrtab */ + ulseek(fd_map, sb.st_size, SEEK_SET); + uwrite(fd_map, old_shstr_sh_offset + (void *)ehdr, old_shstr_sh_size); + uwrite(fd_map, mc_name, 1 + strlen(mc_name)); + + /* old(modified) Elf_Shdr table, word-byte aligned */ + ulseek(fd_map, t, SEEK_SET); + t += sizeof(Elf_Shdr) * old_shnum; + uwrite(fd_map, old_shoff + (void *)ehdr, + sizeof(Elf_Shdr) * old_shnum); + + /* new sections __mcount_loc and .rel__mcount_loc */ + t += 2*sizeof(mcsec); + mcsec.sh_name = w((sizeof(Elf_Rela) == rel_entsize) + strlen(".rel") + + old_shstr_sh_size); + mcsec.sh_type = w(SHT_PROGBITS); + mcsec.sh_flags = _w(SHF_ALLOC); + mcsec.sh_addr = 0; + mcsec.sh_offset = _w(t); + mcsec.sh_size = _w((void *)mlocp - (void *)mloc0); + mcsec.sh_link = 0; + mcsec.sh_info = 0; + mcsec.sh_addralign = _w(_size); + mcsec.sh_entsize = _w(_size); + uwrite(fd_map, &mcsec, sizeof(mcsec)); + + mcsec.sh_name = w(old_shstr_sh_size); + mcsec.sh_type = (sizeof(Elf_Rela) == rel_entsize) + ? w(SHT_RELA) + : w(SHT_REL); + mcsec.sh_flags = 0; + mcsec.sh_addr = 0; + mcsec.sh_offset = _w((void *)mlocp - (void *)mloc0 + t); + mcsec.sh_size = _w((void *)mrelp - (void *)mrel0); + mcsec.sh_link = w(symsec_sh_link); + mcsec.sh_info = w(old_shnum); + mcsec.sh_addralign = _w(_size); + mcsec.sh_entsize = _w(rel_entsize); + uwrite(fd_map, &mcsec, sizeof(mcsec)); + + uwrite(fd_map, mloc0, (void *)mlocp - (void *)mloc0); + uwrite(fd_map, mrel0, (void *)mrelp - (void *)mrel0); + + ehdr->e_shoff = _w(new_e_shoff); + ehdr->e_shnum = w2(2 + w2(ehdr->e_shnum)); /* {.rel,}__mcount_loc */ + ulseek(fd_map, 0, SEEK_SET); + uwrite(fd_map, ehdr, sizeof(*ehdr)); +} + + +/* + * Look at the relocations in order to find the calls to mcount. + * Accumulate the section offsets that are found, and their relocation info, + * onto the end of the existing arrays. + */ +static uint_t *sift_rel_mcount(uint_t *mlocp, + unsigned const offbase, + Elf_Rel **const mrelpp, + Elf_Shdr const *const relhdr, + Elf_Ehdr const *const ehdr, + unsigned const recsym, + uint_t const recval, + unsigned const reltype) +{ + uint_t *const mloc0 = mlocp; + Elf_Rel *mrelp = *mrelpp; + Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) + + (void *)ehdr); + unsigned const symsec_sh_link = w(relhdr->sh_link); + Elf_Shdr const *const symsec = &shdr0[symsec_sh_link]; + Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symsec->sh_offset) + + (void *)ehdr); + + Elf_Shdr const *const strsec = &shdr0[w(symsec->sh_link)]; + char const *const str0 = (char const *)(_w(strsec->sh_offset) + + (void *)ehdr); + + Elf_Rel const *const rel0 = (Elf_Rel const *)(_w(relhdr->sh_offset) + + (void *)ehdr); + unsigned rel_entsize = _w(relhdr->sh_entsize); + unsigned const nrel = _w(relhdr->sh_size) / rel_entsize; + Elf_Rel const *relp = rel0; + + unsigned mcountsym = 0; + unsigned t; + + for (t = nrel; t; --t) { + if (!mcountsym) { + Elf_Sym const *const symp = + &sym0[ELF_R_SYM(_w(relp->r_info))]; + char const *symname = &str0[w(symp->st_name)]; + + if ('.' == symname[0]) + ++symname; /* ppc64 hack */ + if (0 == strcmp((('_' == gpfx) ? "_mcount" : "mcount"), + symname)) + mcountsym = ELF_R_SYM(_w(relp->r_info)); + } + + if (mcountsym == ELF_R_SYM(_w(relp->r_info))) { + uint_t const addend = _w(_w(relp->r_offset) - recval); + + mrelp->r_offset = _w(offbase + + ((void *)mlocp - (void *)mloc0)); + mrelp->r_info = _w(ELF_R_INFO(recsym, reltype)); + if (sizeof(Elf_Rela) == rel_entsize) { + ((Elf_Rela *)mrelp)->r_addend = addend; + *mlocp++ = 0; + } else + *mlocp++ = addend; + + mrelp = (Elf_Rel *)(rel_entsize + (void *)mrelp); + } + relp = (Elf_Rel const *)(rel_entsize + (void *)relp); + } + *mrelpp = mrelp; + return mlocp; +} + + +/* + * Find a symbol in the given section, to be used as the base for relocating + * the table of offsets of calls to mcount. A local or global symbol suffices, + * but avoid a Weak symbol because it may be overridden; the change in value + * would invalidate the relocations of the offsets of the calls to mcount. + * Often the found symbol will be the unnamed local symbol generated by + * GNU 'as' for the start of each section. For example: + * Num: Value Size Type Bind Vis Ndx Name + * 2: 00000000 0 SECTION LOCAL DEFAULT 1 + */ +static unsigned find_secsym_ndx(unsigned const txtndx, + char const *const txtname, + uint_t *const recvalp, + Elf_Shdr const *const symhdr, + Elf_Ehdr const *const ehdr) +{ + Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symhdr->sh_offset) + + (void *)ehdr); + unsigned const nsym = _w(symhdr->sh_size) / _w(symhdr->sh_entsize); + Elf_Sym const *symp; + unsigned t; + + for (symp = sym0, t = nsym; t; --t, ++symp) { + unsigned int const st_bind = ELF_ST_BIND(symp->st_info); + + if (txtndx == w2(symp->st_shndx) + /* avoid STB_WEAK */ + && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) { + *recvalp = _w(symp->st_value); + return symp - sym0; + } + } + fprintf(stderr, "Cannot find symbol for section %d: %s.\n", + txtndx, txtname); + fail_file(); +} + + +/* Evade ISO C restriction: no declaration after statement in has_rel_mcount. */ +static char const * +__has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */ + Elf_Shdr const *const shdr0, + char const *const shstrtab, + char const *const fname) +{ + /* .sh_info depends on .sh_type == SHT_REL[,A] */ + Elf_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)]; + char const *const txtname = &shstrtab[w(txthdr->sh_name)]; + + if (0 == strcmp("__mcount_loc", txtname)) { + fprintf(stderr, "warning: __mcount_loc already exists: %s\n", + fname); + succeed_file(); + } + if (SHT_PROGBITS != w(txthdr->sh_type) || + !is_mcounted_section_name(txtname)) + return NULL; + return txtname; +} + +static char const *has_rel_mcount(Elf_Shdr const *const relhdr, + Elf_Shdr const *const shdr0, + char const *const shstrtab, + char const *const fname) +{ + if (SHT_REL != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type)) + return NULL; + return __has_rel_mcount(relhdr, shdr0, shstrtab, fname); +} + + +static unsigned tot_relsize(Elf_Shdr const *const shdr0, + unsigned nhdr, + const char *const shstrtab, + const char *const fname) +{ + unsigned totrelsz = 0; + Elf_Shdr const *shdrp = shdr0; + + for (; nhdr; --nhdr, ++shdrp) { + if (has_rel_mcount(shdrp, shdr0, shstrtab, fname)) + totrelsz += _w(shdrp->sh_size); + } + return totrelsz; +} + + +/* Overall supervision for Elf32 ET_REL file. */ +static void +do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) +{ + Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) + + (void *)ehdr); + unsigned const nhdr = w2(ehdr->e_shnum); + Elf_Shdr *const shstr = &shdr0[w2(ehdr->e_shstrndx)]; + char const *const shstrtab = (char const *)(_w(shstr->sh_offset) + + (void *)ehdr); + + Elf_Shdr const *relhdr; + unsigned k; + + /* Upper bound on space: assume all relevant relocs are for mcount. */ + unsigned const totrelsz = tot_relsize(shdr0, nhdr, shstrtab, fname); + Elf_Rel *const mrel0 = umalloc(totrelsz); + Elf_Rel * mrelp = mrel0; + + /* 2*sizeof(address) <= sizeof(Elf_Rel) */ + uint_t *const mloc0 = umalloc(totrelsz>>1); + uint_t * mlocp = mloc0; + + unsigned rel_entsize = 0; + unsigned symsec_sh_link = 0; + + for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) { + char const *const txtname = has_rel_mcount(relhdr, shdr0, + shstrtab, fname); + if (txtname) { + uint_t recval = 0; + unsigned const recsym = find_secsym_ndx( + w(relhdr->sh_info), txtname, &recval, + &shdr0[symsec_sh_link = w(relhdr->sh_link)], + ehdr); + + rel_entsize = _w(relhdr->sh_entsize); + mlocp = sift_rel_mcount(mlocp, + (void *)mlocp - (void *)mloc0, &mrelp, + relhdr, ehdr, recsym, recval, reltype); + } + } + if (mloc0 != mlocp) { + append_func(ehdr, shstr, mloc0, mlocp, mrel0, mrelp, + rel_entsize, symsec_sh_link); + } + free(mrel0); + free(mloc0); +} diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index ef43995..c668b44 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -1416,15 +1416,19 @@ static char *tomoyo_print_header(struct tomoyo_request_info *r) const pid_t gpid = task_pid_nr(current); static const int tomoyo_buffer_len = 4096; char *buffer = kmalloc(tomoyo_buffer_len, GFP_NOFS); + pid_t ppid; if (!buffer) return NULL; do_gettimeofday(&tv); + rcu_read_lock(); + ppid = task_tgid_vnr(current->real_parent); + rcu_read_unlock(); snprintf(buffer, tomoyo_buffer_len - 1, "#timestamp=%lu profile=%u mode=%s (global-pid=%u)" " task={ pid=%u ppid=%u uid=%u gid=%u euid=%u" " egid=%u suid=%u sgid=%u fsuid=%u fsgid=%u }", tv.tv_sec, r->profile, tomoyo_mode[r->mode], gpid, - (pid_t) sys_getpid(), (pid_t) sys_getppid(), + task_tgid_vnr(current), ppid, current_uid(), current_gid(), current_euid(), current_egid(), current_suid(), current_sgid(), current_fsuid(), current_fsgid()); diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 04454cb7b..7c66bd8 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -689,9 +689,6 @@ struct tomoyo_profile { /********** Function prototypes. **********/ -extern asmlinkage long sys_getpid(void); -extern asmlinkage long sys_getppid(void); - /* Check whether the given string starts with the given keyword. */ bool tomoyo_str_starts(char **src, const char *find); /* Get tomoyo_realpath() of current process. */ diff --git a/sound/core/control.c b/sound/core/control.c index 070aab4..45a8180 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -31,6 +31,7 @@ /* max number of user-defined controls */ #define MAX_USER_CONTROLS 32 +#define MAX_CONTROL_COUNT 1028 struct snd_kctl_ioctl { struct list_head list; /* list of all ioctls */ @@ -195,6 +196,10 @@ static struct snd_kcontrol *snd_ctl_new(struct snd_kcontrol *control, if (snd_BUG_ON(!control || !control->count)) return NULL; + + if (control->count > MAX_CONTROL_COUNT) + return NULL; + kctl = kzalloc(sizeof(*kctl) + sizeof(struct snd_kcontrol_volatile) * control->count, GFP_KERNEL); if (kctl == NULL) { snd_printk(KERN_ERR "Cannot allocate control instance\n"); diff --git a/sound/i2c/other/ak4xxx-adda.c b/sound/i2c/other/ak4xxx-adda.c index 1adb8a3..42d7844 100644 --- a/sound/i2c/other/ak4xxx-adda.c +++ b/sound/i2c/other/ak4xxx-adda.c @@ -900,7 +900,7 @@ static int proc_init(struct snd_akm4xxx *ak) return 0; } #else /* !CONFIG_PROC_FS */ -static int proc_init(struct snd_akm4xxx *ak) {} +static int proc_init(struct snd_akm4xxx *ak) { return 0; } #endif int snd_akm4xxx_build_controls(struct snd_akm4xxx *ak) diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index b697fd2..10bbbaf 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -3641,6 +3641,7 @@ static struct snd_pci_quirk ad1984_cfg_tbl[] = { /* Lenovo Thinkpad T61/X61 */ SND_PCI_QUIRK_VENDOR(0x17aa, "Lenovo Thinkpad", AD1984_THINKPAD), SND_PCI_QUIRK(0x1028, 0x0214, "Dell T3400", AD1984_DELL_DESKTOP), + SND_PCI_QUIRK(0x1028, 0x0233, "Dell Latitude E6400", AD1984_DELL_DESKTOP), {} }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a1312a6..a432e6e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1594,12 +1594,22 @@ static void alc_auto_parse_digital(struct hda_codec *codec) } if (spec->autocfg.dig_in_pin) { - hda_nid_t dig_nid; - err = snd_hda_get_connections(codec, - spec->autocfg.dig_in_pin, - &dig_nid, 1); - if (err > 0) - spec->dig_in_nid = dig_nid; + dig_nid = codec->start_nid; + for (i = 0; i < codec->num_nodes; i++, dig_nid++) { + unsigned int wcaps = get_wcaps(codec, dig_nid); + if (get_wcaps_type(wcaps) != AC_WID_AUD_IN) + continue; + if (!(wcaps & AC_WCAP_DIGITAL)) + continue; + if (!(wcaps & AC_WCAP_CONN_LIST)) + continue; + err = get_connection_index(codec, dig_nid, + spec->autocfg.dig_in_pin); + if (err >= 0) { + spec->dig_in_nid = dig_nid; + break; + } + } } } diff --git a/sound/pci/oxygen/oxygen.c b/sound/pci/oxygen/oxygen.c index 289cb4d..6c0a11a 100644 --- a/sound/pci/oxygen/oxygen.c +++ b/sound/pci/oxygen/oxygen.c @@ -543,6 +543,10 @@ static int __devinit get_oxygen_model(struct oxygen *chip, chip->model.suspend = claro_suspend; chip->model.resume = claro_resume; chip->model.set_adc_params = set_ak5385_params; + chip->model.device_config = PLAYBACK_0_TO_I2S | + PLAYBACK_1_TO_SPDIF | + CAPTURE_0_FROM_I2S_2 | + CAPTURE_1_FROM_SPDIF; break; } if (id->driver_data == MODEL_MERIDIAN || diff --git a/sound/pci/rme9652/hdsp.c b/sound/pci/rme9652/hdsp.c index b92adef..d6fa7bf 100644 --- a/sound/pci/rme9652/hdsp.c +++ b/sound/pci/rme9652/hdsp.c @@ -4609,6 +4609,7 @@ static int snd_hdsp_hwdep_ioctl(struct snd_hwdep *hw, struct file *file, unsigne if (err < 0) return err; + memset(&info, 0, sizeof(info)); spin_lock_irqsave(&hdsp->lock, flags); info.pref_sync_ref = (unsigned char)hdsp_pref_sync_ref(hdsp); info.wordclock_sync_check = (unsigned char)hdsp_wc_sync_check(hdsp); diff --git a/sound/pci/rme9652/hdspm.c b/sound/pci/rme9652/hdspm.c index 547b713..0c98ef9 100644 --- a/sound/pci/rme9652/hdspm.c +++ b/sound/pci/rme9652/hdspm.c @@ -4127,6 +4127,7 @@ static int snd_hdspm_hwdep_ioctl(struct snd_hwdep * hw, struct file *file, case SNDRV_HDSPM_IOCTL_GET_CONFIG_INFO: + memset(&info, 0, sizeof(info)); spin_lock_irq(&hdspm->lock); info.pref_sync_ref = hdspm_pref_sync_ref(hdspm); info.wordclock_sync_check = hdspm_wc_sync_check(hdspm); diff --git a/sound/soc/sh/migor.c b/sound/soc/sh/migor.c index b823a5c..87e2b7fc 100644 --- a/sound/soc/sh/migor.c +++ b/sound/soc/sh/migor.c @@ -12,6 +12,7 @@ #include <linux/firmware.h> #include <linux/module.h> +#include <asm/clkdev.h> #include <asm/clock.h> #include <cpu/sh7722.h> @@ -40,12 +41,12 @@ static struct clk_ops siumckb_clk_ops = { }; static struct clk siumckb_clk = { - .name = "siumckb_clk", - .id = -1, .ops = &siumckb_clk_ops, .rate = 0, /* initialised at run-time */ }; +static struct clk_lookup *siumckb_lookup; + static int migor_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { @@ -180,6 +181,13 @@ static int __init migor_init(void) if (ret < 0) return ret; + siumckb_lookup = clkdev_alloc(&siumckb_clk, "siumckb_clk", NULL); + if (!siumckb_lookup) { + ret = -ENOMEM; + goto eclkdevalloc; + } + clkdev_add(siumckb_lookup); + /* Port number used on this machine: port B */ migor_snd_device = platform_device_alloc("soc-audio", 1); if (!migor_snd_device) { @@ -200,12 +208,15 @@ static int __init migor_init(void) epdevadd: platform_device_put(migor_snd_device); epdevalloc: + clkdev_drop(siumckb_lookup); +eclkdevalloc: clk_unregister(&siumckb_clk); return ret; } static void __exit migor_exit(void) { + clkdev_drop(siumckb_lookup); clk_unregister(&siumckb_clk); platform_device_unregister(migor_snd_device); } diff --git a/sound/soc/soc-cache.c b/sound/soc/soc-cache.c index adbc68c..f6b0d28 100644 --- a/sound/soc/soc-cache.c +++ b/sound/soc/soc-cache.c @@ -203,8 +203,9 @@ static int snd_soc_8_16_write(struct snd_soc_codec *codec, unsigned int reg, data[1] = (value >> 8) & 0xff; data[2] = value & 0xff; - if (!snd_soc_codec_volatile_register(codec, reg)) - reg_cache[reg] = value; + if (!snd_soc_codec_volatile_register(codec, reg) + && reg < codec->reg_cache_size) + reg_cache[reg] = value; if (codec->cache_only) { codec->cache_sync = 1; diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 5164a65..b2c6330 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -8,7 +8,7 @@ perf-annotate - Read perf.data (created by perf record) and display annotated co SYNOPSIS -------- [verse] -'perf annotate' [-i <file> | --input=file] symbol_name +'perf annotate' [-i <file> | --input=file] [symbol_name] DESCRIPTION ----------- @@ -24,6 +24,13 @@ OPTIONS --input=:: Input file name. (default: perf.data) +--stdio:: Use the stdio interface. + +--tui:: Use the TUI interface Use of --tui requires a tty, if one is not + present, as when piping to other commands, the stdio interface is + used. This interfaces starts by centering on the line with more + samples, TAB/UNTAB cycles thru the lines with more samples. + SEE ALSO -------- -linkperf:perf-record[1] +linkperf:perf-record[1], linkperf:perf-report[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index abfabe9..12052c9 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -65,6 +65,13 @@ OPTIONS the tree is considered as a new profiled object. + Default: fractal,0.5. +--stdio:: Use the stdio interface. + +--tui:: Use the TUI interface, that is integrated with annotate and allows + zooming into DSOs or threads, among other features. Use of --tui + requires a tty, if one is not present, as when piping to other + commands, the stdio interface is used. + SEE ALSO -------- linkperf:perf-stat[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 4f1fa77..d1db0f6 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -313,6 +313,9 @@ TEST_PROGRAMS = SCRIPT_SH += perf-archive.sh +grep-libs = $(filter -l%,$(1)) +strip-libs = $(filter-out -l%,$(1)) + # # No Perl scripts right now: # @@ -588,14 +591,17 @@ endif ifdef NO_LIBPERL BASIC_CFLAGS += -DNO_LIBPERL else - PERL_EMBED_LDOPTS = `perl -MExtUtils::Embed -e ldopts 2>/dev/null` + PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) + PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) + PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y) BASIC_CFLAGS += -DNO_LIBPERL else - ALL_LDFLAGS += $(PERL_EMBED_LDOPTS) + ALL_LDFLAGS += $(PERL_EMBED_LDFLAGS) + EXTLIBS += $(PERL_EMBED_LIBADD) LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o endif @@ -604,13 +610,16 @@ endif ifdef NO_LIBPYTHON BASIC_CFLAGS += -DNO_LIBPYTHON else - PYTHON_EMBED_LDOPTS = `python-config --ldflags 2>/dev/null` + PYTHON_EMBED_LDOPTS = $(shell python-config --ldflags 2>/dev/null) + PYTHON_EMBED_LDFLAGS = $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) + PYTHON_EMBED_LIBADD = $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_CCOPTS = `python-config --cflags 2>/dev/null` FLAGS_PYTHON_EMBED=$(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y) BASIC_CFLAGS += -DNO_LIBPYTHON else - ALL_LDFLAGS += $(PYTHON_EMBED_LDOPTS) + ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS) + EXTLIBS += $(PYTHON_EMBED_LIBADD) LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o endif @@ -653,6 +662,15 @@ else endif endif + +ifdef NO_STRLCPY + BASIC_CFLAGS += -DNO_STRLCPY +else + ifneq ($(call try-cc,$(SOURCE_STRLCPY),),y) + BASIC_CFLAGS += -DNO_STRLCPY + endif +endif + ifndef CC_LD_DYNPATH ifdef NO_R_TO_GCC_LINKER # Some gcc does not accept and pass -R to the linker to specify @@ -910,8 +928,8 @@ $(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@ $(OUTPUT)perf$X: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(OUTPUT)perf.o \ - $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) + $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \ + $(BUILTIN_OBJS) $(LIBS) -o $@ $(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \ @@ -1017,7 +1035,7 @@ builtin-revert.o wt-status.o: wt-status.h # we compile into subdirectories. if the target directory is not the source directory, they might not exists. So # we depend the various files onto their directories. DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -$(DIRECTORY_DEPS): $(sort $(dir $(DIRECTORY_DEPS))) +$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS))) # In the second step, we make a rule to actually create these directories $(sort $(dir $(DIRECTORY_DEPS))): $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 1478dc6..6d5604d8d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -28,7 +28,7 @@ static char const *input_name = "perf.data"; -static bool force; +static bool force, use_tui, use_stdio; static bool full_paths; @@ -321,7 +321,7 @@ static int hist_entry__tty_annotate(struct hist_entry *he) static void hists__find_annotations(struct hists *self) { - struct rb_node *first = rb_first(&self->entries), *nd = first; + struct rb_node *nd = rb_first(&self->entries), *next; int key = KEY_RIGHT; while (nd) { @@ -343,20 +343,19 @@ find_next: if (use_browser > 0) { key = hist_entry__tui_annotate(he); - if (is_exit_key(key)) - break; switch (key) { case KEY_RIGHT: - case '\t': - nd = rb_next(nd); + next = rb_next(nd); break; case KEY_LEFT: - if (nd == first) - continue; - nd = rb_prev(nd); - default: + next = rb_prev(nd); break; + default: + return; } + + if (next != NULL) + nd = next; } else { hist_entry__tty_annotate(he); nd = rb_next(nd); @@ -428,6 +427,8 @@ static const struct option options[] = { "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), + OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, @@ -443,6 +444,11 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __used) { argc = parse_options(argc, argv, options, annotate_usage, 0); + if (use_stdio) + use_browser = 0; + else if (use_tui) + use_browser = 1; + setup_browser(); symbol_conf.priv_size = sizeof(struct sym_priv); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 55fc1f4..5de405d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -32,7 +32,7 @@ static char const *input_name = "perf.data"; -static bool force; +static bool force, use_tui, use_stdio; static bool hide_unresolved; static bool dont_use_callchains; @@ -107,7 +107,8 @@ static int perf_session__add_hist_entry(struct perf_session *self, goto out_free_syms; err = 0; if (symbol_conf.use_callchain) { - err = append_chain(he->callchain, data->callchain, syms, data->period); + err = callchain_append(he->callchain, data->callchain, syms, + data->period); if (err) goto out_free_syms; } @@ -450,6 +451,8 @@ static const struct option options[] = { "Show per-thread event counters"), OPT_STRING(0, "pretty", &pretty_printing_style, "key", "pretty printing style key: normal raw"), + OPT_BOOLEAN(0, "tui", &use_tui, "Use the TUI interface"), + OPT_BOOLEAN(0, "stdio", &use_stdio, "Use the stdio interface"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, @@ -482,8 +485,15 @@ int cmd_report(int argc, const char **argv, const char *prefix __used) { argc = parse_options(argc, argv, options, report_usage, 0); + if (use_stdio) + use_browser = 0; + else if (use_tui) + use_browser = 1; + if (strcmp(input_name, "-") != 0) setup_browser(); + else + use_browser = 0; /* * Only in the newt browser we are doing integrated annotation, * so don't allocate extra space that won't be used in the stdio diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak index 7a7b608..b253db6 100644 --- a/tools/perf/feature-tests.mak +++ b/tools/perf/feature-tests.mak @@ -110,6 +110,17 @@ int main(void) } endef +define SOURCE_STRLCPY +#include <stdlib.h> +extern size_t strlcpy(char *dest, const char *src, size_t size); + +int main(void) +{ + strlcpy(NULL, NULL, 0); + return 0; +} +endef + # try-cc # Usage: option = $(call try-cc, source-to-build, cc-options) try-cc = $(shell sh -c \ diff --git a/tools/perf/scripts/python/bin/netdev-times-record b/tools/perf/scripts/python/bin/netdev-times-record new file mode 100644 index 0000000..d931a82 --- /dev/null +++ b/tools/perf/scripts/python/bin/netdev-times-record @@ -0,0 +1,8 @@ +#!/bin/bash +perf record -a -e net:net_dev_xmit -e net:net_dev_queue \ + -e net:netif_receive_skb -e net:netif_rx \ + -e skb:consume_skb -e skb:kfree_skb \ + -e skb:skb_copy_datagram_iovec -e napi:napi_poll \ + -e irq:irq_handler_entry -e irq:irq_handler_exit \ + -e irq:softirq_entry -e irq:softirq_exit \ + -e irq:softirq_raise $@ diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report new file mode 100644 index 0000000..c3d0a63 --- /dev/null +++ b/tools/perf/scripts/python/bin/netdev-times-report @@ -0,0 +1,5 @@ +#!/bin/bash +# description: display a process of packet and processing time +# args: [tx] [rx] [dev=] [debug] + +perf trace -s ~/libexec/perf-core/scripts/python/netdev-times.py $@ diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py new file mode 100644 index 0000000..9aa0a32 --- /dev/null +++ b/tools/perf/scripts/python/netdev-times.py @@ -0,0 +1,464 @@ +# Display a process of packets and processed time. +# It helps us to investigate networking or network device. +# +# options +# tx: show only tx chart +# rx: show only rx chart +# dev=: show only thing related to specified device +# debug: work with debug mode. It shows buffer status. + +import os +import sys + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from Core import * +from Util import * + +all_event_list = []; # insert all tracepoint event related with this script +irq_dic = {}; # key is cpu and value is a list which stacks irqs + # which raise NET_RX softirq +net_rx_dic = {}; # key is cpu and value include time of NET_RX softirq-entry + # and a list which stacks receive +receive_hunk_list = []; # a list which include a sequence of receive events +rx_skb_list = []; # received packet list for matching + # skb_copy_datagram_iovec + +buffer_budget = 65536; # the budget of rx_skb_list, tx_queue_list and + # tx_xmit_list +of_count_rx_skb_list = 0; # overflow count + +tx_queue_list = []; # list of packets which pass through dev_queue_xmit +of_count_tx_queue_list = 0; # overflow count + +tx_xmit_list = []; # list of packets which pass through dev_hard_start_xmit +of_count_tx_xmit_list = 0; # overflow count + +tx_free_list = []; # list of packets which is freed + +# options +show_tx = 0; +show_rx = 0; +dev = 0; # store a name of device specified by option "dev=" +debug = 0; + +# indices of event_info tuple +EINFO_IDX_NAME= 0 +EINFO_IDX_CONTEXT=1 +EINFO_IDX_CPU= 2 +EINFO_IDX_TIME= 3 +EINFO_IDX_PID= 4 +EINFO_IDX_COMM= 5 + +# Calculate a time interval(msec) from src(nsec) to dst(nsec) +def diff_msec(src, dst): + return (dst - src) / 1000000.0 + +# Display a process of transmitting a packet +def print_transmit(hunk): + if dev != 0 and hunk['dev'].find(dev) < 0: + return + print "%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" % \ + (hunk['dev'], hunk['len'], + nsecs_secs(hunk['queue_t']), + nsecs_nsecs(hunk['queue_t'])/1000, + diff_msec(hunk['queue_t'], hunk['xmit_t']), + diff_msec(hunk['xmit_t'], hunk['free_t'])) + +# Format for displaying rx packet processing +PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)" +PF_SOFT_ENTRY=" softirq_entry(+%.3fmsec)" +PF_NAPI_POLL= " napi_poll_exit(+%.3fmsec %s)" +PF_JOINT= " |" +PF_WJOINT= " | |" +PF_NET_RECV= " |---netif_receive_skb(+%.3fmsec skb=%x len=%d)" +PF_NET_RX= " |---netif_rx(+%.3fmsec skb=%x)" +PF_CPY_DGRAM= " | skb_copy_datagram_iovec(+%.3fmsec %d:%s)" +PF_KFREE_SKB= " | kfree_skb(+%.3fmsec location=%x)" +PF_CONS_SKB= " | consume_skb(+%.3fmsec)" + +# Display a process of received packets and interrputs associated with +# a NET_RX softirq +def print_receive(hunk): + show_hunk = 0 + irq_list = hunk['irq_list'] + cpu = irq_list[0]['cpu'] + base_t = irq_list[0]['irq_ent_t'] + # check if this hunk should be showed + if dev != 0: + for i in range(len(irq_list)): + if irq_list[i]['name'].find(dev) >= 0: + show_hunk = 1 + break + else: + show_hunk = 1 + if show_hunk == 0: + return + + print "%d.%06dsec cpu=%d" % \ + (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu) + for i in range(len(irq_list)): + print PF_IRQ_ENTRY % \ + (diff_msec(base_t, irq_list[i]['irq_ent_t']), + irq_list[i]['irq'], irq_list[i]['name']) + print PF_JOINT + irq_event_list = irq_list[i]['event_list'] + for j in range(len(irq_event_list)): + irq_event = irq_event_list[j] + if irq_event['event'] == 'netif_rx': + print PF_NET_RX % \ + (diff_msec(base_t, irq_event['time']), + irq_event['skbaddr']) + print PF_JOINT + print PF_SOFT_ENTRY % \ + diff_msec(base_t, hunk['sirq_ent_t']) + print PF_JOINT + event_list = hunk['event_list'] + for i in range(len(event_list)): + event = event_list[i] + if event['event_name'] == 'napi_poll': + print PF_NAPI_POLL % \ + (diff_msec(base_t, event['event_t']), event['dev']) + if i == len(event_list) - 1: + print "" + else: + print PF_JOINT + else: + print PF_NET_RECV % \ + (diff_msec(base_t, event['event_t']), event['skbaddr'], + event['len']) + if 'comm' in event.keys(): + print PF_WJOINT + print PF_CPY_DGRAM % \ + (diff_msec(base_t, event['comm_t']), + event['pid'], event['comm']) + elif 'handle' in event.keys(): + print PF_WJOINT + if event['handle'] == "kfree_skb": + print PF_KFREE_SKB % \ + (diff_msec(base_t, + event['comm_t']), + event['location']) + elif event['handle'] == "consume_skb": + print PF_CONS_SKB % \ + diff_msec(base_t, + event['comm_t']) + print PF_JOINT + +def trace_begin(): + global show_tx + global show_rx + global dev + global debug + + for i in range(len(sys.argv)): + if i == 0: + continue + arg = sys.argv[i] + if arg == 'tx': + show_tx = 1 + elif arg =='rx': + show_rx = 1 + elif arg.find('dev=',0, 4) >= 0: + dev = arg[4:] + elif arg == 'debug': + debug = 1 + if show_tx == 0 and show_rx == 0: + show_tx = 1 + show_rx = 1 + +def trace_end(): + # order all events in time + all_event_list.sort(lambda a,b :cmp(a[EINFO_IDX_TIME], + b[EINFO_IDX_TIME])) + # process all events + for i in range(len(all_event_list)): + event_info = all_event_list[i] + name = event_info[EINFO_IDX_NAME] + if name == 'irq__softirq_exit': + handle_irq_softirq_exit(event_info) + elif name == 'irq__softirq_entry': + handle_irq_softirq_entry(event_info) + elif name == 'irq__softirq_raise': + handle_irq_softirq_raise(event_info) + elif name == 'irq__irq_handler_entry': + handle_irq_handler_entry(event_info) + elif name == 'irq__irq_handler_exit': + handle_irq_handler_exit(event_info) + elif name == 'napi__napi_poll': + handle_napi_poll(event_info) + elif name == 'net__netif_receive_skb': + handle_netif_receive_skb(event_info) + elif name == 'net__netif_rx': + handle_netif_rx(event_info) + elif name == 'skb__skb_copy_datagram_iovec': + handle_skb_copy_datagram_iovec(event_info) + elif name == 'net__net_dev_queue': + handle_net_dev_queue(event_info) + elif name == 'net__net_dev_xmit': + handle_net_dev_xmit(event_info) + elif name == 'skb__kfree_skb': + handle_kfree_skb(event_info) + elif name == 'skb__consume_skb': + handle_consume_skb(event_info) + # display receive hunks + if show_rx: + for i in range(len(receive_hunk_list)): + print_receive(receive_hunk_list[i]) + # display transmit hunks + if show_tx: + print " dev len Qdisc " \ + " netdevice free" + for i in range(len(tx_free_list)): + print_transmit(tx_free_list[i]) + if debug: + print "debug buffer status" + print "----------------------------" + print "xmit Qdisc:remain:%d overflow:%d" % \ + (len(tx_queue_list), of_count_tx_queue_list) + print "xmit netdevice:remain:%d overflow:%d" % \ + (len(tx_xmit_list), of_count_tx_xmit_list) + print "receive:remain:%d overflow:%d" % \ + (len(rx_skb_list), of_count_rx_skb_list) + +# called from perf, when it finds a correspoinding event +def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, vec): + if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX": + return + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec) + all_event_list.append(event_info) + +def irq__softirq_exit(name, context, cpu, sec, nsec, pid, comm, vec): + if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX": + return + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec) + all_event_list.append(event_info) + +def irq__softirq_raise(name, context, cpu, sec, nsec, pid, comm, vec): + if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX": + return + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec) + all_event_list.append(event_info) + +def irq__irq_handler_entry(name, context, cpu, sec, nsec, pid, comm, + irq, irq_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + irq, irq_name) + all_event_list.append(event_info) + +def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, irq, ret): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret) + all_event_list.append(event_info) + +def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, napi, dev_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + napi, dev_name) + all_event_list.append(event_info) + +def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr, + skblen, dev_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, skblen, dev_name) + all_event_list.append(event_info) + +def net__netif_rx(name, context, cpu, sec, nsec, pid, comm, skbaddr, + skblen, dev_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, skblen, dev_name) + all_event_list.append(event_info) + +def net__net_dev_queue(name, context, cpu, sec, nsec, pid, comm, + skbaddr, skblen, dev_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, skblen, dev_name) + all_event_list.append(event_info) + +def net__net_dev_xmit(name, context, cpu, sec, nsec, pid, comm, + skbaddr, skblen, rc, dev_name): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, skblen, rc ,dev_name) + all_event_list.append(event_info) + +def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, + skbaddr, protocol, location): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, protocol, location) + all_event_list.append(event_info) + +def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, skbaddr): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr) + all_event_list.append(event_info) + +def skb__skb_copy_datagram_iovec(name, context, cpu, sec, nsec, pid, comm, + skbaddr, skblen): + event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, + skbaddr, skblen) + all_event_list.append(event_info) + +def handle_irq_handler_entry(event_info): + (name, context, cpu, time, pid, comm, irq, irq_name) = event_info + if cpu not in irq_dic.keys(): + irq_dic[cpu] = [] + irq_record = {'irq':irq, 'name':irq_name, 'cpu':cpu, 'irq_ent_t':time} + irq_dic[cpu].append(irq_record) + +def handle_irq_handler_exit(event_info): + (name, context, cpu, time, pid, comm, irq, ret) = event_info + if cpu not in irq_dic.keys(): + return + irq_record = irq_dic[cpu].pop() + if irq != irq_record['irq']: + return + irq_record.update({'irq_ext_t':time}) + # if an irq doesn't include NET_RX softirq, drop. + if 'event_list' in irq_record.keys(): + irq_dic[cpu].append(irq_record) + +def handle_irq_softirq_raise(event_info): + (name, context, cpu, time, pid, comm, vec) = event_info + if cpu not in irq_dic.keys() \ + or len(irq_dic[cpu]) == 0: + return + irq_record = irq_dic[cpu].pop() + if 'event_list' in irq_record.keys(): + irq_event_list = irq_record['event_list'] + else: + irq_event_list = [] + irq_event_list.append({'time':time, 'event':'sirq_raise'}) + irq_record.update({'event_list':irq_event_list}) + irq_dic[cpu].append(irq_record) + +def handle_irq_softirq_entry(event_info): + (name, context, cpu, time, pid, comm, vec) = event_info + net_rx_dic[cpu] = {'sirq_ent_t':time, 'event_list':[]} + +def handle_irq_softirq_exit(event_info): + (name, context, cpu, time, pid, comm, vec) = event_info + irq_list = [] + event_list = 0 + if cpu in irq_dic.keys(): + irq_list = irq_dic[cpu] + del irq_dic[cpu] + if cpu in net_rx_dic.keys(): + sirq_ent_t = net_rx_dic[cpu]['sirq_ent_t'] + event_list = net_rx_dic[cpu]['event_list'] + del net_rx_dic[cpu] + if irq_list == [] or event_list == 0: + return + rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time, + 'irq_list':irq_list, 'event_list':event_list} + # merge information realted to a NET_RX softirq + receive_hunk_list.append(rec_data) + +def handle_napi_poll(event_info): + (name, context, cpu, time, pid, comm, napi, dev_name) = event_info + if cpu in net_rx_dic.keys(): + event_list = net_rx_dic[cpu]['event_list'] + rec_data = {'event_name':'napi_poll', + 'dev':dev_name, 'event_t':time} + event_list.append(rec_data) + +def handle_netif_rx(event_info): + (name, context, cpu, time, pid, comm, + skbaddr, skblen, dev_name) = event_info + if cpu not in irq_dic.keys() \ + or len(irq_dic[cpu]) == 0: + return + irq_record = irq_dic[cpu].pop() + if 'event_list' in irq_record.keys(): + irq_event_list = irq_record['event_list'] + else: + irq_event_list = [] + irq_event_list.append({'time':time, 'event':'netif_rx', + 'skbaddr':skbaddr, 'skblen':skblen, 'dev_name':dev_name}) + irq_record.update({'event_list':irq_event_list}) + irq_dic[cpu].append(irq_record) + +def handle_netif_receive_skb(event_info): + global of_count_rx_skb_list + + (name, context, cpu, time, pid, comm, + skbaddr, skblen, dev_name) = event_info + if cpu in net_rx_dic.keys(): + rec_data = {'event_name':'netif_receive_skb', + 'event_t':time, 'skbaddr':skbaddr, 'len':skblen} + event_list = net_rx_dic[cpu]['event_list'] + event_list.append(rec_data) + rx_skb_list.insert(0, rec_data) + if len(rx_skb_list) > buffer_budget: + rx_skb_list.pop() + of_count_rx_skb_list += 1 + +def handle_net_dev_queue(event_info): + global of_count_tx_queue_list + + (name, context, cpu, time, pid, comm, + skbaddr, skblen, dev_name) = event_info + skb = {'dev':dev_name, 'skbaddr':skbaddr, 'len':skblen, 'queue_t':time} + tx_queue_list.insert(0, skb) + if len(tx_queue_list) > buffer_budget: + tx_queue_list.pop() + of_count_tx_queue_list += 1 + +def handle_net_dev_xmit(event_info): + global of_count_tx_xmit_list + + (name, context, cpu, time, pid, comm, + skbaddr, skblen, rc, dev_name) = event_info + if rc == 0: # NETDEV_TX_OK + for i in range(len(tx_queue_list)): + skb = tx_queue_list[i] + if skb['skbaddr'] == skbaddr: + skb['xmit_t'] = time + tx_xmit_list.insert(0, skb) + del tx_queue_list[i] + if len(tx_xmit_list) > buffer_budget: + tx_xmit_list.pop() + of_count_tx_xmit_list += 1 + return + +def handle_kfree_skb(event_info): + (name, context, cpu, time, pid, comm, + skbaddr, protocol, location) = event_info + for i in range(len(tx_queue_list)): + skb = tx_queue_list[i] + if skb['skbaddr'] == skbaddr: + del tx_queue_list[i] + return + for i in range(len(tx_xmit_list)): + skb = tx_xmit_list[i] + if skb['skbaddr'] == skbaddr: + skb['free_t'] = time + tx_free_list.append(skb) + del tx_xmit_list[i] + return + for i in range(len(rx_skb_list)): + rec_data = rx_skb_list[i] + if rec_data['skbaddr'] == skbaddr: + rec_data.update({'handle':"kfree_skb", + 'comm':comm, 'pid':pid, 'comm_t':time}) + del rx_skb_list[i] + return + +def handle_consume_skb(event_info): + (name, context, cpu, time, pid, comm, skbaddr) = event_info + for i in range(len(tx_xmit_list)): + skb = tx_xmit_list[i] + if skb['skbaddr'] == skbaddr: + skb['free_t'] = time + tx_free_list.append(skb) + del tx_xmit_list[i] + return + +def handle_skb_copy_datagram_iovec(event_info): + (name, context, cpu, time, pid, comm, skbaddr, skblen) = event_info + for i in range(len(rx_skb_list)): + rec_data = rx_skb_list[i] + if skbaddr == rec_data['skbaddr']: + rec_data.update({'handle':"skb_copy_datagram_iovec", + 'comm':comm, 'pid':pid, 'comm_t':time}) + del rx_skb_list[i] + return diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index 27e9ebe..a772979 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -82,6 +82,8 @@ extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2 extern char *perf_pathdup(const char *fmt, ...) __attribute__((format (printf, 1, 2))); +#ifdef NO_STRLCPY extern size_t strlcpy(char *dest, const char *src, size_t size); +#endif #endif /* __PERF_CACHE_H */ diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index f231f43..e12d539 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -28,6 +28,9 @@ bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event) #define chain_for_each_child(child, parent) \ list_for_each_entry(child, &parent->children, brothers) +#define chain_for_each_child_safe(child, next, parent) \ + list_for_each_entry_safe(child, next, &parent->children, brothers) + static void rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, enum chain_mode mode) @@ -86,10 +89,10 @@ __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, * sort them by hit */ static void -sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, +sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root, u64 min_hit, struct callchain_param *param __used) { - __sort_chain_flat(rb_root, node, min_hit); + __sort_chain_flat(rb_root, &root->node, min_hit); } static void __sort_chain_graph_abs(struct callchain_node *node, @@ -108,11 +111,11 @@ static void __sort_chain_graph_abs(struct callchain_node *node, } static void -sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_node *chain_root, +sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root, u64 min_hit, struct callchain_param *param __used) { - __sort_chain_graph_abs(chain_root, min_hit); - rb_root->rb_node = chain_root->rb_root.rb_node; + __sort_chain_graph_abs(&chain_root->node, min_hit); + rb_root->rb_node = chain_root->node.rb_root.rb_node; } static void __sort_chain_graph_rel(struct callchain_node *node, @@ -133,11 +136,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node, } static void -sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, +sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root, u64 min_hit __used, struct callchain_param *param) { - __sort_chain_graph_rel(chain_root, param->min_percent / 100.0); - rb_root->rb_node = chain_root->rb_root.rb_node; + __sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0); + rb_root->rb_node = chain_root->node.rb_root.rb_node; } int register_callchain_param(struct callchain_param *param) @@ -284,19 +287,18 @@ split_add_child(struct callchain_node *parent, struct resolved_chain *chain, } static int -__append_chain(struct callchain_node *root, struct resolved_chain *chain, - unsigned int start, u64 period); +append_chain(struct callchain_node *root, struct resolved_chain *chain, + unsigned int start, u64 period); static void -__append_chain_children(struct callchain_node *root, - struct resolved_chain *chain, - unsigned int start, u64 period) +append_chain_children(struct callchain_node *root, struct resolved_chain *chain, + unsigned int start, u64 period) { struct callchain_node *rnode; /* lookup in childrens */ chain_for_each_child(rnode, root) { - unsigned int ret = __append_chain(rnode, chain, start, period); + unsigned int ret = append_chain(rnode, chain, start, period); if (!ret) goto inc_children_hit; @@ -309,8 +311,8 @@ inc_children_hit: } static int -__append_chain(struct callchain_node *root, struct resolved_chain *chain, - unsigned int start, u64 period) +append_chain(struct callchain_node *root, struct resolved_chain *chain, + unsigned int start, u64 period) { struct callchain_list *cnode; unsigned int i = start; @@ -357,7 +359,7 @@ __append_chain(struct callchain_node *root, struct resolved_chain *chain, } /* We match the node and still have a part remaining */ - __append_chain_children(root, chain, i, period); + append_chain_children(root, chain, i, period); return 0; } @@ -380,8 +382,8 @@ static void filter_context(struct ip_callchain *old, struct resolved_chain *new, } -int append_chain(struct callchain_node *root, struct ip_callchain *chain, - struct map_symbol *syms, u64 period) +int callchain_append(struct callchain_root *root, struct ip_callchain *chain, + struct map_symbol *syms, u64 period) { struct resolved_chain *filtered; @@ -398,9 +400,65 @@ int append_chain(struct callchain_node *root, struct ip_callchain *chain, if (!filtered->nr) goto end; - __append_chain_children(root, filtered, 0, period); + append_chain_children(&root->node, filtered, 0, period); + + if (filtered->nr > root->max_depth) + root->max_depth = filtered->nr; end: free(filtered); return 0; } + +static int +merge_chain_branch(struct callchain_node *dst, struct callchain_node *src, + struct resolved_chain *chain) +{ + struct callchain_node *child, *next_child; + struct callchain_list *list, *next_list; + int old_pos = chain->nr; + int err = 0; + + list_for_each_entry_safe(list, next_list, &src->val, list) { + chain->ips[chain->nr].ip = list->ip; + chain->ips[chain->nr].ms = list->ms; + chain->nr++; + list_del(&list->list); + free(list); + } + + if (src->hit) + append_chain_children(dst, chain, 0, src->hit); + + chain_for_each_child_safe(child, next_child, src) { + err = merge_chain_branch(dst, child, chain); + if (err) + break; + + list_del(&child->brothers); + free(child); + } + + chain->nr = old_pos; + + return err; +} + +int callchain_merge(struct callchain_root *dst, struct callchain_root *src) +{ + struct resolved_chain *chain; + int err; + + chain = malloc(sizeof(*chain) + + src->max_depth * sizeof(struct resolved_ip)); + if (!chain) + return -ENOMEM; + + chain->nr = 0; + + err = merge_chain_branch(&dst->node, &src->node, chain); + + free(chain); + + return err; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 6de4313..c15fb8c 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -26,9 +26,14 @@ struct callchain_node { u64 children_hit; }; +struct callchain_root { + u64 max_depth; + struct callchain_node node; +}; + struct callchain_param; -typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_node *, +typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *, u64, struct callchain_param *); struct callchain_param { @@ -44,15 +49,16 @@ struct callchain_list { struct list_head list; }; -static inline void callchain_init(struct callchain_node *node) +static inline void callchain_init(struct callchain_root *root) { - INIT_LIST_HEAD(&node->brothers); - INIT_LIST_HEAD(&node->children); - INIT_LIST_HEAD(&node->val); + INIT_LIST_HEAD(&root->node.brothers); + INIT_LIST_HEAD(&root->node.children); + INIT_LIST_HEAD(&root->node.val); - node->children_hit = 0; - node->parent = NULL; - node->hit = 0; + root->node.parent = NULL; + root->node.hit = 0; + root->node.children_hit = 0; + root->max_depth = 0; } static inline u64 cumul_hits(struct callchain_node *node) @@ -61,8 +67,9 @@ static inline u64 cumul_hits(struct callchain_node *node) } int register_callchain_param(struct callchain_param *param); -int append_chain(struct callchain_node *root, struct ip_callchain *chain, - struct map_symbol *syms, u64 period); +int callchain_append(struct callchain_root *root, struct ip_callchain *chain, + struct map_symbol *syms, u64 period); +int callchain_merge(struct callchain_root *dst, struct callchain_root *src); bool ip_callchain__valid(struct ip_callchain *chain, const event_t *event); #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index be22ae6..2022e87 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -87,7 +87,7 @@ static void hist_entry__add_cpumode_period(struct hist_entry *self, static struct hist_entry *hist_entry__new(struct hist_entry *template) { - size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_node) : 0; + size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; struct hist_entry *self = malloc(sizeof(*self) + callchain_size); if (self != NULL) { @@ -226,6 +226,8 @@ static bool collapse__insert_entry(struct rb_root *root, struct hist_entry *he) if (!cmp) { iter->period += he->period; + if (symbol_conf.use_callchain) + callchain_merge(iter->callchain, he->callchain); hist_entry__free(he); return false; } diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 58a470d..bd74977 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -22,6 +22,7 @@ static const char *get_perf_dir(void) return "."; } +#ifdef NO_STRLCPY size_t strlcpy(char *dest, const char *src, size_t size) { size_t ret = strlen(src); @@ -33,7 +34,7 @@ size_t strlcpy(char *dest, const char *src, size_t size) } return ret; } - +#endif static char *get_pathname(void) { diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 46e531d..0b91053 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -70,7 +70,7 @@ struct hist_entry { struct hist_entry *pair; struct rb_root sorted_chain; }; - struct callchain_node callchain[0]; + struct callchain_root callchain[0]; }; enum sort_type { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index b2f5ae9..b39f499 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -388,6 +388,20 @@ size_t dso__fprintf_buildid(struct dso *self, FILE *fp) return fprintf(fp, "%s", sbuild_id); } +size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp) +{ + size_t ret = 0; + struct rb_node *nd; + struct symbol_name_rb_node *pos; + + for (nd = rb_first(&self->symbol_names[type]); nd; nd = rb_next(nd)) { + pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); + fprintf(fp, "%s\n", pos->sym.name); + } + + return ret; +} + size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp) { struct rb_node *nd; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index ea95c27..038f220 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -182,6 +182,7 @@ size_t machines__fprintf_dsos(struct rb_root *self, FILE *fp); size_t machines__fprintf_dsos_buildid(struct rb_root *self, FILE *fp, bool with_hits); size_t dso__fprintf_buildid(struct dso *self, FILE *fp); +size_t dso__fprintf_symbols_by_name(struct dso *self, enum map_type type, FILE *fp); size_t dso__fprintf(struct dso *self, enum map_type type, FILE *fp); enum dso_origin { diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 7ea983a..f7af2fc 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -97,7 +97,7 @@ void setup_python_scripting(void) register_python_scripting(&python_scripting_unsupported_ops); } #else -struct scripting_ops python_scripting_ops; +extern struct scripting_ops python_scripting_ops; void setup_python_scripting(void) { @@ -158,7 +158,7 @@ void setup_perl_scripting(void) register_perl_scripting(&perl_scripting_unsupported_ops); } #else -struct scripting_ops perl_scripting_ops; +extern struct scripting_ops perl_scripting_ops; void setup_perl_scripting(void) { diff --git a/tools/perf/util/ui/browser.c b/tools/perf/util/ui/browser.c index 66f2d58..6d0df80 100644 --- a/tools/perf/util/ui/browser.c +++ b/tools/perf/util/ui/browser.c @@ -1,16 +1,6 @@ -#define _GNU_SOURCE -#include <stdio.h> -#undef _GNU_SOURCE -/* - * slang versions <= 2.0.6 have a "#if HAVE_LONG_LONG" that breaks - * the build if it isn't defined. Use the equivalent one that glibc - * has on features.h. - */ -#include <features.h> -#ifndef HAVE_LONG_LONG -#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG -#endif #include <slang.h> +#include "libslang.h" +#include <linux/compiler.h> #include <linux/list.h> #include <linux/rbtree.h> #include <stdlib.h> @@ -19,17 +9,9 @@ #include "helpline.h" #include "../color.h" #include "../util.h" +#include <stdio.h> -#if SLANG_VERSION < 20104 -#define sltt_set_color(obj, name, fg, bg) \ - SLtt_set_color(obj,(char *)name, (char *)fg, (char *)bg) -#else -#define sltt_set_color SLtt_set_color -#endif - -newtComponent newt_form__new(void); - -int ui_browser__percent_color(double percent, bool current) +static int ui_browser__percent_color(double percent, bool current) { if (current) return HE_COLORSET_SELECTED; @@ -40,6 +22,23 @@ int ui_browser__percent_color(double percent, bool current) return HE_COLORSET_NORMAL; } +void ui_browser__set_color(struct ui_browser *self __used, int color) +{ + SLsmg_set_color(color); +} + +void ui_browser__set_percent_color(struct ui_browser *self, + double percent, bool current) +{ + int color = ui_browser__percent_color(percent, current); + ui_browser__set_color(self, color); +} + +void ui_browser__gotorc(struct ui_browser *self, int y, int x) +{ + SLsmg_gotorc(self->y + y, self->x + x); +} + void ui_browser__list_head_seek(struct ui_browser *self, off_t offset, int whence) { struct list_head *head = self->entries; @@ -111,7 +110,7 @@ unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self) nd = self->top; while (nd != NULL) { - SLsmg_gotorc(self->y + row, self->x); + ui_browser__gotorc(self, row, 0); self->write(self, nd, row); if (++row == self->height) break; @@ -131,13 +130,10 @@ void ui_browser__refresh_dimensions(struct ui_browser *self) int cols, rows; newtGetScreenSize(&cols, &rows); - if (self->width > cols - 4) - self->width = cols - 4; - self->height = rows - 5; - if (self->height > self->nr_entries) - self->height = self->nr_entries; - self->y = (rows - self->height) / 2; - self->x = (cols - self->width) / 2; + self->width = cols - 1; + self->height = rows - 2; + self->y = 1; + self->x = 0; } void ui_browser__reset_index(struct ui_browser *self) @@ -146,34 +142,48 @@ void ui_browser__reset_index(struct ui_browser *self) self->seek(self, 0, SEEK_SET); } +void ui_browser__add_exit_key(struct ui_browser *self, int key) +{ + newtFormAddHotKey(self->form, key); +} + +void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]) +{ + int i = 0; + + while (keys[i] && i < 64) { + ui_browser__add_exit_key(self, keys[i]); + ++i; + } +} + int ui_browser__show(struct ui_browser *self, const char *title, const char *helpline, ...) { va_list ap; + int keys[] = { NEWT_KEY_UP, NEWT_KEY_DOWN, NEWT_KEY_PGUP, + NEWT_KEY_PGDN, NEWT_KEY_HOME, NEWT_KEY_END, ' ', + NEWT_KEY_LEFT, NEWT_KEY_ESCAPE, 'q', CTRL('c'), 0 }; - if (self->form != NULL) { + if (self->form != NULL) newtFormDestroy(self->form); - newtPopWindow(); - } + ui_browser__refresh_dimensions(self); - newtCenteredWindow(self->width, self->height, title); - self->form = newt_form__new(); + self->form = newtForm(NULL, NULL, 0); if (self->form == NULL) return -1; - self->sb = newtVerticalScrollbar(self->width, 0, self->height, + self->sb = newtVerticalScrollbar(self->width, 1, self->height, HE_COLORSET_NORMAL, HE_COLORSET_SELECTED); if (self->sb == NULL) return -1; - newtFormAddHotKey(self->form, NEWT_KEY_UP); - newtFormAddHotKey(self->form, NEWT_KEY_DOWN); - newtFormAddHotKey(self->form, NEWT_KEY_PGUP); - newtFormAddHotKey(self->form, NEWT_KEY_PGDN); - newtFormAddHotKey(self->form, NEWT_KEY_HOME); - newtFormAddHotKey(self->form, NEWT_KEY_END); - newtFormAddHotKey(self->form, ' '); + SLsmg_gotorc(0, 0); + ui_browser__set_color(self, NEWT_COLORSET_ROOT); + slsmg_write_nstring(title, self->width); + + ui_browser__add_exit_keys(self, keys); newtFormAddComponent(self->form, self->sb); va_start(ap, helpline); @@ -185,7 +195,6 @@ int ui_browser__show(struct ui_browser *self, const char *title, void ui_browser__hide(struct ui_browser *self) { newtFormDestroy(self->form); - newtPopWindow(); self->form = NULL; ui_helpline__pop(); } @@ -196,28 +205,28 @@ int ui_browser__refresh(struct ui_browser *self) newtScrollbarSet(self->sb, self->index, self->nr_entries - 1); row = self->refresh(self); - SLsmg_set_color(HE_COLORSET_NORMAL); + ui_browser__set_color(self, HE_COLORSET_NORMAL); SLsmg_fill_region(self->y + row, self->x, self->height - row, self->width, ' '); return 0; } -int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es) +int ui_browser__run(struct ui_browser *self) { + struct newtExitStruct es; + if (ui_browser__refresh(self) < 0) return -1; while (1) { off_t offset; - newtFormRun(self->form, es); + newtFormRun(self->form, &es); - if (es->reason != NEWT_EXIT_HOTKEY) + if (es.reason != NEWT_EXIT_HOTKEY) break; - if (is_exit_key(es->u.key)) - return es->u.key; - switch (es->u.key) { + switch (es.u.key) { case NEWT_KEY_DOWN: if (self->index == self->nr_entries - 1) break; @@ -274,12 +283,12 @@ int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es) self->seek(self, -offset, SEEK_END); break; default: - return es->u.key; + return es.u.key; } if (ui_browser__refresh(self) < 0) return -1; } - return 0; + return -1; } unsigned int ui_browser__list_head_refresh(struct ui_browser *self) @@ -294,7 +303,7 @@ unsigned int ui_browser__list_head_refresh(struct ui_browser *self) pos = self->top; list_for_each_from(pos, head) { - SLsmg_gotorc(self->y + row, self->x); + ui_browser__gotorc(self, row, 0); self->write(self, pos, row); if (++row == self->height) break; diff --git a/tools/perf/util/ui/browser.h b/tools/perf/util/ui/browser.h index 0b9f829..0dc7e4d 100644 --- a/tools/perf/util/ui/browser.h +++ b/tools/perf/util/ui/browser.h @@ -25,16 +25,21 @@ struct ui_browser { }; -int ui_browser__percent_color(double percent, bool current); +void ui_browser__set_color(struct ui_browser *self, int color); +void ui_browser__set_percent_color(struct ui_browser *self, + double percent, bool current); bool ui_browser__is_current_entry(struct ui_browser *self, unsigned row); void ui_browser__refresh_dimensions(struct ui_browser *self); void ui_browser__reset_index(struct ui_browser *self); +void ui_browser__gotorc(struct ui_browser *self, int y, int x); +void ui_browser__add_exit_key(struct ui_browser *self, int key); +void ui_browser__add_exit_keys(struct ui_browser *self, int keys[]); int ui_browser__show(struct ui_browser *self, const char *title, const char *helpline, ...); void ui_browser__hide(struct ui_browser *self); int ui_browser__refresh(struct ui_browser *self); -int ui_browser__run(struct ui_browser *self, struct newtExitStruct *es); +int ui_browser__run(struct ui_browser *self); void ui_browser__rb_tree_seek(struct ui_browser *self, off_t offset, int whence); unsigned int ui_browser__rb_tree_refresh(struct ui_browser *self); diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index a90273e..82b78f9 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -40,14 +40,12 @@ static void annotate_browser__write(struct ui_browser *self, void *entry, int ro if (ol->offset != -1) { struct objdump_line_rb_node *olrb = objdump_line__rb(ol); - int color = ui_browser__percent_color(olrb->percent, current_entry); - SLsmg_set_color(color); + ui_browser__set_percent_color(self, olrb->percent, current_entry); slsmg_printf(" %7.2f ", olrb->percent); if (!current_entry) - SLsmg_set_color(HE_COLORSET_CODE); + ui_browser__set_color(self, HE_COLORSET_CODE); } else { - int color = ui_browser__percent_color(0, current_entry); - SLsmg_set_color(color); + ui_browser__set_percent_color(self, 0, current_entry); slsmg_write_nstring(" ", 9); } @@ -135,32 +133,31 @@ static void annotate_browser__set_top(struct annotate_browser *self, self->curr_hot = nd; } -static int annotate_browser__run(struct annotate_browser *self, - struct newtExitStruct *es) +static int annotate_browser__run(struct annotate_browser *self) { struct rb_node *nd; struct hist_entry *he = self->b.priv; + int key; if (ui_browser__show(&self->b, he->ms.sym->name, - "<- or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) + "<-, -> or ESC: exit, TAB/shift+TAB: cycle thru samples") < 0) return -1; - - newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); - newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); + /* + * To allow builtin-annotate to cycle thru multiple symbols by + * examining the exit key for this function. + */ + ui_browser__add_exit_key(&self->b, NEWT_KEY_RIGHT); nd = self->curr_hot; if (nd) { - newtFormAddHotKey(self->b.form, NEWT_KEY_TAB); - newtFormAddHotKey(self->b.form, NEWT_KEY_UNTAB); + int tabs[] = { NEWT_KEY_TAB, NEWT_KEY_UNTAB, 0 }; + ui_browser__add_exit_keys(&self->b, tabs); } while (1) { - ui_browser__run(&self->b, es); - - if (es->reason != NEWT_EXIT_HOTKEY) - break; + key = ui_browser__run(&self->b); - switch (es->u.key) { + switch (key) { case NEWT_KEY_TAB: nd = rb_prev(nd); if (nd == NULL) @@ -179,12 +176,11 @@ static int annotate_browser__run(struct annotate_browser *self, } out: ui_browser__hide(&self->b); - return es->u.key; + return key; } int hist_entry__tui_annotate(struct hist_entry *self) { - struct newtExitStruct es; struct objdump_line *pos, *n; struct objdump_line_rb_node *rbpos; LIST_HEAD(head); @@ -232,7 +228,7 @@ int hist_entry__tui_annotate(struct hist_entry *self) annotate_browser__set_top(&browser, browser.curr_hot); browser.b.width += 18; /* Percentage */ - ret = annotate_browser__run(&browser, &es); + ret = annotate_browser__run(&browser); list_for_each_entry_safe(pos, n, &head, node) { list_del(&pos->node); objdump_line__free(pos); diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c index dafdf67..ebda8c3 100644 --- a/tools/perf/util/ui/browsers/hists.c +++ b/tools/perf/util/ui/browsers/hists.c @@ -58,6 +58,11 @@ static char callchain_list__folded(const struct callchain_list *self) return map_symbol__folded(&self->ms); } +static void map_symbol__set_folding(struct map_symbol *self, bool unfold) +{ + self->unfolded = unfold ? self->has_children : false; +} + static int callchain_node__count_rows_rb_tree(struct callchain_node *self) { int n = 0; @@ -129,16 +134,16 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *se for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); struct callchain_list *chain; - int first = true; + bool first = true; list_for_each_entry(chain, &child->val, list) { if (first) { first = false; chain->ms.has_children = chain->list.next != &child->val || - rb_first(&child->rb_root) != NULL; + !RB_EMPTY_ROOT(&child->rb_root); } else chain->ms.has_children = chain->list.next == &child->val && - rb_first(&child->rb_root) != NULL; + !RB_EMPTY_ROOT(&child->rb_root); } callchain_node__init_have_children_rb_tree(child); @@ -150,7 +155,7 @@ static void callchain_node__init_have_children(struct callchain_node *self) struct callchain_list *chain; list_for_each_entry(chain, &self->val, list) - chain->ms.has_children = rb_first(&self->rb_root) != NULL; + chain->ms.has_children = !RB_EMPTY_ROOT(&self->rb_root); callchain_node__init_have_children_rb_tree(self); } @@ -168,6 +173,7 @@ static void callchain__init_have_children(struct rb_root *self) static void hist_entry__init_have_children(struct hist_entry *self) { if (!self->init_have_children) { + self->ms.has_children = !RB_EMPTY_ROOT(&self->sorted_chain); callchain__init_have_children(&self->sorted_chain); self->init_have_children = true; } @@ -195,43 +201,114 @@ static bool hist_browser__toggle_fold(struct hist_browser *self) return false; } -static int hist_browser__run(struct hist_browser *self, const char *title, - struct newtExitStruct *es) +static int callchain_node__set_folding_rb_tree(struct callchain_node *self, bool unfold) +{ + int n = 0; + struct rb_node *nd; + + for (nd = rb_first(&self->rb_root); nd; nd = rb_next(nd)) { + struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node); + struct callchain_list *chain; + bool has_children = false; + + list_for_each_entry(chain, &child->val, list) { + ++n; + map_symbol__set_folding(&chain->ms, unfold); + has_children = chain->ms.has_children; + } + + if (has_children) + n += callchain_node__set_folding_rb_tree(child, unfold); + } + + return n; +} + +static int callchain_node__set_folding(struct callchain_node *node, bool unfold) +{ + struct callchain_list *chain; + bool has_children = false; + int n = 0; + + list_for_each_entry(chain, &node->val, list) { + ++n; + map_symbol__set_folding(&chain->ms, unfold); + has_children = chain->ms.has_children; + } + + if (has_children) + n += callchain_node__set_folding_rb_tree(node, unfold); + + return n; +} + +static int callchain__set_folding(struct rb_root *chain, bool unfold) +{ + struct rb_node *nd; + int n = 0; + + for (nd = rb_first(chain); nd; nd = rb_next(nd)) { + struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node); + n += callchain_node__set_folding(node, unfold); + } + + return n; +} + +static void hist_entry__set_folding(struct hist_entry *self, bool unfold) +{ + hist_entry__init_have_children(self); + map_symbol__set_folding(&self->ms, unfold); + + if (self->ms.has_children) { + int n = callchain__set_folding(&self->sorted_chain, unfold); + self->nr_rows = unfold ? n : 0; + } else + self->nr_rows = 0; +} + +static void hists__set_folding(struct hists *self, bool unfold) +{ + struct rb_node *nd; + + self->nr_entries = 0; + + for (nd = rb_first(&self->entries); nd; nd = rb_next(nd)) { + struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); + hist_entry__set_folding(he, unfold); + self->nr_entries += 1 + he->nr_rows; + } +} + +static void hist_browser__set_folding(struct hist_browser *self, bool unfold) +{ + hists__set_folding(self->hists, unfold); + self->b.nr_entries = self->hists->nr_entries; + /* Go to the start, we may be way after valid entries after a collapse */ + ui_browser__reset_index(&self->b); +} + +static int hist_browser__run(struct hist_browser *self, const char *title) { - char str[256], unit; - unsigned long nr_events = self->hists->stats.nr_events[PERF_RECORD_SAMPLE]; + int key; + int exit_keys[] = { 'a', '?', 'h', 'C', 'd', 'D', 'E', 't', + NEWT_KEY_ENTER, NEWT_KEY_RIGHT, NEWT_KEY_LEFT, 0, }; self->b.entries = &self->hists->entries; self->b.nr_entries = self->hists->nr_entries; hist_browser__refresh_dimensions(self); - nr_events = convert_unit(nr_events, &unit); - snprintf(str, sizeof(str), "Events: %lu%c ", - nr_events, unit); - newtDrawRootText(0, 0, str); - if (ui_browser__show(&self->b, title, "Press '?' for help on key bindings") < 0) return -1; - newtFormAddHotKey(self->b.form, 'a'); - newtFormAddHotKey(self->b.form, '?'); - newtFormAddHotKey(self->b.form, 'h'); - newtFormAddHotKey(self->b.form, 'd'); - newtFormAddHotKey(self->b.form, 'D'); - newtFormAddHotKey(self->b.form, 't'); - - newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); - newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); - newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER); + ui_browser__add_exit_keys(&self->b, exit_keys); while (1) { - ui_browser__run(&self->b, es); + key = ui_browser__run(&self->b); - if (es->reason != NEWT_EXIT_HOTKEY) - break; - switch (es->u.key) { + switch (key) { case 'D': { /* Debug */ static int seq; struct hist_entry *h = rb_entry(self->b.top, @@ -245,18 +322,26 @@ static int hist_browser__run(struct hist_browser *self, const char *title, self->b.top_idx, h->row_offset, h->nr_rows); } - continue; + break; + case 'C': + /* Collapse the whole world. */ + hist_browser__set_folding(self, false); + break; + case 'E': + /* Expand the whole world. */ + hist_browser__set_folding(self, true); + break; case NEWT_KEY_ENTER: if (hist_browser__toggle_fold(self)) break; /* fall thru */ default: - return 0; + goto out; } } - +out: ui_browser__hide(&self->b); - return 0; + return key; } static char *callchain_list__sym_name(struct callchain_list *self, @@ -306,15 +391,10 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self, int color; bool was_first = first; - if (first) { + if (first) first = false; - chain->ms.has_children = chain->list.next != &child->val || - rb_first(&child->rb_root) != NULL; - } else { + else extra_offset = LEVEL_OFFSET_STEP; - chain->ms.has_children = chain->list.next == &child->val && - rb_first(&child->rb_root) != NULL; - } folded_sign = callchain_list__folded(chain); if (*row_offset != 0) { @@ -341,8 +421,8 @@ static int hist_browser__show_callchain_node_rb_tree(struct hist_browser *self, *is_current_entry = true; } - SLsmg_set_color(color); - SLsmg_gotorc(self->b.y + row, self->b.x); + ui_browser__set_color(&self->b, color); + ui_browser__gotorc(&self->b, row, 0); slsmg_write_nstring(" ", offset + extra_offset); slsmg_printf("%c ", folded_sign); slsmg_write_nstring(str, width); @@ -384,12 +464,7 @@ static int hist_browser__show_callchain_node(struct hist_browser *self, list_for_each_entry(chain, &node->val, list) { char ipstr[BITS_PER_LONG / 4 + 1], *s; int color; - /* - * FIXME: This should be moved to somewhere else, - * probably when the callchain is created, so as not to - * traverse it all over again - */ - chain->ms.has_children = rb_first(&node->rb_root) != NULL; + folded_sign = callchain_list__folded(chain); if (*row_offset != 0) { @@ -405,8 +480,8 @@ static int hist_browser__show_callchain_node(struct hist_browser *self, } s = callchain_list__sym_name(chain, ipstr, sizeof(ipstr)); - SLsmg_gotorc(self->b.y + row, self->b.x); - SLsmg_set_color(color); + ui_browser__gotorc(&self->b, row, 0); + ui_browser__set_color(&self->b, color); slsmg_write_nstring(" ", offset); slsmg_printf("%c ", folded_sign); slsmg_write_nstring(s, width - 2); @@ -465,7 +540,7 @@ static int hist_browser__show_entry(struct hist_browser *self, } if (symbol_conf.use_callchain) { - entry->ms.has_children = !RB_EMPTY_ROOT(&entry->sorted_chain); + hist_entry__init_have_children(entry); folded_sign = hist_entry__folded(entry); } @@ -484,8 +559,8 @@ static int hist_browser__show_entry(struct hist_browser *self, color = HE_COLORSET_NORMAL; } - SLsmg_set_color(color); - SLsmg_gotorc(self->b.y + row, self->b.x); + ui_browser__set_color(&self->b, color); + ui_browser__gotorc(&self->b, row, 0); if (symbol_conf.use_callchain) { slsmg_printf("%c ", folded_sign); width -= 2; @@ -687,8 +762,6 @@ static struct hist_browser *hist_browser__new(struct hists *hists) static void hist_browser__delete(struct hist_browser *self) { - newtFormDestroy(self->b.form); - newtPopWindow(); free(self); } @@ -702,21 +775,26 @@ static struct thread *hist_browser__selected_thread(struct hist_browser *self) return self->he_selection->thread; } -static int hist_browser__title(char *bf, size_t size, const char *ev_name, - const struct dso *dso, const struct thread *thread) +static int hists__browser_title(struct hists *self, char *bf, size_t size, + const char *ev_name, const struct dso *dso, + const struct thread *thread) { - int printed = 0; + char unit; + int printed; + unsigned long nr_events = self->stats.nr_events[PERF_RECORD_SAMPLE]; + + nr_events = convert_unit(nr_events, &unit); + printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name); if (thread) printed += snprintf(bf + printed, size - printed, - "Thread: %s(%d)", - (thread->comm_set ? thread->comm : ""), + ", Thread: %s(%d)", + (thread->comm_set ? thread->comm : ""), thread->pid); if (dso) printed += snprintf(bf + printed, size - printed, - "%sDSO: %s", thread ? " " : "", - dso->short_name); - return printed ?: snprintf(bf, size, "Event: %s", ev_name); + ", DSO: %s", dso->short_name); + return printed; } int hists__browse(struct hists *self, const char *helpline, const char *ev_name) @@ -725,7 +803,6 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name) struct pstack *fstack; const struct thread *thread_filter = NULL; const struct dso *dso_filter = NULL; - struct newtExitStruct es; char msg[160]; int key = -1; @@ -738,9 +815,8 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name) ui_helpline__push(helpline); - hist_browser__title(msg, sizeof(msg), ev_name, - dso_filter, thread_filter); - + hists__browser_title(self, msg, sizeof(msg), ev_name, + dso_filter, thread_filter); while (1) { const struct thread *thread; const struct dso *dso; @@ -749,70 +825,63 @@ int hists__browse(struct hists *self, const char *helpline, const char *ev_name) annotate = -2, zoom_dso = -2, zoom_thread = -2, browse_map = -2; - if (hist_browser__run(browser, msg, &es)) - break; + key = hist_browser__run(browser, msg); thread = hist_browser__selected_thread(browser); dso = browser->selection->map ? browser->selection->map->dso : NULL; - if (es.reason == NEWT_EXIT_HOTKEY) { - key = es.u.key; - - switch (key) { - case NEWT_KEY_F1: - goto do_help; - case NEWT_KEY_TAB: - case NEWT_KEY_UNTAB: - /* - * Exit the browser, let hists__browser_tree - * go to the next or previous - */ - goto out_free_stack; - default:; - } - - switch (key) { - case 'a': - if (browser->selection->map == NULL && - browser->selection->map->dso->annotate_warned) - continue; - goto do_annotate; - case 'd': - goto zoom_dso; - case 't': - goto zoom_thread; - case 'h': - case '?': -do_help: - ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n" - "<- Zoom out\n" - "a Annotate current symbol\n" - "h/?/F1 Show this window\n" - "d Zoom into current DSO\n" - "t Zoom into current Thread\n" - "q/CTRL+C Exit browser"); + switch (key) { + case NEWT_KEY_TAB: + case NEWT_KEY_UNTAB: + /* + * Exit the browser, let hists__browser_tree + * go to the next or previous + */ + goto out_free_stack; + case 'a': + if (browser->selection->map == NULL && + browser->selection->map->dso->annotate_warned) continue; - default:; - } - if (is_exit_key(key)) { - if (key == NEWT_KEY_ESCAPE && - !ui__dialog_yesno("Do you really want to exit?")) - continue; - break; - } - - if (es.u.key == NEWT_KEY_LEFT) { - const void *top; + goto do_annotate; + case 'd': + goto zoom_dso; + case 't': + goto zoom_thread; + case NEWT_KEY_F1: + case 'h': + case '?': + ui__help_window("-> Zoom into DSO/Threads & Annotate current symbol\n" + "<- Zoom out\n" + "a Annotate current symbol\n" + "h/?/F1 Show this window\n" + "C Collapse all callchains\n" + "E Expand all callchains\n" + "d Zoom into current DSO\n" + "t Zoom into current Thread\n" + "q/CTRL+C Exit browser"); + continue; + case NEWT_KEY_ENTER: + case NEWT_KEY_RIGHT: + /* menu */ + break; + case NEWT_KEY_LEFT: { + const void *top; - if (pstack__empty(fstack)) - continue; - top = pstack__pop(fstack); - if (top == &dso_filter) - goto zoom_out_dso; - if (top == &thread_filter) - goto zoom_out_thread; + if (pstack__empty(fstack)) continue; - } + top = pstack__pop(fstack); + if (top == &dso_filter) + goto zoom_out_dso; + if (top == &thread_filter) + goto zoom_out_thread; + continue; + } + case NEWT_KEY_ESCAPE: + if (!ui__dialog_yesno("Do you really want to exit?")) + continue; + /* Fall thru */ + default: + goto out_free_stack; } if (browser->selection->sym != NULL && @@ -885,8 +954,8 @@ zoom_out_dso: pstack__push(fstack, &dso_filter); } hists__filter_by_dso(self, dso_filter); - hist_browser__title(msg, sizeof(msg), ev_name, - dso_filter, thread_filter); + hists__browser_title(self, msg, sizeof(msg), ev_name, + dso_filter, thread_filter); hist_browser__reset(browser); } else if (choice == zoom_thread) { zoom_thread: @@ -903,8 +972,8 @@ zoom_out_thread: pstack__push(fstack, &thread_filter); } hists__filter_by_thread(self, thread_filter); - hist_browser__title(msg, sizeof(msg), ev_name, - dso_filter, thread_filter); + hists__browser_title(self, msg, sizeof(msg), ev_name, + dso_filter, thread_filter); hist_browser__reset(browser); } } @@ -925,10 +994,6 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help) const char *ev_name = __event_name(hists->type, hists->config); key = hists__browse(hists, help, ev_name); - - if (is_exit_key(key)) - break; - switch (key) { case NEWT_KEY_TAB: next = rb_next(nd); @@ -940,7 +1005,7 @@ int hists__tui_browse_tree(struct rb_root *self, const char *help) continue; nd = rb_prev(nd); default: - break; + return key; } } diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c index 142b825..e35437d 100644 --- a/tools/perf/util/ui/browsers/map.c +++ b/tools/perf/util/ui/browsers/map.c @@ -1,6 +1,5 @@ #include "../libslang.h" #include <elf.h> -#include <newt.h> #include <sys/ttydefaults.h> #include <ctype.h> #include <string.h> @@ -47,7 +46,6 @@ out_free_form: struct map_browser { struct ui_browser b; struct map *map; - u16 namelen; u8 addrlen; }; @@ -56,14 +54,16 @@ static void map_browser__write(struct ui_browser *self, void *nd, int row) struct symbol *sym = rb_entry(nd, struct symbol, rb_node); struct map_browser *mb = container_of(self, struct map_browser, b); bool current_entry = ui_browser__is_current_entry(self, row); - int color = ui_browser__percent_color(0, current_entry); + int width; - SLsmg_set_color(color); + ui_browser__set_percent_color(self, 0, current_entry); slsmg_printf("%*llx %*llx %c ", mb->addrlen, sym->start, mb->addrlen, sym->end, sym->binding == STB_GLOBAL ? 'g' : sym->binding == STB_LOCAL ? 'l' : 'w'); - slsmg_write_nstring(sym->name, mb->namelen); + width = self->width - ((mb->addrlen * 2) + 4); + if (width > 0) + slsmg_write_nstring(sym->name, width); } /* FIXME uber-kludgy, see comment on cmd_report... */ @@ -98,31 +98,29 @@ static int map_browser__search(struct map_browser *self) return 0; } -static int map_browser__run(struct map_browser *self, struct newtExitStruct *es) +static int map_browser__run(struct map_browser *self) { + int key; + if (ui_browser__show(&self->b, self->map->dso->long_name, "Press <- or ESC to exit, %s / to search", verbose ? "" : "restart with -v to use") < 0) return -1; - newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); - newtFormAddHotKey(self->b.form, NEWT_KEY_ENTER); if (verbose) - newtFormAddHotKey(self->b.form, '/'); + ui_browser__add_exit_key(&self->b, '/'); while (1) { - ui_browser__run(&self->b, es); + key = ui_browser__run(&self->b); - if (es->reason != NEWT_EXIT_HOTKEY) - break; - if (verbose && es->u.key == '/') + if (verbose && key == '/') map_browser__search(self); else break; } ui_browser__hide(&self->b); - return 0; + return key; } int map__browse(struct map *self) @@ -136,7 +134,6 @@ int map__browse(struct map *self) }, .map = self, }; - struct newtExitStruct es; struct rb_node *nd; char tmp[BITS_PER_LONG / 4]; u64 maxaddr = 0; @@ -144,8 +141,6 @@ int map__browse(struct map *self) for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) { struct symbol *pos = rb_entry(nd, struct symbol, rb_node); - if (mb.namelen < pos->namelen) - mb.namelen = pos->namelen; if (maxaddr < pos->end) maxaddr = pos->end; if (verbose) { @@ -156,6 +151,5 @@ int map__browse(struct map *self) } mb.addrlen = snprintf(tmp, sizeof(tmp), "%llx", maxaddr); - mb.b.width += mb.addrlen * 2 + 4 + mb.namelen; - return map_browser__run(&mb, &es); + return map_browser__run(&mb); } diff --git a/tools/perf/util/ui/util.c b/tools/perf/util/ui/util.c index 04600e2..9706d9d 100644 --- a/tools/perf/util/ui/util.c +++ b/tools/perf/util/ui/util.c @@ -11,8 +11,6 @@ #include "helpline.h" #include "util.h" -newtComponent newt_form__new(void); - static void newt_form__set_exit_keys(newtComponent self) { newtFormAddHotKey(self, NEWT_KEY_LEFT); @@ -22,7 +20,7 @@ static void newt_form__set_exit_keys(newtComponent self) newtFormAddHotKey(self, CTRL('c')); } -newtComponent newt_form__new(void) +static newtComponent newt_form__new(void) { newtComponent self = newtForm(NULL, NULL, 0); if (self) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index f380fed..7562707 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -266,19 +266,6 @@ bool strglobmatch(const char *str, const char *pat); bool strlazymatch(const char *str, const char *pat); unsigned long convert_unit(unsigned long value, char *unit); -#ifndef ESC -#define ESC 27 -#endif - -static inline bool is_exit_key(int key) -{ - char up; - if (key == CTRL('c') || key == ESC) - return true; - up = toupper(key); - return up == 'Q'; -} - #define _STR(x) #x #define STR(x) _STR(x) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 66cf65b..c1f1e3c 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -218,7 +218,6 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) events = file->f_op->poll(file, &irqfd->pt); list_add_tail(&irqfd->list, &kvm->irqfds.items); - spin_unlock_irq(&kvm->irqfds.lock); /* * Check if there was an event already pending on the eventfd @@ -227,6 +226,8 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) if (events & POLLIN) schedule_work(&irqfd->inject); + spin_unlock_irq(&kvm->irqfds.lock); + /* * do not drop the file until the irqfd is fully initialized, otherwise * we might race against the POLLHUP diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d4853a5..5186e72 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1970,10 +1970,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, asmlinkage void kvm_handle_fault_on_reboot(void) { - if (kvm_rebooting) + if (kvm_rebooting) { /* spin while reset goes on */ + local_irq_enable(); while (true) ; + } /* Fault while not rebooting. We want the trace. */ BUG(); } |