diff options
152 files changed, 2237 insertions, 1215 deletions
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 6a0ad47..300e170 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -8,7 +8,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \ kernel-hacking.xml kernel-locking.xml deviceiobook.xml \ - procfs-guide.xml writing_usb_driver.xml \ + procfs-guide.xml writing_usb_driver.xml networking.xml \ kernel-api.xml filesystems.xml lsm.xml usb.xml \ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl index 5eaef87..5e87ad5 100644 --- a/Documentation/DocBook/filesystems.tmpl +++ b/Documentation/DocBook/filesystems.tmpl @@ -398,4 +398,24 @@ an example. </chapter> + <chapter id="splice"> + <title>splice API</title> + <para> + splice is a method for moving blocks of data around inside the + kernel, without continually transferring them between the kernel + and user space. + </para> +!Ffs/splice.c + </chapter> + + <chapter id="pipes"> + <title>pipes API</title> + <para> + Pipe interfaces are all for in-kernel (builtin image) use. + They are not exported for use by modules. + </para> +!Iinclude/linux/pipe_fs_i.h +!Ffs/pipe.c + </chapter> + </book> diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 059aaf2..f31601e 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -204,65 +204,6 @@ X!Ilib/string.c </sect1> </chapter> - <chapter id="netcore"> - <title>Linux Networking</title> - <sect1><title>Networking Base Types</title> -!Iinclude/linux/net.h - </sect1> - <sect1><title>Socket Buffer Functions</title> -!Iinclude/linux/skbuff.h -!Iinclude/net/sock.h -!Enet/socket.c -!Enet/core/skbuff.c -!Enet/core/sock.c -!Enet/core/datagram.c -!Enet/core/stream.c - </sect1> - <sect1><title>Socket Filter</title> -!Enet/core/filter.c - </sect1> - <sect1><title>Generic Network Statistics</title> -!Iinclude/linux/gen_stats.h -!Enet/core/gen_stats.c -!Enet/core/gen_estimator.c - </sect1> - <sect1><title>SUN RPC subsystem</title> -<!-- The !D functionality is not perfect, garbage has to be protected by comments -!Dnet/sunrpc/sunrpc_syms.c ---> -!Enet/sunrpc/xdr.c -!Enet/sunrpc/svcsock.c -!Enet/sunrpc/sched.c - </sect1> - </chapter> - - <chapter id="netdev"> - <title>Network device support</title> - <sect1><title>Driver Support</title> -!Enet/core/dev.c -!Enet/ethernet/eth.c -!Enet/sched/sch_generic.c -!Iinclude/linux/etherdevice.h -!Iinclude/linux/netdevice.h - </sect1> - <sect1><title>PHY Support</title> -!Edrivers/net/phy/phy.c -!Idrivers/net/phy/phy.c -!Edrivers/net/phy/phy_device.c -!Idrivers/net/phy/phy_device.c -!Edrivers/net/phy/mdio_bus.c -!Idrivers/net/phy/mdio_bus.c - </sect1> -<!-- FIXME: Removed for now since no structured comments in source - <sect1><title>Wireless</title> -X!Enet/core/wireless.c - </sect1> ---> - <sect1><title>Synchronous PPP</title> -!Edrivers/net/wan/syncppp.c - </sect1> - </chapter> - <chapter id="modload"> <title>Module Support</title> <sect1><title>Module Loading</title> @@ -508,11 +449,6 @@ X!Isound/sound_firmware.c !Edrivers/serial/8250.c </chapter> - <chapter id="z85230"> - <title>Z85230 Support Library</title> -!Edrivers/net/wan/z85230.c - </chapter> - <chapter id="fbdev"> <title>Frame Buffer Library</title> @@ -712,24 +648,4 @@ X!Idrivers/video/console/fonts.c !Edrivers/i2c/i2c-core.c </chapter> - <chapter id="splice"> - <title>splice API</title> - <para> - splice is a method for moving blocks of data around inside the - kernel, without continually transferring them between the kernel - and user space. - </para> -!Ffs/splice.c - </chapter> - - <chapter id="pipes"> - <title>pipes API</title> - <para> - Pipe interfaces are all for in-kernel (builtin image) use. - They are not exported for use by modules. - </para> -!Iinclude/linux/pipe_fs_i.h -!Ffs/pipe.c - </chapter> - </book> diff --git a/Documentation/DocBook/networking.tmpl b/Documentation/DocBook/networking.tmpl new file mode 100644 index 0000000..f24f9e8 --- /dev/null +++ b/Documentation/DocBook/networking.tmpl @@ -0,0 +1,106 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN" + "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []> + +<book id="LinuxNetworking"> + <bookinfo> + <title>Linux Networking and Network Devices APIs</title> + + <legalnotice> + <para> + This documentation is free software; you can redistribute + it and/or modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 2 of the License, or (at your option) any later + version. + </para> + + <para> + This program is distributed in the hope that it will be + useful, but WITHOUT ANY WARRANTY; without even the implied + warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the GNU General Public License for more details. + </para> + + <para> + You should have received a copy of the GNU General Public + License along with this program; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, + MA 02111-1307 USA + </para> + + <para> + For more details see the file COPYING in the source + distribution of Linux. + </para> + </legalnotice> + </bookinfo> + +<toc></toc> + + <chapter id="netcore"> + <title>Linux Networking</title> + <sect1><title>Networking Base Types</title> +!Iinclude/linux/net.h + </sect1> + <sect1><title>Socket Buffer Functions</title> +!Iinclude/linux/skbuff.h +!Iinclude/net/sock.h +!Enet/socket.c +!Enet/core/skbuff.c +!Enet/core/sock.c +!Enet/core/datagram.c +!Enet/core/stream.c + </sect1> + <sect1><title>Socket Filter</title> +!Enet/core/filter.c + </sect1> + <sect1><title>Generic Network Statistics</title> +!Iinclude/linux/gen_stats.h +!Enet/core/gen_stats.c +!Enet/core/gen_estimator.c + </sect1> + <sect1><title>SUN RPC subsystem</title> +<!-- The !D functionality is not perfect, garbage has to be protected by comments +!Dnet/sunrpc/sunrpc_syms.c +--> +!Enet/sunrpc/xdr.c +!Enet/sunrpc/svc_xprt.c +!Enet/sunrpc/xprt.c +!Enet/sunrpc/sched.c +!Enet/sunrpc/socklib.c +!Enet/sunrpc/stats.c +!Enet/sunrpc/rpc_pipe.c +!Enet/sunrpc/rpcb_clnt.c +!Enet/sunrpc/clnt.c + </sect1> + </chapter> + + <chapter id="netdev"> + <title>Network device support</title> + <sect1><title>Driver Support</title> +!Enet/core/dev.c +!Enet/ethernet/eth.c +!Enet/sched/sch_generic.c +!Iinclude/linux/etherdevice.h +!Iinclude/linux/netdevice.h + </sect1> + <sect1><title>PHY Support</title> +!Edrivers/net/phy/phy.c +!Idrivers/net/phy/phy.c +!Edrivers/net/phy/phy_device.c +!Idrivers/net/phy/phy_device.c +!Edrivers/net/phy/mdio_bus.c +!Idrivers/net/phy/mdio_bus.c + </sect1> +<!-- FIXME: Removed for now since no structured comments in source + <sect1><title>Wireless</title> +X!Enet/core/wireless.c + </sect1> +--> + <sect1><title>Synchronous PPP</title> +!Edrivers/net/wan/syncppp.c + </sect1> + </chapter> + +</book> diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt index d0634a5..c64158e 100644 --- a/Documentation/RCU/NMI-RCU.txt +++ b/Documentation/RCU/NMI-RCU.txt @@ -25,7 +25,7 @@ the NMI handler to take the default machine-specific action. This nmi_callback variable is a global function pointer to the current NMI handler. - fastcall void do_nmi(struct pt_regs * regs, long error_code) + void do_nmi(struct pt_regs * regs, long error_code) { int cpu; diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index 34e06d2..da10e07 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist @@ -20,7 +20,11 @@ kernel patches. 4: ppc64 is a good architecture for cross-compilation checking because it tends to use `unsigned long' for 64-bit quantities. -5: Matches kernel coding style(!) +5: Check your patch for general style as detailed in + Documentation/CodingStyle. Check for trivial violations with the + patch style checker prior to submission (scripts/checkpatch.pl). + You should be able to justify all violations that remain in + your patch. 6: Any new or modified CONFIG options don't muck up the config menu. @@ -79,13 +83,3 @@ kernel patches. 23: Tested after it has been merged into the -mm patchset to make sure that it still works with all of the other queued patches and various changes in the VM, VFS, and other subsystems. - -24: Avoid whitespace damage such as indenting with spaces or whitespace - at the end of lines. You can test this by feeding the patch to - "git apply --check --whitespace=error-all" - -25: Check your patch for general style as detailed in - Documentation/CodingStyle. Check for trivial violations with the - patch style checker prior to submission (scripts/checkpatch.pl). - You should be able to justify all violations that remain in - your patch. diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt index 30c1017..83f515c 100644 --- a/Documentation/kprobes.txt +++ b/Documentation/kprobes.txt @@ -92,9 +92,8 @@ handler has run. Up to MAX_STACK_SIZE bytes are copied -- e.g., 64 bytes on i386. Note that the probed function's args may be passed on the stack -or in registers (e.g., for x86_64 or for an i386 fastcall function). -The jprobe will work in either case, so long as the handler's -prototype matches that of the probed function. +or in registers. The jprobe will work in either case, so long as the +handler's prototype matches that of the probed function. 1.3 Return Probes @@ -270,9 +269,9 @@ Kprobes runs the handler whose address is jp->entry. The handler should have the same arg list and return type as the probed function; and just before it returns, it must call jprobe_return(). (The handler never actually returns, since jprobe_return() returns -control to Kprobes.) If the probed function is declared asmlinkage, -fastcall, or anything else that affects how args are passed, the -handler's declaration must match. +control to Kprobes.) If the probed function is declared asmlinkage +or anything else that affects how args are passed, the handler's +declaration must match. register_jprobe() returns 0 on success, or a negative errno otherwise. diff --git a/Documentation/sched-rt-group.txt b/Documentation/sched-rt-group.txt new file mode 100644 index 0000000..1c6332f --- /dev/null +++ b/Documentation/sched-rt-group.txt @@ -0,0 +1,59 @@ + + +Real-Time group scheduling. + +The problem space: + +In order to schedule multiple groups of realtime tasks each group must +be assigned a fixed portion of the CPU time available. Without a minimum +guarantee a realtime group can obviously fall short. A fuzzy upper limit +is of no use since it cannot be relied upon. Which leaves us with just +the single fixed portion. + +CPU time is divided by means of specifying how much time can be spent +running in a given period. Say a frame fixed realtime renderer must +deliver 25 frames a second, which yields a period of 0.04s. Now say +it will also have to play some music and respond to input, leaving it +with around 80% for the graphics. We can then give this group a runtime +of 0.8 * 0.04s = 0.032s. + +This way the graphics group will have a 0.04s period with a 0.032s runtime +limit. + +Now if the audio thread needs to refill the DMA buffer every 0.005s, but +needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s += 0.00015s. + + +The Interface: + +system wide: + +/proc/sys/kernel/sched_rt_period_ms +/proc/sys/kernel/sched_rt_runtime_us + +CONFIG_FAIR_USER_SCHED + +/sys/kernel/uids/<uid>/cpu_rt_runtime_us + +or + +CONFIG_FAIR_CGROUP_SCHED + +/cgroup/<cgroup>/cpu.rt_runtime_us + +[ time is specified in us because the interface is s32; this gives an + operating range of ~35m to 1us ] + +The period takes values in [ 1, INT_MAX ], runtime in [ -1, INT_MAX - 1 ]. + +A runtime of -1 specifies runtime == period, ie. no limit. + +New groups get the period from /proc/sys/kernel/sched_rt_period_us and +a runtime of 0. + +Settings are constrained to: + + \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period + +in order to keep the configuration schedulable. diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index dc8801d..276a7e6 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -29,7 +29,7 @@ show up in /proc/sys/kernel: - java-interpreter [ binfmt_java, obsolete ] - kstack_depth_to_print [ X86 only ] - l2cr [ PPC only ] -- modprobe ==> Documentation/kmod.txt +- modprobe ==> Documentation/debugging-modules.txt - msgmax - msgmnb - msgmni diff --git a/MAINTAINERS b/MAINTAINERS index c40f0ae..6680ec4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3561,6 +3561,8 @@ P: Christoph Lameter M: clameter@sgi.com P: Pekka Enberg M: penberg@cs.helsinki.fi +P: Matt Mackall +M: mpm@selenic.com L: linux-mm@kvack.org S: Maintained diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 1dd50d0..75480ca 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -119,13 +119,8 @@ irqreturn_t timer_interrupt(int irq, void *dev) state.partial_tick = delta & ((1UL << FIX_SHIFT) - 1); nticks = delta >> FIX_SHIFT; - while (nticks > 0) { - do_timer(1); -#ifndef CONFIG_SMP - update_process_times(user_mode(get_irq_regs())); -#endif - nticks--; - } + if (nticks) + do_timer(nticks); /* * If we have an externally synchronized Linux clock, then update @@ -141,6 +136,12 @@ irqreturn_t timer_interrupt(int irq, void *dev) } write_sequnlock(&xtime_lock); + +#ifndef CONFIG_SMP + while (nticks--) + update_process_times(user_mode(get_irq_regs())); +#endif + return IRQ_HANDLED; } diff --git a/arch/blackfin/kernel/time.c b/arch/blackfin/kernel/time.c index 5bd64e3..9bdc8f9 100644 --- a/arch/blackfin/kernel/time.c +++ b/arch/blackfin/kernel/time.c @@ -137,9 +137,6 @@ irqreturn_t timer_interrupt(int irq, void *dummy) do_timer(1); -#ifndef CONFIG_SMP - update_process_times(user_mode(get_irq_regs())); -#endif profile_tick(CPU_PROFILING); /* @@ -161,6 +158,11 @@ irqreturn_t timer_interrupt(int irq, void *dummy) last_rtc_update = xtime.tv_sec - 600; } write_sequnlock(&xtime_lock); + +#ifndef CONFIG_SMP + update_process_times(user_mode(get_irq_regs())); +#endif + return IRQ_HANDLED; } diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c index 925fb01..69f6a4e 100644 --- a/arch/frv/kernel/time.c +++ b/arch/frv/kernel/time.c @@ -63,6 +63,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy) /* last time the cmos clock got updated */ static long last_rtc_update = 0; + profile_tick(CPU_PROFILING); /* * Here we are in the timer irq handler. We just have irqs locally * disabled but we don't know if the timer_bh is running on the other @@ -73,8 +74,6 @@ static irqreturn_t timer_interrupt(int irq, void *dummy) write_seqlock(&xtime_lock); do_timer(1); - update_process_times(user_mode(get_irq_regs())); - profile_tick(CPU_PROFILING); /* * If we have an externally synchronized Linux clock, then update @@ -99,6 +98,9 @@ static irqreturn_t timer_interrupt(int irq, void *dummy) #endif /* CONFIG_HEARTBEAT */ write_sequnlock(&xtime_lock); + + update_process_times(user_mode(get_irq_regs())); + return IRQ_HANDLED; } diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index ef7527b..17725a5 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S @@ -105,11 +105,9 @@ SECTIONS SCHED_TEXT LOCK_TEXT #ifdef CONFIG_DEBUG_INFO - *( INIT_TEXT EXIT_TEXT - .exitcall.exit - ) + *(.exitcall.exit) #endif *(.fixup) *(.gnu.warning) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 2d4fcd0..dff9edf 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -232,7 +232,14 @@ config PGTABLE_4 endchoice +if IA64_HP_SIM +config HZ + default 32 +endif + +if !IA64_HP_SIM source kernel/Kconfig.hz +endif config IA64_BRL_EMU bool diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index 89cdbca..0ccfb2a 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -42,14 +42,12 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy) /* last time the cmos clock got updated */ static long last_rtc_update=0; + if (current->pid) + profile_tick(CPU_PROFILING); + write_seqlock(&xtime_lock); do_timer(1); -#ifndef CONFIG_SMP - update_process_times(user_mode(get_irq_regs())); -#endif - if (current->pid) - profile_tick(CPU_PROFILING); /* * If we have an externally synchronized Linux clock, then update @@ -67,6 +65,10 @@ irqreturn_t arch_timer_interrupt(int irq, void *dummy) } write_sequnlock(&xtime_lock); + +#ifndef CONFIG_SMP + update_process_times(user_mode(get_irq_regs())); +#endif return(IRQ_HANDLED); } diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c index 2b1953f..01974f7 100644 --- a/arch/powerpc/platforms/cell/spufs/sputrace.c +++ b/arch/powerpc/platforms/cell/spufs/sputrace.c @@ -146,34 +146,28 @@ static void sputrace_log_item(const char *name, struct spu_context *ctx, wake_up(&sputrace_wait); } -static void spu_context_event(const struct marker *mdata, - void *private, const char *format, ...) +static void spu_context_event(void *probe_private, void *call_data, + const char *format, va_list *args) { - struct spu_probe *p = mdata->private; - va_list ap; + struct spu_probe *p = probe_private; struct spu_context *ctx; struct spu *spu; - va_start(ap, format); - ctx = va_arg(ap, struct spu_context *); - spu = va_arg(ap, struct spu *); + ctx = va_arg(*args, struct spu_context *); + spu = va_arg(*args, struct spu *); sputrace_log_item(p->name, ctx, spu); - va_end(ap); } -static void spu_context_nospu_event(const struct marker *mdata, - void *private, const char *format, ...) +static void spu_context_nospu_event(void *probe_private, void *call_data, + const char *format, va_list *args) { - struct spu_probe *p = mdata->private; - va_list ap; + struct spu_probe *p = probe_private; struct spu_context *ctx; - va_start(ap, format); - ctx = va_arg(ap, struct spu_context *); + ctx = va_arg(*args, struct spu_context *); sputrace_log_item(p->name, ctx, NULL); - va_end(ap); } struct spu_probe spu_probes[] = { @@ -219,10 +213,6 @@ static int __init sputrace_init(void) if (error) printk(KERN_INFO "Unable to register probe %s\n", p->name); - - error = marker_arm(p->name); - if (error) - printk(KERN_INFO "Unable to arm probe %s\n", p->name); } return 0; @@ -238,7 +228,8 @@ static void __exit sputrace_exit(void) int i; for (i = 0; i < ARRAY_SIZE(spu_probes); i++) - marker_probe_unregister(spu_probes[i].name); + marker_probe_unregister(spu_probes[i].name, + spu_probes[i].probe_func, &spu_probes[i]); remove_proc_entry("sputrace", NULL); kfree(sputrace_log); diff --git a/arch/sh/kernel/timers/timer-cmt.c b/arch/sh/kernel/timers/timer-cmt.c index 499e07b..71312324 100644 --- a/arch/sh/kernel/timers/timer-cmt.c +++ b/arch/sh/kernel/timers/timer-cmt.c @@ -100,16 +100,7 @@ static irqreturn_t cmt_timer_interrupt(int irq, void *dev_id) timer_status &= ~0x80; ctrl_outw(timer_status, CMT_CMCSR_0); - /* - * Here we are in the timer irq handler. We just have irqs locally - * disabled but we don't know if the timer_bh is running on the other - * CPU. We need to avoid to SMP race with it. NOTE: we don' t need - * the irq version of write_lock because as just said we have irq - * locally disabled. -arca - */ - write_seqlock(&xtime_lock); handle_timer_tick(); - write_sequnlock(&xtime_lock); return IRQ_HANDLED; } diff --git a/arch/sh/kernel/timers/timer-mtu2.c b/arch/sh/kernel/timers/timer-mtu2.c index b7499a2..463cd08 100644 --- a/arch/sh/kernel/timers/timer-mtu2.c +++ b/arch/sh/kernel/timers/timer-mtu2.c @@ -100,9 +100,7 @@ static irqreturn_t mtu2_timer_interrupt(int irq, void *dev_id) ctrl_outb(timer_status, MTU2_TSR_1); /* Do timer tick */ - write_seqlock(&xtime_lock); handle_timer_tick(); - write_sequnlock(&xtime_lock); return IRQ_HANDLED; } diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 4cd5d78..a6a6f98 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -713,10 +713,10 @@ static irqreturn_t pcic_timer_handler (int irq, void *h) write_seqlock(&xtime_lock); /* Dummy, to show that we remember */ pcic_clear_clock_irq(); do_timer(1); + write_sequnlock(&xtime_lock); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); #endif - write_sequnlock(&xtime_lock); return IRQ_HANDLED; } diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c index 00b393c..cfaf22c 100644 --- a/arch/sparc/kernel/time.c +++ b/arch/sparc/kernel/time.c @@ -128,10 +128,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) clear_clock_irq(); do_timer(1); -#ifndef CONFIG_SMP - update_process_times(user_mode(get_irq_regs())); -#endif - /* Determine when to update the Mostek clock. */ if (ntp_synced() && @@ -145,6 +141,9 @@ irqreturn_t timer_interrupt(int irq, void *dev_id) } write_sequnlock(&xtime_lock); +#ifndef CONFIG_SMP + update_process_times(user_mode(get_irq_regs())); +#endif return IRQ_HANDLED; } diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 32dd62b..cbdf9ba 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -384,9 +384,6 @@ static void __init runtime_code_page_mkexec(void) efi_memory_desc_t *md; void *p; - if (!(__supported_pte_mask & _PAGE_NX)) - return; - /* Make EFI runtime service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; @@ -428,9 +425,6 @@ void __init efi_enter_virtual_mode(void) else va = efi_ioremap(md->phys_addr, size); - if (md->attribute & EFI_MEMORY_WB) - set_memory_uc(md->virt_addr, size); - md->virt_addr = (u64) (unsigned long) va; if (!va) { @@ -439,6 +433,9 @@ void __init efi_enter_virtual_mode(void) continue; } + if (!(md->attribute & EFI_MEMORY_WB)) + set_memory_uc(md->virt_addr, size); + systab = (u64) (unsigned long) efi_phys.systab; if (md->phys_addr <= systab && systab < end) { systab += md->virt_addr - md->phys_addr; @@ -476,7 +473,8 @@ void __init efi_enter_virtual_mode(void) efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; efi.reset_system = virt_efi_reset_system; efi.set_virtual_address_map = virt_efi_set_virtual_address_map; - runtime_code_page_mkexec(); + if (__supported_pte_mask & _PAGE_NX) + runtime_code_page_mkexec(); early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); memmap.map = NULL; } diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index 09d5c23..d143a1e 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -35,6 +35,7 @@ #include <asm/tlbflush.h> #include <asm/proto.h> #include <asm/efi.h> +#include <asm/cacheflush.h> static pgd_t save_pgd __initdata; static unsigned long efi_flags __initdata; @@ -43,22 +44,15 @@ static void __init early_mapping_set_exec(unsigned long start, unsigned long end, int executable) { - pte_t *kpte; - unsigned int level; - - while (start < end) { - kpte = lookup_address((unsigned long)__va(start), &level); - BUG_ON(!kpte); - if (executable) - set_pte(kpte, pte_mkexec(*kpte)); - else - set_pte(kpte, __pte((pte_val(*kpte) | _PAGE_NX) & \ - __supported_pte_mask)); - if (level == PG_LEVEL_4K) - start = (start + PAGE_SIZE) & PAGE_MASK; - else - start = (start + PMD_SIZE) & PMD_MASK; - } + unsigned long num_pages; + + start &= PMD_MASK; + end = (end + PMD_SIZE - 1) & PMD_MASK; + num_pages = (end - start) >> PAGE_SHIFT; + if (executable) + set_memory_x((unsigned long)__va(start), num_pages); + else + set_memory_nx((unsigned long)__va(start), num_pages); } static void __init early_runtime_code_mapping_set_exec(int executable) @@ -74,7 +68,7 @@ static void __init early_runtime_code_mapping_set_exec(int executable) md = p; if (md->type == EFI_RUNTIME_SERVICES_CODE) { unsigned long end; - end = md->phys_addr + (md->num_pages << PAGE_SHIFT); + end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); early_mapping_set_exec(md->phys_addr, end, executable); } } @@ -84,8 +78,8 @@ void __init efi_call_phys_prelog(void) { unsigned long vaddress; - local_irq_save(efi_flags); early_runtime_code_mapping_set_exec(1); + local_irq_save(efi_flags); vaddress = (unsigned long)__va(0x0UL); save_pgd = *pgd_offset_k(0x0UL); set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress)); @@ -98,9 +92,9 @@ void __init efi_call_phys_epilog(void) * After the lock is released, the original page table is restored. */ set_pgd(pgd_offset_k(0x0UL), save_pgd); - early_runtime_code_mapping_set_exec(0); __flush_tlb_all(); local_irq_restore(efi_flags); + early_runtime_code_mapping_set_exec(0); } void __init efi_reserve_bootmem(void) diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index ef62b07..8540abe 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c @@ -95,7 +95,7 @@ static int pit_next_event(unsigned long delta, struct clock_event_device *evt) * registered. This mechanism replaces the previous #ifdef LOCAL_APIC - * !using_apic_timer decisions in do_timer_interrupt_hook() */ -struct clock_event_device pit_clockevent = { +static struct clock_event_device pit_clockevent = { .name = "pit", .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, .set_mode = init_pit_timer, diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 1941482..c47208f 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -11,7 +11,7 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) { u8 config, rev; - u32 word; + u16 word; /* BIOS may enable hardware IRQ balancing for * E7520/E7320/E7525(revision ID 0x9 and below) @@ -26,8 +26,11 @@ static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) pci_read_config_byte(dev, 0xf4, &config); pci_write_config_byte(dev, 0xf4, config|0x2); - /* read xTPR register */ - raw_pci_read(0, 0, 0x40, 0x4c, 2, &word); + /* + * read xTPR register. We may not have a pci_dev for device 8 + * because it might be hidden until the above write. + */ + pci_bus_read_config_word(dev->bus, PCI_DEVFN(8, 0), 0x4c, &word); if (!(word & (1 << 13))) { dev_info(&dev->dev, "Intel E7520/7320/7525 detected; " diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 5818dc2..7fd6ac4 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -326,7 +326,7 @@ static inline void kb_wait(void) } } -void machine_emergency_restart(void) +static void native_machine_emergency_restart(void) { int i; @@ -376,7 +376,7 @@ void machine_emergency_restart(void) } } -void machine_shutdown(void) +static void native_machine_shutdown(void) { /* Stop the cpus and apics */ #ifdef CONFIG_SMP @@ -420,7 +420,7 @@ void machine_shutdown(void) #endif } -void machine_restart(char *__unused) +static void native_machine_restart(char *__unused) { printk("machine restart\n"); @@ -429,11 +429,11 @@ void machine_restart(char *__unused) machine_emergency_restart(); } -void machine_halt(void) +static void native_machine_halt(void) { } -void machine_power_off(void) +static void native_machine_power_off(void) { if (pm_power_off) { if (!reboot_force) @@ -443,9 +443,35 @@ void machine_power_off(void) } struct machine_ops machine_ops = { - .power_off = machine_power_off, - .shutdown = machine_shutdown, - .emergency_restart = machine_emergency_restart, - .restart = machine_restart, - .halt = machine_halt + .power_off = native_machine_power_off, + .shutdown = native_machine_shutdown, + .emergency_restart = native_machine_emergency_restart, + .restart = native_machine_restart, + .halt = native_machine_halt }; + +void machine_power_off(void) +{ + machine_ops.power_off(); +} + +void machine_shutdown(void) +{ + machine_ops.shutdown(); +} + +void machine_emergency_restart(void) +{ + machine_ops.emergency_restart(); +} + +void machine_restart(char *cmd) +{ + machine_ops.restart(cmd); +} + +void machine_halt(void) +{ + machine_ops.halt(); +} + diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index a4897a8..9f42d7e 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -265,7 +265,9 @@ static __initdata pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) { - pgd_t *pgd = &swapper_pg_dir[pgd_index(addr)]; + /* Don't assume we're using swapper_pg_dir at this point */ + pgd_t *base = __va(read_cr3()); + pgd_t *pgd = &base[pgd_index(addr)]; pud_t *pud = pud_offset(pgd, addr); pmd_t *pmd = pmd_offset(pud, addr); diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index ed82016..75f1b10 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -40,7 +40,6 @@ struct split_state { static int print_split(struct split_state *s) { long i, expected, missed = 0; - int printed = 0; int err = 0; s->lpg = s->gpg = s->spg = s->exec = 0; @@ -53,12 +52,6 @@ static int print_split(struct split_state *s) pte = lookup_address(addr, &level); if (!pte) { - if (!printed) { - dump_pagetable(addr); - printk(KERN_INFO "CPA %lx no pte level %d\n", - addr, level); - printed = 1; - } missed++; i++; continue; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 440210a..bd61ed1 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -275,8 +275,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, break; #ifdef CONFIG_X86_64 case PG_LEVEL_1G: - psize = PMD_PAGE_SIZE; - pmask = PMD_PAGE_MASK; + psize = PUD_PAGE_SIZE; + pmask = PUD_PAGE_MASK; break; #endif default: diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index d28dda5..f385a4b 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -7,7 +7,7 @@ VDSO32-$(CONFIG_X86_32) := y VDSO32-$(CONFIG_COMPAT) := y vdso-install-$(VDSO64-y) += vdso.so -vdso-install-$(VDSO32-y) += $(vdso32-y:=.so) +vdso-install-$(VDSO32-y) += $(vdso32-images) # files to link into the vdso @@ -63,6 +63,8 @@ vdso32.so-$(CONFIG_X86_32) += int80 vdso32.so-$(CONFIG_COMPAT) += syscall vdso32.so-$(VDSO32-y) += sysenter +vdso32-images = $(vdso32.so-y:%=vdso32-%.so) + CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 @@ -71,21 +73,21 @@ VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ targets += vdso32/vdso32.lds -targets += $(vdso32.so-y:%=vdso32-%.so.dbg) $(vdso32.so-y:%=vdso32-%.so) +targets += $(vdso32-images) $(vdso32-images:=.dbg) targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o) -extra-y += $(vdso32.so-y:%=vdso32-%.so) +extra-y += $(vdso32-images) -$(obj)/vdso32.o: $(vdso32.so-y:%=$(obj)/vdso32-%.so) +$(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) -$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): asflags-$(CONFIG_X86_64) += -m32 +$(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) +$(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 -$(vdso32.so-y:%=$(obj)/vdso32-%.so.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ - $(obj)/vdso32/vdso32.lds \ - $(obj)/vdso32/note.o \ - $(obj)/vdso32/%.o +$(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ + $(obj)/vdso32/vdso32.lds \ + $(obj)/vdso32/note.o \ + $(obj)/vdso32/%.o $(call if_changed,vdso) # Make vdso32-*-syms.lds from each image, and then make sure they match. diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index de647bc..49e5358 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -798,6 +798,10 @@ static __init void xen_pagetable_setup_start(pgd_t *base) * added to the table can be prepared properly for Xen. */ xen_write_cr3(__pa(base)); + + /* Unpin initial Xen pagetable */ + pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, + PFN_DOWN(__pa(xen_start_info->pt_base))); } static __init void xen_pagetable_setup_done(pgd_t *base) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 34b3386..15e6023 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -623,7 +623,7 @@ acpi_os_write_memory(acpi_physical_address phys_addr, u32 value, u32 width) acpi_status acpi_os_read_pci_configuration(struct acpi_pci_id * pci_id, u32 reg, - void *value, u32 width) + u32 *value, u32 width) { int result, size; @@ -689,7 +689,6 @@ static void acpi_os_derive_pci_id_2(acpi_handle rhandle, /* upper bound */ acpi_status status; unsigned long temp; acpi_object_type type; - u8 tu8; acpi_get_parent(chandle, &handle); if (handle != rhandle) { @@ -704,6 +703,7 @@ static void acpi_os_derive_pci_id_2(acpi_handle rhandle, /* upper bound */ acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &temp); if (ACPI_SUCCESS(status)) { + u32 val; pci_id->device = ACPI_HIWORD(ACPI_LODWORD(temp)); pci_id->function = ACPI_LOWORD(ACPI_LODWORD(temp)); @@ -712,24 +712,24 @@ static void acpi_os_derive_pci_id_2(acpi_handle rhandle, /* upper bound */ /* any nicer way to get bus number of bridge ? */ status = - acpi_os_read_pci_configuration(pci_id, 0x0e, &tu8, + acpi_os_read_pci_configuration(pci_id, 0x0e, &val, 8); if (ACPI_SUCCESS(status) - && ((tu8 & 0x7f) == 1 || (tu8 & 0x7f) == 2)) { + && ((val & 0x7f) == 1 || (val & 0x7f) == 2)) { status = acpi_os_read_pci_configuration(pci_id, 0x18, - &tu8, 8); + &val, 8); if (!ACPI_SUCCESS(status)) { /* Certainly broken... FIX ME */ return; } *is_bridge = 1; - pci_id->bus = tu8; + pci_id->bus = val; status = acpi_os_read_pci_configuration(pci_id, 0x19, - &tu8, 8); + &val, 8); if (ACPI_SUCCESS(status)) { - *bus_number = tu8; + *bus_number = val; } } else *is_bridge = 0; diff --git a/drivers/acpi/wmi.c b/drivers/acpi/wmi.c index 36b84ab..efacc9f 100644 --- a/drivers/acpi/wmi.c +++ b/drivers/acpi/wmi.c @@ -247,7 +247,7 @@ u32 method_id, const struct acpi_buffer *in, struct acpi_buffer *out) block = &wblock->gblock; handle = wblock->handle; - if (!block->flags & ACPI_WMI_METHOD) + if (!(block->flags & ACPI_WMI_METHOD)) return AE_BAD_DATA; if (block->instance_count < instance) @@ -673,11 +673,11 @@ static int __init acpi_wmi_init(void) { acpi_status result; + INIT_LIST_HEAD(&wmi_blocks.list); + if (acpi_disabled) return -ENODEV; - INIT_LIST_HEAD(&wmi_blocks.list); - result = acpi_bus_register_driver(&acpi_wmi_driver); if (result < 0) { diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 3011919..004dae4 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3048,6 +3048,8 @@ int ata_down_xfermask_limit(struct ata_device *dev, unsigned int sel) static int ata_dev_set_mode(struct ata_device *dev) { struct ata_eh_context *ehc = &dev->link->eh_context; + const char *dev_err_whine = ""; + int ign_dev_err = 0; unsigned int err_mask; int rc; @@ -3057,41 +3059,57 @@ static int ata_dev_set_mode(struct ata_device *dev) err_mask = ata_dev_set_xfermode(dev); + if (err_mask & ~AC_ERR_DEV) + goto fail; + + /* revalidate */ + ehc->i.flags |= ATA_EHI_POST_SETMODE; + rc = ata_dev_revalidate(dev, ATA_DEV_UNKNOWN, 0); + ehc->i.flags &= ~ATA_EHI_POST_SETMODE; + if (rc) + return rc; + /* Old CFA may refuse this command, which is just fine */ if (dev->xfer_shift == ATA_SHIFT_PIO && ata_id_is_cfa(dev->id)) - err_mask &= ~AC_ERR_DEV; + ign_dev_err = 1; /* Some very old devices and some bad newer ones fail any kind of SET_XFERMODE request but support PIO0-2 timings and no IORDY */ if (dev->xfer_shift == ATA_SHIFT_PIO && !ata_id_has_iordy(dev->id) && dev->pio_mode <= XFER_PIO_2) - err_mask &= ~AC_ERR_DEV; + ign_dev_err = 1; /* Early MWDMA devices do DMA but don't allow DMA mode setting. Don't fail an MWDMA0 set IFF the device indicates it is in MWDMA0 */ if (dev->xfer_shift == ATA_SHIFT_MWDMA && dev->dma_mode == XFER_MW_DMA_0 && (dev->id[63] >> 8) & 1) - err_mask &= ~AC_ERR_DEV; + ign_dev_err = 1; - if (err_mask) { - ata_dev_printk(dev, KERN_ERR, "failed to set xfermode " - "(err_mask=0x%x)\n", err_mask); - return -EIO; - } + /* if the device is actually configured correctly, ignore dev err */ + if (dev->xfer_mode == ata_xfer_mask2mode(ata_id_xfermask(dev->id))) + ign_dev_err = 1; - ehc->i.flags |= ATA_EHI_POST_SETMODE; - rc = ata_dev_revalidate(dev, ATA_DEV_UNKNOWN, 0); - ehc->i.flags &= ~ATA_EHI_POST_SETMODE; - if (rc) - return rc; + if (err_mask & AC_ERR_DEV) { + if (!ign_dev_err) + goto fail; + else + dev_err_whine = " (device error ignored)"; + } DPRINTK("xfer_shift=%u, xfer_mode=0x%x\n", dev->xfer_shift, (int)dev->xfer_mode); - ata_dev_printk(dev, KERN_INFO, "configured for %s\n", - ata_mode_string(ata_xfer_mode2mask(dev->xfer_mode))); + ata_dev_printk(dev, KERN_INFO, "configured for %s%s\n", + ata_mode_string(ata_xfer_mode2mask(dev->xfer_mode)), + dev_err_whine); + return 0; + + fail: + ata_dev_printk(dev, KERN_ERR, "failed to set xfermode " + "(err_mask=0x%x)\n", err_mask); + return -EIO; } /** diff --git a/drivers/ata/pata_amd.c b/drivers/ata/pata_amd.c index 761a666..ea567e2 100644 --- a/drivers/ata/pata_amd.c +++ b/drivers/ata/pata_amd.c @@ -772,7 +772,7 @@ static void __exit amd_exit(void) } MODULE_AUTHOR("Alan Cox"); -MODULE_DESCRIPTION("low-level driver for AMD PATA IDE"); +MODULE_DESCRIPTION("low-level driver for AMD and Nvidia PATA IDE"); MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(pci, amd); MODULE_VERSION(DRV_VERSION); diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index 333dc15..6c59969 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -127,7 +127,7 @@ static int opti82c611a; /* Opti82c611A on primary 1, sec 2, both 3 */ static int opti82c46x; /* Opti 82c465MV present(pri/sec autodetect) */ static int qdi; /* Set to probe QDI controllers */ static int winbond; /* Set to probe Winbond controllers, - give I/O port if non stdanard */ + give I/O port if non standard */ static int autospeed; /* Chip present which snoops speed changes */ static int pio_mask = 0x1F; /* PIO range for autospeed devices */ static int iordy_mask = 0xFFFFFFFF; /* Use iordy if available */ diff --git a/drivers/ata/pata_ninja32.c b/drivers/ata/pata_ninja32.c index 1c1b835..15dd649 100644 --- a/drivers/ata/pata_ninja32.c +++ b/drivers/ata/pata_ninja32.c @@ -17,6 +17,7 @@ * Base + 0x00 IRQ Status * Base + 0x01 IRQ control * Base + 0x02 Chipset control + * Base + 0x03 Unknown * Base + 0x04 VDMA and reset control + wait bits * Base + 0x08 BMIMBA * Base + 0x0C DMA Length @@ -174,8 +175,12 @@ static int ninja32_init_one(struct pci_dev *dev, const struct pci_device_id *id) ata_std_ports(&ap->ioaddr); iowrite8(0x05, base + 0x01); /* Enable interrupt lines */ - iowrite8(0xB3, base + 0x02); /* Burst, ?? setup */ - iowrite8(0x00, base + 0x04); /* WAIT0 ? */ + iowrite8(0xBE, base + 0x02); /* Burst, ?? setup */ + iowrite8(0x01, base + 0x03); /* Unknown */ + iowrite8(0x20, base + 0x04); /* WAIT0 */ + iowrite8(0x8f, base + 0x05); /* Unknown */ + iowrite8(0xa4, base + 0x1c); /* Unknown */ + iowrite8(0x83, base + 0x1d); /* BMDMA control: WAIT0 */ /* FIXME: Should we disable them at remove ? */ return ata_host_activate(host, dev->irq, ata_interrupt, IRQF_SHARED, &ninja32_sht); diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 39627ab..d119a68 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -84,6 +84,7 @@ enum { VIA_BAD_ID = 0x100, /* Has wrong vendor ID (0x1107) */ VIA_BAD_AST = 0x200, /* Don't touch Address Setup Timing */ VIA_NO_ENABLES = 0x400, /* Has no enablebits */ + VIA_SATA_PATA = 0x800, /* SATA/PATA combined configuration */ }; /* @@ -100,7 +101,7 @@ static const struct via_isa_bridge { { "vx800", PCI_DEVICE_ID_VIA_VX800, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8237s", PCI_DEVICE_ID_VIA_8237S, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8251", PCI_DEVICE_ID_VIA_8251, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, - { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, + { "cx700", PCI_DEVICE_ID_VIA_CX700, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_SATA_PATA }, { "vt6410", PCI_DEVICE_ID_VIA_6410, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST | VIA_NO_ENABLES}, { "vt8237a", PCI_DEVICE_ID_VIA_8237A, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, { "vt8237", PCI_DEVICE_ID_VIA_8237, 0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST }, @@ -172,6 +173,9 @@ static int via_cable_detect(struct ata_port *ap) { if (via_cable_override(pdev)) return ATA_CBL_PATA40_SHORT; + if ((config->flags & VIA_SATA_PATA) && ap->port_no == 0) + return ATA_CBL_SATA; + /* Early chips are 40 wire */ if ((config->flags & VIA_UDMA) < VIA_UDMA_66) return ATA_CBL_PATA40; diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 080b836..04b5717 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -1716,14 +1716,16 @@ static void mv_host_intr(struct ata_host *host, u32 relevant, unsigned int hc) VPRINTK("ENTER, hc%u relevant=0x%08x HC IRQ cause=0x%08x\n", hc, relevant, hc_irq_cause); - for (port = port0; port < port0 + last_port; port++) { + for (port = port0; port < last_port; port++) { struct ata_port *ap = host->ports[port]; - struct mv_port_priv *pp = ap->private_data; + struct mv_port_priv *pp; int have_err_bits, hard_port, shift; if ((!ap) || (ap->flags & ATA_FLAG_DISABLED)) continue; + pp = ap->private_data; + shift = port << 1; /* (port * 2) */ if (port >= MV_PORTS_PER_HC) { shift++; /* skip bit 8 in the HC Main IRQ reg */ @@ -2879,6 +2881,26 @@ done: return rc; } +static int mv_create_dma_pools(struct mv_host_priv *hpriv, struct device *dev) +{ + hpriv->crqb_pool = dmam_pool_create("crqb_q", dev, MV_CRQB_Q_SZ, + MV_CRQB_Q_SZ, 0); + if (!hpriv->crqb_pool) + return -ENOMEM; + + hpriv->crpb_pool = dmam_pool_create("crpb_q", dev, MV_CRPB_Q_SZ, + MV_CRPB_Q_SZ, 0); + if (!hpriv->crpb_pool) + return -ENOMEM; + + hpriv->sg_tbl_pool = dmam_pool_create("sg_tbl", dev, MV_SG_TBL_SZ, + MV_SG_TBL_SZ, 0); + if (!hpriv->sg_tbl_pool) + return -ENOMEM; + + return 0; +} + /** * mv_platform_probe - handle a positive probe of an soc Marvell * host @@ -2932,6 +2954,10 @@ static int mv_platform_probe(struct platform_device *pdev) hpriv->base = ioremap(res->start, res->end - res->start + 1); hpriv->base -= MV_SATAHC0_REG_BASE; + rc = mv_create_dma_pools(hpriv, &pdev->dev); + if (rc) + return rc; + /* initialize adapter */ rc = mv_init_host(host, chip_soc); if (rc) @@ -3068,26 +3094,6 @@ static void mv_print_info(struct ata_host *host) scc_s, (MV_HP_FLAG_MSI & hpriv->hp_flags) ? "MSI" : "INTx"); } -static int mv_create_dma_pools(struct mv_host_priv *hpriv, struct device *dev) -{ - hpriv->crqb_pool = dmam_pool_create("crqb_q", dev, MV_CRQB_Q_SZ, - MV_CRQB_Q_SZ, 0); - if (!hpriv->crqb_pool) - return -ENOMEM; - - hpriv->crpb_pool = dmam_pool_create("crpb_q", dev, MV_CRPB_Q_SZ, - MV_CRPB_Q_SZ, 0); - if (!hpriv->crpb_pool) - return -ENOMEM; - - hpriv->sg_tbl_pool = dmam_pool_create("sg_tbl", dev, MV_SG_TBL_SZ, - MV_SG_TBL_SZ, 0); - if (!hpriv->sg_tbl_pool) - return -ENOMEM; - - return 0; -} - /** * mv_pci_init_one - handle a positive probe of a PCI Marvell host * @pdev: PCI device found diff --git a/drivers/char/drm/i830_dma.c b/drivers/char/drm/i830_dma.c index 379cbda..9df0810 100644 --- a/drivers/char/drm/i830_dma.c +++ b/drivers/char/drm/i830_dma.c @@ -36,7 +36,7 @@ #include "i830_drm.h" #include "i830_drv.h" #include <linux/interrupt.h> /* For task queue support */ -#include <linux/pagemap.h> /* For FASTCALL on unlock_page() */ +#include <linux/pagemap.h> #include <linux/delay.h> #include <asm/uaccess.h> diff --git a/drivers/char/pcmcia/Kconfig b/drivers/char/pcmcia/Kconfig index 00b8a84..ffa0efc 100644 --- a/drivers/char/pcmcia/Kconfig +++ b/drivers/char/pcmcia/Kconfig @@ -45,7 +45,7 @@ config CARDMAN_4040 config IPWIRELESS tristate "IPWireless 3G UMTS PCMCIA card support" - depends on PCMCIA + depends on PCMCIA && NETDEVICES select PPP help This is a driver for 3G UMTS PCMCIA card from IPWireless company. In diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 043c34a..df752e6 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -378,6 +378,9 @@ config BLK_DEV_IDEPNP would like the kernel to automatically detect and activate it, say Y here. +config BLK_DEV_IDEDMA_SFF + bool + if PCI comment "PCI IDE chipsets support" @@ -459,6 +462,7 @@ config BLK_DEV_RZ1000 config BLK_DEV_IDEDMA_PCI bool select BLK_DEV_IDEPCI + select BLK_DEV_IDEDMA_SFF config BLK_DEV_AEC62XX tristate "AEC62XX chipset support" @@ -688,23 +692,6 @@ config BLK_DEV_PDC202XX_OLD If unsure, say N. -config PDC202XX_BURST - bool "Special UDMA Feature" - depends on BLK_DEV_PDC202XX_OLD - help - This option causes the pdc202xx driver to enable UDMA modes on the - PDC202xx even when the PDC202xx BIOS has not done so. - - It was originally designed for the PDC20246/Ultra33, whose BIOS will - only setup UDMA on the first two PDC20246 cards. It has also been - used successfully on a PDC20265/Ultra100, allowing use of UDMA modes - when the PDC20265 BIOS has been disabled (for faster boot up). - - Please read the comments at the top of - <file:drivers/ide/pci/pdc202xx_old.c>. - - If unsure, say N. - config BLK_DEV_PDC202XX_NEW tristate "PROMISE PDC202{68|69|70|71|75|76|77} support" select BLK_DEV_IDEDMA_PCI @@ -1016,7 +1003,7 @@ config BLK_DEV_Q40IDE config BLK_DEV_PALMCHIP_BK3710 tristate "Palmchip bk3710 IDE controller support" depends on ARCH_DAVINCI - select BLK_DEV_IDEDMA_PCI + select BLK_DEV_IDEDMA_SFF help Say Y here if you want to support the onchip IDE controller on the TI DaVinci SoC @@ -1124,7 +1111,8 @@ config BLK_DEV_UMC8672 endif config BLK_DEV_IDEDMA - def_bool BLK_DEV_IDEDMA_PCI || BLK_DEV_IDEDMA_PMAC || BLK_DEV_IDEDMA_ICS || BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA + def_bool BLK_DEV_IDEDMA_SFF || BLK_DEV_IDEDMA_PMAC || \ + BLK_DEV_IDEDMA_ICS || BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA config IDE_ARCH_OBSOLETE_INIT def_bool ALPHA || (ARM && !ARCH_L7200) || BLACKFIN || X86 || IA64 || M32R || MIPS || PARISC || PPC || (SUPERH64 && BLK_DEV_IDEPCI) || SPARC diff --git a/drivers/ide/arm/bast-ide.c b/drivers/ide/arm/bast-ide.c index 0e7574c..161d30c 100644 --- a/drivers/ide/arm/bast-ide.c +++ b/drivers/ide/arm/bast-ide.c @@ -21,12 +21,7 @@ #include <asm/arch/bast-map.h> #include <asm/arch/bast-irq.h> -/* list of registered interfaces */ -static ide_hwif_t *ifs[2]; - -static int __init -bastide_register(unsigned int base, unsigned int aux, int irq, - ide_hwif_t **hwif) +static int __init bastide_register(unsigned int base, unsigned int aux, int irq) { ide_hwif_t *hwif; hw_regs_t hw; @@ -76,8 +71,9 @@ static int __init bastide_init(void) printk("BAST: IDE driver, (c) 2003-2004 Simtec Electronics\n"); - bastide_register(BAST_VA_IDEPRI, BAST_VA_IDEPRIAUX, IRQ_IDE0, &ifs[0]); - bastide_register(BAST_VA_IDESEC, BAST_VA_IDESECAUX, IRQ_IDE1, &ifs[1]); + bastide_register(BAST_VA_IDEPRI, BAST_VA_IDEPRIAUX, IRQ_IDE0); + bastide_register(BAST_VA_IDESEC, BAST_VA_IDESECAUX, IRQ_IDE1); + return 0; } diff --git a/drivers/ide/arm/palm_bk3710.c b/drivers/ide/arm/palm_bk3710.c index c306997..8e1f6bd 100644 --- a/drivers/ide/arm/palm_bk3710.c +++ b/drivers/ide/arm/palm_bk3710.c @@ -311,15 +311,37 @@ static void __devinit palm_bk3710_chipinit(void __iomem *base) palm_bk3710_setpiomode(base, NULL, 0, 600, 0); palm_bk3710_setpiomode(base, NULL, 1, 600, 0); } + +static u8 __devinit palm_bk3710_cable_detect(ide_hwif_t *hwif) +{ + return ATA_CBL_PATA80; +} + +static void __devinit palm_bk3710_init_hwif(ide_hwif_t *hwif) +{ + hwif->set_pio_mode = palm_bk3710_set_pio_mode; + hwif->set_dma_mode = palm_bk3710_set_dma_mode; + + hwif->cable_detect = palm_bk3710_cable_detect; +} + +static const struct ide_port_info __devinitdata palm_bk3710_port_info = { + .init_hwif = palm_bk3710_init_hwif, + .host_flags = IDE_HFLAG_NO_DMA, /* hack (no PCI) */ + .pio_mask = ATA_PIO4, + .udma_mask = ATA_UDMA4, /* (input clk 99MHz) */ + .mwdma_mask = ATA_MWDMA2, +}; + static int __devinit palm_bk3710_probe(struct platform_device *pdev) { - hw_regs_t ide_ctlr_info; - int index = 0; - int pribase; struct clk *clkp; struct resource *mem, *irq; ide_hwif_t *hwif; void __iomem *base; + int pribase, i; + hw_regs_t hw; + u8 idx[4] = { 0xff, 0xff, 0xff, 0xff }; clkp = clk_get(NULL, "IDECLK"); if (IS_ERR(clkp)) @@ -330,7 +352,7 @@ static int __devinit palm_bk3710_probe(struct platform_device *pdev) ide_palm_clk = clk_get_rate(ideclkp)/100000; ide_palm_clk = (10000/ide_palm_clk) + 1; /* Register the IDE interface with Linux ATA Interface */ - memset(&ide_ctlr_info, 0, sizeof(ide_ctlr_info)); + memset(&hw, 0, sizeof(hw)); mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (mem == NULL) { @@ -349,32 +371,42 @@ static int __devinit palm_bk3710_probe(struct platform_device *pdev) palm_bk3710_chipinit(base); pribase = mem->start + IDE_PALM_ATA_PRI_REG_OFFSET; - for (index = 0; index < IDE_NR_PORTS - 2; index++) - ide_ctlr_info.io_ports[index] = pribase + index; - ide_ctlr_info.io_ports[IDE_CONTROL_OFFSET] = mem->start + + for (i = 0; i < IDE_NR_PORTS - 2; i++) + hw.io_ports[i] = pribase + i; + hw.io_ports[IDE_CONTROL_OFFSET] = mem->start + IDE_PALM_ATA_PRI_CTL_OFFSET; - ide_ctlr_info.irq = irq->start; - ide_ctlr_info.chipset = ide_palm3710; + hw.irq = irq->start; + hw.chipset = ide_palm3710; - if (ide_register_hw(&ide_ctlr_info, NULL, &hwif) < 0) { - printk(KERN_WARNING "Palm Chip BK3710 IDE Register Fail\n"); - return -ENODEV; - } + hwif = ide_deprecated_find_port(hw.io_ports[IDE_DATA_OFFSET]); + if (hwif == NULL) + goto out; + + i = hwif->index; + + if (hwif->present) + ide_unregister(i, 0, 0); + else if (!hwif->hold) + ide_init_port_data(hwif, i); + + ide_init_port_hw(hwif, &hw); - hwif->set_pio_mode = &palm_bk3710_set_pio_mode; - hwif->set_dma_mode = &palm_bk3710_set_dma_mode; hwif->mmio = 1; default_hwif_mmiops(hwif); - hwif->cbl = ATA_CBL_PATA80; - hwif->ultra_mask = 0x1f; /* Ultra DMA Mode 4 Max - (input clk 99MHz) */ - hwif->mwdma_mask = 0x7; - hwif->drives[0].autotune = 1; - hwif->drives[1].autotune = 1; ide_setup_dma(hwif, mem->start); + idx[0] = i; + + ide_device_add(idx, &palm_bk3710_port_info); + + if (!hwif->present) + goto out; + return 0; +out: + printk(KERN_WARNING "Palm Chip BK3710 IDE Register Fail\n"); + return -ENODEV; } static struct platform_driver platform_bk_driver = { diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 5e42c19..354c91d 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1555,7 +1555,7 @@ int ide_cd_read_toc(ide_drive_t *drive, struct request_sense *sense) if (stat) return stat; - toc->hdr.toc_length = ntohs (toc->hdr.toc_length); + toc->hdr.toc_length = be16_to_cpu(toc->hdr.toc_length); if (info->cd_flags & IDE_CD_FLAG_TOCTRACKS_AS_BCD) { toc->hdr.first_track = BCD2BIN(toc->hdr.first_track); diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 3c69822..aed8b31c 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -590,20 +590,24 @@ static ide_proc_entry_t idedisk_proc[] = { static void idedisk_prepare_flush(struct request_queue *q, struct request *rq) { ide_drive_t *drive = q->queuedata; - ide_task_t task; + ide_task_t *task = kmalloc(sizeof(*task), GFP_ATOMIC); - memset(&task, 0, sizeof(task)); + /* FIXME: map struct ide_taskfile on rq->cmd[] */ + BUG_ON(task == NULL); + + memset(task, 0, sizeof(*task)); if (ide_id_has_flush_cache_ext(drive->id) && (drive->capacity64 >= (1UL << 28))) - task.tf.command = WIN_FLUSH_CACHE_EXT; + task->tf.command = WIN_FLUSH_CACHE_EXT; else - task.tf.command = WIN_FLUSH_CACHE; - task.tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE; - task.data_phase = TASKFILE_NO_DATA; + task->tf.command = WIN_FLUSH_CACHE; + task->tf_flags = IDE_TFLAG_OUT_TF | IDE_TFLAG_OUT_DEVICE | + IDE_TFLAG_DYN; + task->data_phase = TASKFILE_NO_DATA; rq->cmd_type = REQ_TYPE_ATA_TASKFILE; rq->cmd_flags |= REQ_SOFTBARRIER; - rq->special = &task; + rq->special = task; } /* diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index a4bb328..d0e7b53 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -198,7 +198,7 @@ int ide_build_sglist(ide_drive_t *drive, struct request *rq) EXPORT_SYMBOL_GPL(ide_build_sglist); -#ifdef CONFIG_BLK_DEV_IDEDMA_PCI +#ifdef CONFIG_BLK_DEV_IDEDMA_SFF /** * ide_build_dmatable - build IDE DMA table * @@ -316,7 +316,7 @@ void ide_destroy_dmatable (ide_drive_t *drive) EXPORT_SYMBOL_GPL(ide_destroy_dmatable); -#ifdef CONFIG_BLK_DEV_IDEDMA_PCI +#ifdef CONFIG_BLK_DEV_IDEDMA_SFF /** * config_drive_for_dma - attempt to activate IDE DMA * @drive: the drive to place in DMA mode @@ -424,7 +424,7 @@ void ide_dma_host_set(ide_drive_t *drive, int on) } EXPORT_SYMBOL_GPL(ide_dma_host_set); -#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */ +#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */ /** * ide_dma_off_quietly - Generic DMA kill @@ -474,7 +474,7 @@ void ide_dma_on(ide_drive_t *drive) drive->hwif->dma_host_set(drive, 1); } -#ifdef CONFIG_BLK_DEV_IDEDMA_PCI +#ifdef CONFIG_BLK_DEV_IDEDMA_SFF /** * ide_dma_setup - begin a DMA phase * @drive: target device @@ -591,7 +591,7 @@ static int __ide_dma_test_irq(ide_drive_t *drive) } #else static inline int config_drive_for_dma(ide_drive_t *drive) { return 0; } -#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */ +#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */ int __ide_dma_bad_drive (ide_drive_t *drive) { @@ -840,7 +840,7 @@ void ide_check_dma_crc(ide_drive_t *drive) ide_dma_on(drive); } -#ifdef CONFIG_BLK_DEV_IDEDMA_PCI +#ifdef CONFIG_BLK_DEV_IDEDMA_SFF void ide_dma_lost_irq (ide_drive_t *drive) { printk("%s: DMA interrupt recovery\n", drive->name); @@ -1002,4 +1002,4 @@ void ide_setup_dma(ide_hwif_t *hwif, unsigned long base) } EXPORT_SYMBOL_GPL(ide_setup_dma); -#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */ +#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */ diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 3addbe4..7153796 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -361,17 +361,21 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) spin_unlock_irqrestore(&ide_lock, flags); if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) { - ide_task_t *args = (ide_task_t *) rq->special; + ide_task_t *task = (ide_task_t *)rq->special; + if (rq->errors == 0) - rq->errors = !OK_STAT(stat,READY_STAT,BAD_STAT); - - if (args) { - struct ide_taskfile *tf = &args->tf; + rq->errors = !OK_STAT(stat, READY_STAT, BAD_STAT); + + if (task) { + struct ide_taskfile *tf = &task->tf; tf->error = err; tf->status = stat; - ide_tf_read(drive, args); + ide_tf_read(drive, task); + + if (task->tf_flags & IDE_TFLAG_DYN) + kfree(task); } } else if (blk_pm_request(rq)) { struct request_pm_state *pm = rq->data; @@ -388,7 +392,8 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) spin_lock_irqsave(&ide_lock, flags); HWGROUP(drive)->rq = NULL; rq->errors = err; - if (__blk_end_request(rq, (rq->errors ? -EIO : 0), 0)) + if (unlikely(__blk_end_request(rq, (rq->errors ? -EIO : 0), + blk_rq_bytes(rq)))) BUG(); spin_unlock_irqrestore(&ide_lock, flags); } diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index c32e759..c419266 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -786,15 +786,11 @@ static void __ide_set_handler (ide_drive_t *drive, ide_handler_t *handler, { ide_hwgroup_t *hwgroup = HWGROUP(drive); - if (hwgroup->handler != NULL) { - printk(KERN_CRIT "%s: ide_set_handler: handler not null; " - "old=%p, new=%p\n", - drive->name, hwgroup->handler, handler); - } + BUG_ON(hwgroup->handler); hwgroup->handler = handler; hwgroup->expiry = expiry; hwgroup->timer.expires = jiffies + timeout; - hwgroup->req_gen_timer = hwgroup->req_gen; + hwgroup->req_gen_timer = hwgroup->req_gen; add_timer(&hwgroup->timer); } @@ -827,11 +823,9 @@ void ide_execute_command(ide_drive_t *drive, u8 cmd, ide_handler_t *handler, unsigned timeout, ide_expiry_t *expiry) { unsigned long flags; - ide_hwgroup_t *hwgroup = HWGROUP(drive); ide_hwif_t *hwif = HWIF(drive); spin_lock_irqsave(&ide_lock, flags); - BUG_ON(hwgroup->handler); __ide_set_handler(drive, handler, timeout, expiry); hwif->OUTBSYNC(drive, cmd, IDE_COMMAND_REG); /* diff --git a/drivers/ide/ide-lib.c b/drivers/ide/ide-lib.c index 1ff676cc..29e2c97 100644 --- a/drivers/ide/ide-lib.c +++ b/drivers/ide/ide-lib.c @@ -21,15 +21,6 @@ #include <asm/uaccess.h> #include <asm/io.h> -/* - * IDE library routines. These are plug in code that most - * drivers can use but occasionally may be weird enough - * to want to do their own thing with - * - * Add common non I/O op stuff here. Make sure it has proper - * kernel-doc function headers or your patch will be rejected - */ - static const char *udma_str[] = { "UDMA/16", "UDMA/25", "UDMA/33", "UDMA/44", "UDMA/66", "UDMA/100", "UDMA/133", "UDMA7" }; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 6daea89..4a2cb28 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1051,7 +1051,7 @@ static int init_irq (ide_hwif_t *hwif) int sa = 0; #if defined(__mc68000__) sa = IRQF_SHARED; -#endif /* __mc68000__ || CONFIG_APUS */ +#endif /* __mc68000__ */ if (IDE_CHIPSET_IS_PCI(hwif->chipset)) sa = IRQF_SHARED; @@ -1355,7 +1355,7 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port, hwif->ultra_mask = d->udma_mask; /* reset DMA masks only for SFF-style DMA controllers */ - if ((d->host_flags && IDE_HFLAG_NO_DMA) == 0 && hwif->dma_base == 0) + if ((d->host_flags & IDE_HFLAG_NO_DMA) == 0 && hwif->dma_base == 0) hwif->swdma_mask = hwif->mwdma_mask = hwif->ultra_mask = 0; if (d->host_flags & IDE_HFLAG_RQSIZE_256) diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index 49dd2e7..0598ecf 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -466,9 +466,6 @@ static void ide_tape_put(struct ide_tape_obj *tape) /* 0 = no tape is loaded, so we don't rewind after ejecting */ #define IDETAPE_MEDIUM_PRESENT 9 -/* A define for the READ BUFFER command */ -#define IDETAPE_RETRIEVE_FAULTY_BLOCK 6 - /* Some defines for the SPACE command */ #define IDETAPE_SPACE_OVER_FILEMARK 1 #define IDETAPE_SPACE_TO_EOD 3 @@ -490,7 +487,6 @@ enum { REQ_IDETAPE_PC2 = (1 << 1), /* packet command (second stage) */ REQ_IDETAPE_READ = (1 << 2), REQ_IDETAPE_WRITE = (1 << 3), - REQ_IDETAPE_READ_BUFFER = (1 << 4), }; /* Error codes returned in rq->errors to the higher part of the driver. */ @@ -1523,29 +1519,6 @@ static void idetape_create_read_cmd(idetape_tape_t *tape, idetape_pc_t *pc, set_bit(PC_DMA_RECOMMENDED, &pc->flags); } -static void idetape_create_read_buffer_cmd(idetape_tape_t *tape, - idetape_pc_t *pc, struct idetape_bh *bh) -{ - int size = 32768; - struct idetape_bh *p = bh; - - idetape_init_pc(pc); - pc->c[0] = READ_BUFFER; - pc->c[1] = IDETAPE_RETRIEVE_FAULTY_BLOCK; - pc->c[7] = size >> 8; - pc->c[8] = size & 0xff; - pc->callback = &idetape_pc_callback; - pc->bh = bh; - atomic_set(&bh->b_count, 0); - pc->buffer = NULL; - while (p) { - atomic_set(&p->b_count, 0); - p = p->b_reqnext; - } - pc->request_transfer = size; - pc->buffer_size = size; -} - static void idetape_create_write_cmd(idetape_tape_t *tape, idetape_pc_t *pc, unsigned int length, struct idetape_bh *bh) { @@ -1655,13 +1628,6 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive, (struct idetape_bh *)rq->special); goto out; } - if (rq->cmd[0] & REQ_IDETAPE_READ_BUFFER) { - tape->postpone_cnt = 0; - pc = idetape_next_pc_storage(drive); - idetape_create_read_buffer_cmd(tape, pc, - (struct idetape_bh *)rq->special); - goto out; - } if (rq->cmd[0] & REQ_IDETAPE_PC1) { pc = (idetape_pc_t *) rq->buffer; rq->cmd[0] &= ~(REQ_IDETAPE_PC1); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index ad0e995..4a8952a 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -44,8 +44,6 @@ * inspiration from lots of linux users, esp. hamish@zot.apana.org.au */ -#define REVISION "Revision: 7.00alpha2" - #define _IDE_C /* Tell ide.h it's really us */ #include <linux/module.h> @@ -1618,7 +1616,7 @@ static int __init ide_init(void) { int ret; - printk(KERN_INFO "Uniform Multi-Platform E-IDE driver " REVISION "\n"); + printk(KERN_INFO "Uniform Multi-Platform E-IDE driver\n"); system_bus_speed = ide_system_bus_speed(); printk(KERN_INFO "ide: Assuming %dMHz system bus speed " diff --git a/drivers/ide/legacy/gayle.c b/drivers/ide/legacy/gayle.c index 9d3851d..b7d81090 100644 --- a/drivers/ide/legacy/gayle.c +++ b/drivers/ide/legacy/gayle.c @@ -94,7 +94,7 @@ static int gayle_ack_intr_a1200(ide_hwif_t *hwif) static void __init gayle_setup_ports(hw_regs_t *hw, unsigned long base, unsigned long ctl, unsigned long irq_port, - ide_ack_intr_t *ack_intr); + ide_ack_intr_t *ack_intr) { int i; diff --git a/drivers/ide/pci/cs5520.c b/drivers/ide/pci/cs5520.c index 0be1a82..1c163e4 100644 --- a/drivers/ide/pci/cs5520.c +++ b/drivers/ide/pci/cs5520.c @@ -147,11 +147,6 @@ static int __devinit cs5520_init_one(struct pci_dev *dev, const struct pci_devic /* We must not grab the entire device, it has 'ISA' space in its * BARS too and we will freak out other bits of the kernel - * - * pci_enable_device_bars() is going away. I replaced it with - * IO only enable for now but I'll need confirmation this is - * allright for that device. If not, it will need some kind of - * quirk. --BenH. */ if (pci_enable_device_io(dev)) { printk(KERN_WARNING "%s: Unable to enable 55x0.\n", d->name); diff --git a/drivers/ide/pci/pdc202xx_old.c b/drivers/ide/pci/pdc202xx_old.c index da43297..150422e 100644 --- a/drivers/ide/pci/pdc202xx_old.c +++ b/drivers/ide/pci/pdc202xx_old.c @@ -3,26 +3,6 @@ * Copyright (C) 2006-2007 MontaVista Software, Inc. * Copyright (C) 2007 Bartlomiej Zolnierkiewicz * - * Promise Ultra33 cards with BIOS v1.20 through 1.28 will need this - * compiled into the kernel if you have more than one card installed. - * Note that BIOS v1.29 is reported to fix the problem. Since this is - * safe chipset tuning, including this support is harmless - * - * Promise Ultra66 cards with BIOS v1.11 this - * compiled into the kernel if you have more than one card installed. - * - * Promise Ultra100 cards. - * - * The latest chipset code will support the following :: - * Three Ultra33 controllers and 12 drives. - * 8 are UDMA supported and 4 are limited to DMA mode 2 multi-word. - * The 8/4 ratio is a BIOS code limit by promise. - * - * UNLESS you enable "CONFIG_PDC202XX_BURST" - * - */ - -/* * Portions Copyright (C) 1999 Promise Technology, Inc. * Author: Frank Tiernan (frankt@promise.com) * Released under terms of General Public License @@ -344,7 +324,6 @@ static void __devinit init_dma_pdc202xx(ide_hwif_t *hwif, unsigned long dmabase) (primary_mode & 1) ? "MASTER" : "PCI", (secondary_mode & 1) ? "MASTER" : "PCI" ); -#ifdef CONFIG_PDC202XX_BURST if (!(udma_speed_flag & 1)) { printk(KERN_INFO "%s: FORCING BURST BIT 0x%02x->0x%02x ", hwif->cds->name, udma_speed_flag, @@ -352,7 +331,6 @@ static void __devinit init_dma_pdc202xx(ide_hwif_t *hwif, unsigned long dmabase) outb(udma_speed_flag | 1, dmabase | 0x1f); printk("%sACTIVE\n", (inb(dmabase | 0x1f) & 1) ? "" : "IN"); } -#endif /* CONFIG_PDC202XX_BURST */ ide_setup_dma(hwif, dmabase); } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index edc057f..2928ef2 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -124,7 +124,7 @@ enum dm_raid1_error { struct mirror { struct mirror_set *ms; atomic_t error_count; - uint32_t error_type; + unsigned long error_type; struct dm_dev *dev; sector_t offset; }; diff --git a/drivers/memstick/host/tifm_ms.c b/drivers/memstick/host/tifm_ms.c index f55b71a..4fb2421 100644 --- a/drivers/memstick/host/tifm_ms.c +++ b/drivers/memstick/host/tifm_ms.c @@ -282,7 +282,7 @@ static int tifm_ms_issue_cmd(struct tifm_ms *host) writel(TIFM_MS_SYS_LATCH | readl(sock->addr + SOCK_MS_SYSTEM), - sock + SOCK_MS_SYSTEM); + sock->addr + SOCK_MS_SYSTEM); writel(0, sock->addr + SOCK_MS_DATA); dev_dbg(&sock->dev, "writing %x\n", 0); diff --git a/drivers/net/mlx4/alloc.c b/drivers/net/mlx4/alloc.c index 521dc03..75ef9d0 100644 --- a/drivers/net/mlx4/alloc.c +++ b/drivers/net/mlx4/alloc.c @@ -34,6 +34,7 @@ #include <linux/slab.h> #include <linux/bitmap.h> #include <linux/dma-mapping.h> +#include <linux/vmalloc.h> #include "mlx4.h" diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 238628d..d76d37b 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -1768,7 +1768,7 @@ static int parport_PS2_supported(struct parport *pb) } #ifdef CONFIG_PARPORT_PC_FIFO -static int __devinit parport_ECP_supported(struct parport *pb) +static int parport_ECP_supported(struct parport *pb) { int i; int config, configb; @@ -1992,7 +1992,7 @@ static int parport_ECPEPP_supported(struct parport *pb) /* Don't bother probing for modes we know we won't use. */ static int __devinit parport_PS2_supported(struct parport *pb) { return 0; } #ifdef CONFIG_PARPORT_PC_FIFO -static int __devinit parport_ECP_supported(struct parport *pb) { return 0; } +static int parport_ECP_supported(struct parport *pb) { return 0; } #endif static int __devinit parport_EPP_supported(struct parport *pb) { return 0; } static int __devinit parport_ECPEPP_supported(struct parport *pb){return 0;} diff --git a/fs/lockd/host.c b/fs/lockd/host.c index ca6b16f..f1ef49f 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -243,10 +243,18 @@ nlm_bind_host(struct nlm_host *host) .program = &nlm_program, .version = host->h_version, .authflavor = RPC_AUTH_UNIX, - .flags = (RPC_CLNT_CREATE_HARDRTRY | + .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_AUTOBIND), }; + /* + * lockd retries server side blocks automatically so we want + * those to be soft RPC calls. Client side calls need to be + * hard RPC tasks. + */ + if (!host->h_server) + args.flags |= RPC_CLNT_CREATE_HARDRTRY; + clnt = rpc_create(&args); if (!IS_ERR(clnt)) host->h_rpcclnt = clnt; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 2f4d8fa..fe9bdb4 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -763,11 +763,20 @@ callback: dprintk("lockd: GRANTing blocked lock.\n"); block->b_granted = 1; - /* Schedule next grant callback in 30 seconds */ - nlmsvc_insert_block(block, 30 * HZ); + /* keep block on the list, but don't reattempt until the RPC + * completes or the submission fails + */ + nlmsvc_insert_block(block, NLM_NEVER); + + /* Call the client -- use a soft RPC task since nlmsvc_retry_blocked + * will queue up a new one if this one times out + */ + error = nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, + &nlmsvc_grant_ops); - /* Call the client */ - nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops); + /* RPC submission failed, wait a bit and retry */ + if (error < 0) + nlmsvc_insert_block(block, 10 * HZ); } /* @@ -786,6 +795,17 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) dprintk("lockd: GRANT_MSG RPC callback\n"); + /* if the block is not on a list at this point then it has + * been invalidated. Don't try to requeue it. + * + * FIXME: it's possible that the block is removed from the list + * after this check but before the nlmsvc_insert_block. In that + * case it will be added back. Perhaps we need better locking + * for nlm_blocked? + */ + if (list_empty(&block->b_list)) + return; + /* Technically, we should down the file semaphore here. Since we * move the block towards the head of the queue only, no harm * can be done, though. */ @@ -171,7 +171,7 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe, * * Description: * This function returns a kernel virtual address mapping for the - * passed in @pipe_buffer. If @atomic is set, an atomic map is provided + * pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided * and the caller has to be careful not to fault before calling * the unmap function. * @@ -208,15 +208,15 @@ void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, } /** - * generic_pipe_buf_steal - attempt to take ownership of a @pipe_buffer + * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to attempt to steal * * Description: - * This function attempts to steal the @struct page attached to + * This function attempts to steal the &struct page attached to * @buf. If successful, this function returns 0 and returns with * the page locked. The caller may then reuse the page for whatever - * he wishes, the typical use is insertion into a different file + * he wishes; the typical use is insertion into a different file * page cache. */ int generic_pipe_buf_steal(struct pipe_inode_info *pipe, @@ -238,7 +238,7 @@ int generic_pipe_buf_steal(struct pipe_inode_info *pipe, } /** - * generic_pipe_buf_get - get a reference to a @struct pipe_buffer + * generic_pipe_buf_get - get a reference to a &struct pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to get a reference to * diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 4e5c22c..376ef3e 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -505,7 +505,7 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) if (warn_count < 5) { warn_count++; printk(KERN_EMERG "smbfs is deprecated and will be removed" - "from the 2.6.27 kernel. Please migrate to cifs\n"); + " from the 2.6.27 kernel. Please migrate to cifs\n"); } if (!raw_data) diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index d721a1a..f855dcb 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -145,7 +145,7 @@ static bool udf_add_free_space(struct udf_sb_info *sbi, { struct logicalVolIntegrityDesc *lvid; - if (sbi->s_lvid_bh) + if (sbi->s_lvid_bh == NULL) return false; lvid = (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 4b44e23..8d8643ada 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -43,13 +43,13 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, struct fileIdentDesc *fi = NULL; struct fileIdentDesc cfi; int block, iblock; - loff_t nf_pos = filp->f_pos - 1; + loff_t nf_pos = (filp->f_pos - 1) << 2; int flen; char fname[UDF_NAME_LEN]; char *nameptr; uint16_t liu; uint8_t lfi; - loff_t size = (udf_ext0_offset(dir) + dir->i_size) >> 2; + loff_t size = udf_ext0_offset(dir) + dir->i_size; struct buffer_head *tmp, *bha[16]; kernel_lb_addr eloc; uint32_t elen; @@ -63,13 +63,13 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, return 0; if (nf_pos == 0) - nf_pos = (udf_ext0_offset(dir) >> 2); + nf_pos = udf_ext0_offset(dir); - fibh.soffset = fibh.eoffset = (nf_pos & ((dir->i_sb->s_blocksize - 1) >> 2)) << 2; + fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1); iinfo = UDF_I(dir); if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { fibh.sbh = fibh.ebh = NULL; - } else if (inode_bmap(dir, nf_pos >> (dir->i_sb->s_blocksize_bits - 2), + } else if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits, &epos, &eloc, &elen, &offset) == (EXT_RECORDED_ALLOCATED >> 30)) { block = udf_get_lb_pblock(dir->i_sb, eloc, offset); if ((++offset << dir->i_sb->s_blocksize_bits) < elen) { @@ -111,7 +111,7 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, } while (nf_pos < size) { - filp->f_pos = nf_pos + 1; + filp->f_pos = (nf_pos >> 2) + 1; fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset); @@ -178,7 +178,7 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, } } /* end while */ - filp->f_pos = nf_pos + 1; + filp->f_pos = (nf_pos >> 2) + 1; if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 35582fe..1f3da5b 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -1648,14 +1648,14 @@ xfs_qm_quotacheck_dqadjust( * Adjust the inode count and the block count to reflect this inode's * resource usage. */ - be64_add(&dqp->q_core.d_icount, 1); + be64_add_cpu(&dqp->q_core.d_icount, 1); dqp->q_res_icount++; if (nblks) { - be64_add(&dqp->q_core.d_bcount, nblks); + be64_add_cpu(&dqp->q_core.d_bcount, nblks); dqp->q_res_bcount += nblks; } if (rtblks) { - be64_add(&dqp->q_core.d_rtbcount, rtblks); + be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks); dqp->q_res_rtbcount += rtblks; } diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c index 7de6874..f441f83 100644 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ b/fs/xfs/quota/xfs_trans_dquot.c @@ -421,13 +421,13 @@ xfs_trans_apply_dquot_deltas( (xfs_qcnt_t) -qtrx->qt_icount_delta); #endif if (totalbdelta) - be64_add(&d->d_bcount, (xfs_qcnt_t)totalbdelta); + be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); if (qtrx->qt_icount_delta) - be64_add(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta); + be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta); if (totalrtbdelta) - be64_add(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta); + be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta); /* * Get any default limits in use. diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index ea6aa60..bdbfbbe 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -592,7 +592,7 @@ xfs_alloc_ag_vextent( if (!(args->wasfromfl)) { agf = XFS_BUF_TO_AGF(args->agbp); - be32_add(&agf->agf_freeblks, -(args->len)); + be32_add_cpu(&agf->agf_freeblks, -(args->len)); xfs_trans_agblocks_delta(args->tp, -((long)(args->len))); args->pag->pagf_freeblks -= args->len; @@ -1720,7 +1720,7 @@ xfs_free_ag_extent( agf = XFS_BUF_TO_AGF(agbp); pag = &mp->m_perag[agno]; - be32_add(&agf->agf_freeblks, len); + be32_add_cpu(&agf->agf_freeblks, len); xfs_trans_agblocks_delta(tp, len); pag->pagf_freeblks += len; XFS_WANT_CORRUPTED_GOTO( @@ -2008,18 +2008,18 @@ xfs_alloc_get_freelist( * Get the block number and update the data structures. */ bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]); - be32_add(&agf->agf_flfirst, 1); + be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) agf->agf_flfirst = 0; pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; - be32_add(&agf->agf_flcount, -1); + be32_add_cpu(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); pag->pagf_flcount--; logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; if (btreeblk) { - be32_add(&agf->agf_btreeblks, 1); + be32_add_cpu(&agf->agf_btreeblks, 1); pag->pagf_btreeblks++; logflags |= XFS_AGF_BTREEBLKS; } @@ -2117,17 +2117,17 @@ xfs_alloc_put_freelist( be32_to_cpu(agf->agf_seqno), &agflbp))) return error; agfl = XFS_BUF_TO_AGFL(agflbp); - be32_add(&agf->agf_fllast, 1); + be32_add_cpu(&agf->agf_fllast, 1); if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) agf->agf_fllast = 0; pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; - be32_add(&agf->agf_flcount, 1); + be32_add_cpu(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); pag->pagf_flcount++; logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT; if (btreeblk) { - be32_add(&agf->agf_btreeblks, -1); + be32_add_cpu(&agf->agf_btreeblks, -1); pag->pagf_btreeblks--; logflags |= XFS_AGF_BTREEBLKS; } diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 1603ce5..3ce2645 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -221,7 +221,7 @@ xfs_alloc_delrec( */ bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]); agf->agf_roots[cur->bc_btnum] = *lpp; - be32_add(&agf->agf_levels[cur->bc_btnum], -1); + be32_add_cpu(&agf->agf_levels[cur->bc_btnum], -1); mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--; /* * Put this buffer/block on the ag's freelist. @@ -1256,9 +1256,9 @@ xfs_alloc_lshift( /* * Bump and log left's numrecs, decrement and log right's numrecs. */ - be16_add(&left->bb_numrecs, 1); + be16_add_cpu(&left->bb_numrecs, 1); xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add(&right->bb_numrecs, -1); + be16_add_cpu(&right->bb_numrecs, -1); xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); /* * Slide the contents of right down one entry. @@ -1346,7 +1346,7 @@ xfs_alloc_newroot( agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp); agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno); - be32_add(&agf->agf_levels[cur->bc_btnum], 1); + be32_add_cpu(&agf->agf_levels[cur->bc_btnum], 1); seqno = be32_to_cpu(agf->agf_seqno); mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++; xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp, @@ -1558,9 +1558,9 @@ xfs_alloc_rshift( /* * Decrement and log left's numrecs, bump and log right's numrecs. */ - be16_add(&left->bb_numrecs, -1); + be16_add_cpu(&left->bb_numrecs, -1); xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); /* * Using a temporary cursor, update the parent key values of the @@ -1643,7 +1643,7 @@ xfs_alloc_split( */ if ((be16_to_cpu(left->bb_numrecs) & 1) && cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; /* * For non-leaf blocks, copy keys and addresses over to the new block. @@ -1689,7 +1689,7 @@ xfs_alloc_split( * Adjust numrecs, sibling pointers. */ lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp)); - be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); + be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); right->bb_rightsib = left->bb_rightsib; left->bb_rightsib = cpu_to_be32(rbno); right->bb_leftsib = cpu_to_be32(lbno); diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index c483689..f9472a2 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h @@ -170,21 +170,6 @@ } \ } -static inline void be16_add(__be16 *a, __s16 b) -{ - *a = cpu_to_be16(be16_to_cpu(*a) + b); -} - -static inline void be32_add(__be32 *a, __s32 b) -{ - *a = cpu_to_be32(be32_to_cpu(*a) + b); -} - -static inline void be64_add(__be64 *a, __s64 b) -{ - *a = cpu_to_be64(be64_to_cpu(*a) + b); -} - /* * In directories inode numbers are stored as unaligned arrays of unsigned * 8bit integers on disk. diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index eb3815e..b08e2a2 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -317,7 +317,7 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) memcpy(sfe->nameval, args->name, args->namelen); memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen); sf->hdr.count++; - be16_add(&sf->hdr.totsize, size); + be16_add_cpu(&sf->hdr.totsize, size); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); xfs_sbversion_add_attr2(mp, args->trans); @@ -363,7 +363,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) if (end != totsize) memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end); sf->hdr.count--; - be16_add(&sf->hdr.totsize, -size); + be16_add_cpu(&sf->hdr.totsize, -size); /* * Fix up the start offset of the attribute fork @@ -1133,7 +1133,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) xfs_da_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); } - be16_add(&hdr->count, 1); + be16_add_cpu(&hdr->count, 1); /* * Allocate space for the new string (at the end of the run). @@ -1147,7 +1147,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) mp->m_sb.sb_blocksize, NULL)); ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp)); ASSERT((be16_to_cpu(map->size) & 0x3) == 0); - be16_add(&map->size, + be16_add_cpu(&map->size, -xfs_attr_leaf_newentsize(args->namelen, args->valuelen, mp->m_sb.sb_blocksize, &tmp)); entry->nameidx = cpu_to_be16(be16_to_cpu(map->base) + @@ -1214,12 +1214,12 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) map = &hdr->freemap[0]; for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; map++, i++) { if (be16_to_cpu(map->base) == tmp) { - be16_add(&map->base, sizeof(xfs_attr_leaf_entry_t)); - be16_add(&map->size, + be16_add_cpu(&map->base, sizeof(xfs_attr_leaf_entry_t)); + be16_add_cpu(&map->size, -((int)sizeof(xfs_attr_leaf_entry_t))); } } - be16_add(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index)); + be16_add_cpu(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index)); xfs_da_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr))); return(0); @@ -1727,9 +1727,9 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp)); ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp)); if (be16_to_cpu(map->base) == tablesize) { - be16_add(&map->base, + be16_add_cpu(&map->base, -((int)sizeof(xfs_attr_leaf_entry_t))); - be16_add(&map->size, sizeof(xfs_attr_leaf_entry_t)); + be16_add_cpu(&map->size, sizeof(xfs_attr_leaf_entry_t)); } if ((be16_to_cpu(map->base) + be16_to_cpu(map->size)) @@ -1751,19 +1751,19 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) if ((before >= 0) || (after >= 0)) { if ((before >= 0) && (after >= 0)) { map = &hdr->freemap[before]; - be16_add(&map->size, entsize); - be16_add(&map->size, + be16_add_cpu(&map->size, entsize); + be16_add_cpu(&map->size, be16_to_cpu(hdr->freemap[after].size)); hdr->freemap[after].base = 0; hdr->freemap[after].size = 0; } else if (before >= 0) { map = &hdr->freemap[before]; - be16_add(&map->size, entsize); + be16_add_cpu(&map->size, entsize); } else { map = &hdr->freemap[after]; /* both on-disk, don't endian flip twice */ map->base = entry->nameidx; - be16_add(&map->size, entsize); + be16_add_cpu(&map->size, entsize); } } else { /* @@ -1788,7 +1788,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) * Compress the remaining entries and zero out the removed stuff. */ memset(XFS_ATTR_LEAF_NAME(leaf, args->index), 0, entsize); - be16_add(&hdr->usedbytes, -entsize); + be16_add_cpu(&hdr->usedbytes, -entsize); xfs_da_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index), entsize)); @@ -1796,7 +1796,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) tmp = (be16_to_cpu(hdr->count) - args->index) * sizeof(xfs_attr_leaf_entry_t); memmove((char *)entry, (char *)(entry+1), tmp); - be16_add(&hdr->count, -1); + be16_add_cpu(&hdr->count, -1); xfs_da_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry))); entry = &leaf->entries[be16_to_cpu(hdr->count)]; @@ -2182,15 +2182,15 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, */ if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */ memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp); - be16_add(&hdr_s->usedbytes, -tmp); - be16_add(&hdr_s->count, -1); + be16_add_cpu(&hdr_s->usedbytes, -tmp); + be16_add_cpu(&hdr_s->count, -1); entry_d--; /* to compensate for ++ in loop hdr */ desti--; if ((start_s + i) < offset) result++; /* insertion index adjustment */ } else { #endif /* GROT */ - be16_add(&hdr_d->firstused, -tmp); + be16_add_cpu(&hdr_d->firstused, -tmp); /* both on-disk, don't endian flip twice */ entry_d->hashval = entry_s->hashval; /* both on-disk, don't endian flip twice */ @@ -2203,10 +2203,10 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, ASSERT(be16_to_cpu(entry_s->nameidx) + tmp <= XFS_LBSIZE(mp)); memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp); - be16_add(&hdr_s->usedbytes, -tmp); - be16_add(&hdr_d->usedbytes, tmp); - be16_add(&hdr_s->count, -1); - be16_add(&hdr_d->count, 1); + be16_add_cpu(&hdr_s->usedbytes, -tmp); + be16_add_cpu(&hdr_d->usedbytes, tmp); + be16_add_cpu(&hdr_s->count, -1); + be16_add_cpu(&hdr_d->count, 1); tmp = be16_to_cpu(hdr_d->count) * sizeof(xfs_attr_leaf_entry_t) + sizeof(xfs_attr_leaf_hdr_t); @@ -2247,7 +2247,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, * Fill in the freemap information */ hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_attr_leaf_hdr_t)); - be16_add(&hdr_d->freemap[0].base, be16_to_cpu(hdr_d->count) * + be16_add_cpu(&hdr_d->freemap[0].base, be16_to_cpu(hdr_d->count) * sizeof(xfs_attr_leaf_entry_t)); hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) - be16_to_cpu(hdr_d->freemap[0].base)); diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index c4181d8..bd18987 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -631,7 +631,7 @@ xfs_bmbt_delrec( memcpy(lrp, rrp, numrrecs * sizeof(*lrp)); xfs_bmbt_log_recs(cur, lbp, numlrecs + 1, numlrecs + numrrecs); } - be16_add(&left->bb_numrecs, numrrecs); + be16_add_cpu(&left->bb_numrecs, numrrecs); left->bb_rightsib = right->bb_rightsib; xfs_bmbt_log_block(cur, lbp, XFS_BB_RIGHTSIB | XFS_BB_NUMRECS); if (be64_to_cpu(left->bb_rightsib) != NULLDFSBNO) { @@ -924,7 +924,7 @@ xfs_bmbt_killroot( xfs_iroot_realloc(ip, i, cur->bc_private.b.whichfork); block = ifp->if_broot; } - be16_add(&block->bb_numrecs, i); + be16_add_cpu(&block->bb_numrecs, i); ASSERT(block->bb_numrecs == cblock->bb_numrecs); kp = XFS_BMAP_KEY_IADDR(block, 1, cur); ckp = XFS_BMAP_KEY_IADDR(cblock, 1, cur); @@ -947,7 +947,7 @@ xfs_bmbt_killroot( XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(cur->bc_tp, cbp); cur->bc_bufs[level - 1] = NULL; - be16_add(&block->bb_level, -1); + be16_add_cpu(&block->bb_level, -1); xfs_trans_log_inode(cur->bc_tp, ip, XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork)); cur->bc_nlevels--; @@ -1401,9 +1401,9 @@ xfs_bmbt_rshift( key.br_startoff = cpu_to_be64(xfs_bmbt_disk_get_startoff(rrp)); rkp = &key; } - be16_add(&left->bb_numrecs, -1); + be16_add_cpu(&left->bb_numrecs, -1); xfs_bmbt_log_block(cur, lbp, XFS_BB_NUMRECS); - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); #ifdef DEBUG if (level > 0) xfs_btree_check_key(XFS_BTNUM_BMAP, rkp, rkp + 1); @@ -1535,7 +1535,7 @@ xfs_bmbt_split( right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2); if ((be16_to_cpu(left->bb_numrecs) & 1) && cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; if (level > 0) { lkp = XFS_BMAP_KEY_IADDR(left, i, cur); @@ -1562,7 +1562,7 @@ xfs_bmbt_split( xfs_bmbt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs)); *startoff = xfs_bmbt_disk_get_startoff(rrp); } - be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); + be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); right->bb_rightsib = left->bb_rightsib; left->bb_rightsib = cpu_to_be64(args.fsbno); right->bb_leftsib = cpu_to_be64(lbno); @@ -2240,7 +2240,7 @@ xfs_bmbt_newroot( bp = xfs_btree_get_bufl(args.mp, cur->bc_tp, args.fsbno, 0); cblock = XFS_BUF_TO_BMBT_BLOCK(bp); *cblock = *block; - be16_add(&block->bb_level, 1); + be16_add_cpu(&block->bb_level, 1); block->bb_numrecs = cpu_to_be16(1); cur->bc_nlevels++; cur->bc_ptrs[level + 1] = 1; diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 1b44684..021a8f7 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -511,12 +511,12 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, * Move the req'd B-tree elements from high in node1 to * low in node2. */ - be16_add(&node2->hdr.count, count); + be16_add_cpu(&node2->hdr.count, count); tmp = count * (uint)sizeof(xfs_da_node_entry_t); btree_s = &node1->btree[be16_to_cpu(node1->hdr.count) - count]; btree_d = &node2->btree[0]; memcpy(btree_d, btree_s, tmp); - be16_add(&node1->hdr.count, -count); + be16_add_cpu(&node1->hdr.count, -count); } else { /* * Move the req'd B-tree elements from low in node2 to @@ -527,7 +527,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, btree_s = &node2->btree[0]; btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)]; memcpy(btree_d, btree_s, tmp); - be16_add(&node1->hdr.count, count); + be16_add_cpu(&node1->hdr.count, count); xfs_da_log_buf(tp, blk1->bp, XFS_DA_LOGRANGE(node1, btree_d, tmp)); @@ -539,7 +539,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1, btree_s = &node2->btree[count]; btree_d = &node2->btree[0]; memmove(btree_d, btree_s, tmp); - be16_add(&node2->hdr.count, -count); + be16_add_cpu(&node2->hdr.count, -count); } /* @@ -604,7 +604,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk, btree->before = cpu_to_be32(newblk->blkno); xfs_da_log_buf(state->args->trans, oldblk->bp, XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree))); - be16_add(&node->hdr.count, 1); + be16_add_cpu(&node->hdr.count, 1); xfs_da_log_buf(state->args->trans, oldblk->bp, XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); @@ -959,7 +959,7 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk) memset((char *)btree, 0, sizeof(xfs_da_node_entry_t)); xfs_da_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, btree, sizeof(*btree))); - be16_add(&node->hdr.count, -1); + be16_add_cpu(&node->hdr.count, -1); xfs_da_log_buf(state->args->trans, drop_blk->bp, XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr))); @@ -1018,7 +1018,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, */ tmp = be16_to_cpu(drop_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t); memcpy(btree, &drop_node->btree[0], tmp); - be16_add(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count)); + be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count)); xfs_da_log_buf(tp, save_blk->bp, XFS_DA_LOGRANGE(save_node, &save_node->hdr, diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index a5f4f4f..fb5a556 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -271,7 +271,7 @@ xfs_dir2_block_addname( } lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1); lfloghigh -= be32_to_cpu(btp->stale) - 1; - be32_add(&btp->count, -(be32_to_cpu(btp->stale) - 1)); + be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); xfs_dir2_data_make_free(tp, bp, (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), @@ -326,7 +326,7 @@ xfs_dir2_block_addname( /* * Update the tail (entry count). */ - be32_add(&btp->count, 1); + be32_add_cpu(&btp->count, 1); /* * If we now need to rebuild the bestfree map, do so. * This needs to happen before the next call to use_free. @@ -387,7 +387,7 @@ xfs_dir2_block_addname( lfloglow = MIN(mid, lfloglow); lfloghigh = MAX(highstale, lfloghigh); } - be32_add(&btp->stale, -1); + be32_add_cpu(&btp->stale, -1); } /* * Point to the new data entry. @@ -767,7 +767,7 @@ xfs_dir2_block_removename( /* * Fix up the block tail. */ - be32_add(&btp->stale, 1); + be32_add_cpu(&btp->stale, 1); xfs_dir2_block_log_tail(tp, bp); /* * Remove the leaf entry by marking it stale. diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index d245269..fb8c9e0 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -587,7 +587,7 @@ xfs_dir2_data_make_free( /* * Fix up the new big freespace. */ - be16_add(&prevdup->length, len + be16_to_cpu(postdup->length)); + be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, prevdup); @@ -621,7 +621,7 @@ xfs_dir2_data_make_free( */ else if (prevdup) { dfp = xfs_dir2_data_freefind(d, prevdup); - be16_add(&prevdup->length, len); + be16_add_cpu(&prevdup->length, len); *xfs_dir2_data_unused_tag_p(prevdup) = cpu_to_be16((char *)prevdup - (char *)d); xfs_dir2_data_log_unused(tp, bp, prevdup); diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 0ca0020..bc52b80 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -359,7 +359,7 @@ xfs_dir2_leaf_addname( bestsp--; memmove(&bestsp[0], &bestsp[1], be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0])); - be32_add(<p->bestcount, 1); + be32_add_cpu(<p->bestcount, 1); xfs_dir2_leaf_log_tail(tp, lbp); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); } @@ -445,7 +445,7 @@ xfs_dir2_leaf_addname( */ lfloglow = index; lfloghigh = be16_to_cpu(leaf->hdr.count); - be16_add(&leaf->hdr.count, 1); + be16_add_cpu(&leaf->hdr.count, 1); } /* * There are stale entries. @@ -523,7 +523,7 @@ xfs_dir2_leaf_addname( lfloglow = MIN(index, lfloglow); lfloghigh = MAX(highstale, lfloghigh); } - be16_add(&leaf->hdr.stale, -1); + be16_add_cpu(&leaf->hdr.stale, -1); } /* * Fill in the new leaf entry. @@ -626,7 +626,7 @@ xfs_dir2_leaf_compact( * Update and log the header, log the leaf entries. */ ASSERT(be16_to_cpu(leaf->hdr.stale) == from - to); - be16_add(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale))); + be16_add_cpu(&leaf->hdr.count, -(be16_to_cpu(leaf->hdr.stale))); leaf->hdr.stale = 0; xfs_dir2_leaf_log_header(args->trans, bp); if (loglow != -1) @@ -728,7 +728,7 @@ xfs_dir2_leaf_compact_x1( /* * Adjust the leaf header values. */ - be16_add(&leaf->hdr.count, -(from - to)); + be16_add_cpu(&leaf->hdr.count, -(from - to)); leaf->hdr.stale = cpu_to_be16(1); /* * Remember the low/high stale value only in the "right" @@ -1470,7 +1470,7 @@ xfs_dir2_leaf_removename( /* * We just mark the leaf entry stale by putting a null in it. */ - be16_add(&leaf->hdr.stale, 1); + be16_add_cpu(&leaf->hdr.stale, 1); xfs_dir2_leaf_log_header(tp, lbp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); xfs_dir2_leaf_log_ents(tp, lbp, index, index); @@ -1531,7 +1531,7 @@ xfs_dir2_leaf_removename( */ memmove(&bestsp[db - i], bestsp, (be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp)); - be32_add(<p->bestcount, -(db - i)); + be32_add_cpu(<p->bestcount, -(db - i)); xfs_dir2_leaf_log_tail(tp, lbp); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); } else @@ -1712,7 +1712,7 @@ xfs_dir2_leaf_trim_data( * Eliminate the last bests entry from the table. */ bestsp = xfs_dir2_leaf_bests_p(ltp); - be32_add(<p->bestcount, -1); + be32_add_cpu(<p->bestcount, -1); memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); xfs_dir2_leaf_log_tail(tp, lbp); xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index eb18e39..8dade71 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -254,7 +254,7 @@ xfs_dir2_leafn_add( (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep)); lfloglow = index; lfloghigh = be16_to_cpu(leaf->hdr.count); - be16_add(&leaf->hdr.count, 1); + be16_add_cpu(&leaf->hdr.count, 1); } /* * There are stale entries. We'll use one for the new entry. @@ -322,7 +322,7 @@ xfs_dir2_leafn_add( lfloglow = MIN(index, lfloglow); lfloghigh = MAX(highstale, lfloghigh); } - be16_add(&leaf->hdr.stale, -1); + be16_add_cpu(&leaf->hdr.stale, -1); } /* * Insert the new entry, log everything. @@ -697,10 +697,10 @@ xfs_dir2_leafn_moveents( /* * Update the headers and log them. */ - be16_add(&leaf_s->hdr.count, -(count)); - be16_add(&leaf_s->hdr.stale, -(stale)); - be16_add(&leaf_d->hdr.count, count); - be16_add(&leaf_d->hdr.stale, stale); + be16_add_cpu(&leaf_s->hdr.count, -(count)); + be16_add_cpu(&leaf_s->hdr.stale, -(stale)); + be16_add_cpu(&leaf_d->hdr.count, count); + be16_add_cpu(&leaf_d->hdr.stale, stale); xfs_dir2_leaf_log_header(tp, bp_s); xfs_dir2_leaf_log_header(tp, bp_d); xfs_dir2_leafn_check(args->dp, bp_s); @@ -885,7 +885,7 @@ xfs_dir2_leafn_remove( * Kill the leaf entry by marking it stale. * Log the leaf block changes. */ - be16_add(&leaf->hdr.stale, 1); + be16_add_cpu(&leaf->hdr.stale, 1); xfs_dir2_leaf_log_header(tp, bp); lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); xfs_dir2_leaf_log_ents(tp, bp, index, index); @@ -971,7 +971,7 @@ xfs_dir2_leafn_remove( /* * One less used entry in the free table. */ - be32_add(&free->hdr.nused, -1); + be32_add_cpu(&free->hdr.nused, -1); xfs_dir2_free_log_header(tp, fbp); /* * If this was the last entry in the table, we can @@ -1642,7 +1642,7 @@ xfs_dir2_node_addname_int( * (this should always be true) then update the header. */ if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) { - be32_add(&free->hdr.nused, 1); + be32_add_cpu(&free->hdr.nused, 1); xfs_dir2_free_log_header(tp, fbp); } /* diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index b8de7f3..eadc159 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -318,7 +318,7 @@ xfs_growfs_data_private( } ASSERT(bp); agi = XFS_BUF_TO_AGI(bp); - be32_add(&agi->agi_length, new); + be32_add_cpu(&agi->agi_length, new); ASSERT(nagcount == oagcount || be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks); xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH); @@ -331,7 +331,7 @@ xfs_growfs_data_private( } ASSERT(bp); agf = XFS_BUF_TO_AGF(bp); - be32_add(&agf->agf_length, new); + be32_add_cpu(&agf->agf_length, new); ASSERT(be32_to_cpu(agf->agf_length) == be32_to_cpu(agi->agi_length)); xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 1409c2d..c5836b9 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -301,8 +301,8 @@ xfs_ialloc_ag_alloc( } xfs_trans_inode_alloc_buf(tp, fbuf); } - be32_add(&agi->agi_count, newlen); - be32_add(&agi->agi_freecount, newlen); + be32_add_cpu(&agi->agi_count, newlen); + be32_add_cpu(&agi->agi_freecount, newlen); agno = be32_to_cpu(agi->agi_seqno); down_read(&args.mp->m_peraglock); args.mp->m_perag[agno].pagi_freecount += newlen; @@ -885,7 +885,7 @@ nextag: if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) goto error0; - be32_add(&agi->agi_freecount, -1); + be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); mp->m_perag[tagno].pagi_freecount--; @@ -1065,8 +1065,8 @@ xfs_difree( * to be freed when the transaction is committed. */ ilen = XFS_IALLOC_INODES(mp); - be32_add(&agi->agi_count, -ilen); - be32_add(&agi->agi_freecount, -(ilen - 1)); + be32_add_cpu(&agi->agi_count, -ilen); + be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); mp->m_perag[agno].pagi_freecount -= ilen - 1; @@ -1095,7 +1095,7 @@ xfs_difree( /* * Change the inode free counts and log the ag/sb changes. */ - be32_add(&agi->agi_freecount, 1); + be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); down_read(&mp->m_peraglock); mp->m_perag[agno].pagi_freecount++; diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 8cdeeaf..e5310c90 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -189,7 +189,7 @@ xfs_inobt_delrec( */ bno = be32_to_cpu(agi->agi_root); agi->agi_root = *pp; - be32_add(&agi->agi_level, -1); + be32_add_cpu(&agi->agi_level, -1); /* * Free the block. */ @@ -1132,7 +1132,7 @@ xfs_inobt_lshift( /* * Bump and log left's numrecs, decrement and log right's numrecs. */ - be16_add(&left->bb_numrecs, 1); + be16_add_cpu(&left->bb_numrecs, 1); xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); #ifdef DEBUG if (level > 0) @@ -1140,7 +1140,7 @@ xfs_inobt_lshift( else xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp); #endif - be16_add(&right->bb_numrecs, -1); + be16_add_cpu(&right->bb_numrecs, -1); xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS); /* * Slide the contents of right down one entry. @@ -1232,7 +1232,7 @@ xfs_inobt_newroot( * Set the root data in the a.g. inode structure. */ agi->agi_root = cpu_to_be32(args.agbno); - be32_add(&agi->agi_level, 1); + be32_add_cpu(&agi->agi_level, 1); xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); /* @@ -1426,9 +1426,9 @@ xfs_inobt_rshift( /* * Decrement and log left's numrecs, bump and log right's numrecs. */ - be16_add(&left->bb_numrecs, -1); + be16_add_cpu(&left->bb_numrecs, -1); xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS); - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); #ifdef DEBUG if (level > 0) xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1); @@ -1529,7 +1529,7 @@ xfs_inobt_split( */ if ((be16_to_cpu(left->bb_numrecs) & 1) && cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1) - be16_add(&right->bb_numrecs, 1); + be16_add_cpu(&right->bb_numrecs, 1); i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1; /* * For non-leaf blocks, copy keys and addresses over to the new block. @@ -1565,7 +1565,7 @@ xfs_inobt_split( * Find the left block number by looking in the buffer. * Adjust numrecs, sibling pointers. */ - be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); + be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs))); right->bb_rightsib = left->bb_rightsib; left->bb_rightsib = cpu_to_be32(args.agbno); right->bb_leftsib = cpu_to_be32(lbno); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b3ac380..a75edca 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -1509,9 +1509,9 @@ xlog_sync(xlog_t *log, * case, though. */ for (i = 0; i < split; i += BBSIZE) { - be32_add((__be32 *)dptr, 1); + be32_add_cpu((__be32 *)dptr, 1); if (be32_to_cpu(*(__be32 *)dptr) == XLOG_HEADER_MAGIC_NUM) - be32_add((__be32 *)dptr, 1); + be32_add_cpu((__be32 *)dptr, 1); dptr += BBSIZE; } @@ -1600,7 +1600,7 @@ xlog_state_finish_copy(xlog_t *log, { spin_lock(&log->l_icloglock); - be32_add(&iclog->ic_header.h_num_logops, record_cnt); + be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt); iclog->ic_offset += copy_bytes; spin_unlock(&log->l_icloglock); diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 71e4c8d..1403864 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -567,26 +567,26 @@ xfs_trans_apply_sb_deltas( */ if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) { if (tp->t_icount_delta) - be64_add(&sbp->sb_icount, tp->t_icount_delta); + be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta); if (tp->t_ifree_delta) - be64_add(&sbp->sb_ifree, tp->t_ifree_delta); + be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta); if (tp->t_fdblocks_delta) - be64_add(&sbp->sb_fdblocks, tp->t_fdblocks_delta); + be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta); if (tp->t_res_fdblocks_delta) - be64_add(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta); + be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta); } if (tp->t_frextents_delta) - be64_add(&sbp->sb_frextents, tp->t_frextents_delta); + be64_add_cpu(&sbp->sb_frextents, tp->t_frextents_delta); if (tp->t_res_frextents_delta) - be64_add(&sbp->sb_frextents, tp->t_res_frextents_delta); + be64_add_cpu(&sbp->sb_frextents, tp->t_res_frextents_delta); if (tp->t_dblocks_delta) { - be64_add(&sbp->sb_dblocks, tp->t_dblocks_delta); + be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta); whole = 1; } if (tp->t_agcount_delta) { - be32_add(&sbp->sb_agcount, tp->t_agcount_delta); + be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta); whole = 1; } if (tp->t_imaxpct_delta) { @@ -594,19 +594,19 @@ xfs_trans_apply_sb_deltas( whole = 1; } if (tp->t_rextsize_delta) { - be32_add(&sbp->sb_rextsize, tp->t_rextsize_delta); + be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta); whole = 1; } if (tp->t_rbmblocks_delta) { - be32_add(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta); + be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta); whole = 1; } if (tp->t_rblocks_delta) { - be64_add(&sbp->sb_rblocks, tp->t_rblocks_delta); + be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta); whole = 1; } if (tp->t_rextents_delta) { - be64_add(&sbp->sb_rextents, tp->t_rextents_delta); + be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta); whole = 1; } if (tp->t_rextslog_delta) { diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index 022a5fd..4839f2a 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -222,7 +222,7 @@ acpi_os_write_memory(acpi_physical_address address, u32 value, u32 width); */ acpi_status acpi_os_read_pci_configuration(struct acpi_pci_id *pci_id, - u32 reg, void *value, u32 width); + u32 reg, u32 *value, u32 width); acpi_status acpi_os_write_pci_configuration(struct acpi_pci_id *pci_id, diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 5d9d70c..342a2a0 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -30,19 +30,19 @@ /* Other architectures wishing to use this simple topology API should fill in the below functions as appropriate in their own <asm/topology.h> file. */ #ifndef cpu_to_node -#define cpu_to_node(cpu) (0) +#define cpu_to_node(cpu) ((void)(cpu),0) #endif #ifndef parent_node -#define parent_node(node) (0) +#define parent_node(node) ((void)(node),0) #endif #ifndef node_to_cpumask -#define node_to_cpumask(node) (cpu_online_map) +#define node_to_cpumask(node) ((void)node, cpu_online_map) #endif #ifndef node_to_first_cpu -#define node_to_first_cpu(node) (0) +#define node_to_first_cpu(node) ((void)(node),0) #endif #ifndef pcibus_to_node -#define pcibus_to_node(node) (-1) +#define pcibus_to_node(bus) ((void)(bus), -1) #endif #ifndef pcibus_to_cpumask diff --git a/include/asm-ia64/param.h b/include/asm-ia64/param.h index 49c62dd..0964c32 100644 --- a/include/asm-ia64/param.h +++ b/include/asm-ia64/param.h @@ -19,15 +19,7 @@ #define MAXHOSTNAMELEN 64 /* max length of hostname */ #ifdef __KERNEL__ -# ifdef CONFIG_IA64_HP_SIM - /* - * Yeah, simulating stuff is slow, so let us catch some breath between - * timer interrupts... - */ -# define HZ 32 -# else -# define HZ CONFIG_HZ -# endif +# define HZ CONFIG_HZ # define USER_HZ HZ # define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ #else diff --git a/include/asm-mn10300/highmem.h b/include/asm-mn10300/highmem.h index 383c0c4..5256854 100644 --- a/include/asm-mn10300/highmem.h +++ b/include/asm-mn10300/highmem.h @@ -42,8 +42,8 @@ extern void __init kmap_init(void); #define PKMAP_NR(virt) ((virt - PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) -extern unsigned long __fastcall kmap_high(struct page *page); -extern void __fastcall kunmap_high(struct page *page); +extern unsigned long kmap_high(struct page *page); +extern void kunmap_high(struct page *page); static inline unsigned long kmap(struct page *page) { diff --git a/include/asm-mn10300/linkage.h b/include/asm-mn10300/linkage.h index 29a32e4..dda3002 100644 --- a/include/asm-mn10300/linkage.h +++ b/include/asm-mn10300/linkage.h @@ -13,8 +13,6 @@ /* don't override anything */ #define asmlinkage -#define FASTCALL(x) x -#define fastcall #define __ALIGN .align 4,0xcb #define __ALIGN_STR ".align 4,0xcb" diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h index 681dead..d743947 100644 --- a/include/asm-x86/sigcontext.h +++ b/include/asm-x86/sigcontext.h @@ -58,6 +58,7 @@ struct _fpstate { #define X86_FXSR_MAGIC 0x0000 +#ifdef __KERNEL__ struct sigcontext { unsigned short gs, __gsh; unsigned short fs, __fsh; @@ -82,6 +83,35 @@ struct sigcontext { unsigned long oldmask; unsigned long cr2; }; +#else /* __KERNEL__ */ +/* + * User-space might still rely on the old definition: + */ +struct sigcontext { + unsigned short gs, __gsh; + unsigned short fs, __fsh; + unsigned short es, __esh; + unsigned short ds, __dsh; + unsigned long edi; + unsigned long esi; + unsigned long ebp; + unsigned long esp; + unsigned long ebx; + unsigned long edx; + unsigned long ecx; + unsigned long eax; + unsigned long trapno; + unsigned long err; + unsigned long eip; + unsigned short cs, __csh; + unsigned long eflags; + unsigned long esp_at_signal; + unsigned short ss, __ssh; + struct _fpstate __user * fpstate; + unsigned long oldmask; + unsigned long cr2; +}; +#endif /* !__KERNEL__ */ #else /* __i386__ */ @@ -102,6 +132,7 @@ struct _fpstate { __u32 reserved2[24]; }; +#ifdef __KERNEL__ struct sigcontext { unsigned long r8; unsigned long r9; @@ -132,6 +163,41 @@ struct sigcontext { struct _fpstate __user *fpstate; /* zero when no FPU context */ unsigned long reserved1[8]; }; +#else /* __KERNEL__ */ +/* + * User-space might still rely on the old definition: + */ +struct sigcontext { + unsigned long r8; + unsigned long r9; + unsigned long r10; + unsigned long r11; + unsigned long r12; + unsigned long r13; + unsigned long r14; + unsigned long r15; + unsigned long rdi; + unsigned long rsi; + unsigned long rbp; + unsigned long rbx; + unsigned long rdx; + unsigned long rax; + unsigned long rcx; + unsigned long rsp; + unsigned long rip; + unsigned long eflags; /* RFLAGS */ + unsigned short cs; + unsigned short gs; + unsigned short fs; + unsigned short __pad0; + unsigned long err; + unsigned long trapno; + unsigned long oldmask; + unsigned long cr2; + struct _fpstate __user *fpstate; /* zero when no FPU context */ + unsigned long reserved1[8]; +}; +#endif /* !__KERNEL__ */ #endif /* !__i386__ */ diff --git a/include/linux/aio.h b/include/linux/aio.h index 7ef8de6..a9931e2 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -206,21 +206,21 @@ struct kioctx { /* prototypes */ extern unsigned aio_max_size; -extern ssize_t FASTCALL(wait_on_sync_kiocb(struct kiocb *iocb)); -extern int FASTCALL(aio_put_req(struct kiocb *iocb)); -extern void FASTCALL(kick_iocb(struct kiocb *iocb)); -extern int FASTCALL(aio_complete(struct kiocb *iocb, long res, long res2)); -extern void FASTCALL(__put_ioctx(struct kioctx *ctx)); +extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb); +extern int aio_put_req(struct kiocb *iocb); +extern void kick_iocb(struct kiocb *iocb); +extern int aio_complete(struct kiocb *iocb, long res, long res2); +extern void __put_ioctx(struct kioctx *ctx); struct mm_struct; -extern void FASTCALL(exit_aio(struct mm_struct *mm)); +extern void exit_aio(struct mm_struct *mm); extern struct kioctx *lookup_ioctx(unsigned long ctx_id); -extern int FASTCALL(io_submit_one(struct kioctx *ctx, - struct iocb __user *user_iocb, struct iocb *iocb)); +extern int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, + struct iocb *iocb); /* semi private, but used by the 32bit emulations: */ struct kioctx *lookup_ioctx(unsigned long ctx_id); -int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, - struct iocb *iocb)); +int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, + struct iocb *iocb); #define get_ioctx(kioctx) do { \ BUG_ON(atomic_read(&(kioctx)->users) <= 0); \ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e98801f..932eb02 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -144,7 +144,7 @@ BUFFER_FNS(Unwritten, unwritten) * Declarations */ -void FASTCALL(mark_buffer_dirty(struct buffer_head *bh)); +void mark_buffer_dirty(struct buffer_head *bh); void init_buffer(struct buffer_head *, bh_end_io_t *, void *); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); @@ -185,8 +185,8 @@ struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size void invalidate_bh_lrus(void); struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); -void FASTCALL(unlock_buffer(struct buffer_head *bh)); -void FASTCALL(__lock_buffer(struct buffer_head *bh)); +void unlock_buffer(struct buffer_head *bh); +void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int submit_bh(int, struct buffer_head *); diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 228235c..ac6aad9 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -25,7 +25,7 @@ SUBSYS(ns) /* */ -#ifdef CONFIG_FAIR_CGROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED SUBSYS(cpu_cgroup) #endif diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index f8c9a27..0a26be3 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -26,8 +26,6 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p); #define cpuset_current_mems_allowed (current->mems_allowed) void cpuset_init_current_mems_allowed(void); void cpuset_update_task_memory_state(void); -#define cpuset_nodes_subset_current_mems_allowed(nodes) \ - nodes_subset((nodes), current->mems_allowed) int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); extern int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask); @@ -103,7 +101,6 @@ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) #define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY]) static inline void cpuset_init_current_mems_allowed(void) {} static inline void cpuset_update_task_memory_state(void) {} -#define cpuset_nodes_subset_current_mems_allowed(nodes) (1) static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) { diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index b7558ec..25d62e6 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -70,8 +70,7 @@ static inline int is_multicast_ether_addr(const u8 *addr) } /** - * is_local_ether_addr - Determine if the Ethernet address is locally-assigned - * one (IEEE 802). + * is_local_ether_addr - Determine if the Ethernet address is locally-assigned one (IEEE 802). * @addr: Pointer to a six-byte array containing the Ethernet address * * Return true if the address is a local address. diff --git a/include/linux/file.h b/include/linux/file.h index 56023c7..7239baa 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -59,8 +59,8 @@ struct files_struct { extern struct kmem_cache *filp_cachep; -extern void FASTCALL(__fput(struct file *)); -extern void FASTCALL(fput(struct file *)); +extern void __fput(struct file *); +extern void fput(struct file *); struct file_operations; struct vfsmount; @@ -77,13 +77,13 @@ static inline void fput_light(struct file *file, int fput_needed) fput(file); } -extern struct file * FASTCALL(fget(unsigned int fd)); -extern struct file * FASTCALL(fget_light(unsigned int fd, int *fput_needed)); -extern void FASTCALL(set_close_on_exec(unsigned int fd, int flag)); +extern struct file *fget(unsigned int fd); +extern struct file *fget_light(unsigned int fd, int *fput_needed); +extern void set_close_on_exec(unsigned int fd, int flag); extern void put_filp(struct file *); extern int get_unused_fd(void); extern int get_unused_fd_flags(int flags); -extern void FASTCALL(put_unused_fd(unsigned int fd)); +extern void put_unused_fd(unsigned int fd); struct kmem_cache; extern int expand_files(struct files_struct *, int nr); @@ -110,12 +110,12 @@ static inline struct file * fcheck_files(struct files_struct *files, unsigned in */ #define fcheck(fd) fcheck_files(current->files, fd) -extern void FASTCALL(fd_install(unsigned int fd, struct file * file)); +extern void fd_install(unsigned int fd, struct file *file); struct task_struct; struct files_struct *get_files_struct(struct task_struct *); -void FASTCALL(put_files_struct(struct files_struct *fs)); +void put_files_struct(struct files_struct *fs); void reset_files_struct(struct task_struct *, struct files_struct *); extern struct kmem_cache *files_cachep; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 0c6ce51..164be9d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -172,8 +172,7 @@ static inline void arch_free_page(struct page *page, int order) { } static inline void arch_alloc_page(struct page *page, int order) { } #endif -extern struct page * -FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *)); +extern struct page *__alloc_pages(gfp_t, unsigned int, struct zonelist *); static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) @@ -209,8 +208,8 @@ extern struct page *alloc_page_vma(gfp_t gfp_mask, #endif #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) -extern unsigned long FASTCALL(__get_free_pages(gfp_t gfp_mask, unsigned int order)); -extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask)); +extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); +extern unsigned long get_zeroed_page(gfp_t gfp_mask); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask),0) @@ -218,10 +217,10 @@ extern unsigned long FASTCALL(get_zeroed_page(gfp_t gfp_mask)); #define __get_dma_pages(gfp_mask, order) \ __get_free_pages((gfp_mask) | GFP_DMA,(order)) -extern void FASTCALL(__free_pages(struct page *page, unsigned int order)); -extern void FASTCALL(free_pages(unsigned long addr, unsigned int order)); -extern void FASTCALL(free_hot_page(struct page *page)); -extern void FASTCALL(free_cold_page(struct page *page)); +extern void __free_pages(struct page *page, unsigned int order); +extern void free_pages(unsigned long addr, unsigned int order); +extern void free_hot_page(struct page *page); +extern void free_cold_page(struct page *page); #define __free_page(page) __free_pages((page), 0) #define free_page(addr) free_pages((addr),0) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 7ca198b..addca4c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -33,8 +33,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to); void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed); extern unsigned long max_huge_pages; +extern unsigned long sysctl_overcommit_huge_pages; extern unsigned long hugepages_treat_as_movable; -extern unsigned long nr_overcommit_huge_pages; extern const unsigned long hugetlb_zero, hugetlb_infinity; extern int sysctl_hugetlb_shm_group; diff --git a/include/linux/ide.h b/include/linux/ide.h index acec99d..a3b69c1 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -906,6 +906,8 @@ enum { IDE_TFLAG_IN_DEVICE, /* force 16-bit I/O operations */ IDE_TFLAG_IO_16BIT = (1 << 30), + /* ide_task_t was allocated using kmalloc() */ + IDE_TFLAG_DYN = (1 << 31), }; struct ide_taskfile { @@ -998,8 +1000,7 @@ extern int __ide_pci_register_driver(struct pci_driver *driver, struct module *o void ide_pci_setup_ports(struct pci_dev *, const struct ide_port_info *, int, u8 *); void ide_setup_pci_noise(struct pci_dev *, const struct ide_port_info *); -/* FIXME: palm_bk3710 uses BLK_DEV_IDEDMA_PCI without BLK_DEV_IDEPCI! */ -#if defined(CONFIG_BLK_DEV_IDEPCI) && defined(CONFIG_BLK_DEV_IDEDMA_PCI) +#ifdef CONFIG_BLK_DEV_IDEDMA_PCI void ide_hwif_setup_dma(ide_hwif_t *, const struct ide_port_info *); #else static inline void ide_hwif_setup_dma(ide_hwif_t *hwif, @@ -1146,7 +1147,7 @@ ide_startstop_t ide_dma_intr(ide_drive_t *); int ide_build_sglist(ide_drive_t *, struct request *); void ide_destroy_dmatable(ide_drive_t *); -#ifdef CONFIG_BLK_DEV_IDEDMA_PCI +#ifdef CONFIG_BLK_DEV_IDEDMA_SFF extern int ide_build_dmatable(ide_drive_t *, struct request *); extern int ide_release_dma(ide_hwif_t *); extern void ide_setup_dma(ide_hwif_t *, unsigned long); @@ -1157,7 +1158,7 @@ extern void ide_dma_start(ide_drive_t *); extern int __ide_dma_end(ide_drive_t *); extern void ide_dma_lost_irq(ide_drive_t *); extern void ide_dma_timeout(ide_drive_t *); -#endif /* CONFIG_BLK_DEV_IDEDMA_PCI */ +#endif /* CONFIG_BLK_DEV_IDEDMA_SFF */ #else static inline int ide_id_dma_bug(ide_drive_t *drive) { return 0; } @@ -1171,7 +1172,7 @@ static inline int ide_set_dma(ide_drive_t *drive) { return 1; } static inline void ide_check_dma_crc(ide_drive_t *drive) { ; } #endif /* CONFIG_BLK_DEV_IDEDMA */ -#ifndef CONFIG_BLK_DEV_IDEDMA_PCI +#ifndef CONFIG_BLK_DEV_IDEDMA_SFF static inline void ide_release_dma(ide_hwif_t *drive) {;} #endif @@ -1294,7 +1295,7 @@ static inline void ide_dump_identify(u8 *id) static inline int hwif_to_node(ide_hwif_t *hwif) { struct pci_dev *dev = to_pci_dev(hwif->dev); - return dev ? pcibus_to_node(dev->bus) : -1; + return hwif->dev ? pcibus_to_node(dev->bus) : -1; } static inline ide_drive_t *ide_get_paired_drive(ide_drive_t *drive) diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index dea7598..f8ab4ce 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -273,8 +273,8 @@ asmlinkage void do_softirq(void); extern void open_softirq(int nr, void (*action)(struct softirq_action*), void *data); extern void softirq_init(void); #define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0) -extern void FASTCALL(raise_softirq_irqoff(unsigned int nr)); -extern void FASTCALL(raise_softirq(unsigned int nr)); +extern void raise_softirq_irqoff(unsigned int nr); +extern void raise_softirq(unsigned int nr); /* Tasklets --- multithreaded analogue of BHs. @@ -341,7 +341,7 @@ static inline void tasklet_unlock_wait(struct tasklet_struct *t) #define tasklet_unlock(t) do { } while (0) #endif -extern void FASTCALL(__tasklet_schedule(struct tasklet_struct *t)); +extern void __tasklet_schedule(struct tasklet_struct *t); static inline void tasklet_schedule(struct tasklet_struct *t) { @@ -349,7 +349,7 @@ static inline void tasklet_schedule(struct tasklet_struct *t) __tasklet_schedule(t); } -extern void FASTCALL(__tasklet_hi_schedule(struct tasklet_struct *t)); +extern void __tasklet_hi_schedule(struct tasklet_struct *t); static inline void tasklet_hi_schedule(struct tasklet_struct *t) { diff --git a/include/linux/irq.h b/include/linux/irq.h index bfd9efb..176e5e7 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -285,7 +285,6 @@ extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc); /* * Monolithic do_IRQ implementation. - * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly) */ #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ extern unsigned int __do_IRQ(unsigned int irq); diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 3faf599..0592936 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -73,9 +73,4 @@ #define ATTRIB_NORET __attribute__((noreturn)) #define NORET_AND noreturn, -#ifndef FASTCALL -#define FASTCALL(x) x -#define fastcall -#endif - #endif diff --git a/include/linux/marker.h b/include/linux/marker.h index 5f36cf9..5df879d 100644 --- a/include/linux/marker.h +++ b/include/linux/marker.h @@ -19,16 +19,23 @@ struct marker; /** * marker_probe_func - Type of a marker probe function - * @mdata: pointer of type struct marker - * @private_data: caller site private data + * @probe_private: probe private data + * @call_private: call site private data * @fmt: format string - * @...: variable argument list + * @args: variable argument list pointer. Use a pointer to overcome C's + * inability to pass this around as a pointer in a portable manner in + * the callee otherwise. * * Type of marker probe functions. They receive the mdata and need to parse the * format string to recover the variable argument list. */ -typedef void marker_probe_func(const struct marker *mdata, - void *private_data, const char *fmt, ...); +typedef void marker_probe_func(void *probe_private, void *call_private, + const char *fmt, va_list *args); + +struct marker_probe_closure { + marker_probe_func *func; /* Callback */ + void *probe_private; /* Private probe data */ +}; struct marker { const char *name; /* Marker name */ @@ -36,8 +43,11 @@ struct marker { * variable argument list. */ char state; /* Marker state. */ - marker_probe_func *call;/* Probe handler function pointer */ - void *private; /* Private probe data */ + char ptype; /* probe type : 0 : single, 1 : multi */ + void (*call)(const struct marker *mdata, /* Probe wrapper */ + void *call_private, const char *fmt, ...); + struct marker_probe_closure single; + struct marker_probe_closure *multi; } __attribute__((aligned(8))); #ifdef CONFIG_MARKERS @@ -49,35 +59,31 @@ struct marker { * not add unwanted padding between the beginning of the section and the * structure. Force alignment to the same alignment as the section start. */ -#define __trace_mark(name, call_data, format, args...) \ +#define __trace_mark(name, call_private, format, args...) \ do { \ - static const char __mstrtab_name_##name[] \ - __attribute__((section("__markers_strings"))) \ - = #name; \ - static const char __mstrtab_format_##name[] \ + static const char __mstrtab_##name[] \ __attribute__((section("__markers_strings"))) \ - = format; \ + = #name "\0" format; \ static struct marker __mark_##name \ __attribute__((section("__markers"), aligned(8))) = \ - { __mstrtab_name_##name, __mstrtab_format_##name, \ - 0, __mark_empty_function, NULL }; \ + { __mstrtab_##name, &__mstrtab_##name[sizeof(#name)], \ + 0, 0, marker_probe_cb, \ + { __mark_empty_function, NULL}, NULL }; \ __mark_check_format(format, ## args); \ if (unlikely(__mark_##name.state)) { \ - preempt_disable(); \ (*__mark_##name.call) \ - (&__mark_##name, call_data, \ + (&__mark_##name, call_private, \ format, ## args); \ - preempt_enable(); \ } \ } while (0) extern void marker_update_probe_range(struct marker *begin, - struct marker *end, struct module *probe_module, int *refcount); + struct marker *end); #else /* !CONFIG_MARKERS */ -#define __trace_mark(name, call_data, format, args...) \ +#define __trace_mark(name, call_private, format, args...) \ __mark_check_format(format, ## args) static inline void marker_update_probe_range(struct marker *begin, - struct marker *end, struct module *probe_module, int *refcount) + struct marker *end) { } #endif /* CONFIG_MARKERS */ @@ -92,8 +98,6 @@ static inline void marker_update_probe_range(struct marker *begin, #define trace_mark(name, format, args...) \ __trace_mark(name, NULL, format, ## args) -#define MARK_MAX_FORMAT_LEN 1024 - /** * MARK_NOARGS - Format string for a marker with no argument. */ @@ -106,24 +110,30 @@ static inline void __printf(1, 2) __mark_check_format(const char *fmt, ...) extern marker_probe_func __mark_empty_function; +extern void marker_probe_cb(const struct marker *mdata, + void *call_private, const char *fmt, ...); +extern void marker_probe_cb_noarg(const struct marker *mdata, + void *call_private, const char *fmt, ...); + /* * Connect a probe to a marker. * private data pointer must be a valid allocated memory address, or NULL. */ extern int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *private); + marker_probe_func *probe, void *probe_private); /* * Returns the private data given to marker_probe_register. */ -extern void *marker_probe_unregister(const char *name); +extern int marker_probe_unregister(const char *name, + marker_probe_func *probe, void *probe_private); /* * Unregister a marker by providing the registered private data. */ -extern void *marker_probe_unregister_private_data(void *private); +extern int marker_probe_unregister_private_data(marker_probe_func *probe, + void *probe_private); -extern int marker_arm(const char *name); -extern int marker_disarm(const char *name); -extern void *marker_get_private_data(const char *name); +extern void *marker_get_private_data(const char *name, marker_probe_func *probe, + int num); #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index e8abb38..26c7124 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -786,7 +786,7 @@ int __set_page_dirty_nobuffers(struct page *page); int __set_page_dirty_no_writeback(struct page *page); int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page); -int FASTCALL(set_page_dirty(struct page *page)); +int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); @@ -829,7 +829,7 @@ extern void unregister_shrinker(struct shrinker *); int vma_wants_writenotify(struct vm_area_struct *vma); -extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)); +extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl); #ifdef __PAGETABLE_PUD_FOLDED static inline int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, diff --git a/include/linux/module.h b/include/linux/module.h index ac28e87..330bec0 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -465,7 +465,7 @@ int unregister_module_notifier(struct notifier_block * nb); extern void print_modules(void); -extern void module_update_markers(struct module *probe_module, int *refcount); +extern void module_update_markers(void); #else /* !CONFIG_MODULES... */ #define EXPORT_SYMBOL(sym) diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 8126e55..ec62438 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -62,6 +62,16 @@ struct kparam_array void *elem; }; +/* On alpha, ia64 and ppc64 relocations to global data cannot go into + read-only sections (which is part of respective UNIX ABI on these + platforms). So 'const' makes no sense and even causes compile failures + with some compilers. */ +#if defined(CONFIG_ALPHA) || defined(CONFIG_IA64) || defined(CONFIG_PPC64) +#define __moduleparam_const +#else +#define __moduleparam_const const +#endif + /* This is the fundamental function for registering boot/module parameters. perm sets the visibility in sysfs: 000 means it's not there, read bits mean it's readable, write bits mean it's @@ -71,7 +81,7 @@ struct kparam_array static int __param_perm_check_##name __attribute__((unused)) = \ BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)); \ static const char __param_str_##name[] = prefix #name; \ - static struct kernel_param const __param_##name \ + static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ = { __param_str_##name, perm, set, get, { arg } } diff --git a/include/linux/mutex-debug.h b/include/linux/mutex-debug.h index 2537285..731d77d 100644 --- a/include/linux/mutex-debug.h +++ b/include/linux/mutex-debug.h @@ -18,6 +18,6 @@ do { \ __mutex_init((mutex), #mutex, &__key); \ } while (0) -extern void FASTCALL(mutex_destroy(struct mutex *lock)); +extern void mutex_destroy(struct mutex *lock); #endif diff --git a/include/linux/namei.h b/include/linux/namei.h index 4cb4f8d..c13e411 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -62,13 +62,13 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_ACCESS (0x0400) #define LOOKUP_CHDIR (0x0800) -extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); -extern int FASTCALL(__user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *)); +extern int __user_walk(const char __user *, unsigned, struct nameidata *); +extern int __user_walk_fd(int dfd, const char __user *, unsigned, struct nameidata *); #define user_path_walk(name,nd) \ __user_walk_fd(AT_FDCWD, name, LOOKUP_FOLLOW, nd) #define user_path_walk_link(name,nd) \ __user_walk_fd(AT_FDCWD, name, 0, nd) -extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *)); +extern int path_lookup(const char *, unsigned, struct nameidata *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct nameidata *); extern void path_release(struct nameidata *); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 047d432..7128a02 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -322,7 +322,7 @@ enum NAPI_STATE_DISABLE, /* Disable pending */ }; -extern void FASTCALL(__napi_schedule(struct napi_struct *n)); +extern void __napi_schedule(struct napi_struct *n); static inline int napi_disable_pending(struct napi_struct *n) { diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4b62a10..d2fca80 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -156,10 +156,10 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT); } -extern void FASTCALL(__lock_page(struct page *page)); -extern int FASTCALL(__lock_page_killable(struct page *page)); -extern void FASTCALL(__lock_page_nosync(struct page *page)); -extern void FASTCALL(unlock_page(struct page *page)); +extern void __lock_page(struct page *page); +extern int __lock_page_killable(struct page *page); +extern void __lock_page_nosync(struct page *page); +extern void unlock_page(struct page *page); /* * lock_page may only be called if we have the page's inode pinned. @@ -199,7 +199,7 @@ static inline void lock_page_nosync(struct page *page) * This is exported only for wait_on_page_locked/wait_on_page_writeback. * Never use this directly! */ -extern void FASTCALL(wait_on_page_bit(struct page *page, int bit_nr)); +extern void wait_on_page_bit(struct page *page, int bit_nr); /* * Wait for a page to be unlocked. diff --git a/include/linux/pid.h b/include/linux/pid.h index f84d532..c798081 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -79,10 +79,9 @@ static inline struct pid *get_pid(struct pid *pid) return pid; } -extern void FASTCALL(put_pid(struct pid *pid)); -extern struct task_struct *FASTCALL(pid_task(struct pid *pid, enum pid_type)); -extern struct task_struct *FASTCALL(get_pid_task(struct pid *pid, - enum pid_type)); +extern void put_pid(struct pid *pid); +extern struct task_struct *pid_task(struct pid *pid, enum pid_type); +extern struct task_struct *get_pid_task(struct pid *pid, enum pid_type); extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); @@ -90,11 +89,11 @@ extern struct pid *get_task_pid(struct task_struct *task, enum pid_type type); * attach_pid() and detach_pid() must be called with the tasklist_lock * write-held. */ -extern int FASTCALL(attach_pid(struct task_struct *task, - enum pid_type type, struct pid *pid)); -extern void FASTCALL(detach_pid(struct task_struct *task, enum pid_type)); -extern void FASTCALL(transfer_pid(struct task_struct *old, - struct task_struct *new, enum pid_type)); +extern int attach_pid(struct task_struct *task, enum pid_type type, + struct pid *pid); +extern void detach_pid(struct task_struct *task, enum pid_type); +extern void transfer_pid(struct task_struct *old, struct task_struct *new, + enum pid_type); struct pid_namespace; extern struct pid_namespace init_pid_ns; @@ -109,7 +108,7 @@ extern struct pid_namespace init_pid_ns; * * see also find_task_by_pid() set in include/linux/sched.h */ -extern struct pid *FASTCALL(find_pid_ns(int nr, struct pid_namespace *ns)); +extern struct pid *find_pid_ns(int nr, struct pid_namespace *ns); extern struct pid *find_vpid(int nr); extern struct pid *find_pid(int nr); @@ -121,7 +120,7 @@ extern struct pid *find_ge_pid(int nr, struct pid_namespace *); int next_pidmap(struct pid_namespace *pid_ns, int last); extern struct pid *alloc_pid(struct pid_namespace *ns); -extern void FASTCALL(free_pid(struct pid *pid)); +extern void free_pid(struct pid *pid); /* * the helpers to get the pid's id seen from different namespaces diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index 813cee1..6c3c0f6 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -60,14 +60,14 @@ do { \ __init_rwsem((sem), #sem, &__key); \ } while (0) -extern void FASTCALL(__down_read(struct rw_semaphore *sem)); -extern int FASTCALL(__down_read_trylock(struct rw_semaphore *sem)); -extern void FASTCALL(__down_write(struct rw_semaphore *sem)); -extern void FASTCALL(__down_write_nested(struct rw_semaphore *sem, int subclass)); -extern int FASTCALL(__down_write_trylock(struct rw_semaphore *sem)); -extern void FASTCALL(__up_read(struct rw_semaphore *sem)); -extern void FASTCALL(__up_write(struct rw_semaphore *sem)); -extern void FASTCALL(__downgrade_write(struct rw_semaphore *sem)); +extern void __down_read(struct rw_semaphore *sem); +extern int __down_read_trylock(struct rw_semaphore *sem); +extern void __down_write(struct rw_semaphore *sem); +extern void __down_write_nested(struct rw_semaphore *sem, int subclass); +extern int __down_write_trylock(struct rw_semaphore *sem); +extern void __up_read(struct rw_semaphore *sem); +extern void __up_write(struct rw_semaphore *sem); +extern void __downgrade_write(struct rw_semaphore *sem); static inline int rwsem_is_locked(struct rw_semaphore *sem) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 00e1441..e217d18 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -323,7 +323,7 @@ extern char __sched_text_start[], __sched_text_end[]; extern int in_sched_functions(unsigned long addr); #define MAX_SCHEDULE_TIMEOUT LONG_MAX -extern signed long FASTCALL(schedule_timeout(signed long timeout)); +extern signed long schedule_timeout(signed long timeout); extern signed long schedule_timeout_interruptible(signed long timeout); extern signed long schedule_timeout_killable(signed long timeout); extern signed long schedule_timeout_uninterruptible(signed long timeout); @@ -590,7 +590,7 @@ struct user_struct { struct hlist_node uidhash_node; uid_t uid; -#ifdef CONFIG_FAIR_USER_SCHED +#ifdef CONFIG_USER_SCHED struct task_group *tg; #ifdef CONFIG_SYSFS struct kobject kobj; @@ -973,7 +973,7 @@ struct sched_rt_entity { unsigned long timeout; int nr_cpus_allowed; -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED struct sched_rt_entity *parent; /* rq on which this entity is (to be) queued: */ struct rt_rq *rt_rq; @@ -1541,8 +1541,6 @@ extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_rt_period; -extern unsigned int sysctl_sched_rt_ratio; #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) extern unsigned int sysctl_sched_min_bal_int_shares; extern unsigned int sysctl_sched_max_bal_int_shares; @@ -1552,6 +1550,8 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos); #endif +extern unsigned int sysctl_sched_rt_period; +extern int sysctl_sched_rt_runtime; extern unsigned int sysctl_sched_compat_yield; @@ -1648,10 +1648,10 @@ extern void release_uids(struct user_namespace *ns); extern void do_timer(unsigned long ticks); -extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state)); -extern int FASTCALL(wake_up_process(struct task_struct * tsk)); -extern void FASTCALL(wake_up_new_task(struct task_struct * tsk, - unsigned long clone_flags)); +extern int wake_up_state(struct task_struct *tsk, unsigned int state); +extern int wake_up_process(struct task_struct *tsk); +extern void wake_up_new_task(struct task_struct *tsk, + unsigned long clone_flags); #ifdef CONFIG_SMP extern void kick_process(struct task_struct *tsk); #else @@ -1741,7 +1741,7 @@ static inline int sas_ss_flags(unsigned long sp) extern struct mm_struct * mm_alloc(void); /* mmdrop drops the mm and the page tables */ -extern void FASTCALL(__mmdrop(struct mm_struct *)); +extern void __mmdrop(struct mm_struct *); static inline void mmdrop(struct mm_struct * mm) { if (unlikely(atomic_dec_and_test(&mm->mm_count))) @@ -1925,7 +1925,7 @@ static inline int signal_pending(struct task_struct *p) return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING)); } -extern int FASTCALL(__fatal_signal_pending(struct task_struct *p)); +extern int __fatal_signal_pending(struct task_struct *p); static inline int fatal_signal_pending(struct task_struct *p) { @@ -2027,16 +2027,22 @@ extern int sched_mc_power_savings, sched_smt_power_savings; extern void normalize_rt_tasks(void); -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_GROUP_SCHED extern struct task_group init_task_group; extern struct task_group *sched_create_group(void); extern void sched_destroy_group(struct task_group *tg); extern void sched_move_task(struct task_struct *tsk); +#ifdef CONFIG_FAIR_GROUP_SCHED extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); extern unsigned long sched_group_shares(struct task_group *tg); - +#endif +#ifdef CONFIG_RT_GROUP_SCHED +extern int sched_group_set_rt_runtime(struct task_group *tg, + long rt_runtime_us); +extern long sched_group_rt_runtime(struct task_group *tg); +#endif #endif #ifdef CONFIG_TASK_XACCT diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 1a0b6cf..289942f 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -149,6 +149,8 @@ /* Freescale ColdFire */ #define PORT_MCF 78 +#define PORT_SC26XX 79 + /* MN10300 on-chip UART numbers */ #define PORT_MN10300 80 diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 64c7710..64c9755 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -409,16 +409,13 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t); * for all cases without actually generating the checksum, so we just use a * static value. */ -static inline void -svc_reserve_auth(struct svc_rqst *rqstp, int space) +static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space) { - int added_space = 0; + int added_space = 0; - switch(rqstp->rq_authop->flavour) { - case RPC_AUTH_GSS: - added_space = RPC_MAX_AUTH_SIZE; - } - return svc_reserve(rqstp, space + added_space); + if (rqstp->rq_authop->flavour) + added_space = RPC_MAX_AUTH_SIZE; + svc_reserve(rqstp, space + added_space); } #endif /* SUNRPC_SVC_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index 3ca5c4b..878459a 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -171,10 +171,10 @@ extern unsigned int nr_free_pagecache_pages(void); /* linux/mm/swap.c */ -extern void FASTCALL(lru_cache_add(struct page *)); -extern void FASTCALL(lru_cache_add_active(struct page *)); -extern void FASTCALL(activate_page(struct page *)); -extern void FASTCALL(mark_page_accessed(struct page *)); +extern void lru_cache_add(struct page *); +extern void lru_cache_add_active(struct page *); +extern void activate_page(struct page *); +extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern int lru_add_drain_all(void); extern int rotate_reclaimable_page(struct page *page); diff --git a/include/linux/wait.h b/include/linux/wait.h index 33a2aa9..0081147 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -117,9 +117,9 @@ static inline int waitqueue_active(wait_queue_head_t *q) */ #define is_sync_wait(wait) (!(wait) || ((wait)->private)) -extern void FASTCALL(add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); -extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)); -extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); +extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); +extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); +extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) { @@ -141,16 +141,16 @@ static inline void __remove_wait_queue(wait_queue_head_t *head, list_del(&old->task_list); } -void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key)); -extern void FASTCALL(__wake_up_locked(wait_queue_head_t *q, unsigned int mode)); -extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr)); -void FASTCALL(__wake_up_bit(wait_queue_head_t *, void *, int)); -int FASTCALL(__wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned)); -int FASTCALL(__wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned)); -void FASTCALL(wake_up_bit(void *, int)); -int FASTCALL(out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned)); -int FASTCALL(out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned)); -wait_queue_head_t *FASTCALL(bit_waitqueue(void *, int)); +void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); +extern void __wake_up_locked(wait_queue_head_t *q, unsigned int mode); +extern void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); +void __wake_up_bit(wait_queue_head_t *, void *, int); +int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); +int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, int (*)(void *), unsigned); +void wake_up_bit(void *, int); +int out_of_line_wait_on_bit(void *, int, int (*)(void *), unsigned); +int out_of_line_wait_on_bit_lock(void *, int, int (*)(void *), unsigned); +wait_queue_head_t *bit_waitqueue(void *, int); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -437,11 +437,9 @@ extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, /* * Waitqueues which are removed from the waitqueue_head at wakeup time */ -void FASTCALL(prepare_to_wait(wait_queue_head_t *q, - wait_queue_t *wait, int state)); -void FASTCALL(prepare_to_wait_exclusive(wait_queue_head_t *q, - wait_queue_t *wait, int state)); -void FASTCALL(finish_wait(wait_queue_head_t *q, wait_queue_t *wait)); +void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); +void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state); +void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 7f28c32..542526c 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -178,18 +178,17 @@ __create_workqueue_key(const char *name, int singlethread, extern void destroy_workqueue(struct workqueue_struct *wq); -extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work)); -extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, - struct delayed_work *work, unsigned long delay)); +extern int queue_work(struct workqueue_struct *wq, struct work_struct *work); +extern int queue_delayed_work(struct workqueue_struct *wq, + struct delayed_work *work, unsigned long delay); extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay); -extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq)); +extern void flush_workqueue(struct workqueue_struct *wq); extern void flush_scheduled_work(void); -extern int FASTCALL(schedule_work(struct work_struct *work)); -extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, - unsigned long delay)); +extern int schedule_work(struct work_struct *work); +extern int schedule_delayed_work(struct delayed_work *work, unsigned long delay); extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay); extern int schedule_on_each_cpu(work_func_t func); diff --git a/init/Kconfig b/init/Kconfig index 824d48c..dcef8b5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -311,25 +311,36 @@ config CPUSETS Say N if unsure. -config FAIR_GROUP_SCHED - bool "Fair group CPU scheduler" +config GROUP_SCHED + bool "Group CPU scheduler" default y help This feature lets CPU scheduler recognize task groups and control CPU bandwidth allocation to such task groups. +config FAIR_GROUP_SCHED + bool "Group scheduling for SCHED_OTHER" + depends on GROUP_SCHED + default y + +config RT_GROUP_SCHED + bool "Group scheduling for SCHED_RR/FIFO" + depends on EXPERIMENTAL + depends on GROUP_SCHED + default n + choice - depends on FAIR_GROUP_SCHED + depends on GROUP_SCHED prompt "Basis for grouping tasks" - default FAIR_USER_SCHED + default USER_SCHED -config FAIR_USER_SCHED +config USER_SCHED bool "user id" help This option will choose userid as the basis for grouping tasks, thus providing equal CPU bandwidth to each user. -config FAIR_CGROUP_SCHED +config CGROUP_SCHED bool "Control groups" depends on CGROUPS help diff --git a/init/Makefile b/init/Makefile index c5f157c..4a243df 100644 --- a/init/Makefile +++ b/init/Makefile @@ -27,6 +27,7 @@ $(obj)/version.o: include/linux/compile.h # mkcompile_h will make sure to only update the # actual file if its content has changed. + chk_compile.h = : quiet_chk_compile.h = echo ' CHK $@' silent_chk_compile.h = : include/linux/compile.h: FORCE diff --git a/kernel/marker.c b/kernel/marker.c index 5323cfa..c4c2cd8 100644 --- a/kernel/marker.c +++ b/kernel/marker.c @@ -27,35 +27,42 @@ extern struct marker __start___markers[]; extern struct marker __stop___markers[]; +/* Set to 1 to enable marker debug output */ +const int marker_debug; + /* * markers_mutex nests inside module_mutex. Markers mutex protects the builtin - * and module markers, the hash table and deferred_sync. + * and module markers and the hash table. */ static DEFINE_MUTEX(markers_mutex); /* - * Marker deferred synchronization. - * Upon marker probe_unregister, we delay call to synchronize_sched() to - * accelerate mass unregistration (only when there is no more reference to a - * given module do we call synchronize_sched()). However, we need to make sure - * every critical region has ended before we re-arm a marker that has been - * unregistered and then registered back with a different probe data. - */ -static int deferred_sync; - -/* * Marker hash table, containing the active markers. * Protected by module_mutex. */ #define MARKER_HASH_BITS 6 #define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS) +/* + * Note about RCU : + * It is used to make sure every handler has finished using its private data + * between two consecutive operation (add or remove) on a given marker. It is + * also used to delay the free of multiple probes array until a quiescent state + * is reached. + * marker entries modifications are protected by the markers_mutex. + */ struct marker_entry { struct hlist_node hlist; char *format; - marker_probe_func *probe; - void *private; + void (*call)(const struct marker *mdata, /* Probe wrapper */ + void *call_private, const char *fmt, ...); + struct marker_probe_closure single; + struct marker_probe_closure *multi; int refcount; /* Number of times armed. 0 if disarmed. */ + struct rcu_head rcu; + void *oldptr; + char rcu_pending:1; + char ptype:1; char name[0]; /* Contains name'\0'format'\0' */ }; @@ -63,7 +70,8 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE]; /** * __mark_empty_function - Empty probe callback - * @mdata: pointer of type const struct marker + * @probe_private: probe private data + * @call_private: call site private data * @fmt: format string * @...: variable argument list * @@ -72,13 +80,267 @@ static struct hlist_head marker_table[MARKER_TABLE_SIZE]; * though the function pointer change and the marker enabling are two distinct * operations that modifies the execution flow of preemptible code. */ -void __mark_empty_function(const struct marker *mdata, void *private, - const char *fmt, ...) +void __mark_empty_function(void *probe_private, void *call_private, + const char *fmt, va_list *args) { } EXPORT_SYMBOL_GPL(__mark_empty_function); /* + * marker_probe_cb Callback that prepares the variable argument list for probes. + * @mdata: pointer of type struct marker + * @call_private: caller site private data + * @fmt: format string + * @...: Variable argument list. + * + * Since we do not use "typical" pointer based RCU in the 1 argument case, we + * need to put a full smp_rmb() in this branch. This is why we do not use + * rcu_dereference() for the pointer read. + */ +void marker_probe_cb(const struct marker *mdata, void *call_private, + const char *fmt, ...) +{ + va_list args; + char ptype; + + /* + * disabling preemption to make sure the teardown of the callbacks can + * be done correctly when they are in modules and they insure RCU read + * coherency. + */ + preempt_disable(); + ptype = ACCESS_ONCE(mdata->ptype); + if (likely(!ptype)) { + marker_probe_func *func; + /* Must read the ptype before ptr. They are not data dependant, + * so we put an explicit smp_rmb() here. */ + smp_rmb(); + func = ACCESS_ONCE(mdata->single.func); + /* Must read the ptr before private data. They are not data + * dependant, so we put an explicit smp_rmb() here. */ + smp_rmb(); + va_start(args, fmt); + func(mdata->single.probe_private, call_private, fmt, &args); + va_end(args); + } else { + struct marker_probe_closure *multi; + int i; + /* + * multi points to an array, therefore accessing the array + * depends on reading multi. However, even in this case, + * we must insure that the pointer is read _before_ the array + * data. Same as rcu_dereference, but we need a full smp_rmb() + * in the fast path, so put the explicit barrier here. + */ + smp_read_barrier_depends(); + multi = ACCESS_ONCE(mdata->multi); + for (i = 0; multi[i].func; i++) { + va_start(args, fmt); + multi[i].func(multi[i].probe_private, call_private, fmt, + &args); + va_end(args); + } + } + preempt_enable(); +} +EXPORT_SYMBOL_GPL(marker_probe_cb); + +/* + * marker_probe_cb Callback that does not prepare the variable argument list. + * @mdata: pointer of type struct marker + * @call_private: caller site private data + * @fmt: format string + * @...: Variable argument list. + * + * Should be connected to markers "MARK_NOARGS". + */ +void marker_probe_cb_noarg(const struct marker *mdata, + void *call_private, const char *fmt, ...) +{ + va_list args; /* not initialized */ + char ptype; + + preempt_disable(); + ptype = ACCESS_ONCE(mdata->ptype); + if (likely(!ptype)) { + marker_probe_func *func; + /* Must read the ptype before ptr. They are not data dependant, + * so we put an explicit smp_rmb() here. */ + smp_rmb(); + func = ACCESS_ONCE(mdata->single.func); + /* Must read the ptr before private data. They are not data + * dependant, so we put an explicit smp_rmb() here. */ + smp_rmb(); + func(mdata->single.probe_private, call_private, fmt, &args); + } else { + struct marker_probe_closure *multi; + int i; + /* + * multi points to an array, therefore accessing the array + * depends on reading multi. However, even in this case, + * we must insure that the pointer is read _before_ the array + * data. Same as rcu_dereference, but we need a full smp_rmb() + * in the fast path, so put the explicit barrier here. + */ + smp_read_barrier_depends(); + multi = ACCESS_ONCE(mdata->multi); + for (i = 0; multi[i].func; i++) + multi[i].func(multi[i].probe_private, call_private, fmt, + &args); + } + preempt_enable(); +} +EXPORT_SYMBOL_GPL(marker_probe_cb_noarg); + +static void free_old_closure(struct rcu_head *head) +{ + struct marker_entry *entry = container_of(head, + struct marker_entry, rcu); + kfree(entry->oldptr); + /* Make sure we free the data before setting the pending flag to 0 */ + smp_wmb(); + entry->rcu_pending = 0; +} + +static void debug_print_probes(struct marker_entry *entry) +{ + int i; + + if (!marker_debug) + return; + + if (!entry->ptype) { + printk(KERN_DEBUG "Single probe : %p %p\n", + entry->single.func, + entry->single.probe_private); + } else { + for (i = 0; entry->multi[i].func; i++) + printk(KERN_DEBUG "Multi probe %d : %p %p\n", i, + entry->multi[i].func, + entry->multi[i].probe_private); + } +} + +static struct marker_probe_closure * +marker_entry_add_probe(struct marker_entry *entry, + marker_probe_func *probe, void *probe_private) +{ + int nr_probes = 0; + struct marker_probe_closure *old, *new; + + WARN_ON(!probe); + + debug_print_probes(entry); + old = entry->multi; + if (!entry->ptype) { + if (entry->single.func == probe && + entry->single.probe_private == probe_private) + return ERR_PTR(-EBUSY); + if (entry->single.func == __mark_empty_function) { + /* 0 -> 1 probes */ + entry->single.func = probe; + entry->single.probe_private = probe_private; + entry->refcount = 1; + entry->ptype = 0; + debug_print_probes(entry); + return NULL; + } else { + /* 1 -> 2 probes */ + nr_probes = 1; + old = NULL; + } + } else { + /* (N -> N+1), (N != 0, 1) probes */ + for (nr_probes = 0; old[nr_probes].func; nr_probes++) + if (old[nr_probes].func == probe + && old[nr_probes].probe_private + == probe_private) + return ERR_PTR(-EBUSY); + } + /* + 2 : one for new probe, one for NULL func */ + new = kzalloc((nr_probes + 2) * sizeof(struct marker_probe_closure), + GFP_KERNEL); + if (new == NULL) + return ERR_PTR(-ENOMEM); + if (!old) + new[0] = entry->single; + else + memcpy(new, old, + nr_probes * sizeof(struct marker_probe_closure)); + new[nr_probes].func = probe; + new[nr_probes].probe_private = probe_private; + entry->refcount = nr_probes + 1; + entry->multi = new; + entry->ptype = 1; + debug_print_probes(entry); + return old; +} + +static struct marker_probe_closure * +marker_entry_remove_probe(struct marker_entry *entry, + marker_probe_func *probe, void *probe_private) +{ + int nr_probes = 0, nr_del = 0, i; + struct marker_probe_closure *old, *new; + + old = entry->multi; + + debug_print_probes(entry); + if (!entry->ptype) { + /* 0 -> N is an error */ + WARN_ON(entry->single.func == __mark_empty_function); + /* 1 -> 0 probes */ + WARN_ON(probe && entry->single.func != probe); + WARN_ON(entry->single.probe_private != probe_private); + entry->single.func = __mark_empty_function; + entry->refcount = 0; + entry->ptype = 0; + debug_print_probes(entry); + return NULL; + } else { + /* (N -> M), (N > 1, M >= 0) probes */ + for (nr_probes = 0; old[nr_probes].func; nr_probes++) { + if ((!probe || old[nr_probes].func == probe) + && old[nr_probes].probe_private + == probe_private) + nr_del++; + } + } + + if (nr_probes - nr_del == 0) { + /* N -> 0, (N > 1) */ + entry->single.func = __mark_empty_function; + entry->refcount = 0; + entry->ptype = 0; + } else if (nr_probes - nr_del == 1) { + /* N -> 1, (N > 1) */ + for (i = 0; old[i].func; i++) + if ((probe && old[i].func != probe) || + old[i].probe_private != probe_private) + entry->single = old[i]; + entry->refcount = 1; + entry->ptype = 0; + } else { + int j = 0; + /* N -> M, (N > 1, M > 1) */ + /* + 1 for NULL */ + new = kzalloc((nr_probes - nr_del + 1) + * sizeof(struct marker_probe_closure), GFP_KERNEL); + if (new == NULL) + return ERR_PTR(-ENOMEM); + for (i = 0; old[i].func; i++) + if ((probe && old[i].func != probe) || + old[i].probe_private != probe_private) + new[j++] = old[i]; + entry->refcount = nr_probes - nr_del; + entry->ptype = 1; + entry->multi = new; + } + debug_print_probes(entry); + return old; +} + +/* * Get marker if the marker is present in the marker hash table. * Must be called with markers_mutex held. * Returns NULL if not present. @@ -102,8 +364,7 @@ static struct marker_entry *get_marker(const char *name) * Add the marker to the marker hash table. Must be called with markers_mutex * held. */ -static int add_marker(const char *name, const char *format, - marker_probe_func *probe, void *private) +static struct marker_entry *add_marker(const char *name, const char *format) { struct hlist_head *head; struct hlist_node *node; @@ -118,9 +379,8 @@ static int add_marker(const char *name, const char *format, hlist_for_each_entry(e, node, head, hlist) { if (!strcmp(name, e->name)) { printk(KERN_NOTICE - "Marker %s busy, probe %p already installed\n", - name, e->probe); - return -EBUSY; /* Already there */ + "Marker %s busy\n", name); + return ERR_PTR(-EBUSY); /* Already there */ } } /* @@ -130,34 +390,42 @@ static int add_marker(const char *name, const char *format, e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, GFP_KERNEL); if (!e) - return -ENOMEM; + return ERR_PTR(-ENOMEM); memcpy(&e->name[0], name, name_len); if (format) { e->format = &e->name[name_len]; memcpy(e->format, format, format_len); + if (strcmp(e->format, MARK_NOARGS) == 0) + e->call = marker_probe_cb_noarg; + else + e->call = marker_probe_cb; trace_mark(core_marker_format, "name %s format %s", e->name, e->format); - } else + } else { e->format = NULL; - e->probe = probe; - e->private = private; + e->call = marker_probe_cb; + } + e->single.func = __mark_empty_function; + e->single.probe_private = NULL; + e->multi = NULL; + e->ptype = 0; e->refcount = 0; + e->rcu_pending = 0; hlist_add_head(&e->hlist, head); - return 0; + return e; } /* * Remove the marker from the marker hash table. Must be called with mutex_lock * held. */ -static void *remove_marker(const char *name) +static int remove_marker(const char *name) { struct hlist_head *head; struct hlist_node *node; struct marker_entry *e; int found = 0; size_t len = strlen(name) + 1; - void *private = NULL; u32 hash = jhash(name, len-1, 0); head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; @@ -167,12 +435,16 @@ static void *remove_marker(const char *name) break; } } - if (found) { - private = e->private; - hlist_del(&e->hlist); - kfree(e); - } - return private; + if (!found) + return -ENOENT; + if (e->single.func != __mark_empty_function) + return -EBUSY; + hlist_del(&e->hlist); + /* Make sure the call_rcu has been executed */ + if (e->rcu_pending) + rcu_barrier(); + kfree(e); + return 0; } /* @@ -184,6 +456,7 @@ static int marker_set_format(struct marker_entry **entry, const char *format) size_t name_len = strlen((*entry)->name) + 1; size_t format_len = strlen(format) + 1; + e = kmalloc(sizeof(struct marker_entry) + name_len + format_len, GFP_KERNEL); if (!e) @@ -191,11 +464,20 @@ static int marker_set_format(struct marker_entry **entry, const char *format) memcpy(&e->name[0], (*entry)->name, name_len); e->format = &e->name[name_len]; memcpy(e->format, format, format_len); - e->probe = (*entry)->probe; - e->private = (*entry)->private; + if (strcmp(e->format, MARK_NOARGS) == 0) + e->call = marker_probe_cb_noarg; + else + e->call = marker_probe_cb; + e->single = (*entry)->single; + e->multi = (*entry)->multi; + e->ptype = (*entry)->ptype; e->refcount = (*entry)->refcount; + e->rcu_pending = 0; hlist_add_before(&e->hlist, &(*entry)->hlist); hlist_del(&(*entry)->hlist); + /* Make sure the call_rcu has been executed */ + if ((*entry)->rcu_pending) + rcu_barrier(); kfree(*entry); *entry = e; trace_mark(core_marker_format, "name %s format %s", @@ -206,7 +488,8 @@ static int marker_set_format(struct marker_entry **entry, const char *format) /* * Sets the probe callback corresponding to one marker. */ -static int set_marker(struct marker_entry **entry, struct marker *elem) +static int set_marker(struct marker_entry **entry, struct marker *elem, + int active) { int ret; WARN_ON(strcmp((*entry)->name, elem->name) != 0); @@ -226,9 +509,43 @@ static int set_marker(struct marker_entry **entry, struct marker *elem) if (ret) return ret; } - elem->call = (*entry)->probe; - elem->private = (*entry)->private; - elem->state = 1; + + /* + * probe_cb setup (statically known) is done here. It is + * asynchronous with the rest of execution, therefore we only + * pass from a "safe" callback (with argument) to an "unsafe" + * callback (does not set arguments). + */ + elem->call = (*entry)->call; + /* + * Sanity check : + * We only update the single probe private data when the ptr is + * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1) + */ + WARN_ON(elem->single.func != __mark_empty_function + && elem->single.probe_private + != (*entry)->single.probe_private && + !elem->ptype); + elem->single.probe_private = (*entry)->single.probe_private; + /* + * Make sure the private data is valid when we update the + * single probe ptr. + */ + smp_wmb(); + elem->single.func = (*entry)->single.func; + /* + * We also make sure that the new probe callbacks array is consistent + * before setting a pointer to it. + */ + rcu_assign_pointer(elem->multi, (*entry)->multi); + /* + * Update the function or multi probe array pointer before setting the + * ptype. + */ + smp_wmb(); + elem->ptype = (*entry)->ptype; + elem->state = active; + return 0; } @@ -240,8 +557,12 @@ static int set_marker(struct marker_entry **entry, struct marker *elem) */ static void disable_marker(struct marker *elem) { + /* leave "call" as is. It is known statically. */ elem->state = 0; - elem->call = __mark_empty_function; + elem->single.func = __mark_empty_function; + /* Update the function before setting the ptype */ + smp_wmb(); + elem->ptype = 0; /* single probe */ /* * Leave the private data and id there, because removal is racy and * should be done only after a synchronize_sched(). These are never used @@ -253,14 +574,11 @@ static void disable_marker(struct marker *elem) * marker_update_probe_range - Update a probe range * @begin: beginning of the range * @end: end of the range - * @probe_module: module address of the probe being updated - * @refcount: number of references left to the given probe_module (out) * * Updates the probe callback corresponding to a range of markers. */ void marker_update_probe_range(struct marker *begin, - struct marker *end, struct module *probe_module, - int *refcount) + struct marker *end) { struct marker *iter; struct marker_entry *mark_entry; @@ -268,15 +586,12 @@ void marker_update_probe_range(struct marker *begin, mutex_lock(&markers_mutex); for (iter = begin; iter < end; iter++) { mark_entry = get_marker(iter->name); - if (mark_entry && mark_entry->refcount) { - set_marker(&mark_entry, iter); + if (mark_entry) { + set_marker(&mark_entry, iter, + !!mark_entry->refcount); /* * ignore error, continue */ - if (probe_module) - if (probe_module == - __module_text_address((unsigned long)mark_entry->probe)) - (*refcount)++; } else { disable_marker(iter); } @@ -289,20 +604,27 @@ void marker_update_probe_range(struct marker *begin, * Issues a synchronize_sched() when no reference to the module passed * as parameter is found in the probes so the probe module can be * safely unloaded from now on. + * + * Internal callback only changed before the first probe is connected to it. + * Single probe private data can only be changed on 0 -> 1 and 2 -> 1 + * transitions. All other transitions will leave the old private data valid. + * This makes the non-atomicity of the callback/private data updates valid. + * + * "special case" updates : + * 0 -> 1 callback + * 1 -> 0 callback + * 1 -> 2 callbacks + * 2 -> 1 callbacks + * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates. + * Site effect : marker_set_format may delete the marker entry (creating a + * replacement). */ -static void marker_update_probes(struct module *probe_module) +static void marker_update_probes(void) { - int refcount = 0; - /* Core kernel markers */ - marker_update_probe_range(__start___markers, - __stop___markers, probe_module, &refcount); + marker_update_probe_range(__start___markers, __stop___markers); /* Markers in modules. */ - module_update_markers(probe_module, &refcount); - if (probe_module && refcount == 0) { - synchronize_sched(); - deferred_sync = 0; - } + module_update_markers(); } /** @@ -310,33 +632,49 @@ static void marker_update_probes(struct module *probe_module) * @name: marker name * @format: format string * @probe: probe handler - * @private: probe private data + * @probe_private: probe private data * * private data must be a valid allocated memory address, or NULL. * Returns 0 if ok, error value on error. + * The probe address must at least be aligned on the architecture pointer size. */ int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *private) + marker_probe_func *probe, void *probe_private) { struct marker_entry *entry; int ret = 0; + struct marker_probe_closure *old; mutex_lock(&markers_mutex); entry = get_marker(name); - if (entry && entry->refcount) { - ret = -EBUSY; - goto end; - } - if (deferred_sync) { - synchronize_sched(); - deferred_sync = 0; + if (!entry) { + entry = add_marker(name, format); + if (IS_ERR(entry)) { + ret = PTR_ERR(entry); + goto end; + } } - ret = add_marker(name, format, probe, private); - if (ret) + /* + * If we detect that a call_rcu is pending for this marker, + * make sure it's executed now. + */ + if (entry->rcu_pending) + rcu_barrier(); + old = marker_entry_add_probe(entry, probe, probe_private); + if (IS_ERR(old)) { + ret = PTR_ERR(old); goto end; + } mutex_unlock(&markers_mutex); - marker_update_probes(NULL); - return ret; + marker_update_probes(); /* may update entry */ + mutex_lock(&markers_mutex); + entry = get_marker(name); + WARN_ON(!entry); + entry->oldptr = old; + entry->rcu_pending = 1; + /* write rcu_pending before calling the RCU callback */ + smp_wmb(); + call_rcu(&entry->rcu, free_old_closure); end: mutex_unlock(&markers_mutex); return ret; @@ -346,171 +684,166 @@ EXPORT_SYMBOL_GPL(marker_probe_register); /** * marker_probe_unregister - Disconnect a probe from a marker * @name: marker name + * @probe: probe function pointer + * @probe_private: probe private data * * Returns the private data given to marker_probe_register, or an ERR_PTR(). + * We do not need to call a synchronize_sched to make sure the probes have + * finished running before doing a module unload, because the module unload + * itself uses stop_machine(), which insures that every preempt disabled section + * have finished. */ -void *marker_probe_unregister(const char *name) +int marker_probe_unregister(const char *name, + marker_probe_func *probe, void *probe_private) { - struct module *probe_module; struct marker_entry *entry; - void *private; + struct marker_probe_closure *old; + int ret = 0; mutex_lock(&markers_mutex); entry = get_marker(name); if (!entry) { - private = ERR_PTR(-ENOENT); + ret = -ENOENT; goto end; } - entry->refcount = 0; - /* In what module is the probe handler ? */ - probe_module = __module_text_address((unsigned long)entry->probe); - private = remove_marker(name); - deferred_sync = 1; + if (entry->rcu_pending) + rcu_barrier(); + old = marker_entry_remove_probe(entry, probe, probe_private); mutex_unlock(&markers_mutex); - marker_update_probes(probe_module); - return private; + marker_update_probes(); /* may update entry */ + mutex_lock(&markers_mutex); + entry = get_marker(name); + entry->oldptr = old; + entry->rcu_pending = 1; + /* write rcu_pending before calling the RCU callback */ + smp_wmb(); + call_rcu(&entry->rcu, free_old_closure); + remove_marker(name); /* Ignore busy error message */ end: mutex_unlock(&markers_mutex); - return private; + return ret; } EXPORT_SYMBOL_GPL(marker_probe_unregister); -/** - * marker_probe_unregister_private_data - Disconnect a probe from a marker - * @private: probe private data - * - * Unregister a marker by providing the registered private data. - * Returns the private data given to marker_probe_register, or an ERR_PTR(). - */ -void *marker_probe_unregister_private_data(void *private) +static struct marker_entry * +get_marker_from_private_data(marker_probe_func *probe, void *probe_private) { - struct module *probe_module; - struct hlist_head *head; - struct hlist_node *node; struct marker_entry *entry; - int found = 0; unsigned int i; + struct hlist_head *head; + struct hlist_node *node; - mutex_lock(&markers_mutex); for (i = 0; i < MARKER_TABLE_SIZE; i++) { head = &marker_table[i]; hlist_for_each_entry(entry, node, head, hlist) { - if (entry->private == private) { - found = 1; - goto iter_end; + if (!entry->ptype) { + if (entry->single.func == probe + && entry->single.probe_private + == probe_private) + return entry; + } else { + struct marker_probe_closure *closure; + closure = entry->multi; + for (i = 0; closure[i].func; i++) { + if (closure[i].func == probe && + closure[i].probe_private + == probe_private) + return entry; + } } } } -iter_end: - if (!found) { - private = ERR_PTR(-ENOENT); - goto end; - } - entry->refcount = 0; - /* In what module is the probe handler ? */ - probe_module = __module_text_address((unsigned long)entry->probe); - private = remove_marker(entry->name); - deferred_sync = 1; - mutex_unlock(&markers_mutex); - marker_update_probes(probe_module); - return private; -end: - mutex_unlock(&markers_mutex); - return private; + return NULL; } -EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); /** - * marker_arm - Arm a marker - * @name: marker name + * marker_probe_unregister_private_data - Disconnect a probe from a marker + * @probe: probe function + * @probe_private: probe private data * - * Activate a marker. It keeps a reference count of the number of - * arming/disarming done. - * Returns 0 if ok, error value on error. + * Unregister a probe by providing the registered private data. + * Only removes the first marker found in hash table. + * Return 0 on success or error value. + * We do not need to call a synchronize_sched to make sure the probes have + * finished running before doing a module unload, because the module unload + * itself uses stop_machine(), which insures that every preempt disabled section + * have finished. */ -int marker_arm(const char *name) +int marker_probe_unregister_private_data(marker_probe_func *probe, + void *probe_private) { struct marker_entry *entry; int ret = 0; + struct marker_probe_closure *old; mutex_lock(&markers_mutex); - entry = get_marker(name); + entry = get_marker_from_private_data(probe, probe_private); if (!entry) { ret = -ENOENT; goto end; } - /* - * Only need to update probes when refcount passes from 0 to 1. - */ - if (entry->refcount++) - goto end; -end: + if (entry->rcu_pending) + rcu_barrier(); + old = marker_entry_remove_probe(entry, NULL, probe_private); mutex_unlock(&markers_mutex); - marker_update_probes(NULL); - return ret; -} -EXPORT_SYMBOL_GPL(marker_arm); - -/** - * marker_disarm - Disarm a marker - * @name: marker name - * - * Disarm a marker. It keeps a reference count of the number of arming/disarming - * done. - * Returns 0 if ok, error value on error. - */ -int marker_disarm(const char *name) -{ - struct marker_entry *entry; - int ret = 0; - + marker_update_probes(); /* may update entry */ mutex_lock(&markers_mutex); - entry = get_marker(name); - if (!entry) { - ret = -ENOENT; - goto end; - } - /* - * Only permit decrement refcount if higher than 0. - * Do probe update only on 1 -> 0 transition. - */ - if (entry->refcount) { - if (--entry->refcount) - goto end; - } else { - ret = -EPERM; - goto end; - } + entry = get_marker_from_private_data(probe, probe_private); + WARN_ON(!entry); + entry->oldptr = old; + entry->rcu_pending = 1; + /* write rcu_pending before calling the RCU callback */ + smp_wmb(); + call_rcu(&entry->rcu, free_old_closure); + remove_marker(entry->name); /* Ignore busy error message */ end: mutex_unlock(&markers_mutex); - marker_update_probes(NULL); return ret; } -EXPORT_SYMBOL_GPL(marker_disarm); +EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data); /** * marker_get_private_data - Get a marker's probe private data * @name: marker name + * @probe: probe to match + * @num: get the nth matching probe's private data * + * Returns the nth private data pointer (starting from 0) matching, or an + * ERR_PTR. * Returns the private data pointer, or an ERR_PTR. * The private data pointer should _only_ be dereferenced if the caller is the * owner of the data, or its content could vanish. This is mostly used to * confirm that a caller is the owner of a registered probe. */ -void *marker_get_private_data(const char *name) +void *marker_get_private_data(const char *name, marker_probe_func *probe, + int num) { struct hlist_head *head; struct hlist_node *node; struct marker_entry *e; size_t name_len = strlen(name) + 1; u32 hash = jhash(name, name_len-1, 0); - int found = 0; + int i; head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)]; hlist_for_each_entry(e, node, head, hlist) { if (!strcmp(name, e->name)) { - found = 1; - return e->private; + if (!e->ptype) { + if (num == 0 && e->single.func == probe) + return e->single.probe_private; + else + break; + } else { + struct marker_probe_closure *closure; + int match = 0; + closure = e->multi; + for (i = 0; closure[i].func; i++) { + if (closure[i].func != probe) + continue; + if (match++ == num) + return closure[i].probe_private; + } + } } } return ERR_PTR(-ENOENT); diff --git a/kernel/module.c b/kernel/module.c index 4202da9..92595ba 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2038,7 +2038,7 @@ static struct module *load_module(void __user *umod, #ifdef CONFIG_MARKERS if (!mod->taints) marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers, NULL, NULL); + mod->markers + mod->num_markers); #endif err = module_finalize(hdr, sechdrs, mod); if (err < 0) @@ -2564,7 +2564,7 @@ EXPORT_SYMBOL(struct_module); #endif #ifdef CONFIG_MARKERS -void module_update_markers(struct module *probe_module, int *refcount) +void module_update_markers(void) { struct module *mod; @@ -2572,8 +2572,7 @@ void module_update_markers(struct module *probe_module, int *refcount) list_for_each_entry(mod, &modules, list) if (!mod->taints) marker_update_probe_range(mod->markers, - mod->markers + mod->num_markers, - probe_module, refcount); + mod->markers + mod->num_markers); mutex_unlock(&module_mutex); } #endif diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 760dfc2..c09605f 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -56,7 +56,10 @@ static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; -/* Because of FASTCALL declaration of complete, we use this wrapper */ +/* + * Awaken the corresponding synchronize_rcu() instance now that a + * grace period has elapsed. + */ static void wakeme_after_rcu(struct rcu_head *head) { struct rcu_synchronize *rcu; diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c index 0deef71..6522ae5 100644 --- a/kernel/rtmutex.c +++ b/kernel/rtmutex.c @@ -630,9 +630,12 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, set_current_state(state); /* Setup the timer, when timeout != NULL */ - if (unlikely(timeout)) + if (unlikely(timeout)) { hrtimer_start(&timeout->timer, timeout->timer.expires, HRTIMER_MODE_ABS); + if (!hrtimer_active(&timeout->timer)) + timeout->task = NULL; + } for (;;) { /* Try to acquire the lock: */ diff --git a/kernel/sched.c b/kernel/sched.c index 3eedd52..f28f19e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -155,7 +155,7 @@ struct rt_prio_array { struct list_head queue[MAX_RT_PRIO]; }; -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_GROUP_SCHED #include <linux/cgroup.h> @@ -165,19 +165,16 @@ static LIST_HEAD(task_groups); /* task group related information */ struct task_group { -#ifdef CONFIG_FAIR_CGROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED struct cgroup_subsys_state css; #endif + +#ifdef CONFIG_FAIR_GROUP_SCHED /* schedulable entities of this group on each cpu */ struct sched_entity **se; /* runqueue "owned" by this group on each cpu */ struct cfs_rq **cfs_rq; - struct sched_rt_entity **rt_se; - struct rt_rq **rt_rq; - - unsigned int rt_ratio; - /* * shares assigned to a task group governs how much of cpu bandwidth * is allocated to the group. The more shares a group has, the more is @@ -213,33 +210,46 @@ struct task_group { * */ unsigned long shares; +#endif + +#ifdef CONFIG_RT_GROUP_SCHED + struct sched_rt_entity **rt_se; + struct rt_rq **rt_rq; + + u64 rt_runtime; +#endif struct rcu_head rcu; struct list_head list; }; +#ifdef CONFIG_FAIR_GROUP_SCHED /* Default task group's sched entity on each cpu */ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); /* Default task group's cfs_rq on each cpu */ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; -static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); -static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; - static struct sched_entity *init_sched_entity_p[NR_CPUS]; static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; +#endif + +#ifdef CONFIG_RT_GROUP_SCHED +static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); +static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; static struct sched_rt_entity *init_sched_rt_entity_p[NR_CPUS]; static struct rt_rq *init_rt_rq_p[NR_CPUS]; +#endif -/* task_group_mutex serializes add/remove of task groups and also changes to +/* task_group_lock serializes add/remove of task groups and also changes to * a task group's cpu shares. */ -static DEFINE_MUTEX(task_group_mutex); +static DEFINE_SPINLOCK(task_group_lock); /* doms_cur_mutex serializes access to doms_cur[] array */ static DEFINE_MUTEX(doms_cur_mutex); +#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_SMP /* kernel thread that runs rebalance_shares() periodically */ static struct task_struct *lb_monitor_task; @@ -248,35 +258,40 @@ static int load_balance_monitor(void *unused); static void set_se_shares(struct sched_entity *se, unsigned long shares); +#ifdef CONFIG_USER_SCHED +# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) +#else +# define INIT_TASK_GROUP_LOAD NICE_0_LOAD +#endif + +#define MIN_GROUP_SHARES 2 + +static int init_task_group_load = INIT_TASK_GROUP_LOAD; +#endif + /* Default task group. * Every task in system belong to this group at bootup. */ struct task_group init_task_group = { +#ifdef CONFIG_FAIR_GROUP_SCHED .se = init_sched_entity_p, .cfs_rq = init_cfs_rq_p, +#endif +#ifdef CONFIG_RT_GROUP_SCHED .rt_se = init_sched_rt_entity_p, .rt_rq = init_rt_rq_p, -}; - -#ifdef CONFIG_FAIR_USER_SCHED -# define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) -#else -# define INIT_TASK_GROUP_LOAD NICE_0_LOAD #endif - -#define MIN_GROUP_SHARES 2 - -static int init_task_group_load = INIT_TASK_GROUP_LOAD; +}; /* return group to which a task belongs */ static inline struct task_group *task_group(struct task_struct *p) { struct task_group *tg; -#ifdef CONFIG_FAIR_USER_SCHED +#ifdef CONFIG_USER_SCHED tg = p->user->tg; -#elif defined(CONFIG_FAIR_CGROUP_SCHED) +#elif defined(CONFIG_CGROUP_SCHED) tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); #else @@ -288,21 +303,15 @@ static inline struct task_group *task_group(struct task_struct *p) /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { +#ifdef CONFIG_FAIR_GROUP_SCHED p->se.cfs_rq = task_group(p)->cfs_rq[cpu]; p->se.parent = task_group(p)->se[cpu]; +#endif +#ifdef CONFIG_RT_GROUP_SCHED p->rt.rt_rq = task_group(p)->rt_rq[cpu]; p->rt.parent = task_group(p)->rt_se[cpu]; -} - -static inline void lock_task_group_list(void) -{ - mutex_lock(&task_group_mutex); -} - -static inline void unlock_task_group_list(void) -{ - mutex_unlock(&task_group_mutex); +#endif } static inline void lock_doms_cur(void) @@ -318,12 +327,10 @@ static inline void unlock_doms_cur(void) #else static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } -static inline void lock_task_group_list(void) { } -static inline void unlock_task_group_list(void) { } static inline void lock_doms_cur(void) { } static inline void unlock_doms_cur(void) { } -#endif /* CONFIG_FAIR_GROUP_SCHED */ +#endif /* CONFIG_GROUP_SCHED */ /* CFS-related fields in a runqueue */ struct cfs_rq { @@ -363,7 +370,7 @@ struct cfs_rq { struct rt_rq { struct rt_prio_array active; unsigned long rt_nr_running; -#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED int highest_prio; /* highest queued rt task prio */ #endif #ifdef CONFIG_SMP @@ -373,7 +380,9 @@ struct rt_rq { int rt_throttled; u64 rt_time; -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED + unsigned long rt_nr_boosted; + struct rq *rq; struct list_head leaf_rt_rq_list; struct task_group *tg; @@ -447,6 +456,8 @@ struct rq { #ifdef CONFIG_FAIR_GROUP_SCHED /* list of leaf cfs_rq on this cpu: */ struct list_head leaf_cfs_rq_list; +#endif +#ifdef CONFIG_RT_GROUP_SCHED struct list_head leaf_rt_rq_list; #endif @@ -652,19 +663,21 @@ const_debug unsigned int sysctl_sched_features = const_debug unsigned int sysctl_sched_nr_migrate = 32; /* - * period over which we measure -rt task cpu usage in ms. + * period over which we measure -rt task cpu usage in us. * default: 1s */ -const_debug unsigned int sysctl_sched_rt_period = 1000; +unsigned int sysctl_sched_rt_period = 1000000; -#define SCHED_RT_FRAC_SHIFT 16 -#define SCHED_RT_FRAC (1UL << SCHED_RT_FRAC_SHIFT) +/* + * part of the period that we allow rt tasks to run in us. + * default: 0.95s + */ +int sysctl_sched_rt_runtime = 950000; /* - * ratio of time -rt tasks may consume. - * default: 95% + * single value that denotes runtime == period, ie unlimited time. */ -const_debug unsigned int sysctl_sched_rt_ratio = 62259; +#define RUNTIME_INF ((u64)~0ULL) /* * For kernel-internal use: high-speed (but slightly incorrect) per-cpu @@ -4571,6 +4584,15 @@ recheck: return -EPERM; } +#ifdef CONFIG_RT_GROUP_SCHED + /* + * Do not allow realtime tasks into groups that have no runtime + * assigned. + */ + if (rt_policy(policy) && task_group(p)->rt_runtime == 0) + return -EPERM; +#endif + retval = security_task_setscheduler(p, policy, param); if (retval) return retval; @@ -7112,7 +7134,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) /* delimiter for bitsearch: */ __set_bit(MAX_RT_PRIO, array->bitmap); -#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED rt_rq->highest_prio = MAX_RT_PRIO; #endif #ifdef CONFIG_SMP @@ -7123,7 +7145,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) rt_rq->rt_time = 0; rt_rq->rt_throttled = 0; -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED + rt_rq->rt_nr_boosted = 0; rt_rq->rq = rq; #endif } @@ -7146,7 +7169,9 @@ static void init_tg_cfs_entry(struct rq *rq, struct task_group *tg, se->load.inv_weight = div64_64(1ULL<<32, se->load.weight); se->parent = NULL; } +#endif +#ifdef CONFIG_RT_GROUP_SCHED static void init_tg_rt_entry(struct rq *rq, struct task_group *tg, struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int cpu, int add) @@ -7175,7 +7200,7 @@ void __init sched_init(void) init_defrootdomain(); #endif -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_GROUP_SCHED list_add(&init_task_group.list, &task_groups); #endif @@ -7196,7 +7221,10 @@ void __init sched_init(void) &per_cpu(init_cfs_rq, i), &per_cpu(init_sched_entity, i), i, 1); - init_task_group.rt_ratio = sysctl_sched_rt_ratio; /* XXX */ +#endif +#ifdef CONFIG_RT_GROUP_SCHED + init_task_group.rt_runtime = + sysctl_sched_rt_runtime * NSEC_PER_USEC; INIT_LIST_HEAD(&rq->leaf_rt_rq_list); init_tg_rt_entry(rq, &init_task_group, &per_cpu(init_rt_rq, i), @@ -7303,7 +7331,7 @@ void normalize_rt_tasks(void) unsigned long flags; struct rq *rq; - read_lock_irq(&tasklist_lock); + read_lock_irqsave(&tasklist_lock, flags); do_each_thread(g, p) { /* * Only normalize user tasks: @@ -7329,16 +7357,16 @@ void normalize_rt_tasks(void) continue; } - spin_lock_irqsave(&p->pi_lock, flags); + spin_lock(&p->pi_lock); rq = __task_rq_lock(p); normalize_task(rq, p); __task_rq_unlock(rq); - spin_unlock_irqrestore(&p->pi_lock, flags); + spin_unlock(&p->pi_lock); } while_each_thread(g, p); - read_unlock_irq(&tasklist_lock); + read_unlock_irqrestore(&tasklist_lock, flags); } #endif /* CONFIG_MAGIC_SYSRQ */ @@ -7387,9 +7415,9 @@ void set_curr_task(int cpu, struct task_struct *p) #endif -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_GROUP_SCHED -#ifdef CONFIG_SMP +#if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP /* * distribute shares of all task groups among their schedulable entities, * to reflect load distribution across cpus. @@ -7540,7 +7568,8 @@ static int load_balance_monitor(void *unused) } #endif /* CONFIG_SMP */ -static void free_sched_group(struct task_group *tg) +#ifdef CONFIG_FAIR_GROUP_SCHED +static void free_fair_sched_group(struct task_group *tg) { int i; @@ -7549,49 +7578,27 @@ static void free_sched_group(struct task_group *tg) kfree(tg->cfs_rq[i]); if (tg->se) kfree(tg->se[i]); - if (tg->rt_rq) - kfree(tg->rt_rq[i]); - if (tg->rt_se) - kfree(tg->rt_se[i]); } kfree(tg->cfs_rq); kfree(tg->se); - kfree(tg->rt_rq); - kfree(tg->rt_se); - kfree(tg); } -/* allocate runqueue etc for a new task group */ -struct task_group *sched_create_group(void) +static int alloc_fair_sched_group(struct task_group *tg) { - struct task_group *tg; struct cfs_rq *cfs_rq; struct sched_entity *se; - struct rt_rq *rt_rq; - struct sched_rt_entity *rt_se; struct rq *rq; int i; - tg = kzalloc(sizeof(*tg), GFP_KERNEL); - if (!tg) - return ERR_PTR(-ENOMEM); - tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); if (!tg->cfs_rq) goto err; tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); if (!tg->se) goto err; - tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL); - if (!tg->rt_rq) - goto err; - tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL); - if (!tg->rt_se) - goto err; tg->shares = NICE_0_LOAD; - tg->rt_ratio = 0; /* XXX */ for_each_possible_cpu(i) { rq = cpu_rq(i); @@ -7606,6 +7613,79 @@ struct task_group *sched_create_group(void) if (!se) goto err; + init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0); + } + + return 1; + + err: + return 0; +} + +static inline void register_fair_sched_group(struct task_group *tg, int cpu) +{ + list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list, + &cpu_rq(cpu)->leaf_cfs_rq_list); +} + +static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) +{ + list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list); +} +#else +static inline void free_fair_sched_group(struct task_group *tg) +{ +} + +static inline int alloc_fair_sched_group(struct task_group *tg) +{ + return 1; +} + +static inline void register_fair_sched_group(struct task_group *tg, int cpu) +{ +} + +static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) +{ +} +#endif + +#ifdef CONFIG_RT_GROUP_SCHED +static void free_rt_sched_group(struct task_group *tg) +{ + int i; + + for_each_possible_cpu(i) { + if (tg->rt_rq) + kfree(tg->rt_rq[i]); + if (tg->rt_se) + kfree(tg->rt_se[i]); + } + + kfree(tg->rt_rq); + kfree(tg->rt_se); +} + +static int alloc_rt_sched_group(struct task_group *tg) +{ + struct rt_rq *rt_rq; + struct sched_rt_entity *rt_se; + struct rq *rq; + int i; + + tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL); + if (!tg->rt_rq) + goto err; + tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL); + if (!tg->rt_se) + goto err; + + tg->rt_runtime = 0; + + for_each_possible_cpu(i) { + rq = cpu_rq(i); + rt_rq = kmalloc_node(sizeof(struct rt_rq), GFP_KERNEL|__GFP_ZERO, cpu_to_node(i)); if (!rt_rq) @@ -7616,20 +7696,75 @@ struct task_group *sched_create_group(void) if (!rt_se) goto err; - init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0); init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0); } - lock_task_group_list(); + return 1; + + err: + return 0; +} + +static inline void register_rt_sched_group(struct task_group *tg, int cpu) +{ + list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list, + &cpu_rq(cpu)->leaf_rt_rq_list); +} + +static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) +{ + list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list); +} +#else +static inline void free_rt_sched_group(struct task_group *tg) +{ +} + +static inline int alloc_rt_sched_group(struct task_group *tg) +{ + return 1; +} + +static inline void register_rt_sched_group(struct task_group *tg, int cpu) +{ +} + +static inline void unregister_rt_sched_group(struct task_group *tg, int cpu) +{ +} +#endif + +static void free_sched_group(struct task_group *tg) +{ + free_fair_sched_group(tg); + free_rt_sched_group(tg); + kfree(tg); +} + +/* allocate runqueue etc for a new task group */ +struct task_group *sched_create_group(void) +{ + struct task_group *tg; + unsigned long flags; + int i; + + tg = kzalloc(sizeof(*tg), GFP_KERNEL); + if (!tg) + return ERR_PTR(-ENOMEM); + + if (!alloc_fair_sched_group(tg)) + goto err; + + if (!alloc_rt_sched_group(tg)) + goto err; + + spin_lock_irqsave(&task_group_lock, flags); for_each_possible_cpu(i) { - rq = cpu_rq(i); - cfs_rq = tg->cfs_rq[i]; - list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); - rt_rq = tg->rt_rq[i]; - list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); + register_fair_sched_group(tg, i); + register_rt_sched_group(tg, i); } list_add_rcu(&tg->list, &task_groups); - unlock_task_group_list(); + spin_unlock_irqrestore(&task_group_lock, flags); return tg; @@ -7648,21 +7783,16 @@ static void free_sched_group_rcu(struct rcu_head *rhp) /* Destroy runqueue etc associated with a task group */ void sched_destroy_group(struct task_group *tg) { - struct cfs_rq *cfs_rq = NULL; - struct rt_rq *rt_rq = NULL; + unsigned long flags; int i; - lock_task_group_list(); + spin_lock_irqsave(&task_group_lock, flags); for_each_possible_cpu(i) { - cfs_rq = tg->cfs_rq[i]; - list_del_rcu(&cfs_rq->leaf_cfs_rq_list); - rt_rq = tg->rt_rq[i]; - list_del_rcu(&rt_rq->leaf_rt_rq_list); + unregister_fair_sched_group(tg, i); + unregister_rt_sched_group(tg, i); } list_del_rcu(&tg->list); - unlock_task_group_list(); - - BUG_ON(!cfs_rq); + spin_unlock_irqrestore(&task_group_lock, flags); /* wait for possible concurrent references to cfs_rqs complete */ call_rcu(&tg->rcu, free_sched_group_rcu); @@ -7703,6 +7833,7 @@ void sched_move_task(struct task_struct *tsk) task_rq_unlock(rq, &flags); } +#ifdef CONFIG_FAIR_GROUP_SCHED /* rq->lock to be locked by caller */ static void set_se_shares(struct sched_entity *se, unsigned long shares) { @@ -7728,13 +7859,14 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) } } +static DEFINE_MUTEX(shares_mutex); + int sched_group_set_shares(struct task_group *tg, unsigned long shares) { int i; - struct cfs_rq *cfs_rq; - struct rq *rq; + unsigned long flags; - lock_task_group_list(); + mutex_lock(&shares_mutex); if (tg->shares == shares) goto done; @@ -7746,10 +7878,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) * load_balance_fair) from referring to this group first, * by taking it off the rq->leaf_cfs_rq_list on each cpu. */ - for_each_possible_cpu(i) { - cfs_rq = tg->cfs_rq[i]; - list_del_rcu(&cfs_rq->leaf_cfs_rq_list); - } + spin_lock_irqsave(&task_group_lock, flags); + for_each_possible_cpu(i) + unregister_fair_sched_group(tg, i); + spin_unlock_irqrestore(&task_group_lock, flags); /* wait for any ongoing reference to this group to finish */ synchronize_sched(); @@ -7769,13 +7901,12 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) * Enable load balance activity on this group, by inserting it back on * each cpu's rq->leaf_cfs_rq_list. */ - for_each_possible_cpu(i) { - rq = cpu_rq(i); - cfs_rq = tg->cfs_rq[i]; - list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); - } + spin_lock_irqsave(&task_group_lock, flags); + for_each_possible_cpu(i) + register_fair_sched_group(tg, i); + spin_unlock_irqrestore(&task_group_lock, flags); done: - unlock_task_group_list(); + mutex_unlock(&shares_mutex); return 0; } @@ -7783,35 +7914,84 @@ unsigned long sched_group_shares(struct task_group *tg) { return tg->shares; } +#endif +#ifdef CONFIG_RT_GROUP_SCHED /* - * Ensure the total rt_ratio <= sysctl_sched_rt_ratio + * Ensure that the real time constraints are schedulable. */ -int sched_group_set_rt_ratio(struct task_group *tg, unsigned long rt_ratio) +static DEFINE_MUTEX(rt_constraints_mutex); + +static unsigned long to_ratio(u64 period, u64 runtime) +{ + if (runtime == RUNTIME_INF) + return 1ULL << 16; + + runtime *= (1ULL << 16); + div64_64(runtime, period); + return runtime; +} + +static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) { struct task_group *tgi; unsigned long total = 0; + unsigned long global_ratio = + to_ratio(sysctl_sched_rt_period, + sysctl_sched_rt_runtime < 0 ? + RUNTIME_INF : sysctl_sched_rt_runtime); rcu_read_lock(); - list_for_each_entry_rcu(tgi, &task_groups, list) - total += tgi->rt_ratio; - rcu_read_unlock(); + list_for_each_entry_rcu(tgi, &task_groups, list) { + if (tgi == tg) + continue; - if (total + rt_ratio - tg->rt_ratio > sysctl_sched_rt_ratio) - return -EINVAL; + total += to_ratio(period, tgi->rt_runtime); + } + rcu_read_unlock(); - tg->rt_ratio = rt_ratio; - return 0; + return total + to_ratio(period, runtime) < global_ratio; } -unsigned long sched_group_rt_ratio(struct task_group *tg) +int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) { - return tg->rt_ratio; + u64 rt_runtime, rt_period; + int err = 0; + + rt_period = sysctl_sched_rt_period * NSEC_PER_USEC; + rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; + if (rt_runtime_us == -1) + rt_runtime = rt_period; + + mutex_lock(&rt_constraints_mutex); + if (!__rt_schedulable(tg, rt_period, rt_runtime)) { + err = -EINVAL; + goto unlock; + } + if (rt_runtime_us == -1) + rt_runtime = RUNTIME_INF; + tg->rt_runtime = rt_runtime; + unlock: + mutex_unlock(&rt_constraints_mutex); + + return err; } -#endif /* CONFIG_FAIR_GROUP_SCHED */ +long sched_group_rt_runtime(struct task_group *tg) +{ + u64 rt_runtime_us; + + if (tg->rt_runtime == RUNTIME_INF) + return -1; + + rt_runtime_us = tg->rt_runtime; + do_div(rt_runtime_us, NSEC_PER_USEC); + return rt_runtime_us; +} +#endif +#endif /* CONFIG_GROUP_SCHED */ -#ifdef CONFIG_FAIR_CGROUP_SCHED +#ifdef CONFIG_CGROUP_SCHED /* return corresponding task_group object of a cgroup */ static inline struct task_group *cgroup_tg(struct cgroup *cgrp) @@ -7857,9 +8037,15 @@ static int cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *tsk) { +#ifdef CONFIG_RT_GROUP_SCHED + /* Don't accept realtime tasks when there is no way for them to run */ + if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0) + return -EINVAL; +#else /* We don't support RT-tasks being in separate groups */ if (tsk->sched_class != &fair_sched_class) return -EINVAL; +#endif return 0; } @@ -7871,6 +8057,7 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, sched_move_task(tsk); } +#ifdef CONFIG_FAIR_GROUP_SCHED static int cpu_shares_write_uint(struct cgroup *cgrp, struct cftype *cftype, u64 shareval) { @@ -7883,31 +8070,70 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft) return (u64) tg->shares; } +#endif -static int cpu_rt_ratio_write_uint(struct cgroup *cgrp, struct cftype *cftype, - u64 rt_ratio_val) +#ifdef CONFIG_RT_GROUP_SCHED +static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + const char __user *userbuf, + size_t nbytes, loff_t *unused_ppos) { - return sched_group_set_rt_ratio(cgroup_tg(cgrp), rt_ratio_val); + char buffer[64]; + int retval = 0; + s64 val; + char *end; + + if (!nbytes) + return -EINVAL; + if (nbytes >= sizeof(buffer)) + return -E2BIG; + if (copy_from_user(buffer, userbuf, nbytes)) + return -EFAULT; + + buffer[nbytes] = 0; /* nul-terminate */ + + /* strip newline if necessary */ + if (nbytes && (buffer[nbytes-1] == '\n')) + buffer[nbytes-1] = 0; + val = simple_strtoll(buffer, &end, 0); + if (*end) + return -EINVAL; + + /* Pass to subsystem */ + retval = sched_group_set_rt_runtime(cgroup_tg(cgrp), val); + if (!retval) + retval = nbytes; + return retval; } -static u64 cpu_rt_ratio_read_uint(struct cgroup *cgrp, struct cftype *cft) +static ssize_t cpu_rt_runtime_read(struct cgroup *cgrp, struct cftype *cft, + struct file *file, + char __user *buf, size_t nbytes, + loff_t *ppos) { - struct task_group *tg = cgroup_tg(cgrp); + char tmp[64]; + long val = sched_group_rt_runtime(cgroup_tg(cgrp)); + int len = sprintf(tmp, "%ld\n", val); - return (u64) tg->rt_ratio; + return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); } +#endif static struct cftype cpu_files[] = { +#ifdef CONFIG_FAIR_GROUP_SCHED { .name = "shares", .read_uint = cpu_shares_read_uint, .write_uint = cpu_shares_write_uint, }, +#endif +#ifdef CONFIG_RT_GROUP_SCHED { - .name = "rt_ratio", - .read_uint = cpu_rt_ratio_read_uint, - .write_uint = cpu_rt_ratio_write_uint, + .name = "rt_runtime_us", + .read = cpu_rt_runtime_read, + .write = cpu_rt_runtime_write, }, +#endif }; static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont) @@ -7926,7 +8152,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { .early_init = 1, }; -#endif /* CONFIG_FAIR_CGROUP_SCHED */ +#endif /* CONFIG_CGROUP_SCHED */ #ifdef CONFIG_CGROUP_CPUACCT diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 274b40d..f54792b 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -55,14 +55,14 @@ static inline int on_rt_rq(struct sched_rt_entity *rt_se) return !list_empty(&rt_se->run_list); } -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED -static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) +static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) { if (!rt_rq->tg) - return SCHED_RT_FRAC; + return RUNTIME_INF; - return rt_rq->tg->rt_ratio; + return rt_rq->tg->rt_runtime; } #define for_each_leaf_rt_rq(rt_rq, rq) \ @@ -89,7 +89,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) static void enqueue_rt_entity(struct sched_rt_entity *rt_se); static void dequeue_rt_entity(struct sched_rt_entity *rt_se); -static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) +static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { struct sched_rt_entity *rt_se = rt_rq->rt_se; @@ -102,7 +102,7 @@ static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) } } -static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) +static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { struct sched_rt_entity *rt_se = rt_rq->rt_se; @@ -110,11 +110,31 @@ static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) dequeue_rt_entity(rt_se); } +static inline int rt_rq_throttled(struct rt_rq *rt_rq) +{ + return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; +} + +static int rt_se_boosted(struct sched_rt_entity *rt_se) +{ + struct rt_rq *rt_rq = group_rt_rq(rt_se); + struct task_struct *p; + + if (rt_rq) + return !!rt_rq->rt_nr_boosted; + + p = rt_task_of(rt_se); + return p->prio != p->normal_prio; +} + #else -static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq) +static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) { - return sysctl_sched_rt_ratio; + if (sysctl_sched_rt_runtime == -1) + return RUNTIME_INF; + + return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; } #define for_each_leaf_rt_rq(rt_rq, rq) \ @@ -141,19 +161,23 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) return NULL; } -static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq) +static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) { } -static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq) +static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { } +static inline int rt_rq_throttled(struct rt_rq *rt_rq) +{ + return rt_rq->rt_throttled; +} #endif static inline int rt_se_prio(struct sched_rt_entity *rt_se) { -#ifdef CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_RT_GROUP_SCHED struct rt_rq *rt_rq = group_rt_rq(rt_se); if (rt_rq) @@ -163,28 +187,26 @@ static inline int rt_se_prio(struct sched_rt_entity *rt_se) return rt_task_of(rt_se)->prio; } -static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq) +static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) { - unsigned int rt_ratio = sched_rt_ratio(rt_rq); - u64 period, ratio; + u64 runtime = sched_rt_runtime(rt_rq); - if (rt_ratio == SCHED_RT_FRAC) + if (runtime == RUNTIME_INF) return 0; if (rt_rq->rt_throttled) - return 1; - - period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; - ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; + return rt_rq_throttled(rt_rq); - if (rt_rq->rt_time > ratio) { + if (rt_rq->rt_time > runtime) { struct rq *rq = rq_of_rt_rq(rt_rq); rq->rt_throttled = 1; rt_rq->rt_throttled = 1; - sched_rt_ratio_dequeue(rt_rq); - return 1; + if (rt_rq_throttled(rt_rq)) { + sched_rt_rq_dequeue(rt_rq); + return 1; + } } return 0; @@ -196,17 +218,16 @@ static void update_sched_rt_period(struct rq *rq) u64 period; while (rq->clock > rq->rt_period_expire) { - period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC; + period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC; rq->rt_period_expire += period; for_each_leaf_rt_rq(rt_rq, rq) { - unsigned long rt_ratio = sched_rt_ratio(rt_rq); - u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT; + u64 runtime = sched_rt_runtime(rt_rq); - rt_rq->rt_time -= min(rt_rq->rt_time, ratio); - if (rt_rq->rt_throttled) { + rt_rq->rt_time -= min(rt_rq->rt_time, runtime); + if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { rt_rq->rt_throttled = 0; - sched_rt_ratio_enqueue(rt_rq); + sched_rt_rq_enqueue(rt_rq); } } @@ -239,12 +260,7 @@ static void update_curr_rt(struct rq *rq) cpuacct_charge(curr, delta_exec); rt_rq->rt_time += delta_exec; - /* - * might make it a tad more accurate: - * - * update_sched_rt_period(rq); - */ - if (sched_rt_ratio_exceeded(rt_rq)) + if (sched_rt_runtime_exceeded(rt_rq)) resched_task(curr); } @@ -253,7 +269,7 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { WARN_ON(!rt_prio(rt_se_prio(rt_se))); rt_rq->rt_nr_running++; -#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED if (rt_se_prio(rt_se) < rt_rq->highest_prio) rt_rq->highest_prio = rt_se_prio(rt_se); #endif @@ -265,6 +281,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) update_rt_migration(rq_of_rt_rq(rt_rq)); #endif +#ifdef CONFIG_RT_GROUP_SCHED + if (rt_se_boosted(rt_se)) + rt_rq->rt_nr_boosted++; +#endif } static inline @@ -273,7 +293,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) WARN_ON(!rt_prio(rt_se_prio(rt_se))); WARN_ON(!rt_rq->rt_nr_running); rt_rq->rt_nr_running--; -#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED +#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED if (rt_rq->rt_nr_running) { struct rt_prio_array *array; @@ -295,6 +315,12 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) update_rt_migration(rq_of_rt_rq(rt_rq)); #endif /* CONFIG_SMP */ +#ifdef CONFIG_RT_GROUP_SCHED + if (rt_se_boosted(rt_se)) + rt_rq->rt_nr_boosted--; + + WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); +#endif } static void enqueue_rt_entity(struct sched_rt_entity *rt_se) @@ -303,7 +329,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se) struct rt_prio_array *array = &rt_rq->active; struct rt_rq *group_rq = group_rt_rq(rt_se); - if (group_rq && group_rq->rt_throttled) + if (group_rq && rt_rq_throttled(group_rq)) return; list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); @@ -496,7 +522,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) if (unlikely(!rt_rq->rt_nr_running)) return NULL; - if (sched_rt_ratio_exceeded(rt_rq)) + if (rt_rq_throttled(rt_rq)) return NULL; do { diff --git a/kernel/signal.c b/kernel/signal.c index 2c1f08d..84917fe 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -972,7 +972,7 @@ void zap_other_threads(struct task_struct *p) } } -int fastcall __fatal_signal_pending(struct task_struct *tsk) +int __fatal_signal_pending(struct task_struct *tsk) { return sigismember(&tsk->pending.signal, SIGKILL); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d41ef6b..8b7e954 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -311,22 +311,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "sched_rt_period_ms", - .data = &sysctl_sched_rt_period, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "sched_rt_ratio", - .data = &sysctl_sched_rt_ratio, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) { .ctl_name = CTL_UNNUMBERED, @@ -348,6 +332,22 @@ static struct ctl_table kern_table[] = { #endif { .ctl_name = CTL_UNNUMBERED, + .procname = "sched_rt_period_us", + .data = &sysctl_sched_rt_period, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "sched_rt_runtime_us", + .data = &sysctl_sched_rt_runtime, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, .procname = "sched_compat_yield", .data = &sysctl_sched_compat_yield, .maxlen = sizeof(unsigned int), @@ -978,8 +978,8 @@ static struct ctl_table vm_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nr_overcommit_hugepages", - .data = &nr_overcommit_huge_pages, - .maxlen = sizeof(nr_overcommit_huge_pages), + .data = &sysctl_overcommit_huge_pages, + .maxlen = sizeof(sysctl_overcommit_huge_pages), .mode = 0644, .proc_handler = &hugetlb_overcommit_handler, }, diff --git a/kernel/timeconst.pl b/kernel/timeconst.pl index 62b1287..4146803 100644 --- a/kernel/timeconst.pl +++ b/kernel/timeconst.pl @@ -339,7 +339,7 @@ sub output($@) print "\n"; foreach $pfx ('HZ_TO_MSEC','MSEC_TO_HZ', - 'USEC_TO_HZ','HZ_TO_USEC') { + 'HZ_TO_USEC','USEC_TO_HZ') { foreach $bit (32, 64) { foreach $suf ('MUL', 'ADJ', 'SHR') { printf "#define %-23s %s\n", diff --git a/kernel/user.c b/kernel/user.c index 7d7900c..7132022 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -57,7 +57,7 @@ struct user_struct root_user = { .uid_keyring = &root_user_keyring, .session_keyring = &root_session_keyring, #endif -#ifdef CONFIG_FAIR_USER_SCHED +#ifdef CONFIG_USER_SCHED .tg = &init_task_group, #endif }; @@ -90,7 +90,7 @@ static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) return NULL; } -#ifdef CONFIG_FAIR_USER_SCHED +#ifdef CONFIG_USER_SCHED static void sched_destroy_user(struct user_struct *up) { @@ -113,15 +113,15 @@ static void sched_switch_user(struct task_struct *p) sched_move_task(p); } -#else /* CONFIG_FAIR_USER_SCHED */ +#else /* CONFIG_USER_SCHED */ static void sched_destroy_user(struct user_struct *up) { } static int sched_create_user(struct user_struct *up) { return 0; } static void sched_switch_user(struct task_struct *p) { } -#endif /* CONFIG_FAIR_USER_SCHED */ +#endif /* CONFIG_USER_SCHED */ -#if defined(CONFIG_FAIR_USER_SCHED) && defined(CONFIG_SYSFS) +#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS) static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */ static DEFINE_MUTEX(uids_mutex); @@ -137,6 +137,7 @@ static inline void uids_mutex_unlock(void) } /* uid directory attributes */ +#ifdef CONFIG_FAIR_GROUP_SCHED static ssize_t cpu_shares_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -163,10 +164,45 @@ static ssize_t cpu_shares_store(struct kobject *kobj, static struct kobj_attribute cpu_share_attr = __ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store); +#endif + +#ifdef CONFIG_RT_GROUP_SCHED +static ssize_t cpu_rt_runtime_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct user_struct *up = container_of(kobj, struct user_struct, kobj); + + return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg)); +} + +static ssize_t cpu_rt_runtime_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t size) +{ + struct user_struct *up = container_of(kobj, struct user_struct, kobj); + unsigned long rt_runtime; + int rc; + + sscanf(buf, "%lu", &rt_runtime); + + rc = sched_group_set_rt_runtime(up->tg, rt_runtime); + + return (rc ? rc : size); +} + +static struct kobj_attribute cpu_rt_runtime_attr = + __ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store); +#endif /* default attributes per uid directory */ static struct attribute *uids_attributes[] = { +#ifdef CONFIG_FAIR_GROUP_SCHED &cpu_share_attr.attr, +#endif +#ifdef CONFIG_RT_GROUP_SCHED + &cpu_rt_runtime_attr.attr, +#endif NULL }; @@ -269,7 +305,7 @@ static inline void free_user(struct user_struct *up, unsigned long flags) schedule_work(&up->work); } -#else /* CONFIG_FAIR_USER_SCHED && CONFIG_SYSFS */ +#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */ int uids_sysfs_init(void) { return 0; } static inline int uids_user_create(struct user_struct *up) { return 0; } @@ -373,7 +409,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) spin_lock_irq(&uidhash_lock); up = uid_hash_find(uid, hashent); if (up) { - /* This case is not possible when CONFIG_FAIR_USER_SCHED + /* This case is not possible when CONFIG_USER_SCHED * is defined, since we serialize alloc_uid() using * uids_mutex. Hence no need to call * sched_destroy_user() or remove_user_sysfs_dir(). diff --git a/mm/filemap.c b/mm/filemap.c index b7b1be6..5c74b68 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -604,7 +604,7 @@ void __lock_page(struct page *page) } EXPORT_SYMBOL(__lock_page); -int fastcall __lock_page_killable(struct page *page) +int __lock_page_killable(struct page *page) { DEFINE_WAIT_BIT(wait, &page->flags, PG_locked); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d9a3803..cb1b3a7 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -24,14 +24,15 @@ const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; static unsigned long nr_huge_pages, free_huge_pages, resv_huge_pages; static unsigned long surplus_huge_pages; +static unsigned long nr_overcommit_huge_pages; unsigned long max_huge_pages; +unsigned long sysctl_overcommit_huge_pages; static struct list_head hugepage_freelists[MAX_NUMNODES]; static unsigned int nr_huge_pages_node[MAX_NUMNODES]; static unsigned int free_huge_pages_node[MAX_NUMNODES]; static unsigned int surplus_huge_pages_node[MAX_NUMNODES]; static gfp_t htlb_alloc_mask = GFP_HIGHUSER; unsigned long hugepages_treat_as_movable; -unsigned long nr_overcommit_huge_pages; static int hugetlb_next_nid; /* @@ -609,8 +610,9 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { - spin_lock(&hugetlb_lock); proc_doulongvec_minmax(table, write, file, buffer, length, ppos); + spin_lock(&hugetlb_lock); + nr_overcommit_huge_pages = sysctl_overcommit_huge_pages; spin_unlock(&hugetlb_lock); return 0; } diff --git a/mm/memory.c b/mm/memory.c index e5628a5..717aa0e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -989,6 +989,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int i; unsigned int vm_flags; + if (len <= 0) + return 0; /* * Require read or write permissions. * If 'force' is set, we only require the "MAY" flags. diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 83c69f8..8d246c3 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -116,22 +116,51 @@ static void mpol_rebind_policy(struct mempolicy *pol, /* Do sanity checking on a policy */ static int mpol_check_policy(int mode, nodemask_t *nodes) { - int empty = nodes_empty(*nodes); + int was_empty, is_empty; + + if (!nodes) + return 0; + + /* + * "Contextualize" the in-coming nodemast for cpusets: + * Remember whether in-coming nodemask was empty, If not, + * restrict the nodes to the allowed nodes in the cpuset. + * This is guaranteed to be a subset of nodes with memory. + */ + cpuset_update_task_memory_state(); + is_empty = was_empty = nodes_empty(*nodes); + if (!was_empty) { + nodes_and(*nodes, *nodes, cpuset_current_mems_allowed); + is_empty = nodes_empty(*nodes); /* after "contextualization" */ + } switch (mode) { case MPOL_DEFAULT: - if (!empty) + /* + * require caller to specify an empty nodemask + * before "contextualization" + */ + if (!was_empty) return -EINVAL; break; case MPOL_BIND: case MPOL_INTERLEAVE: - /* Preferred will only use the first bit, but allow - more for now. */ - if (empty) + /* + * require at least 1 valid node after "contextualization" + */ + if (is_empty) + return -EINVAL; + break; + case MPOL_PREFERRED: + /* + * Did caller specify invalid nodes? + * Don't silently accept this as "local allocation". + */ + if (!was_empty && is_empty) return -EINVAL; break; } - return nodes_subset(*nodes, node_states[N_HIGH_MEMORY]) ? 0 : -EINVAL; + return 0; } /* Generate a custom zonelist for the BIND policy. */ @@ -188,8 +217,6 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) switch (mode) { case MPOL_INTERLEAVE: policy->v.nodes = *nodes; - nodes_and(policy->v.nodes, policy->v.nodes, - node_states[N_HIGH_MEMORY]); if (nodes_weight(policy->v.nodes) == 0) { kmem_cache_free(policy_cache, policy); return ERR_PTR(-EINVAL); @@ -421,18 +448,6 @@ static int mbind_range(struct vm_area_struct *vma, unsigned long start, return err; } -static int contextualize_policy(int mode, nodemask_t *nodes) -{ - if (!nodes) - return 0; - - cpuset_update_task_memory_state(); - if (!cpuset_nodes_subset_current_mems_allowed(*nodes)) - return -EINVAL; - return mpol_check_policy(mode, nodes); -} - - /* * Update task->flags PF_MEMPOLICY bit: set iff non-default * mempolicy. Allows more rapid checking of this (combined perhaps @@ -468,7 +483,7 @@ static long do_set_mempolicy(int mode, nodemask_t *nodes) { struct mempolicy *new; - if (contextualize_policy(mode, nodes)) + if (mpol_check_policy(mode, nodes)) return -EINVAL; new = mpol_new(mode, nodes); if (IS_ERR(new)) @@ -915,10 +930,6 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len, err = get_nodes(&nodes, nmask, maxnode); if (err) return err; -#ifdef CONFIG_CPUSETS - /* Restrict the nodes to the allowed nodes in the cpuset */ - nodes_and(nodes, nodes, current->mems_allowed); -#endif return do_mbind(start, len, mode, &nodes, flags); } diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index d3e4e18..0c2c937 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -465,7 +465,7 @@ int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) return len; } -void fastcall __rfcomm_dlc_throttle(struct rfcomm_dlc *d) +void __rfcomm_dlc_throttle(struct rfcomm_dlc *d) { BT_DBG("dlc %p state %ld", d, d->state); @@ -476,7 +476,7 @@ void fastcall __rfcomm_dlc_throttle(struct rfcomm_dlc *d) rfcomm_schedule(RFCOMM_SCHED_TX); } -void fastcall __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d) +void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d) { BT_DBG("dlc %p state %ld", d, d->state); diff --git a/net/core/dev.c b/net/core/dev.c index 9549417..b3e19ae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2143,7 +2143,7 @@ static int process_backlog(struct napi_struct *napi, int quota) * * The entry's receive function will be scheduled to run */ -void fastcall __napi_schedule(struct napi_struct *n) +void __napi_schedule(struct napi_struct *n) { unsigned long flags; @@ -3038,8 +3038,7 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from) EXPORT_SYMBOL(dev_unicast_sync); /** - * dev_unicast_unsync - Remove synchronized addresses from the destination - * device + * dev_unicast_unsync - Remove synchronized addresses from the destination device * @to: destination device * @from: source device * diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4e35422..cfc07da 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1907,11 +1907,11 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, * of bytes already consumed and the next call to * skb_seq_read() will return the remaining part of the block. * - * Note: The size of each block of data returned can be arbitary, + * Note 1: The size of each block of data returned can be arbitary, * this limitation is the cost for zerocopy seqeuental * reads of potentially non linear data. * - * Note: Fragment lists within fragments are not implemented + * Note 2: Fragment lists within fragments are not implemented * at the moment, state->root_skb could be replaced with * a stack for this purpose. */ diff --git a/net/core/sock.c b/net/core/sock.c index 433715f..09cb3a7 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1731,7 +1731,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) atomic_set(&sk->sk_drops, 0); } -void fastcall lock_sock_nested(struct sock *sk, int subclass) +void lock_sock_nested(struct sock *sk, int subclass) { might_sleep(); spin_lock_bh(&sk->sk_lock.slock); @@ -1748,7 +1748,7 @@ void fastcall lock_sock_nested(struct sock *sk, int subclass) EXPORT_SYMBOL(lock_sock_nested); -void fastcall release_sock(struct sock *sk) +void release_sock(struct sock *sk) { /* * The sk_lock has mutex_unlock() semantics: diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0998e6d..8c6a7f1 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -464,9 +464,9 @@ rpc_release_client(struct rpc_clnt *clnt) /** * rpc_bind_new_program - bind a new RPC program to an existing client - * @old - old rpc_client - * @program - rpc program to set - * @vers - rpc program version + * @old: old rpc_client + * @program: rpc program to set + * @vers: rpc program version * * Clones the rpc client and sets up a new RPC program. This is mainly * of use for enabling different RPC programs to share the same transport. @@ -575,7 +575,7 @@ EXPORT_SYMBOL_GPL(rpc_call_sync); * @clnt: pointer to RPC client * @msg: RPC call parameters * @flags: RPC call flags - * @ops: RPC call ops + * @tk_ops: RPC call ops * @data: user call data */ int @@ -610,7 +610,7 @@ EXPORT_SYMBOL_GPL(rpc_call_start); * rpc_peeraddr - extract remote peer address from clnt's xprt * @clnt: RPC client structure * @buf: target buffer - * @size: length of target buffer + * @bufsize: length of target buffer * * Returns the number of bytes that are actually in the stored address. */ diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 7e19716..0e3ead7 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -677,7 +677,7 @@ rpc_lookup_negative(char *path, struct nameidata *nd) /** * rpc_mkdir - Create a new directory in rpc_pipefs * @path: path from the rpc_pipefs root to the new directory - * @rpc_clnt: rpc client to associate with this directory + * @rpc_client: rpc client to associate with this directory * * This creates a directory at the given @path associated with * @rpc_clnt, which will contain a file named "info" with some basic @@ -748,6 +748,7 @@ rpc_rmdir(struct dentry *dentry) * @private: private data to associate with the pipe, for the caller's use * @ops: operations defining the behavior of the pipe: upcall, downcall, * release_pipe, and destroy_msg. + * @flags: rpc_inode flags * * Data is made available for userspace to read by calls to * rpc_queue_upcall(). The actual reads will result in calls to diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index cfcade9..d5553b8 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -124,7 +124,7 @@ EXPORT_SYMBOL_GPL(xprt_register_transport); /** * xprt_unregister_transport - unregister a transport implementation - * transport: transport to unregister + * @transport: transport to unregister * * Returns: * 0: transport successfully unregistered diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 3e32194..0598b22 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -159,7 +159,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, BUG_ON(sge_count >= 32); dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, " "write_len=%d, xdr_sge=%p, sge_count=%d\n", - rmr, to, xdr_off, write_len, xdr_sge, sge_count); + rmr, (unsigned long long)to, xdr_off, + write_len, xdr_sge, sge_count); ctxt = svc_rdma_get_context(xprt); ctxt->count = 0; diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c index a367975..c8e099d 100644 --- a/samples/markers/probe-example.c +++ b/samples/markers/probe-example.c @@ -20,31 +20,27 @@ struct probe_data { marker_probe_func *probe_func; }; -void probe_subsystem_event(const struct marker *mdata, void *private, - const char *format, ...) +void probe_subsystem_event(void *probe_data, void *call_data, + const char *format, va_list *args) { - va_list ap; /* Declare args */ unsigned int value; const char *mystr; /* Assign args */ - va_start(ap, format); - value = va_arg(ap, typeof(value)); - mystr = va_arg(ap, typeof(mystr)); + value = va_arg(*args, typeof(value)); + mystr = va_arg(*args, typeof(mystr)); /* Call printk */ - printk(KERN_DEBUG "Value %u, string %s\n", value, mystr); + printk(KERN_INFO "Value %u, string %s\n", value, mystr); /* or count, check rights, serialize data in a buffer */ - - va_end(ap); } atomic_t eventb_count = ATOMIC_INIT(0); -void probe_subsystem_eventb(const struct marker *mdata, void *private, - const char *format, ...) +void probe_subsystem_eventb(void *probe_data, void *call_data, + const char *format, va_list *args) { /* Increment counter */ atomic_inc(&eventb_count); @@ -72,10 +68,6 @@ static int __init probe_init(void) if (result) printk(KERN_INFO "Unable to register probe %s\n", probe_array[i].name); - result = marker_arm(probe_array[i].name); - if (result) - printk(KERN_INFO "Unable to arm probe %s\n", - probe_array[i].name); } return 0; } @@ -85,7 +77,8 @@ static void __exit probe_fini(void) int i; for (i = 0; i < ARRAY_SIZE(probe_array); i++) - marker_probe_unregister(probe_array[i].name); + marker_probe_unregister(probe_array[i].name, + probe_array[i].probe_func, &probe_array[i]); printk(KERN_INFO "Number of event b : %u\n", atomic_read(&eventb_count)); } diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index da3559e..d64e6ba 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -39,10 +39,13 @@ escsq = $(subst $(squote),'\$(squote)',$1) # - If they are equal no change, and no timestamp update # - stdin is piped in from the first prerequisite ($<) so one has # to specify a valid file as first prerequisite (often the kbuild file) + chk_filechk = : quiet_chk_filechk = echo ' CHK $@' silent_chk_filechk = : + upd_filechk = : quiet_upd_filechk = echo ' UPD $@' silent_upd_filechk = : + define filechk $(Q)set -e; \ $($(quiet)chk_filechk); \ diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 65e707e..cfc004e 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -13,6 +13,7 @@ # 2) modpost is then used to # 3) create one <module>.mod.c file pr. module # 4) create one Module.symvers file with CRC for all exported symbols +# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers # 5) compile all <module>.mod.c files # 6) final link of the module to a <module.ko> file @@ -45,6 +46,10 @@ include scripts/Makefile.lib kernelsymfile := $(objtree)/Module.symvers modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers +kernelmarkersfile := $(objtree)/Module.markers +modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers + +markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile)) # Step 1), find all modules listed in $(MODVERDIR)/ __modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod))) @@ -63,6 +68,8 @@ modpost = scripts/mod/modpost \ $(if $(KBUILD_EXTMOD),-I $(modulesymfile)) \ $(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \ $(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \ + $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \ + $(if $(CONFIG_MARKERS),-M $(markersfile)) \ $(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) quiet_cmd_modpost = MODPOST $(words $(filter-out vmlinux FORCE, $^)) modules @@ -82,6 +89,10 @@ vmlinux.o: FORCE $(symverfile): __modpost ; $(modules:.ko=.mod.c): __modpost ; +ifdef CONFIG_MARKERS +$(markersfile): __modpost ; +endif + # Step 5), compile all *.mod.c files diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index dbe1fb5..6174277 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -11,6 +11,8 @@ * Usage: modpost vmlinux module1.o module2.o ... */ +#define _GNU_SOURCE +#include <stdio.h> #include <ctype.h> #include "modpost.h" #include "../../include/linux/license.h" @@ -435,6 +437,8 @@ static int parse_elf(struct elf_info *info, const char *filename) info->export_unused_gpl_sec = i; else if (strcmp(secname, "__ksymtab_gpl_future") == 0) info->export_gpl_future_sec = i; + else if (strcmp(secname, "__markers_strings") == 0) + info->markers_strings_sec = i; if (sechdrs[i].sh_type != SHT_SYMTAB) continue; @@ -1470,6 +1474,62 @@ static void check_sec_ref(struct module *mod, const char *modname, } } +static void get_markers(struct elf_info *info, struct module *mod) +{ + const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec]; + const char *strings = (const char *) info->hdr + sh->sh_offset; + const Elf_Sym *sym, *first_sym, *last_sym; + size_t n; + + if (!info->markers_strings_sec) + return; + + /* + * First count the strings. We look for all the symbols defined + * in the __markers_strings section named __mstrtab_*. For + * these local names, the compiler puts a random .NNN suffix on, + * so the names don't correspond exactly. + */ + first_sym = last_sym = NULL; + n = 0; + for (sym = info->symtab_start; sym < info->symtab_stop; sym++) + if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && + sym->st_shndx == info->markers_strings_sec && + !strncmp(info->strtab + sym->st_name, + "__mstrtab_", sizeof "__mstrtab_" - 1)) { + if (first_sym == NULL) + first_sym = sym; + last_sym = sym; + ++n; + } + + if (n == 0) + return; + + /* + * Now collect each name and format into a line for the output. + * Lines look like: + * marker_name vmlinux marker %s format %d + * The format string after the second \t can use whitespace. + */ + mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n)); + mod->nmarkers = n; + + n = 0; + for (sym = first_sym; sym <= last_sym; sym++) + if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT && + sym->st_shndx == info->markers_strings_sec && + !strncmp(info->strtab + sym->st_name, + "__mstrtab_", sizeof "__mstrtab_" - 1)) { + const char *name = strings + sym->st_value; + const char *fmt = strchr(name, '\0') + 1; + char *line = NULL; + asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); + NOFAIL(line); + mod->markers[n++] = line; + } +} + static void read_symbols(char *modname) { const char *symname; @@ -1521,6 +1581,8 @@ static void read_symbols(char *modname) get_src_version(modname, mod->srcversion, sizeof(mod->srcversion)-1); + get_markers(&info, mod); + parse_elf_finish(&info); /* Our trick to get versioning for struct_module - it's @@ -1867,16 +1929,104 @@ static void write_dump(const char *fname) write_if_changed(&buf, fname); } +static void add_marker(struct module *mod, const char *name, const char *fmt) +{ + char *line = NULL; + asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt); + NOFAIL(line); + + mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) * + sizeof mod->markers[0]))); + mod->markers[mod->nmarkers++] = line; +} + +static void read_markers(const char *fname) +{ + unsigned long size, pos = 0; + void *file = grab_file(fname, &size); + char *line; + + if (!file) /* No old markers, silently ignore */ + return; + + while ((line = get_next_line(&pos, file, size))) { + char *marker, *modname, *fmt; + struct module *mod; + + marker = line; + modname = strchr(marker, '\t'); + if (!modname) + goto fail; + *modname++ = '\0'; + fmt = strchr(modname, '\t'); + if (!fmt) + goto fail; + *fmt++ = '\0'; + if (*marker == '\0' || *modname == '\0') + goto fail; + + mod = find_module(modname); + if (!mod) { + if (is_vmlinux(modname)) + have_vmlinux = 1; + mod = new_module(NOFAIL(strdup(modname))); + mod->skip = 1; + } + + add_marker(mod, marker, fmt); + } + return; +fail: + fatal("parse error in markers list file\n"); +} + +static int compare_strings(const void *a, const void *b) +{ + return strcmp(*(const char **) a, *(const char **) b); +} + +static void write_markers(const char *fname) +{ + struct buffer buf = { }; + struct module *mod; + size_t i; + + for (mod = modules; mod; mod = mod->next) + if ((!external_module || !mod->skip) && mod->markers != NULL) { + /* + * Sort the strings so we can skip duplicates when + * we write them out. + */ + qsort(mod->markers, mod->nmarkers, + sizeof mod->markers[0], &compare_strings); + for (i = 0; i < mod->nmarkers; ++i) { + char *line = mod->markers[i]; + buf_write(&buf, line, strlen(line)); + while (i + 1 < mod->nmarkers && + !strcmp(mod->markers[i], + mod->markers[i + 1])) + free(mod->markers[i++]); + free(mod->markers[i]); + } + free(mod->markers); + mod->markers = NULL; + } + + write_if_changed(&buf, fname); +} + int main(int argc, char **argv) { struct module *mod; struct buffer buf = { }; char *kernel_read = NULL, *module_read = NULL; char *dump_write = NULL; + char *markers_read = NULL; + char *markers_write = NULL; int opt; int err; - while ((opt = getopt(argc, argv, "i:I:msSo:aw")) != -1) { + while ((opt = getopt(argc, argv, "i:I:msSo:awM:K:")) != -1) { switch (opt) { case 'i': kernel_read = optarg; @@ -1903,6 +2053,12 @@ int main(int argc, char **argv) case 'w': warn_unresolved = 1; break; + case 'M': + markers_write = optarg; + break; + case 'K': + markers_read = optarg; + break; default: exit(1); } @@ -1950,5 +2106,11 @@ int main(int argc, char **argv) "'make CONFIG_DEBUG_SECTION_MISMATCH=y'\n", sec_mismatch_count); + if (markers_read) + read_markers(markers_read); + + if (markers_write) + write_markers(markers_write); + return err; } diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 999f15e..565c587 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -112,6 +112,8 @@ struct module { int has_init; int has_cleanup; struct buffer dev_table_buf; + char **markers; + size_t nmarkers; char srcversion[25]; }; @@ -126,6 +128,7 @@ struct elf_info { Elf_Section export_gpl_sec; Elf_Section export_unused_gpl_sec; Elf_Section export_gpl_future_sec; + Elf_Section markers_strings_sec; const char *strtab; char *modinfo; unsigned int modinfo_len; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e5ed075..44f16d9 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1272,12 +1272,18 @@ static int task_has_perm(struct task_struct *tsk1, SECCLASS_PROCESS, perms, NULL); } +#if CAP_LAST_CAP > 63 +#error Fix SELinux to handle capabilities > 63. +#endif + /* Check whether a task is allowed to use a capability. */ static int task_has_capability(struct task_struct *tsk, int cap) { struct task_security_struct *tsec; struct avc_audit_data ad; + u16 sclass; + u32 av = CAP_TO_MASK(cap); tsec = tsk->security; @@ -1285,8 +1291,19 @@ static int task_has_capability(struct task_struct *tsk, ad.tsk = tsk; ad.u.cap = cap; - return avc_has_perm(tsec->sid, tsec->sid, - SECCLASS_CAPABILITY, CAP_TO_MASK(cap), &ad); + switch (CAP_TO_INDEX(cap)) { + case 0: + sclass = SECCLASS_CAPABILITY; + break; + case 1: + sclass = SECCLASS_CAPABILITY2; + break; + default: + printk(KERN_ERR + "SELinux: out of range capability %d\n", cap); + BUG(); + } + return avc_has_perm(tsec->sid, tsec->sid, sclass, av, &ad); } /* Check whether a task is allowed to use a system operation. */ diff --git a/security/selinux/include/av_perm_to_string.h b/security/selinux/include/av_perm_to_string.h index 399f868..d569669 100644 --- a/security/selinux/include/av_perm_to_string.h +++ b/security/selinux/include/av_perm_to_string.h @@ -132,6 +132,9 @@ S_(SECCLASS_CAPABILITY, CAPABILITY__LEASE, "lease") S_(SECCLASS_CAPABILITY, CAPABILITY__AUDIT_WRITE, "audit_write") S_(SECCLASS_CAPABILITY, CAPABILITY__AUDIT_CONTROL, "audit_control") + S_(SECCLASS_CAPABILITY, CAPABILITY__SETFCAP, "setfcap") + S_(SECCLASS_CAPABILITY2, CAPABILITY2__MAC_OVERRIDE, "mac_override") + S_(SECCLASS_CAPABILITY2, CAPABILITY2__MAC_ADMIN, "mac_admin") S_(SECCLASS_NETLINK_ROUTE_SOCKET, NETLINK_ROUTE_SOCKET__NLMSG_READ, "nlmsg_read") S_(SECCLASS_NETLINK_ROUTE_SOCKET, NETLINK_ROUTE_SOCKET__NLMSG_WRITE, "nlmsg_write") S_(SECCLASS_NETLINK_FIREWALL_SOCKET, NETLINK_FIREWALL_SOCKET__NLMSG_READ, "nlmsg_read") diff --git a/security/selinux/include/av_permissions.h b/security/selinux/include/av_permissions.h index 84c9abc..75b4131 100644 --- a/security/selinux/include/av_permissions.h +++ b/security/selinux/include/av_permissions.h @@ -533,6 +533,9 @@ #define CAPABILITY__LEASE 0x10000000UL #define CAPABILITY__AUDIT_WRITE 0x20000000UL #define CAPABILITY__AUDIT_CONTROL 0x40000000UL +#define CAPABILITY__SETFCAP 0x80000000UL +#define CAPABILITY2__MAC_OVERRIDE 0x00000001UL +#define CAPABILITY2__MAC_ADMIN 0x00000002UL #define NETLINK_ROUTE_SOCKET__IOCTL 0x00000001UL #define NETLINK_ROUTE_SOCKET__READ 0x00000002UL #define NETLINK_ROUTE_SOCKET__WRITE 0x00000004UL diff --git a/security/selinux/include/class_to_string.h b/security/selinux/include/class_to_string.h index b1b0d1d..bd813c3 100644 --- a/security/selinux/include/class_to_string.h +++ b/security/selinux/include/class_to_string.h @@ -71,3 +71,4 @@ S_(NULL) S_(NULL) S_("peer") + S_("capability2") diff --git a/security/selinux/include/flask.h b/security/selinux/include/flask.h index 09e9dd23..febf886 100644 --- a/security/selinux/include/flask.h +++ b/security/selinux/include/flask.h @@ -51,6 +51,7 @@ #define SECCLASS_DCCP_SOCKET 60 #define SECCLASS_MEMPROTECT 61 #define SECCLASS_PEER 68 +#define SECCLASS_CAPABILITY2 69 /* * Security identifier indices for initial entities diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 1c11e42..5b69048 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -701,7 +701,7 @@ static int smack_inode_getsecurity(const struct inode *inode, return -EOPNOTSUPP; sock = SOCKET_I(ip); - if (sock == NULL) + if (sock == NULL || sock->sk == NULL) return -EOPNOTSUPP; ssp = sock->sk->sk_security; @@ -1280,10 +1280,11 @@ static void smack_to_secattr(char *smack, struct netlbl_lsm_secattr *nlsp) */ static int smack_netlabel(struct sock *sk) { - struct socket_smack *ssp = sk->sk_security; + struct socket_smack *ssp; struct netlbl_lsm_secattr secattr; int rc = 0; + ssp = sk->sk_security; netlbl_secattr_init(&secattr); smack_to_secattr(ssp->smk_out, &secattr); if (secattr.flags != NETLBL_SECATTR_NONE) @@ -1331,7 +1332,7 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name, return -EOPNOTSUPP; sock = SOCKET_I(inode); - if (sock == NULL) + if (sock == NULL || sock->sk == NULL) return -EOPNOTSUPP; ssp = sock->sk->sk_security; @@ -1362,7 +1363,7 @@ static int smack_inode_setsecurity(struct inode *inode, const char *name, static int smack_socket_post_create(struct socket *sock, int family, int type, int protocol, int kern) { - if (family != PF_INET) + if (family != PF_INET || sock->sk == NULL) return 0; /* * Set the outbound netlbl. |