diff options
624 files changed, 9943 insertions, 6098 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 5b5aba4..7306081 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -251,8 +251,6 @@ mono.txt - how to execute Mono-based .NET binaries with the help of BINFMT_MISC. moxa-smartio - file with info on installing/using Moxa multiport serial driver. -mtrr.txt - - how to use PPro Memory Type Range Registers to increase performance. mutex-design.txt - info on the generic mutex subsystem. namespaces/ diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index d8b63d1..b8e8646 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -337,7 +337,7 @@ With scatterlists, you use the resulting mapping like this: int i, count = dma_map_sg(dev, sglist, nents, direction); struct scatterlist *sg; - for (i = 0, sg = sglist; i < count; i++, sg++) { + for_each_sg(sglist, sg, count, i) { hw_address[i] = sg_dma_address(sg); hw_len[i] = sg_dma_len(sg); } diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index b7b1482..f5696ba 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -364,6 +364,10 @@ X!Edrivers/pnp/system.c !Eblock/blk-barrier.c !Eblock/blk-tag.c !Iblock/blk-tag.c +!Eblock/blk-integrity.c +!Iblock/blktrace.c +!Iblock/genhd.c +!Eblock/genhd.c </chapter> <chapter id="chrdev"> diff --git a/Documentation/HOWTO b/Documentation/HOWTO index c2371c5..48a3955 100644 --- a/Documentation/HOWTO +++ b/Documentation/HOWTO @@ -77,7 +77,8 @@ documentation files are also added which explain how to use the feature. When a kernel change causes the interface that the kernel exposes to userspace to change, it is recommended that you send the information or a patch to the manual pages explaining the change to the manual pages -maintainer at mtk.manpages@gmail.com. +maintainer at mtk.manpages@gmail.com, and CC the list +linux-api@vger.kernel.org. Here is a list of files that are in the kernel source tree that are required reading: diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist index da10e07..21f0795 100644 --- a/Documentation/SubmitChecklist +++ b/Documentation/SubmitChecklist @@ -67,6 +67,8 @@ kernel patches. 19: All new userspace interfaces are documented in Documentation/ABI/. See Documentation/ABI/README for more information. + Patches that change userspace interfaces should be CCed to + linux-api@vger.kernel.org. 20: Check that it all passes `make headers_check'. diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt index c23cab1..7257676 100644 --- a/Documentation/block/deadline-iosched.txt +++ b/Documentation/block/deadline-iosched.txt @@ -30,12 +30,18 @@ write_expire (in ms) Similar to read_expire mentioned above, but for writes. -fifo_batch +fifo_batch (number of requests) ---------- -When a read request expires its deadline, we must move some requests from -the sorted io scheduler list to the block device dispatch queue. fifo_batch -controls how many requests we move. +Requests are grouped into ``batches'' of a particular data direction (read or +write) which are serviced in increasing sector order. To limit extra seeking, +deadline expiries are only checked between batches. fifo_batch controls the +maximum number of requests per batch. + +This parameter tunes the balance between per-request latency and aggregate +throughput. When low latency is the primary concern, smaller is better (where +a value of 1 yields first-come first-served behaviour). Increasing fifo_batch +generally improves throughput, at the cost of latency variation. writes_starved (number of dispatches) diff --git a/Documentation/cdrom/ide-cd b/Documentation/cdrom/ide-cd index 91c0dcc..2c558cd 100644 --- a/Documentation/cdrom/ide-cd +++ b/Documentation/cdrom/ide-cd @@ -145,8 +145,7 @@ useful for reading photocds. To play an audio CD, you should first unmount and remove any data CDROM. Any of the CDROM player programs should then work (workman, -workbone, cdplayer, etc.). Lacking anything else, you could use the -cdtester program in Documentation/cdrom/sbpcd. +workbone, cdplayer, etc.). On a few drives, you can read digital audio directly using a program such as cdda2wav. The only types of drive which I've heard support diff --git a/Documentation/cpu-freq/index.txt b/Documentation/cpu-freq/index.txt index ffdb532..3d0b915 100644 --- a/Documentation/cpu-freq/index.txt +++ b/Documentation/cpu-freq/index.txt @@ -35,11 +35,9 @@ Mailing List ------------ There is a CPU frequency changing CVS commit and general list where you can report bugs, problems or submit patches. To post a message, -send an email to cpufreq@lists.linux.org.uk, to subscribe go to -http://lists.linux.org.uk/mailman/listinfo/cpufreq. Previous post to the -mailing list are available to subscribers at -http://lists.linux.org.uk/mailman/private/cpufreq/. - +send an email to cpufreq@vger.kernel.org, to subscribe go to +http://vger.kernel.org/vger-lists.html#cpufreq and follow the +instructions there. Links ----- @@ -50,7 +48,7 @@ how to access the CVS repository: * http://cvs.arm.linux.org.uk/ the CPUFreq Mailing list: -* http://lists.linux.org.uk/mailman/listinfo/cpufreq +* http://vger.kernel.org/vger-lists.html#cpufreq Clock and voltage scaling for the SA-1100: * http://www.lartmaker.nl/projects/scaling diff --git a/Documentation/hwmon/adt7473 b/Documentation/hwmon/adt7473 index 2126de3..1cbf671 100644 --- a/Documentation/hwmon/adt7473 +++ b/Documentation/hwmon/adt7473 @@ -14,14 +14,14 @@ Description This driver implements support for the Analog Devices ADT7473 chip family. -The LM85 uses the 2-wire interface compatible with the SMBUS 2.0 +The ADT7473 uses the 2-wire interface compatible with the SMBUS 2.0 specification. Using an analog to digital converter it measures three (3) -temperatures and two (2) voltages. It has three (3) 16-bit counters for +temperatures and two (2) voltages. It has four (4) 16-bit counters for measuring fan speed. There are three (3) PWM outputs that can be used to control fan speed. A sophisticated control system for the PWM outputs is designed into the -LM85 that allows fan speed to be adjusted automatically based on any of the +ADT7473 that allows fan speed to be adjusted automatically based on any of the three temperature sensors. Each PWM output is individually adjustable and programmable. Once configured, the ADT7473 will adjust the PWM outputs in response to the measured temperatures without further host intervention. @@ -46,14 +46,6 @@ from the raw value to get the temperature value. The Analog Devices datasheet is very detailed and describes a procedure for determining an optimal configuration for the automatic PWM control. -Hardware Configurations ------------------------ - -The ADT7473 chips have an optional SMBALERT output that can be used to -signal the chipset in case a limit is exceeded or the temperature sensors -fail. Individual sensor interrupts can be masked so they won't trigger -SMBALERT. The SMBALERT output if configured replaces the PWM2 function. - Configuration Notes ------------------- @@ -61,8 +53,8 @@ Besides standard interfaces driver adds the following: * PWM Control -* pwm#_auto_point1_pwm and pwm#_auto_point1_temp and -* pwm#_auto_point2_pwm and pwm#_auto_point2_temp - +* pwm#_auto_point1_pwm and temp#_auto_point1_temp and +* pwm#_auto_point2_pwm and temp#_auto_point2_temp - point1: Set the pwm speed at a lower temperature bound. point2: Set the pwm speed at a higher temperature bound. diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface index 2d84573..6dbfd5e 100644 --- a/Documentation/hwmon/sysfs-interface +++ b/Documentation/hwmon/sysfs-interface @@ -329,6 +329,10 @@ power[1-*]_average Average power use Unit: microWatt RO +power[1-*]_average_interval Power use averaging interval + Unit: milliseconds + RW + power[1-*]_average_highest Historical average maximum power use Unit: microWatt RO @@ -354,6 +358,14 @@ power[1-*]_reset_history Reset input_highest, input_lowest, WO ********** +* Energy * +********** + +energy[1-*]_input Cumulative energy use + Unit: microJoule + RO + +********** * Alarms * ********** diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 1150444..329dcab 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -463,12 +463,6 @@ and is between 256 and 4096 characters. It is defined in the file Range: 0 - 8192 Default: 64 - disable_8254_timer - enable_8254_timer - [IA32/X86_64] Disable/Enable interrupt 0 timer routing - over the 8254 in addition to over the IO-APIC. The - kernel tries to set a sensible default. - hpet= [X86-32,HPET] option to control HPET usage Format: { enable (default) | disable | force } disable: disable HPET and use PIT instead @@ -1882,6 +1876,12 @@ and is between 256 and 4096 characters. It is defined in the file shapers= [NET] Maximal number of shapers. + show_msr= [x86] show boot-time MSR settings + Format: { <integer> } + Show boot-time (BIOS-initialized) MSR settings. + The parameter means the number of CPUs to show, + for example 1 means boot CPU only. + sim710= [SCSI,HW] See header of drivers/scsi/sim710.c. diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx index 89c7f32..53449cb 100644 --- a/Documentation/video4linux/CARDLIST.em28xx +++ b/Documentation/video4linux/CARDLIST.em28xx @@ -46,7 +46,7 @@ 45 -> Pinnacle PCTV DVB-T (em2870) 46 -> Compro, VideoMate U3 (em2870) [185b:2870] 47 -> KWorld DVB-T 305U (em2880) [eb1a:e305] - 48 -> KWorld DVB-T 310U (em2880) + 48 -> KWorld DVB-T 310U (em2880) [eb1a:e310] 49 -> MSI DigiVox A/D (em2880) [eb1a:e310] 50 -> MSI DigiVox A/D II (em2880) [eb1a:e320] 51 -> Terratec Hybrid XS Secam (em2880) [0ccd:004c] diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt index 0f03900..9a3e4d7 100644 --- a/Documentation/video4linux/gspca.txt +++ b/Documentation/video4linux/gspca.txt @@ -190,6 +190,7 @@ pac7311 093a:260f SnakeCam pac7311 093a:2621 PAC731x pac7311 093a:2624 PAC7302 pac7311 093a:2626 Labtec 2200 +pac7311 093a:262a Webcam 300k zc3xx 0ac8:0302 Z-star Vimicro zc0302 vc032x 0ac8:0321 Vimicro generic vc0321 vc032x 0ac8:0323 Vimicro Vc0323 diff --git a/Documentation/x86/00-INDEX b/Documentation/x86/00-INDEX new file mode 100644 index 0000000..dbe3377 --- /dev/null +++ b/Documentation/x86/00-INDEX @@ -0,0 +1,4 @@ +00-INDEX + - this file +mtrr.txt + - how to use x86 Memory Type Range Registers to increase performance diff --git a/Documentation/x86/i386/boot.txt b/Documentation/x86/boot.txt index 147bfe5..83c0033 100644 --- a/Documentation/x86/i386/boot.txt +++ b/Documentation/x86/boot.txt @@ -308,7 +308,7 @@ Protocol: 2.00+ Field name: start_sys Type: read -Offset/size: 0x20c/4 +Offset/size: 0x20c/2 Protocol: 2.00+ The load low segment (0x1000). Obsolete. diff --git a/Documentation/mtrr.txt b/Documentation/x86/mtrr.txt index c39ac39..cc071dc 100644 --- a/Documentation/mtrr.txt +++ b/Documentation/x86/mtrr.txt @@ -18,7 +18,7 @@ Richard Gooch The AMD K6-2 (stepping 8 and above) and K6-3 processors have two MTRRs. These are supported. The AMD Athlon family provide 8 Intel style MTRRs. - + The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These are supported. @@ -87,7 +87,7 @@ reg00: base=0x00000000 ( 0MB), size= 64MB: write-back, count=1 reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1 reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1 -Some cards (especially Voodoo Graphics boards) need this 4 kB area +Some cards (especially Voodoo Graphics boards) need this 4 kB area excluded from the beginning of the region because it is used for registers. diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt index 17965f9..c93ff5f 100644 --- a/Documentation/x86/pat.txt +++ b/Documentation/x86/pat.txt @@ -14,6 +14,10 @@ PAT allows for different types of memory attributes. The most commonly used ones that will be supported at this time are Write-back, Uncached, Write-combined and Uncached Minus. + +PAT APIs +-------- + There are many different APIs in the kernel that allows setting of memory attributes at the page level. In order to avoid aliasing, these interfaces should be used thoughtfully. Below is a table of interfaces available, @@ -26,38 +30,38 @@ address range to avoid any aliasing. API | RAM | ACPI,... | Reserved/Holes | -----------------------|----------|------------|------------------| | | | | -ioremap | -- | UC | UC | +ioremap | -- | UC- | UC- | | | | | ioremap_cache | -- | WB | WB | | | | | -ioremap_nocache | -- | UC | UC | +ioremap_nocache | -- | UC- | UC- | | | | | ioremap_wc | -- | -- | WC | | | | | -set_memory_uc | UC | -- | -- | +set_memory_uc | UC- | -- | -- | set_memory_wb | | | | | | | | set_memory_wc | WC | -- | -- | set_memory_wb | | | | | | | | -pci sysfs resource | -- | -- | UC | +pci sysfs resource | -- | -- | UC- | | | | | pci sysfs resource_wc | -- | -- | WC | is IORESOURCE_PREFETCH| | | | | | | | -pci proc | -- | -- | UC | +pci proc | -- | -- | UC- | !PCIIOC_WRITE_COMBINE | | | | | | | | pci proc | -- | -- | WC | PCIIOC_WRITE_COMBINE | | | | | | | | -/dev/mem | -- | UC | UC | +/dev/mem | -- | WB/WC/UC- | WB/WC/UC- | read-write | | | | | | | | -/dev/mem | -- | UC | UC | +/dev/mem | -- | UC- | UC- | mmap SYNC flag | | | | | | | | -/dev/mem | -- | WB/WC/UC | WB/WC/UC | +/dev/mem | -- | WB/WC/UC- | WB/WC/UC- | mmap !SYNC flag | |(from exist-| (from exist- | and | | ing alias)| ing alias) | any alias to this area| | | | @@ -68,7 +72,7 @@ pci proc | -- | -- | WC | and | | | | MTRR says WB | | | | | | | | -/dev/mem | -- | -- | UC_MINUS | +/dev/mem | -- | -- | UC- | mmap !SYNC flag | | | | no alias to this area | | | | and | | | | @@ -98,3 +102,35 @@ types. Drivers should use set_memory_[uc|wc] to set access type for RAM ranges. + +PAT debugging +------------- + +With CONFIG_DEBUG_FS enabled, PAT memtype list can be examined by + +# mount -t debugfs debugfs /sys/kernel/debug +# cat /sys/kernel/debug/x86/pat_memtype_list +PAT memtype list: +uncached-minus @ 0x7fadf000-0x7fae0000 +uncached-minus @ 0x7fb19000-0x7fb1a000 +uncached-minus @ 0x7fb1a000-0x7fb1b000 +uncached-minus @ 0x7fb1b000-0x7fb1c000 +uncached-minus @ 0x7fb1c000-0x7fb1d000 +uncached-minus @ 0x7fb1d000-0x7fb1e000 +uncached-minus @ 0x7fb1e000-0x7fb25000 +uncached-minus @ 0x7fb25000-0x7fb26000 +uncached-minus @ 0x7fb26000-0x7fb27000 +uncached-minus @ 0x7fb27000-0x7fb28000 +uncached-minus @ 0x7fb28000-0x7fb2e000 +uncached-minus @ 0x7fb2e000-0x7fb2f000 +uncached-minus @ 0x7fb2f000-0x7fb30000 +uncached-minus @ 0x7fb31000-0x7fb32000 +uncached-minus @ 0x80000000-0x90000000 + +This list shows physical address ranges and various PAT settings used to +access those physical address ranges. + +Another, more verbose way of getting PAT related debug messages is with +"debugpat" boot parameter. With this parameter, various debug messages are +printed to dmesg log. + diff --git a/Documentation/x86/i386/usb-legacy-support.txt b/Documentation/x86/usb-legacy-support.txt index 1894cdf..1894cdf 100644 --- a/Documentation/x86/i386/usb-legacy-support.txt +++ b/Documentation/x86/usb-legacy-support.txt diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt index b0c7b6c..72ffb53 100644 --- a/Documentation/x86/x86_64/boot-options.txt +++ b/Documentation/x86/x86_64/boot-options.txt @@ -54,10 +54,6 @@ APICs apicmaintimer. Useful when your PIT timer is totally broken. - disable_8254_timer / enable_8254_timer - Enable interrupt 0 timer routing over the 8254 in addition to over - the IO-APIC. The kernel tries to set a sensible default. - Early Console syntax: earlyprintk=vga diff --git a/Documentation/x86/i386/zero-page.txt b/Documentation/x86/zero-page.txt index 169ad42..169ad42 100644 --- a/Documentation/x86/i386/zero-page.txt +++ b/Documentation/x86/zero-page.txt diff --git a/MAINTAINERS b/MAINTAINERS index 3596d17..8dae455 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1198,9 +1198,7 @@ M: hpa@zytor.com S: Maintained CPUSETS -P: Paul Jackson P: Paul Menage -M: pj@sgi.com M: menage@google.com L: linux-kernel@vger.kernel.org W: http://www.bullopensource.org/cpuset/ @@ -2706,6 +2704,7 @@ MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7 P: Michael Kerrisk M: mtk.manpages@gmail.com W: http://www.kernel.org/doc/man-pages +L: linux-man@vger.kernel.org S: Supported MARVELL LIBERTAS WIRELESS DRIVER @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 27 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Rotary Wombat # *DOCUMENTATION* diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 49896a2..1e06d23 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -211,6 +211,7 @@ config MIPS_MALTA select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_LITTLE_ENDIAN + select SYS_SUPPORTS_MIPS_CMP if BROKEN # because SYNC_R4K is broken select SYS_SUPPORTS_MULTITHREADING select SYS_SUPPORTS_SMARTMIPS help @@ -1403,7 +1404,6 @@ config MIPS_MT_SMTC depends on CPU_MIPS32_R2 #depends on CPU_MIPS64_R2 # once there is hardware ... depends on SYS_SUPPORTS_MULTITHREADING - select GENERIC_CLOCKEVENTS_BROADCAST select CPU_MIPSR2_IRQ_VI select CPU_MIPSR2_IRQ_EI select MIPS_MT @@ -1451,32 +1451,17 @@ config MIPS_VPE_LOADER Includes a loader for loading an elf relocatable object onto another VPE and running it. -config MIPS_MT_SMTC_INSTANT_REPLAY - bool "Low-latency Dispatch of Deferred SMTC IPIs" - depends on MIPS_MT_SMTC && !PREEMPT - default y - help - SMTC pseudo-interrupts between TCs are deferred and queued - if the target TC is interrupt-inhibited (IXMT). In the first - SMTC prototypes, these queued IPIs were serviced on return - to user mode, or on entry into the kernel idle loop. The - INSTANT_REPLAY option dispatches them as part of local_irq_restore() - processing, which adds runtime overhead (hence the option to turn - it off), but ensures that IPIs are handled promptly even under - heavy I/O interrupt load. - config MIPS_MT_SMTC_IM_BACKSTOP bool "Use per-TC register bits as backstop for inhibited IM bits" depends on MIPS_MT_SMTC - default y + default n help To support multiple TC microthreads acting as "CPUs" within a VPE, VPE-wide interrupt mask bits must be specially manipulated during interrupt handling. To support legacy drivers and interrupt controller management code, SMTC has a "backstop" to track and if necessary restore the interrupt mask. This has some performance - impact on interrupt service overhead. Disable it only if you know - what you are doing. + impact on interrupt service overhead. config MIPS_MT_SMTC_IRQAFF bool "Support IRQ affinity API" @@ -1486,10 +1471,8 @@ config MIPS_MT_SMTC_IRQAFF Enables SMP IRQ affinity API (/proc/irq/*/smp_affinity, etc.) for SMTC Linux kernel. Requires platform support, of which an example can be found in the MIPS kernel i8259 and Malta - platform code. It is recommended that MIPS_MT_SMTC_INSTANT_REPLAY - be enabled if MIPS_MT_SMTC_IRQAFF is used. Adds overhead to - interrupt dispatch, and should be used only if you know what - you are doing. + platform code. Adds some overhead to interrupt dispatch, and + should be used only if you know what you are doing. config MIPS_VPE_LOADER_TOM bool "Load VPE program into memory hidden from linux" @@ -1517,6 +1500,18 @@ config MIPS_APSP_KSPD "exit" syscall notifying other kernel modules the SP program is exiting. You probably want to say yes here. +config MIPS_CMP + bool "MIPS CMP framework support" + depends on SYS_SUPPORTS_MIPS_CMP + select SYNC_R4K if BROKEN + select SYS_SUPPORTS_SMP + select SYS_SUPPORTS_SCHED_SMT if SMP + select WEAK_ORDERING + default n + help + This is a placeholder option for the GCMP work. It will need to + be handled differently... + config SB1_PASS_1_WORKAROUNDS bool depends on CPU_SB1_PASS_1 @@ -1693,6 +1688,9 @@ config SMP config SMP_UP bool +config SYS_SUPPORTS_MIPS_CMP + bool + config SYS_SUPPORTS_SMP bool @@ -1740,17 +1738,6 @@ config NR_CPUS performance should round up your number of processors to the next power of two. -config MIPS_CMP - bool "MIPS CMP framework support" - depends on SMP - select SYNC_R4K - select SYS_SUPPORTS_SCHED_SMT - select WEAK_ORDERING - default n - help - This is a placeholder option for the GCMP work. It will need to - be handled differently... - source "kernel/time/Kconfig" # diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 706f939..25775cb 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -10,6 +10,7 @@ obj-y += cpu-probe.o branch.o entry.o genex.o irq.o process.o \ obj-$(CONFIG_CEVT_BCM1480) += cevt-bcm1480.o obj-$(CONFIG_CEVT_R4K) += cevt-r4k.o +obj-$(CONFIG_MIPS_MT_SMTC) += cevt-smtc.o obj-$(CONFIG_CEVT_DS1287) += cevt-ds1287.o obj-$(CONFIG_CEVT_GT641XX) += cevt-gt641xx.o obj-$(CONFIG_CEVT_SB1250) += cevt-sb1250.o diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 24a2d90..4a4c59f 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -12,6 +12,14 @@ #include <asm/smtc_ipi.h> #include <asm/time.h> +#include <asm/cevt-r4k.h> + +/* + * The SMTC Kernel for the 34K, 1004K, et. al. replaces several + * of these routines with SMTC-specific variants. + */ + +#ifndef CONFIG_MIPS_MT_SMTC static int mips_next_event(unsigned long delta, struct clock_event_device *evt) @@ -19,60 +27,27 @@ static int mips_next_event(unsigned long delta, unsigned int cnt; int res; -#ifdef CONFIG_MIPS_MT_SMTC - { - unsigned long flags, vpflags; - local_irq_save(flags); - vpflags = dvpe(); -#endif cnt = read_c0_count(); cnt += delta; write_c0_compare(cnt); res = ((int)(read_c0_count() - cnt) > 0) ? -ETIME : 0; -#ifdef CONFIG_MIPS_MT_SMTC - evpe(vpflags); - local_irq_restore(flags); - } -#endif return res; } -static void mips_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) +#endif /* CONFIG_MIPS_MT_SMTC */ + +void mips_set_clock_mode(enum clock_event_mode mode, + struct clock_event_device *evt) { /* Nothing to do ... */ } -static DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device); -static int cp0_timer_irq_installed; +DEFINE_PER_CPU(struct clock_event_device, mips_clockevent_device); +int cp0_timer_irq_installed; -/* - * Timer ack for an R4k-compatible timer of a known frequency. - */ -static void c0_timer_ack(void) -{ - write_c0_compare(read_c0_compare()); -} +#ifndef CONFIG_MIPS_MT_SMTC -/* - * Possibly handle a performance counter interrupt. - * Return true if the timer interrupt should not be checked - */ -static inline int handle_perf_irq(int r2) -{ - /* - * The performance counter overflow interrupt may be shared with the - * timer interrupt (cp0_perfcount_irq < 0). If it is and a - * performance counter has overflowed (perf_irq() == IRQ_HANDLED) - * and we can't reliably determine if a counter interrupt has also - * happened (!r2) then don't check for a timer interrupt. - */ - return (cp0_perfcount_irq < 0) && - perf_irq() == IRQ_HANDLED && - !r2; -} - -static irqreturn_t c0_compare_interrupt(int irq, void *dev_id) +irqreturn_t c0_compare_interrupt(int irq, void *dev_id) { const int r2 = cpu_has_mips_r2; struct clock_event_device *cd; @@ -93,12 +68,8 @@ static irqreturn_t c0_compare_interrupt(int irq, void *dev_id) * interrupt. Being the paranoiacs we are we check anyway. */ if (!r2 || (read_c0_cause() & (1 << 30))) { - c0_timer_ack(); -#ifdef CONFIG_MIPS_MT_SMTC - if (cpu_data[cpu].vpe_id) - goto out; - cpu = 0; -#endif + /* Clear Count/Compare Interrupt */ + write_c0_compare(read_c0_compare()); cd = &per_cpu(mips_clockevent_device, cpu); cd->event_handler(cd); } @@ -107,65 +78,16 @@ out: return IRQ_HANDLED; } -static struct irqaction c0_compare_irqaction = { +#endif /* Not CONFIG_MIPS_MT_SMTC */ + +struct irqaction c0_compare_irqaction = { .handler = c0_compare_interrupt, -#ifdef CONFIG_MIPS_MT_SMTC - .flags = IRQF_DISABLED, -#else .flags = IRQF_DISABLED | IRQF_PERCPU, -#endif .name = "timer", }; -#ifdef CONFIG_MIPS_MT_SMTC -DEFINE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device); - -static void smtc_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) -{ -} - -static void mips_broadcast(cpumask_t mask) -{ - unsigned int cpu; - - for_each_cpu_mask(cpu, mask) - smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0); -} - -static void setup_smtc_dummy_clockevent_device(void) -{ - //uint64_t mips_freq = mips_hpt_^frequency; - unsigned int cpu = smp_processor_id(); - struct clock_event_device *cd; - cd = &per_cpu(smtc_dummy_clockevent_device, cpu); - - cd->name = "SMTC"; - cd->features = CLOCK_EVT_FEAT_DUMMY; - - /* Calculate the min / max delta */ - cd->mult = 0; //div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32); - cd->shift = 0; //32; - cd->max_delta_ns = 0; //clockevent_delta2ns(0x7fffffff, cd); - cd->min_delta_ns = 0; //clockevent_delta2ns(0x30, cd); - - cd->rating = 200; - cd->irq = 17; //-1; -// if (cpu) -// cd->cpumask = CPU_MASK_ALL; // cpumask_of_cpu(cpu); -// else - cd->cpumask = cpumask_of_cpu(cpu); - - cd->set_mode = smtc_set_mode; - - cd->broadcast = mips_broadcast; - - clockevents_register_device(cd); -} -#endif - -static void mips_event_handler(struct clock_event_device *dev) +void mips_event_handler(struct clock_event_device *dev) { } @@ -177,7 +99,23 @@ static int c0_compare_int_pending(void) return (read_c0_cause() >> cp0_compare_irq) & 0x100; } -static int c0_compare_int_usable(void) +/* + * Compare interrupt can be routed and latched outside the core, + * so a single execution hazard barrier may not be enough to give + * it time to clear as seen in the Cause register. 4 time the + * pipeline depth seems reasonably conservative, and empirically + * works better in configurations with high CPU/bus clock ratios. + */ + +#define compare_change_hazard() \ + do { \ + irq_disable_hazard(); \ + irq_disable_hazard(); \ + irq_disable_hazard(); \ + irq_disable_hazard(); \ + } while (0) + +int c0_compare_int_usable(void) { unsigned int delta; unsigned int cnt; @@ -187,7 +125,7 @@ static int c0_compare_int_usable(void) */ if (c0_compare_int_pending()) { write_c0_compare(read_c0_count()); - irq_disable_hazard(); + compare_change_hazard(); if (c0_compare_int_pending()) return 0; } @@ -196,7 +134,7 @@ static int c0_compare_int_usable(void) cnt = read_c0_count(); cnt += delta; write_c0_compare(cnt); - irq_disable_hazard(); + compare_change_hazard(); if ((int)(read_c0_count() - cnt) < 0) break; /* increase delta if the timer was already expired */ @@ -205,11 +143,12 @@ static int c0_compare_int_usable(void) while ((int)(read_c0_count() - cnt) <= 0) ; /* Wait for expiry */ + compare_change_hazard(); if (!c0_compare_int_pending()) return 0; write_c0_compare(read_c0_count()); - irq_disable_hazard(); + compare_change_hazard(); if (c0_compare_int_pending()) return 0; @@ -219,6 +158,8 @@ static int c0_compare_int_usable(void) return 1; } +#ifndef CONFIG_MIPS_MT_SMTC + int __cpuinit mips_clockevent_init(void) { uint64_t mips_freq = mips_hpt_frequency; @@ -229,17 +170,6 @@ int __cpuinit mips_clockevent_init(void) if (!cpu_has_counter || !mips_hpt_frequency) return -ENXIO; -#ifdef CONFIG_MIPS_MT_SMTC - setup_smtc_dummy_clockevent_device(); - - /* - * On SMTC we only register VPE0's compare interrupt as clockevent - * device. - */ - if (cpu) - return 0; -#endif - if (!c0_compare_int_usable()) return -ENXIO; @@ -265,13 +195,9 @@ int __cpuinit mips_clockevent_init(void) cd->rating = 300; cd->irq = irq; -#ifdef CONFIG_MIPS_MT_SMTC - cd->cpumask = CPU_MASK_ALL; -#else cd->cpumask = cpumask_of_cpu(cpu); -#endif cd->set_next_event = mips_next_event; - cd->set_mode = mips_set_mode; + cd->set_mode = mips_set_clock_mode; cd->event_handler = mips_event_handler; clockevents_register_device(cd); @@ -281,12 +207,9 @@ int __cpuinit mips_clockevent_init(void) cp0_timer_irq_installed = 1; -#ifdef CONFIG_MIPS_MT_SMTC -#define CPUCTR_IMASKBIT (0x100 << cp0_compare_irq) - setup_irq_smtc(irq, &c0_compare_irqaction, CPUCTR_IMASKBIT); -#else setup_irq(irq, &c0_compare_irqaction); -#endif return 0; } + +#endif /* Not CONFIG_MIPS_MT_SMTC */ diff --git a/arch/mips/kernel/cevt-smtc.c b/arch/mips/kernel/cevt-smtc.c new file mode 100644 index 0000000..5162fe4 --- /dev/null +++ b/arch/mips/kernel/cevt-smtc.c @@ -0,0 +1,321 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2007 MIPS Technologies, Inc. + * Copyright (C) 2007 Ralf Baechle <ralf@linux-mips.org> + * Copyright (C) 2008 Kevin D. Kissell, Paralogos sarl + */ +#include <linux/clockchips.h> +#include <linux/interrupt.h> +#include <linux/percpu.h> + +#include <asm/smtc_ipi.h> +#include <asm/time.h> +#include <asm/cevt-r4k.h> + +/* + * Variant clock event timer support for SMTC on MIPS 34K, 1004K + * or other MIPS MT cores. + * + * Notes on SMTC Support: + * + * SMTC has multiple microthread TCs pretending to be Linux CPUs. + * But there's only one Count/Compare pair per VPE, and Compare + * interrupts are taken opportunisitically by available TCs + * bound to the VPE with the Count register. The new timer + * framework provides for global broadcasts, but we really + * want VPE-level multicasts for best behavior. So instead + * of invoking the high-level clock-event broadcast code, + * this version of SMTC support uses the historical SMTC + * multicast mechanisms "under the hood", appearing to the + * generic clock layer as if the interrupts are per-CPU. + * + * The approach taken here is to maintain a set of NR_CPUS + * virtual timers, and track which "CPU" needs to be alerted + * at each event. + * + * It's unlikely that we'll see a MIPS MT core with more than + * 2 VPEs, but we *know* that we won't need to handle more + * VPEs than we have "CPUs". So NCPUs arrays of NCPUs elements + * is always going to be overkill, but always going to be enough. + */ + +unsigned long smtc_nexttime[NR_CPUS][NR_CPUS]; +static int smtc_nextinvpe[NR_CPUS]; + +/* + * Timestamps stored are absolute values to be programmed + * into Count register. Valid timestamps will never be zero. + * If a Zero Count value is actually calculated, it is converted + * to be a 1, which will introduce 1 or two CPU cycles of error + * roughly once every four billion events, which at 1000 HZ means + * about once every 50 days. If that's actually a problem, one + * could alternate squashing 0 to 1 and to -1. + */ + +#define MAKEVALID(x) (((x) == 0L) ? 1L : (x)) +#define ISVALID(x) ((x) != 0L) + +/* + * Time comparison is subtle, as it's really truncated + * modular arithmetic. + */ + +#define IS_SOONER(a, b, reference) \ + (((a) - (unsigned long)(reference)) < ((b) - (unsigned long)(reference))) + +/* + * CATCHUP_INCREMENT, used when the function falls behind the counter. + * Could be an increasing function instead of a constant; + */ + +#define CATCHUP_INCREMENT 64 + +static int mips_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + unsigned long flags; + unsigned int mtflags; + unsigned long timestamp, reference, previous; + unsigned long nextcomp = 0L; + int vpe = current_cpu_data.vpe_id; + int cpu = smp_processor_id(); + local_irq_save(flags); + mtflags = dmt(); + + /* + * Maintain the per-TC virtual timer + * and program the per-VPE shared Count register + * as appropriate here... + */ + reference = (unsigned long)read_c0_count(); + timestamp = MAKEVALID(reference + delta); + /* + * To really model the clock, we have to catch the case + * where the current next-in-VPE timestamp is the old + * timestamp for the calling CPE, but the new value is + * in fact later. In that case, we have to do a full + * scan and discover the new next-in-VPE CPU id and + * timestamp. + */ + previous = smtc_nexttime[vpe][cpu]; + if (cpu == smtc_nextinvpe[vpe] && ISVALID(previous) + && IS_SOONER(previous, timestamp, reference)) { + int i; + int soonest = cpu; + + /* + * Update timestamp array here, so that new + * value gets considered along with those of + * other virtual CPUs on the VPE. + */ + smtc_nexttime[vpe][cpu] = timestamp; + for_each_online_cpu(i) { + if (ISVALID(smtc_nexttime[vpe][i]) + && IS_SOONER(smtc_nexttime[vpe][i], + smtc_nexttime[vpe][soonest], reference)) { + soonest = i; + } + } + smtc_nextinvpe[vpe] = soonest; + nextcomp = smtc_nexttime[vpe][soonest]; + /* + * Otherwise, we don't have to process the whole array rank, + * we just have to see if the event horizon has gotten closer. + */ + } else { + if (!ISVALID(smtc_nexttime[vpe][smtc_nextinvpe[vpe]]) || + IS_SOONER(timestamp, + smtc_nexttime[vpe][smtc_nextinvpe[vpe]], reference)) { + smtc_nextinvpe[vpe] = cpu; + nextcomp = timestamp; + } + /* + * Since next-in-VPE may me the same as the executing + * virtual CPU, we update the array *after* checking + * its value. + */ + smtc_nexttime[vpe][cpu] = timestamp; + } + + /* + * It may be that, in fact, we don't need to update Compare, + * but if we do, we want to make sure we didn't fall into + * a crack just behind Count. + */ + if (ISVALID(nextcomp)) { + write_c0_compare(nextcomp); + ehb(); + /* + * We never return an error, we just make sure + * that we trigger the handlers as quickly as + * we can if we fell behind. + */ + while ((nextcomp - (unsigned long)read_c0_count()) + > (unsigned long)LONG_MAX) { + nextcomp += CATCHUP_INCREMENT; + write_c0_compare(nextcomp); + ehb(); + } + } + emt(mtflags); + local_irq_restore(flags); + return 0; +} + + +void smtc_distribute_timer(int vpe) +{ + unsigned long flags; + unsigned int mtflags; + int cpu; + struct clock_event_device *cd; + unsigned long nextstamp = 0L; + unsigned long reference; + + +repeat: + for_each_online_cpu(cpu) { + /* + * Find virtual CPUs within the current VPE who have + * unserviced timer requests whose time is now past. + */ + local_irq_save(flags); + mtflags = dmt(); + if (cpu_data[cpu].vpe_id == vpe && + ISVALID(smtc_nexttime[vpe][cpu])) { + reference = (unsigned long)read_c0_count(); + if ((smtc_nexttime[vpe][cpu] - reference) + > (unsigned long)LONG_MAX) { + smtc_nexttime[vpe][cpu] = 0L; + emt(mtflags); + local_irq_restore(flags); + /* + * We don't send IPIs to ourself. + */ + if (cpu != smp_processor_id()) { + smtc_send_ipi(cpu, SMTC_CLOCK_TICK, 0); + } else { + cd = &per_cpu(mips_clockevent_device, cpu); + cd->event_handler(cd); + } + } else { + /* Local to VPE but Valid Time not yet reached. */ + if (!ISVALID(nextstamp) || + IS_SOONER(smtc_nexttime[vpe][cpu], nextstamp, + reference)) { + smtc_nextinvpe[vpe] = cpu; + nextstamp = smtc_nexttime[vpe][cpu]; + } + emt(mtflags); + local_irq_restore(flags); + } + } else { + emt(mtflags); + local_irq_restore(flags); + + } + } + /* Reprogram for interrupt at next soonest timestamp for VPE */ + if (ISVALID(nextstamp)) { + write_c0_compare(nextstamp); + ehb(); + if ((nextstamp - (unsigned long)read_c0_count()) + > (unsigned long)LONG_MAX) + goto repeat; + } +} + + +irqreturn_t c0_compare_interrupt(int irq, void *dev_id) +{ + int cpu = smp_processor_id(); + + /* If we're running SMTC, we've got MIPS MT and therefore MIPS32R2 */ + handle_perf_irq(1); + + if (read_c0_cause() & (1 << 30)) { + /* Clear Count/Compare Interrupt */ + write_c0_compare(read_c0_compare()); + smtc_distribute_timer(cpu_data[cpu].vpe_id); + } + return IRQ_HANDLED; +} + + +int __cpuinit mips_clockevent_init(void) +{ + uint64_t mips_freq = mips_hpt_frequency; + unsigned int cpu = smp_processor_id(); + struct clock_event_device *cd; + unsigned int irq; + int i; + int j; + + if (!cpu_has_counter || !mips_hpt_frequency) + return -ENXIO; + if (cpu == 0) { + for (i = 0; i < num_possible_cpus(); i++) { + smtc_nextinvpe[i] = 0; + for (j = 0; j < num_possible_cpus(); j++) + smtc_nexttime[i][j] = 0L; + } + /* + * SMTC also can't have the usablility test + * run by secondary TCs once Compare is in use. + */ + if (!c0_compare_int_usable()) + return -ENXIO; + } + + /* + * With vectored interrupts things are getting platform specific. + * get_c0_compare_int is a hook to allow a platform to return the + * interrupt number of it's liking. + */ + irq = MIPS_CPU_IRQ_BASE + cp0_compare_irq; + if (get_c0_compare_int) + irq = get_c0_compare_int(); + + cd = &per_cpu(mips_clockevent_device, cpu); + + cd->name = "MIPS"; + cd->features = CLOCK_EVT_FEAT_ONESHOT; + + /* Calculate the min / max delta */ + cd->mult = div_sc((unsigned long) mips_freq, NSEC_PER_SEC, 32); + cd->shift = 32; + cd->max_delta_ns = clockevent_delta2ns(0x7fffffff, cd); + cd->min_delta_ns = clockevent_delta2ns(0x300, cd); + + cd->rating = 300; + cd->irq = irq; + cd->cpumask = cpumask_of_cpu(cpu); + cd->set_next_event = mips_next_event; + cd->set_mode = mips_set_clock_mode; + cd->event_handler = mips_event_handler; + + clockevents_register_device(cd); + + /* + * On SMTC we only want to do the data structure + * initialization and IRQ setup once. + */ + if (cpu) + return 0; + /* + * And we need the hwmask associated with the c0_compare + * vector to be initialized. + */ + irq_hwmask[irq] = (0x100 << cp0_compare_irq); + if (cp0_timer_irq_installed) + return 0; + + cp0_timer_irq_installed = 1; + + setup_irq(irq, &c0_compare_irqaction); + + return 0; +} diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 11c92dc..e621fda 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -54,14 +54,18 @@ extern void r4k_wait(void); * interrupt is requested" restriction in the MIPS32/MIPS64 architecture makes * using this version a gamble. */ -static void r4k_wait_irqoff(void) +void r4k_wait_irqoff(void) { local_irq_disable(); if (!need_resched()) - __asm__(" .set mips3 \n" + __asm__(" .set push \n" + " .set mips3 \n" " wait \n" - " .set mips0 \n"); + " .set pop \n"); local_irq_enable(); + __asm__(" .globl __pastwait \n" + "__pastwait: \n"); + return; } /* diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index e29598a..ffa3310 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -79,11 +79,6 @@ FEXPORT(syscall_exit) FEXPORT(restore_all) # restore full frame #ifdef CONFIG_MIPS_MT_SMTC -/* Detect and execute deferred IPI "interrupts" */ - LONG_L s0, TI_REGS($28) - LONG_S sp, TI_REGS($28) - jal deferred_smtc_ipi - LONG_S s0, TI_REGS($28) #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP /* Re-arm any temporarily masked interrupts not explicitly "acked" */ mfc0 v0, CP0_TCSTATUS @@ -112,6 +107,11 @@ FEXPORT(restore_all) # restore full frame xor t0, t0, t3 mtc0 t0, CP0_TCCONTEXT #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */ +/* Detect and execute deferred IPI "interrupts" */ + LONG_L s0, TI_REGS($28) + LONG_S sp, TI_REGS($28) + jal deferred_smtc_ipi + LONG_S s0, TI_REGS($28) #endif /* CONFIG_MIPS_MT_SMTC */ .set noat RESTORE_TEMP diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index f886dd7..01dcbe3 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -282,8 +282,8 @@ NESTED(except_vec_vi_handler, 0, sp) and t0, a0, t1 #ifdef CONFIG_MIPS_MT_SMTC_IM_BACKSTOP mfc0 t2, CP0_TCCONTEXT - or t0, t0, t2 - mtc0 t0, CP0_TCCONTEXT + or t2, t0, t2 + mtc0 t2, CP0_TCCONTEXT #endif /* CONFIG_MIPS_MT_SMTC_IM_BACKSTOP */ xor t1, t1, t0 mtc0 t1, CP0_STATUS diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index 3613645..492a0a8 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -22,6 +22,7 @@ #include <asm/irqflags.h> #include <asm/regdef.h> #include <asm/page.h> +#include <asm/pgtable-bits.h> #include <asm/mipsregs.h> #include <asm/stackframe.h> diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index df4d3f2..dc9eb72 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -159,7 +159,7 @@ __setup("fpaff=", fpaff_thresh); /* * FPU Use Factor empirically derived from experiments on 34K */ -#define FPUSEFACTOR 333 +#define FPUSEFACTOR 2000 static __init int mt_fp_affinity_init(void) { diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index ce76843..22fc19b 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -55,7 +55,7 @@ void __noreturn cpu_idle(void) while (1) { tick_nohz_stop_sched_tick(1); while (!need_resched()) { -#ifdef CONFIG_SMTC_IDLE_HOOK_DEBUG +#ifdef CONFIG_MIPS_MT_SMTC extern void smtc_idle_loop_hook(void); smtc_idle_loop_hook(); @@ -145,19 +145,18 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, */ p->thread.cp0_status = read_c0_status() & ~(ST0_CU2|ST0_CU1); childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); + +#ifdef CONFIG_MIPS_MT_SMTC + /* + * SMTC restores TCStatus after Status, and the CU bits + * are aliased there. + */ + childregs->cp0_tcstatus &= ~(ST0_CU2|ST0_CU1); +#endif clear_tsk_thread_flag(p, TIF_USEDFPU); #ifdef CONFIG_MIPS_MT_FPAFF clear_tsk_thread_flag(p, TIF_FPUBOUND); - - /* - * FPU affinity support is cleaner if we track the - * user-visible CPU affinity from the very beginning. - * The generic cpus_allowed mask will already have - * been copied from the parent before copy_thread - * is invoked. - */ - p->thread.user_cpus_allowed = p->cpus_allowed; #endif /* CONFIG_MIPS_MT_FPAFF */ if (clone_flags & CLONE_SETTLS) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 35234b9..96ffc9c 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -238,7 +238,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) case FPC_EIR: { /* implementation / version register */ unsigned int flags; #ifdef CONFIG_MIPS_MT_SMTC - unsigned int irqflags; + unsigned long irqflags; unsigned int mtflags; #endif /* CONFIG_MIPS_MT_SMTC */ diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c index a516286..897fb2b 100644 --- a/arch/mips/kernel/smtc.c +++ b/arch/mips/kernel/smtc.c @@ -1,4 +1,21 @@ -/* Copyright (C) 2004 Mips Technologies, Inc */ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2004 Mips Technologies, Inc + * Copyright (C) 2008 Kevin D. Kissell + */ #include <linux/clockchips.h> #include <linux/kernel.h> @@ -21,7 +38,6 @@ #include <asm/time.h> #include <asm/addrspace.h> #include <asm/smtc.h> -#include <asm/smtc_ipi.h> #include <asm/smtc_proc.h> /* @@ -58,11 +74,6 @@ unsigned long irq_hwmask[NR_IRQS]; asiduse smtc_live_asid[MAX_SMTC_TLBS][MAX_SMTC_ASIDS]; -/* - * Clock interrupt "latch" buffers, per "CPU" - */ - -static atomic_t ipi_timer_latch[NR_CPUS]; /* * Number of InterProcessor Interrupt (IPI) message buffers to allocate @@ -70,7 +81,7 @@ static atomic_t ipi_timer_latch[NR_CPUS]; #define IPIBUF_PER_CPU 4 -static struct smtc_ipi_q IPIQ[NR_CPUS]; +struct smtc_ipi_q IPIQ[NR_CPUS]; static struct smtc_ipi_q freeIPIq; @@ -282,7 +293,7 @@ static void smtc_configure_tlb(void) * phys_cpu_present_map and the logical/physical mappings. */ -int __init mipsmt_build_cpu_map(int start_cpu_slot) +int __init smtc_build_cpu_map(int start_cpu_slot) { int i, ntcs; @@ -325,7 +336,12 @@ static void smtc_tc_setup(int vpe, int tc, int cpu) write_tc_c0_tcstatus((read_tc_c0_tcstatus() & ~(TCSTATUS_TKSU | TCSTATUS_DA | TCSTATUS_IXMT)) | TCSTATUS_A); - write_tc_c0_tccontext(0); + /* + * TCContext gets an offset from the base of the IPIQ array + * to be used in low-level code to detect the presence of + * an active IPI queue + */ + write_tc_c0_tccontext((sizeof(struct smtc_ipi_q) * cpu) << 16); /* Bind tc to vpe */ write_tc_c0_tcbind(vpe); /* In general, all TCs should have the same cpu_data indications */ @@ -336,10 +352,18 @@ static void smtc_tc_setup(int vpe, int tc, int cpu) cpu_data[cpu].options &= ~MIPS_CPU_FPU; cpu_data[cpu].vpe_id = vpe; cpu_data[cpu].tc_id = tc; + /* Multi-core SMTC hasn't been tested, but be prepared */ + cpu_data[cpu].core = (read_vpe_c0_ebase() >> 1) & 0xff; } +/* + * Tweak to get Count registes in as close a sync as possible. + * Value seems good for 34K-class cores. + */ + +#define CP0_SKEW 8 -void mipsmt_prepare_cpus(void) +void smtc_prepare_cpus(int cpus) { int i, vpe, tc, ntc, nvpe, tcpervpe[NR_CPUS], slop, cpu; unsigned long flags; @@ -363,13 +387,13 @@ void mipsmt_prepare_cpus(void) IPIQ[i].head = IPIQ[i].tail = NULL; spin_lock_init(&IPIQ[i].lock); IPIQ[i].depth = 0; - atomic_set(&ipi_timer_latch[i], 0); } /* cpu_data index starts at zero */ cpu = 0; cpu_data[cpu].vpe_id = 0; cpu_data[cpu].tc_id = 0; + cpu_data[cpu].core = (read_c0_ebase() >> 1) & 0xff; cpu++; /* Report on boot-time options */ @@ -484,7 +508,8 @@ void mipsmt_prepare_cpus(void) write_vpe_c0_compare(0); /* Propagate Config7 */ write_vpe_c0_config7(read_c0_config7()); - write_vpe_c0_count(read_c0_count()); + write_vpe_c0_count(read_c0_count() + CP0_SKEW); + ehb(); } /* enable multi-threading within VPE */ write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() | VPECONTROL_TE); @@ -556,7 +581,7 @@ void mipsmt_prepare_cpus(void) void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle) { extern u32 kernelsp[NR_CPUS]; - long flags; + unsigned long flags; int mtflags; LOCK_MT_PRA(); @@ -585,24 +610,22 @@ void __cpuinit smtc_boot_secondary(int cpu, struct task_struct *idle) void smtc_init_secondary(void) { - /* - * Start timer on secondary VPEs if necessary. - * plat_timer_setup has already have been invoked by init/main - * on "boot" TC. Like per_cpu_trap_init() hack, this assumes that - * SMTC init code assigns TCs consdecutively and in ascending order - * to across available VPEs. - */ - if (((read_c0_tcbind() & TCBIND_CURTC) != 0) && - ((read_c0_tcbind() & TCBIND_CURVPE) - != cpu_data[smp_processor_id() - 1].vpe_id)){ - write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ); - } - local_irq_enable(); } void smtc_smp_finish(void) { + int cpu = smp_processor_id(); + + /* + * Lowest-numbered CPU per VPE starts a clock tick. + * Like per_cpu_trap_init() hack, this assumes that + * SMTC init code assigns TCs consdecutively and + * in ascending order across available VPEs. + */ + if (cpu > 0 && (cpu_data[cpu].vpe_id != cpu_data[cpu - 1].vpe_id)) + write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ); + printk("TC %d going on-line as CPU %d\n", cpu_data[smp_processor_id()].tc_id, smp_processor_id()); } @@ -753,8 +776,10 @@ void smtc_send_ipi(int cpu, int type, unsigned int action) { int tcstatus; struct smtc_ipi *pipi; - long flags; + unsigned long flags; int mtflags; + unsigned long tcrestart; + extern void r4k_wait_irqoff(void), __pastwait(void); if (cpu == smp_processor_id()) { printk("Cannot Send IPI to self!\n"); @@ -771,8 +796,6 @@ void smtc_send_ipi(int cpu, int type, unsigned int action) pipi->arg = (void *)action; pipi->dest = cpu; if (cpu_data[cpu].vpe_id != cpu_data[smp_processor_id()].vpe_id) { - if (type == SMTC_CLOCK_TICK) - atomic_inc(&ipi_timer_latch[cpu]); /* If not on same VPE, enqueue and send cross-VPE interrupt */ smtc_ipi_nq(&IPIQ[cpu], pipi); LOCK_CORE_PRA(); @@ -800,22 +823,29 @@ void smtc_send_ipi(int cpu, int type, unsigned int action) if ((tcstatus & TCSTATUS_IXMT) != 0) { /* - * Spin-waiting here can deadlock, - * so we queue the message for the target TC. + * If we're in the the irq-off version of the wait + * loop, we need to force exit from the wait and + * do a direct post of the IPI. + */ + if (cpu_wait == r4k_wait_irqoff) { + tcrestart = read_tc_c0_tcrestart(); + if (tcrestart >= (unsigned long)r4k_wait_irqoff + && tcrestart < (unsigned long)__pastwait) { + write_tc_c0_tcrestart(__pastwait); + tcstatus &= ~TCSTATUS_IXMT; + write_tc_c0_tcstatus(tcstatus); + goto postdirect; + } + } + /* + * Otherwise we queue the message for the target TC + * to pick up when he does a local_irq_restore() */ write_tc_c0_tchalt(0); UNLOCK_CORE_PRA(); - /* Try to reduce redundant timer interrupt messages */ - if (type == SMTC_CLOCK_TICK) { - if (atomic_postincrement(&ipi_timer_latch[cpu])!=0){ - smtc_ipi_nq(&freeIPIq, pipi); - return; - } - } smtc_ipi_nq(&IPIQ[cpu], pipi); } else { - if (type == SMTC_CLOCK_TICK) - atomic_inc(&ipi_timer_latch[cpu]); +postdirect: post_direct_ipi(cpu, pipi); write_tc_c0_tchalt(0); UNLOCK_CORE_PRA(); @@ -883,7 +913,7 @@ static void ipi_call_interrupt(void) smp_call_function_interrupt(); } -DECLARE_PER_CPU(struct clock_event_device, smtc_dummy_clockevent_device); +DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device); void ipi_decode(struct smtc_ipi *pipi) { @@ -891,20 +921,13 @@ void ipi_decode(struct smtc_ipi *pipi) struct clock_event_device *cd; void *arg_copy = pipi->arg; int type_copy = pipi->type; - int ticks; - smtc_ipi_nq(&freeIPIq, pipi); switch (type_copy) { case SMTC_CLOCK_TICK: irq_enter(); kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++; - cd = &per_cpu(smtc_dummy_clockevent_device, cpu); - ticks = atomic_read(&ipi_timer_latch[cpu]); - atomic_sub(ticks, &ipi_timer_latch[cpu]); - while (ticks) { - cd->event_handler(cd); - ticks--; - } + cd = &per_cpu(mips_clockevent_device, cpu); + cd->event_handler(cd); irq_exit(); break; @@ -937,24 +960,48 @@ void ipi_decode(struct smtc_ipi *pipi) } } +/* + * Similar to smtc_ipi_replay(), but invoked from context restore, + * so it reuses the current exception frame rather than set up a + * new one with self_ipi. + */ + void deferred_smtc_ipi(void) { - struct smtc_ipi *pipi; - unsigned long flags; -/* DEBUG */ - int q = smp_processor_id(); + int cpu = smp_processor_id(); /* * Test is not atomic, but much faster than a dequeue, * and the vast majority of invocations will have a null queue. + * If irq_disabled when this was called, then any IPIs queued + * after we test last will be taken on the next irq_enable/restore. + * If interrupts were enabled, then any IPIs added after the + * last test will be taken directly. */ - if (IPIQ[q].head != NULL) { - while((pipi = smtc_ipi_dq(&IPIQ[q])) != NULL) { - /* ipi_decode() should be called with interrupts off */ - local_irq_save(flags); + + while (IPIQ[cpu].head != NULL) { + struct smtc_ipi_q *q = &IPIQ[cpu]; + struct smtc_ipi *pipi; + unsigned long flags; + + /* + * It may be possible we'll come in with interrupts + * already enabled. + */ + local_irq_save(flags); + + spin_lock(&q->lock); + pipi = __smtc_ipi_dq(q); + spin_unlock(&q->lock); + if (pipi != NULL) ipi_decode(pipi); - local_irq_restore(flags); - } + /* + * The use of the __raw_local restore isn't + * as obviously necessary here as in smtc_ipi_replay(), + * but it's more efficient, given that we're already + * running down the IPI queue. + */ + __raw_local_irq_restore(flags); } } @@ -975,7 +1022,7 @@ static irqreturn_t ipi_interrupt(int irq, void *dev_idm) struct smtc_ipi *pipi; unsigned long tcstatus; int sent; - long flags; + unsigned long flags; unsigned int mtflags; unsigned int vpflags; @@ -1066,55 +1113,53 @@ static void setup_cross_vpe_interrupts(unsigned int nvpe) /* * SMTC-specific hacks invoked from elsewhere in the kernel. - * - * smtc_ipi_replay is called from raw_local_irq_restore which is only ever - * called with interrupts disabled. We do rely on interrupts being disabled - * here because using spin_lock_irqsave()/spin_unlock_irqrestore() would - * result in a recursive call to raw_local_irq_restore(). */ -static void __smtc_ipi_replay(void) + /* + * smtc_ipi_replay is called from raw_local_irq_restore + */ + +void smtc_ipi_replay(void) { unsigned int cpu = smp_processor_id(); /* * To the extent that we've ever turned interrupts off, * we may have accumulated deferred IPIs. This is subtle. - * If we use the smtc_ipi_qdepth() macro, we'll get an - * exact number - but we'll also disable interrupts - * and create a window of failure where a new IPI gets - * queued after we test the depth but before we re-enable - * interrupts. So long as IXMT never gets set, however, * we should be OK: If we pick up something and dispatch * it here, that's great. If we see nothing, but concurrent * with this operation, another TC sends us an IPI, IXMT * is clear, and we'll handle it as a real pseudo-interrupt - * and not a pseudo-pseudo interrupt. + * and not a pseudo-pseudo interrupt. The important thing + * is to do the last check for queued message *after* the + * re-enabling of interrupts. */ - if (IPIQ[cpu].depth > 0) { - while (1) { - struct smtc_ipi_q *q = &IPIQ[cpu]; - struct smtc_ipi *pipi; - extern void self_ipi(struct smtc_ipi *); - - spin_lock(&q->lock); - pipi = __smtc_ipi_dq(q); - spin_unlock(&q->lock); - if (!pipi) - break; + while (IPIQ[cpu].head != NULL) { + struct smtc_ipi_q *q = &IPIQ[cpu]; + struct smtc_ipi *pipi; + unsigned long flags; + + /* + * It's just possible we'll come in with interrupts + * already enabled. + */ + local_irq_save(flags); + + spin_lock(&q->lock); + pipi = __smtc_ipi_dq(q); + spin_unlock(&q->lock); + /* + ** But use a raw restore here to avoid recursion. + */ + __raw_local_irq_restore(flags); + if (pipi) { self_ipi(pipi); smtc_cpu_stats[cpu].selfipis++; } } } -void smtc_ipi_replay(void) -{ - raw_local_irq_disable(); - __smtc_ipi_replay(); -} - EXPORT_SYMBOL(smtc_ipi_replay); void smtc_idle_loop_hook(void) @@ -1193,40 +1238,13 @@ void smtc_idle_loop_hook(void) } } - /* - * Now that we limit outstanding timer IPIs, check for hung TC - */ - for (tc = 0; tc < NR_CPUS; tc++) { - /* Don't check ourself - we'll dequeue IPIs just below */ - if ((tc != smp_processor_id()) && - atomic_read(&ipi_timer_latch[tc]) > timerq_limit) { - if (clock_hang_reported[tc] == 0) { - pdb_msg += sprintf(pdb_msg, - "TC %d looks hung with timer latch at %d\n", - tc, atomic_read(&ipi_timer_latch[tc])); - clock_hang_reported[tc]++; - } - } - } emt(mtflags); local_irq_restore(flags); if (pdb_msg != &id_ho_db_msg[0]) printk("CPU%d: %s", smp_processor_id(), id_ho_db_msg); #endif /* CONFIG_SMTC_IDLE_HOOK_DEBUG */ - /* - * Replay any accumulated deferred IPIs. If "Instant Replay" - * is in use, there should never be any. - */ -#ifndef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY - { - unsigned long flags; - - local_irq_save(flags); - __smtc_ipi_replay(); - local_irq_restore(flags); - } -#endif /* CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY */ + smtc_ipi_replay(); } void smtc_soft_dump(void) @@ -1242,10 +1260,6 @@ void smtc_soft_dump(void) printk("%d: %ld\n", i, smtc_cpu_stats[i].selfipis); } smtc_ipi_qdump(); - printk("Timer IPI Backlogs:\n"); - for (i=0; i < NR_CPUS; i++) { - printk("%d: %d\n", i, atomic_read(&ipi_timer_latch[i])); - } printk("%d Recoveries of \"stolen\" FPU\n", atomic_read(&smtc_fpu_recoveries)); } diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 5fd0cd0..b602ac6 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -825,8 +825,10 @@ static void mt_ase_fp_affinity(void) if (cpus_intersects(current->cpus_allowed, mt_fpu_cpumask)) { cpumask_t tmask; - cpus_and(tmask, current->thread.user_cpus_allowed, - mt_fpu_cpumask); + current->thread.user_cpus_allowed + = current->cpus_allowed; + cpus_and(tmask, current->cpus_allowed, + mt_fpu_cpumask); set_cpus_allowed(current, tmask); set_thread_flag(TIF_FPUBOUND); } diff --git a/arch/mips/mti-malta/Makefile b/arch/mips/mti-malta/Makefile index 3b7dd72..cef2db8 100644 --- a/arch/mips/mti-malta/Makefile +++ b/arch/mips/mti-malta/Makefile @@ -15,6 +15,6 @@ obj-$(CONFIG_EARLY_PRINTK) += malta-console.o obj-$(CONFIG_PCI) += malta-pci.o # FIXME FIXME FIXME -obj-$(CONFIG_MIPS_MT_SMTC) += malta_smtc.o +obj-$(CONFIG_MIPS_MT_SMTC) += malta-smtc.o EXTRA_CFLAGS += -Werror diff --git a/arch/mips/mti-malta/malta-smtc.c b/arch/mips/mti-malta/malta-smtc.c index 5ea705e..f84a46a 100644 --- a/arch/mips/mti-malta/malta-smtc.c +++ b/arch/mips/mti-malta/malta-smtc.c @@ -84,12 +84,17 @@ static void msmtc_cpus_done(void) static void __init msmtc_smp_setup(void) { - mipsmt_build_cpu_map(0); + /* + * we won't get the definitive value until + * we've run smtc_prepare_cpus later, but + * we would appear to need an upper bound now. + */ + smp_num_siblings = smtc_build_cpu_map(0); } static void __init msmtc_prepare_cpus(unsigned int max_cpus) { - mipsmt_prepare_cpus(); + smtc_prepare_cpus(max_cpus); } struct plat_smp_ops msmtc_smp_ops = { diff --git a/arch/mips/sibyte/swarm/Makefile b/arch/mips/sibyte/swarm/Makefile index f18ba92..7b45f19 100644 --- a/arch/mips/sibyte/swarm/Makefile +++ b/arch/mips/sibyte/swarm/Makefile @@ -1,3 +1,4 @@ -obj-y := setup.o rtc_xicor1241.o rtc_m41t81.o +obj-y := platform.o setup.o rtc_xicor1241.o \ + rtc_m41t81.o obj-$(CONFIG_I2C_BOARDINFO) += swarm-i2c.o diff --git a/arch/mips/sibyte/swarm/platform.c b/arch/mips/sibyte/swarm/platform.c new file mode 100644 index 0000000..54847fe --- /dev/null +++ b/arch/mips/sibyte/swarm/platform.c @@ -0,0 +1,85 @@ +#include <linux/err.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/ata_platform.h> + +#include <asm/sibyte/board.h> +#include <asm/sibyte/sb1250_genbus.h> +#include <asm/sibyte/sb1250_regs.h> + +#if defined(CONFIG_SIBYTE_SWARM) || defined(CONFIG_SIBYTE_LITTLESUR) + +#define DRV_NAME "pata-swarm" + +#define SWARM_IDE_SHIFT 5 +#define SWARM_IDE_BASE 0x1f0 +#define SWARM_IDE_CTRL 0x3f6 + +static struct resource swarm_pata_resource[] = { + { + .name = "Swarm GenBus IDE", + .flags = IORESOURCE_MEM, + }, { + .name = "Swarm GenBus IDE", + .flags = IORESOURCE_MEM, + }, { + .name = "Swarm GenBus IDE", + .flags = IORESOURCE_IRQ, + .start = K_INT_GB_IDE, + .end = K_INT_GB_IDE, + }, +}; + +static struct pata_platform_info pata_platform_data = { + .ioport_shift = SWARM_IDE_SHIFT, +}; + +static struct platform_device swarm_pata_device = { + .name = "pata_platform", + .id = -1, + .resource = swarm_pata_resource, + .num_resources = ARRAY_SIZE(swarm_pata_resource), + .dev = { + .platform_data = &pata_platform_data, + .coherent_dma_mask = ~0, /* grumble */ + }, +}; + +static int __init swarm_pata_init(void) +{ + u8 __iomem *base; + phys_t offset, size; + struct resource *r; + + if (!SIBYTE_HAVE_IDE) + return -ENODEV; + + base = ioremap(A_IO_EXT_BASE, 0x800); + offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS)); + size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS)); + iounmap(base); + + offset = G_IO_START_ADDR(offset) << S_IO_ADDRBASE; + size = (G_IO_MULT_SIZE(size) + 1) << S_IO_REGSIZE; + if (offset < A_PHYS_GENBUS || offset >= A_PHYS_GENBUS_END) { + pr_info(DRV_NAME ": PATA interface at GenBus disabled\n"); + + return -EBUSY; + } + + pr_info(DRV_NAME ": PATA interface at GenBus slot %i\n", IDE_CS); + + r = swarm_pata_resource; + r[0].start = offset + (SWARM_IDE_BASE << SWARM_IDE_SHIFT); + r[0].end = offset + ((SWARM_IDE_BASE + 8) << SWARM_IDE_SHIFT) - 1; + r[1].start = offset + (SWARM_IDE_CTRL << SWARM_IDE_SHIFT); + r[1].end = offset + ((SWARM_IDE_CTRL + 1) << SWARM_IDE_SHIFT) - 1; + + return platform_device_register(&swarm_pata_device); +} + +device_initcall(swarm_pata_init); + +#endif /* defined(CONFIG_SIBYTE_SWARM) || defined(CONFIG_SIBYTE_LITTLESUR) */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index ca114fe..06acb1a 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -169,6 +169,8 @@ void init_cpu_timer(void) static void clock_comparator_interrupt(__u16 code) { + if (S390_lowcore.clock_comparator == -1ULL) + set_clock_comparator(S390_lowcore.clock_comparator); } static void etr_timing_alert(struct etr_irq_parm *); diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c index fc6ab60..0953cee 100644 --- a/arch/s390/lib/delay.c +++ b/arch/s390/lib/delay.c @@ -1,14 +1,9 @@ /* - * arch/s390/lib/delay.c * Precise Delay Loops for S390 * - * S390 version - * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation - * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com), - * - * Derived from "arch/i386/lib/delay.c" - * Copyright (C) 1993 Linus Torvalds - * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> + * Copyright IBM Corp. 1999,2008 + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>, + * Heiko Carstens <heiko.carstens@de.ibm.com>, */ #include <linux/sched.h> @@ -29,30 +24,31 @@ void __delay(unsigned long loops) asm volatile("0: brct %0,0b" : : "d" ((loops/2) + 1)); } -/* - * Waits for 'usecs' microseconds using the TOD clock comparator. - */ -void __udelay(unsigned long usecs) +static void __udelay_disabled(unsigned long usecs) { - u64 end, time, old_cc = 0; - unsigned long flags, cr0, mask, dummy; - int irq_context; + unsigned long mask, cr0, cr0_saved; + u64 clock_saved; - irq_context = in_interrupt(); - if (!irq_context) - local_bh_disable(); - local_irq_save(flags); - if (raw_irqs_disabled_flags(flags)) { - old_cc = local_tick_disable(); - S390_lowcore.clock_comparator = -1ULL; - __ctl_store(cr0, 0, 0); - dummy = (cr0 & 0xffff00e0) | 0x00000800; - __ctl_load(dummy , 0, 0); - mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; - } else - mask = psw_kernel_bits | PSW_MASK_WAIT | - PSW_MASK_EXT | PSW_MASK_IO; + clock_saved = local_tick_disable(); + set_clock_comparator(get_clock() + ((u64) usecs << 12)); + __ctl_store(cr0_saved, 0, 0); + cr0 = (cr0_saved & 0xffff00e0) | 0x00000800; + __ctl_load(cr0 , 0, 0); + mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT; + trace_hardirqs_on(); + __load_psw_mask(mask); + local_irq_disable(); + __ctl_load(cr0_saved, 0, 0); + local_tick_enable(clock_saved); + set_clock_comparator(S390_lowcore.clock_comparator); +} +static void __udelay_enabled(unsigned long usecs) +{ + unsigned long mask; + u64 end, time; + + mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_EXT | PSW_MASK_IO; end = get_clock() + ((u64) usecs << 12); do { time = end < S390_lowcore.clock_comparator ? @@ -62,13 +58,37 @@ void __udelay(unsigned long usecs) __load_psw_mask(mask); local_irq_disable(); } while (get_clock() < end); + set_clock_comparator(S390_lowcore.clock_comparator); +} - if (raw_irqs_disabled_flags(flags)) { - __ctl_load(cr0, 0, 0); - local_tick_enable(old_cc); +/* + * Waits for 'usecs' microseconds using the TOD clock comparator. + */ +void __udelay(unsigned long usecs) +{ + unsigned long flags; + + preempt_disable(); + local_irq_save(flags); + if (in_irq()) { + __udelay_disabled(usecs); + goto out; + } + if (in_softirq()) { + if (raw_irqs_disabled_flags(flags)) + __udelay_disabled(usecs); + else + __udelay_enabled(usecs); + goto out; } - if (!irq_context) + if (raw_irqs_disabled_flags(flags)) { + local_bh_disable(); + __udelay_disabled(usecs); _local_bh_enable(); - set_clock_comparator(S390_lowcore.clock_comparator); + goto out; + } + __udelay_enabled(usecs); +out: local_irq_restore(flags); + preempt_enable(); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ed92864..97f0d2b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -29,6 +29,7 @@ config X86 select HAVE_FTRACE select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER + select HAVE_ARCH_TRACEHOOK select HAVE_GENERIC_DMA_COHERENT if X86_32 select HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -1020,7 +1021,7 @@ config HAVE_ARCH_ALLOC_REMAP config ARCH_FLATMEM_ENABLE def_bool y - depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && X86_PC && !NUMA + depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA config ARCH_DISCONTIGMEM_ENABLE def_bool y @@ -1036,7 +1037,7 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SPARSEMEM_ENABLE def_bool y - depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) + depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH select SPARSEMEM_STATIC if X86_32 select SPARSEMEM_VMEMMAP_ENABLE if X86_64 @@ -1117,10 +1118,10 @@ config MTRR You can safely say Y even if your machine doesn't have MTRRs, you'll just add about 9 KB to your kernel. - See <file:Documentation/mtrr.txt> for more information. + See <file:Documentation/x86/mtrr.txt> for more information. config MTRR_SANITIZER - bool + def_bool y prompt "MTRR cleanup support" depends on MTRR help @@ -1131,7 +1132,7 @@ config MTRR_SANITIZER The largest mtrr entry size for a continous block can be set with mtrr_chunk_size. - If unsure, say N. + If unsure, say Y. config MTRR_SANITIZER_ENABLE_DEFAULT int "MTRR cleanup enable value (0-1)" @@ -1191,7 +1192,6 @@ config IRQBALANCE config SECCOMP def_bool y prompt "Enable seccomp to safely compute untrusted bytecode" - depends on PROC_FS help This kernel feature is useful for number crunching applications that may need to compute untrusted bytecode during their @@ -1199,7 +1199,7 @@ config SECCOMP the process as file descriptors supporting the read/write syscalls, it's possible to isolate those applications in their own address space using seccomp. Once seccomp is - enabled via /proc/<pid>/seccomp, it cannot be disabled + enabled via prctl(PR_SET_SECCOMP), it cannot be disabled and the task is only allowed to execute a few safe syscalls defined by each seccomp mode. @@ -1356,14 +1356,14 @@ config PHYSICAL_ALIGN Don't change this unless you know what you are doing. config HOTPLUG_CPU - bool "Support for suspend on SMP and hot-pluggable CPUs (EXPERIMENTAL)" - depends on SMP && HOTPLUG && EXPERIMENTAL && !X86_VOYAGER + bool "Support for hot-pluggable CPUs" + depends on SMP && HOTPLUG && !X86_VOYAGER ---help--- - Say Y here to experiment with turning CPUs off and on, and to - enable suspend on SMP systems. CPUs can be controlled through - /sys/devices/system/cpu. - Say N if you want to disable CPU hotplug and don't need to - suspend. + Say Y here to allow turning CPUs off and on. CPUs can be + controlled through /sys/devices/system/cpu. + ( Note: power management support will enable this option + automatically on SMP systems. ) + Say N if you want to disable CPU hotplug. config COMPAT_VDSO def_bool y @@ -1378,6 +1378,51 @@ config COMPAT_VDSO If unsure, say Y. +config CMDLINE_BOOL + bool "Built-in kernel command line" + default n + help + Allow for specifying boot arguments to the kernel at + build time. On some systems (e.g. embedded ones), it is + necessary or convenient to provide some or all of the + kernel boot arguments with the kernel itself (that is, + to not rely on the boot loader to provide them.) + + To compile command line arguments into the kernel, + set this option to 'Y', then fill in the + the boot arguments in CONFIG_CMDLINE. + + Systems with fully functional boot loaders (i.e. non-embedded) + should leave this option set to 'N'. + +config CMDLINE + string "Built-in kernel command string" + depends on CMDLINE_BOOL + default "" + help + Enter arguments here that should be compiled into the kernel + image and used at boot time. If the boot loader provides a + command line at boot time, it is appended to this string to + form the full kernel command line, when the system boots. + + However, you can use the CONFIG_CMDLINE_OVERRIDE option to + change this behavior. + + In most cases, the command line (whether built-in or provided + by the boot loader) should specify the device for the root + file system. + +config CMDLINE_OVERRIDE + bool "Built-in command line overrides boot loader arguments" + default n + depends on CMDLINE_BOOL + help + Set this option to 'Y' to have the kernel ignore the boot loader + command line, and use ONLY the built-in command line. + + This is used to work around broken boot loaders. This should + be set to 'N' under normal conditions. + endmenu config ARCH_ENABLE_MEMORY_HOTPLUG @@ -1773,7 +1818,7 @@ config COMPAT_FOR_U64_ALIGNMENT config SYSVIPC_COMPAT def_bool y - depends on X86_64 && COMPAT && SYSVIPC + depends on COMPAT && SYSVIPC endmenu diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index b225219..60a8576 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -418,3 +418,21 @@ config X86_MINIMUM_CPU_FAMILY config X86_DEBUGCTLMSR def_bool y depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) + +config X86_DS + bool "Debug Store support" + default y + help + Add support for Debug Store. + This allows the kernel to provide a memory buffer to the hardware + to store various profiling and tracing events. + +config X86_PTRACE_BTS + bool "ptrace interface to Branch Trace Store" + default y + depends on (X86_DS && X86_DEBUGCTLMSR) + help + Add a ptrace interface to allow collecting an execution trace + of the traced task. + This collects control flow changes in a (cyclic) buffer and allows + debuggers to fill in the gaps and show an execution trace of the debuggee. diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index ba7736c..29c5fbf 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -137,14 +137,15 @@ relocated: */ movl output_len(%ebx), %eax pushl %eax + # push arguments for decompress_kernel: pushl %ebp # output address movl input_len(%ebx), %eax pushl %eax # input_len leal input_data(%ebx), %eax pushl %eax # input_data leal boot_heap(%ebx), %eax - pushl %eax # heap area as third argument - pushl %esi # real mode pointer as second arg + pushl %eax # heap area + pushl %esi # real mode pointer call decompress_kernel addl $20, %esp popl %ecx diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 9fea737..5780d36 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -16,7 +16,7 @@ */ #undef CONFIG_PARAVIRT #ifdef CONFIG_X86_32 -#define _ASM_DESC_H_ 1 +#define ASM_X86__DESC_H 1 #endif #ifdef CONFIG_X86_64 @@ -27,7 +27,7 @@ #include <linux/linkage.h> #include <linux/screen_info.h> #include <linux/elf.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/page.h> #include <asm/boot.h> #include <asm/bootparam.h> @@ -251,7 +251,7 @@ static void __putstr(int error, const char *s) y--; } } else { - vidmem [(x + cols * y) * 2] = c; + vidmem[(x + cols * y) * 2] = c; if (++x >= cols) { x = 0; if (++y >= lines) { @@ -277,7 +277,8 @@ static void *memset(void *s, int c, unsigned n) int i; char *ss = s; - for (i = 0; i < n; i++) ss[i] = c; + for (i = 0; i < n; i++) + ss[i] = c; return s; } @@ -287,7 +288,8 @@ static void *memcpy(void *dest, const void *src, unsigned n) const char *s = src; char *d = dest; - for (i = 0; i < n; i++) d[i] = s[i]; + for (i = 0; i < n; i++) + d[i] = s[i]; return dest; } diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c index a1310c5..857e492 100644 --- a/arch/x86/boot/compressed/relocs.c +++ b/arch/x86/boot/compressed/relocs.c @@ -492,7 +492,7 @@ static void walk_relocs(void (*visit)(Elf32_Rel *rel, Elf32_Sym *sym)) continue; } sh_symtab = sec_symtab->symtab; - sym_strtab = sec->link->strtab; + sym_strtab = sec_symtab->link->strtab; for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) { Elf32_Rel *rel; Elf32_Sym *sym; diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index af86e43..b993062 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -30,7 +30,6 @@ SYSSEG = DEF_SYSSEG /* system loaded at 0x10000 (65536) */ SYSSIZE = DEF_SYSSIZE /* system size: # of 16-byte clicks */ /* to be loaded */ ROOT_DEV = 0 /* ROOT_DEV is now written by "build" */ -SWAP_DEV = 0 /* SWAP_DEV is now written by "build" */ #ifndef SVGA_MODE #define SVGA_MODE ASK_VGA diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 104275e..ef9a520 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.27-rc4 -# Mon Aug 25 15:04:00 2008 +# Linux kernel version: 2.6.27-rc5 +# Wed Sep 3 17:23:09 2008 # # CONFIG_64BIT is not set CONFIG_X86_32=y @@ -202,7 +202,7 @@ CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y # CONFIG_M586 is not set # CONFIG_M586TSC is not set # CONFIG_M586MMX is not set -# CONFIG_M686 is not set +CONFIG_M686=y # CONFIG_MPENTIUMII is not set # CONFIG_MPENTIUMIII is not set # CONFIG_MPENTIUMM is not set @@ -221,13 +221,14 @@ CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y # CONFIG_MVIAC3_2 is not set # CONFIG_MVIAC7 is not set # CONFIG_MPSC is not set -CONFIG_MCORE2=y +# CONFIG_MCORE2 is not set # CONFIG_GENERIC_CPU is not set CONFIG_X86_GENERIC=y CONFIG_X86_CPU=y CONFIG_X86_CMPXCHG=y CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_X86_XADD=y +# CONFIG_X86_PPRO_FENCE is not set CONFIG_X86_WP_WORKS_OK=y CONFIG_X86_INVLPG=y CONFIG_X86_BSWAP=y @@ -235,14 +236,15 @@ CONFIG_X86_POPAD_OK=y CONFIG_X86_INTEL_USERCOPY=y CONFIG_X86_USE_PPRO_CHECKSUM=y CONFIG_X86_TSC=y +CONFIG_X86_CMOV=y CONFIG_X86_MINIMUM_CPU_FAMILY=4 CONFIG_X86_DEBUGCTLMSR=y CONFIG_HPET_TIMER=y CONFIG_HPET_EMULATE_RTC=y CONFIG_DMI=y # CONFIG_IOMMU_HELPER is not set -CONFIG_NR_CPUS=4 -# CONFIG_SCHED_SMT is not set +CONFIG_NR_CPUS=64 +CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y @@ -254,7 +256,8 @@ CONFIG_VM86=y # CONFIG_TOSHIBA is not set # CONFIG_I8K is not set CONFIG_X86_REBOOTFIXUPS=y -# CONFIG_MICROCODE is not set +CONFIG_MICROCODE=y +CONFIG_MICROCODE_OLD_INTERFACE=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y # CONFIG_NOHIGHMEM is not set @@ -2115,7 +2118,7 @@ CONFIG_IO_DELAY_0X80=y CONFIG_DEFAULT_IO_DELAY_TYPE=0 CONFIG_DEBUG_BOOT_PARAMS=y # CONFIG_CPA_DEBUG is not set -# CONFIG_OPTIMIZE_INLINING is not set +CONFIG_OPTIMIZE_INLINING=y # # Security options diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 678c8ac..e620ea6 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.27-rc4 -# Mon Aug 25 14:40:46 2008 +# Linux kernel version: 2.6.27-rc5 +# Wed Sep 3 17:13:39 2008 # CONFIG_64BIT=y # CONFIG_X86_32 is not set @@ -218,17 +218,14 @@ CONFIG_X86_PC=y # CONFIG_MVIAC3_2 is not set # CONFIG_MVIAC7 is not set # CONFIG_MPSC is not set -CONFIG_MCORE2=y -# CONFIG_GENERIC_CPU is not set +# CONFIG_MCORE2 is not set +CONFIG_GENERIC_CPU=y CONFIG_X86_CPU=y -CONFIG_X86_L1_CACHE_BYTES=64 -CONFIG_X86_INTERNODE_CACHE_BYTES=64 +CONFIG_X86_L1_CACHE_BYTES=128 +CONFIG_X86_INTERNODE_CACHE_BYTES=128 CONFIG_X86_CMPXCHG=y -CONFIG_X86_L1_CACHE_SHIFT=6 +CONFIG_X86_L1_CACHE_SHIFT=7 CONFIG_X86_WP_WORKS_OK=y -CONFIG_X86_INTEL_USERCOPY=y -CONFIG_X86_USE_PPRO_CHECKSUM=y -CONFIG_X86_P6_NOP=y CONFIG_X86_TSC=y CONFIG_X86_CMPXCHG64=y CONFIG_X86_CMOV=y @@ -243,9 +240,8 @@ CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y CONFIG_AMD_IOMMU=y CONFIG_SWIOTLB=y CONFIG_IOMMU_HELPER=y -# CONFIG_MAXSMP is not set -CONFIG_NR_CPUS=4 -# CONFIG_SCHED_SMT is not set +CONFIG_NR_CPUS=64 +CONFIG_SCHED_SMT=y CONFIG_SCHED_MC=y # CONFIG_PREEMPT_NONE is not set CONFIG_PREEMPT_VOLUNTARY=y @@ -254,7 +250,8 @@ CONFIG_X86_LOCAL_APIC=y CONFIG_X86_IO_APIC=y # CONFIG_X86_MCE is not set # CONFIG_I8K is not set -# CONFIG_MICROCODE is not set +CONFIG_MICROCODE=y +CONFIG_MICROCODE_OLD_INTERFACE=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y CONFIG_NUMA=y @@ -290,7 +287,7 @@ CONFIG_BOUNCE=y CONFIG_VIRT_TO_BUS=y CONFIG_MTRR=y # CONFIG_MTRR_SANITIZER is not set -# CONFIG_X86_PAT is not set +CONFIG_X86_PAT=y CONFIG_EFI=y CONFIG_SECCOMP=y # CONFIG_HZ_100 is not set @@ -2089,7 +2086,7 @@ CONFIG_IO_DELAY_0X80=y CONFIG_DEFAULT_IO_DELAY_TYPE=0 CONFIG_DEBUG_BOOT_PARAMS=y # CONFIG_CPA_DEBUG is not set -# CONFIG_OPTIMIZE_INLINING is not set +CONFIG_OPTIMIZE_INLINING=y # # Security options diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index a0e1dbe..127ec3f 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -85,8 +85,10 @@ static void dump_thread32(struct pt_regs *regs, struct user32 *dump) dump->regs.ax = regs->ax; dump->regs.ds = current->thread.ds; dump->regs.es = current->thread.es; - asm("movl %%fs,%0" : "=r" (fs)); dump->regs.fs = fs; - asm("movl %%gs,%0" : "=r" (gs)); dump->regs.gs = gs; + savesegment(fs, fs); + dump->regs.fs = fs; + savesegment(gs, gs); + dump->regs.gs = gs; dump->regs.orig_ax = regs->orig_ax; dump->regs.ip = regs->ip; dump->regs.cs = regs->cs; @@ -430,8 +432,9 @@ beyond_if: current->mm->start_stack = (unsigned long)create_aout_tables((char __user *)bprm->p, bprm); /* start thread */ - asm volatile("movl %0,%%fs" :: "r" (0)); \ - asm volatile("movl %0,%%es; movl %0,%%ds": :"r" (__USER32_DS)); + loadsegment(fs, 0); + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); load_gs_index(0); (regs)->ip = ex.a_entry; (regs)->sp = current->mm->start_stack; diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 20af4c7..f1a2ac7 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -206,7 +206,7 @@ struct rt_sigframe { unsigned int cur; \ unsigned short pre; \ err |= __get_user(pre, &sc->seg); \ - asm volatile("movl %%" #seg ",%0" : "=r" (cur)); \ + savesegment(seg, cur); \ pre |= mask; \ if (pre != cur) loadsegment(seg, pre); } @@ -235,7 +235,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, */ err |= __get_user(gs, &sc->gs); gs |= 3; - asm("movl %%gs,%0" : "=r" (oldgs)); + savesegment(gs, oldgs); if (gs != oldgs) load_gs_index(gs); @@ -355,14 +355,13 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, { int tmp, err = 0; - tmp = 0; - __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); + savesegment(gs, tmp); err |= __put_user(tmp, (unsigned int __user *)&sc->gs); - __asm__("movl %%fs,%0" : "=r"(tmp): "0"(tmp)); + savesegment(fs, tmp); err |= __put_user(tmp, (unsigned int __user *)&sc->fs); - __asm__("movl %%ds,%0" : "=r"(tmp): "0"(tmp)); + savesegment(ds, tmp); err |= __put_user(tmp, (unsigned int __user *)&sc->ds); - __asm__("movl %%es,%0" : "=r"(tmp): "0"(tmp)); + savesegment(es, tmp); err |= __put_user(tmp, (unsigned int __user *)&sc->es); err |= __put_user((u32)regs->di, &sc->di); @@ -498,8 +497,8 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, regs->dx = 0; regs->cx = 0; - asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); - asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); regs->cs = __USER32_CS; regs->ss = __USER32_DS; @@ -591,8 +590,8 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->dx = (unsigned long) &frame->info; regs->cx = (unsigned long) &frame->uc; - asm volatile("movl %0,%%ds" :: "r" (__USER32_DS)); - asm volatile("movl %0,%%es" :: "r" (__USER32_DS)); + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); regs->cs = __USER32_CS; regs->ss = __USER32_DS; diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index d3c6408..beda423 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c @@ -556,15 +556,6 @@ asmlinkage long sys32_rt_sigqueueinfo(int pid, int sig, return ret; } -/* These are here just in case some old ia32 binary calls it. */ -asmlinkage long sys32_pause(void) -{ - current->state = TASK_INTERRUPTIBLE; - schedule(); - return -ERESTARTNOHAND; -} - - #ifdef CONFIG_SYSCTL_SYSCALL struct sysctl_ia32 { unsigned int name; diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index bfd10fd..7d40ef7 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -58,7 +58,6 @@ EXPORT_SYMBOL(acpi_disabled); #ifdef CONFIG_X86_64 #include <asm/proto.h> -#include <asm/genapic.h> #else /* X86 */ @@ -97,8 +96,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; #warning ACPI uses CMPXCHG, i486 and later hardware #endif -static int acpi_mcfg_64bit_base_addr __initdata = FALSE; - /* -------------------------------------------------------------------------- Boot-time Configuration -------------------------------------------------------------------------- */ @@ -160,6 +157,8 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) struct acpi_mcfg_allocation *pci_mmcfg_config; int pci_mmcfg_config_num; +static int acpi_mcfg_64bit_base_addr __initdata = FALSE; + static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) { if (!strcmp(mcfg->header.oem_id, "SGI")) @@ -1605,6 +1604,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { */ { .callback = dmi_ignore_irq0_timer_override, + .ident = "HP nx6115 laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6115"), + }, + }, + { + .callback = dmi_ignore_irq0_timer_override, .ident = "HP NX6125 laptop", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), @@ -1619,6 +1626,14 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nx6325"), }, }, + { + .callback = dmi_ignore_irq0_timer_override, + .ident = "HP 6715b laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"), + }, + }, {} }; diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 65a0c1b..fb04e49 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -231,25 +231,25 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) continue; if (*ptr > text_end) continue; - text_poke(*ptr, ((unsigned char []){0xf0}), 1); /* add lock prefix */ + /* turn DS segment override prefix into lock prefix */ + text_poke(*ptr, ((unsigned char []){0xf0}), 1); }; } static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) { u8 **ptr; - char insn[1]; if (noreplace_smp) return; - add_nops(insn, 1); for (ptr = start; ptr < end; ptr++) { if (*ptr < text) continue; if (*ptr > text_end) continue; - text_poke(*ptr, insn, 1); + /* turn lock prefix into DS segment override prefix */ + text_poke(*ptr, ((unsigned char []){0x3E}), 1); }; } diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 44e2182..9a32b37 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -455,11 +455,11 @@ out: force_iommu || valid_agp || fallback_aper_force) { - printk(KERN_ERR + printk(KERN_INFO "Your BIOS doesn't leave a aperture memory hole\n"); - printk(KERN_ERR + printk(KERN_INFO "Please enable the IOMMU option in the BIOS setup\n"); - printk(KERN_ERR + printk(KERN_INFO "This costs you %d MB of RAM\n", 32 << fallback_aper_order); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 732d1f4..5145a6e 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -228,7 +228,6 @@ #include <linux/suspend.h> #include <linux/kthread.h> #include <linux/jiffies.h> -#include <linux/smp_lock.h> #include <asm/system.h> #include <asm/uaccess.h> diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index aa89387..505543a 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -22,7 +22,7 @@ #define __NO_STUBS 1 #undef __SYSCALL -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #define __SYSCALL(nr, sym) [nr] = 1, static char syscalls[] = { #include <asm/unistd.h> diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index c639bd5..fdd585f 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c @@ -25,11 +25,11 @@ x86_bios_strerror(long status) { const char *str; switch (status) { - case 0: str = "Call completed without error"; break; - case -1: str = "Not implemented"; break; - case -2: str = "Invalid argument"; break; - case -3: str = "Call completed with error"; break; - default: str = "Unknown BIOS status code"; break; + case 0: str = "Call completed without error"; break; + case -1: str = "Not implemented"; break; + case -2: str = "Invalid argument"; break; + case -3: str = "Call completed with error"; break; + default: str = "Unknown BIOS status code"; break; } return str; } diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index a11f5d4..305b465 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c @@ -430,6 +430,49 @@ static __init int setup_noclflush(char *arg) } __setup("noclflush", setup_noclflush); +struct msr_range { + unsigned min; + unsigned max; +}; + +static struct msr_range msr_range_array[] __cpuinitdata = { + { 0x00000000, 0x00000418}, + { 0xc0000000, 0xc000040b}, + { 0xc0010000, 0xc0010142}, + { 0xc0011000, 0xc001103b}, +}; + +static void __cpuinit print_cpu_msr(void) +{ + unsigned index; + u64 val; + int i; + unsigned index_min, index_max; + + for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) { + index_min = msr_range_array[i].min; + index_max = msr_range_array[i].max; + for (index = index_min; index < index_max; index++) { + if (rdmsrl_amd_safe(index, &val)) + continue; + printk(KERN_INFO " MSR%08x: %016llx\n", index, val); + } + } +} + +static int show_msr __cpuinitdata; +static __init int setup_show_msr(char *arg) +{ + int num; + + get_option(&arg, &num); + + if (num > 0) + show_msr = num; + return 1; +} +__setup("show_msr=", setup_show_msr); + void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { if (c->x86_model_id[0]) @@ -439,6 +482,14 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) printk(KERN_CONT " stepping %02x\n", c->x86_mask); else printk(KERN_CONT "\n"); + +#ifdef CONFIG_SMP + if (c->cpu_index < show_msr) + print_cpu_msr(); +#else + if (show_msr) + print_cpu_msr(); +#endif } static __init int setup_disablecpuid(char *arg) diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index f1685fb..b8e05ee 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -171,7 +171,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) } if (c->x86 != 0xF) { - printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@lists.linux.org.uk>\n"); + printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n"); return 0; } diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 15e13c0..3b5f064 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c @@ -26,7 +26,7 @@ #include <asm/cpufeature.h> #define PFX "speedstep-centrino: " -#define MAINTAINER "cpufreq@lists.linux.org.uk" +#define MAINTAINER "cpufreq@vger.kernel.org" #define dprintk(msg...) \ cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index b75f256..f113ef4 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -222,10 +222,11 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); + ds_init_intel(c); } if (cpu_has_bts) - ds_init_intel(c); + ptrace_bts_init_intel(c); /* * See if we have a good local APIC by checking for buggy Pentia, diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index cb7d3b6..4e8d77f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -401,12 +401,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, tmp |= ~((1<<(hi - 1)) - 1); if (tmp != mask_lo) { - static int once = 1; - - if (once) { - printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); - once = 0; - } + WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); mask_lo = tmp; } } diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 84c480b..4c42146 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -405,9 +405,9 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) } /* RED-PEN: base can be > 32bit */ len += seq_printf(seq, - "reg%02i: base=0x%05lx000 (%4luMB), size=%4lu%cB: %s, count=%d\n", + "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", i, base, base >> (20 - PAGE_SHIFT), size, factor, - mtrr_attrib_to_str(type), mtrr_usage_table[i]); + mtrr_usage_table[i], mtrr_attrib_to_str(type)); } } return 0; diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 885c826..c78c048 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -729,7 +729,7 @@ struct var_mtrr_range_state { mtrr_type type; }; -struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; +static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; static int __initdata debug_print; static int __init @@ -759,7 +759,8 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, /* take out UC ranges */ for (i = 0; i < num_var_ranges; i++) { type = range_state[i].type; - if (type != MTRR_TYPE_UNCACHABLE) + if (type != MTRR_TYPE_UNCACHABLE && + type != MTRR_TYPE_WRPROT) continue; size = range_state[i].size_pfn; if (!size) @@ -836,6 +837,13 @@ static int __init enable_mtrr_cleanup_setup(char *str) } early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); +static int __init mtrr_cleanup_debug_setup(char *str) +{ + debug_print = 1; + return 0; +} +early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); + struct var_mtrr_state { unsigned long range_startk; unsigned long range_sizek; @@ -898,6 +906,27 @@ set_var_mtrr_all(unsigned int address_bits) } } +static unsigned long to_size_factor(unsigned long sizek, char *factorp) +{ + char factor; + unsigned long base = sizek; + + if (base & ((1<<10) - 1)) { + /* not MB alignment */ + factor = 'K'; + } else if (base & ((1<<20) - 1)){ + factor = 'M'; + base >>= 10; + } else { + factor = 'G'; + base >>= 20; + } + + *factorp = factor; + + return base; +} + static unsigned int __init range_to_mtrr(unsigned int reg, unsigned long range_startk, unsigned long range_sizek, unsigned char type) @@ -919,13 +948,21 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, align = max_align; sizek = 1 << align; - if (debug_print) + if (debug_print) { + char start_factor = 'K', size_factor = 'K'; + unsigned long start_base, size_base; + + start_base = to_size_factor(range_startk, &start_factor), + size_base = to_size_factor(sizek, &size_factor), + printk(KERN_DEBUG "Setting variable MTRR %d, " - "base: %ldMB, range: %ldMB, type %s\n", - reg, range_startk >> 10, sizek >> 10, + "base: %ld%cB, range: %ld%cB, type %s\n", + reg, start_base, start_factor, + size_base, size_factor, (type == MTRR_TYPE_UNCACHABLE)?"UC": ((type == MTRR_TYPE_WRBACK)?"WB":"Other") ); + } save_var_mtrr(reg++, range_startk, sizek, type); range_startk += sizek; range_sizek -= sizek; @@ -970,6 +1007,8 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, /* try to append some small hole */ range0_basek = state->range_startk; range0_sizek = ALIGN(state->range_sizek, chunk_sizek); + + /* no increase */ if (range0_sizek == state->range_sizek) { if (debug_print) printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", @@ -980,13 +1019,40 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, return 0; } - range0_sizek -= chunk_sizek; - if (range0_sizek && sizek) { - while (range0_basek + range0_sizek > (basek + sizek)) { - range0_sizek -= chunk_sizek; - if (!range0_sizek) - break; - } + /* only cut back, when it is not the last */ + if (sizek) { + while (range0_basek + range0_sizek > (basek + sizek)) { + if (range0_sizek >= chunk_sizek) + range0_sizek -= chunk_sizek; + else + range0_sizek = 0; + + if (!range0_sizek) + break; + } + } + +second_try: + range_basek = range0_basek + range0_sizek; + + /* one hole in the middle */ + if (range_basek > basek && range_basek <= (basek + sizek)) + second_sizek = range_basek - basek; + + if (range0_sizek > state->range_sizek) { + + /* one hole in middle or at end */ + hole_sizek = range0_sizek - state->range_sizek - second_sizek; + + /* hole size should be less than half of range0 size */ + if (hole_sizek >= (range0_sizek >> 1) && + range0_sizek >= chunk_sizek) { + range0_sizek -= chunk_sizek; + second_sizek = 0; + hole_sizek = 0; + + goto second_try; + } } if (range0_sizek) { @@ -996,50 +1062,28 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, (range0_basek + range0_sizek)<<10); state->reg = range_to_mtrr(state->reg, range0_basek, range0_sizek, MTRR_TYPE_WRBACK); - - } - - range_basek = range0_basek + range0_sizek; - range_sizek = chunk_sizek; - - if (range_basek + range_sizek > basek && - range_basek + range_sizek <= (basek + sizek)) { - /* one hole */ - second_basek = basek; - second_sizek = range_basek + range_sizek - basek; } - /* if last piece, only could one hole near end */ - if ((second_basek || !basek) && - range_sizek - (state->range_sizek - range0_sizek) - second_sizek < - (chunk_sizek >> 1)) { - /* - * one hole in middle (second_sizek is 0) or at end - * (second_sizek is 0 ) - */ - hole_sizek = range_sizek - (state->range_sizek - range0_sizek) - - second_sizek; - hole_basek = range_basek + range_sizek - hole_sizek - - second_sizek; - } else { - /* fallback for big hole, or several holes */ + if (range0_sizek < state->range_sizek) { + /* need to handle left over */ range_sizek = state->range_sizek - range0_sizek; - second_basek = 0; - second_sizek = 0; + + if (debug_print) + printk(KERN_DEBUG "range: %016lx - %016lx\n", + range_basek<<10, + (range_basek + range_sizek)<<10); + state->reg = range_to_mtrr(state->reg, range_basek, + range_sizek, MTRR_TYPE_WRBACK); } - if (debug_print) - printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10, - (range_basek + range_sizek)<<10); - state->reg = range_to_mtrr(state->reg, range_basek, range_sizek, - MTRR_TYPE_WRBACK); if (hole_sizek) { + hole_basek = range_basek - hole_sizek - second_sizek; if (debug_print) printk(KERN_DEBUG "hole: %016lx - %016lx\n", - hole_basek<<10, (hole_basek + hole_sizek)<<10); - state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek, - MTRR_TYPE_UNCACHABLE); - + hole_basek<<10, + (hole_basek + hole_sizek)<<10); + state->reg = range_to_mtrr(state->reg, hole_basek, + hole_sizek, MTRR_TYPE_UNCACHABLE); } return second_sizek; @@ -1154,11 +1198,11 @@ struct mtrr_cleanup_result { }; /* - * gran_size: 1M, 2M, ..., 2G - * chunk size: gran_size, ..., 4G - * so we need (2+13)*6 + * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G + * chunk size: gran_size, ..., 2G + * so we need (1+16)*8 */ -#define NUM_RESULT 90 +#define NUM_RESULT 136 #define PSHIFT (PAGE_SHIFT - 10) static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; @@ -1168,13 +1212,14 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM]; static int __init mtrr_cleanup(unsigned address_bits) { unsigned long extra_remove_base, extra_remove_size; - unsigned long i, base, size, def, dummy; + unsigned long base, size, def, dummy; mtrr_type type; int nr_range, nr_range_new; u64 chunk_size, gran_size; unsigned long range_sums, range_sums_new; int index_good; int num_reg_good; + int i; /* extra one for all 0 */ int num[MTRR_NUM_TYPES + 1]; @@ -1204,6 +1249,8 @@ static int __init mtrr_cleanup(unsigned address_bits) continue; if (!size) type = MTRR_NUM_TYPES; + if (type == MTRR_TYPE_WRPROT) + type = MTRR_TYPE_UNCACHABLE; num[type]++; } @@ -1216,23 +1263,57 @@ static int __init mtrr_cleanup(unsigned address_bits) num_var_ranges - num[MTRR_NUM_TYPES]) return 0; + /* print original var MTRRs at first, for debugging: */ + printk(KERN_DEBUG "original variable MTRRs\n"); + for (i = 0; i < num_var_ranges; i++) { + char start_factor = 'K', size_factor = 'K'; + unsigned long start_base, size_base; + + size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); + if (!size_base) + continue; + + size_base = to_size_factor(size_base, &size_factor), + start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); + start_base = to_size_factor(start_base, &start_factor), + type = range_state[i].type; + + printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", + i, start_base, start_factor, + size_base, size_factor, + (type == MTRR_TYPE_UNCACHABLE) ? "UC" : + ((type == MTRR_TYPE_WRPROT) ? "WP" : + ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) + ); + } + memset(range, 0, sizeof(range)); extra_remove_size = 0; - if (mtrr_tom2) { - extra_remove_base = 1 << (32 - PAGE_SHIFT); + extra_remove_base = 1 << (32 - PAGE_SHIFT); + if (mtrr_tom2) extra_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; - } nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, extra_remove_size); + /* + * [0, 1M) should always be coverred by var mtrr with WB + * and fixed mtrrs should take effective before var mtrr for it + */ + nr_range = add_range_with_merge(range, nr_range, 0, + (1ULL<<(20 - PAGE_SHIFT)) - 1); + /* sort the ranges */ + sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); + range_sums = sum_ranges(range, nr_range); printk(KERN_INFO "total RAM coverred: %ldM\n", range_sums >> (20 - PAGE_SHIFT)); if (mtrr_chunk_size && mtrr_gran_size) { int num_reg; + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; - debug_print = 1; + debug_print++; /* convert ranges to var ranges state */ num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, mtrr_gran_size); @@ -1256,34 +1337,48 @@ static int __init mtrr_cleanup(unsigned address_bits) result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; - printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", - result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ldM \n", + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", + result[i].bad?"*BAD*":" ", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", result[i].num_reg, result[i].bad?"-":"", - result[i].lose_cover_sizek >> 10); + lose_base, lose_factor); if (!result[i].bad) { set_var_mtrr_all(address_bits); return 1; } printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " "will find optimal one\n"); - debug_print = 0; + debug_print--; memset(result, 0, sizeof(result[0])); } i = 0; memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); memset(result, 0, sizeof(result)); - for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) { - for (chunk_size = gran_size; chunk_size < (1ULL<<33); + for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { + char gran_factor; + unsigned long gran_base; + + if (debug_print) + gran_base = to_size_factor(gran_size >> 10, &gran_factor); + + for (chunk_size = gran_size; chunk_size < (1ULL<<32); chunk_size <<= 1) { int num_reg; - if (debug_print) - printk(KERN_INFO - "\ngran_size: %lldM chunk_size_size: %lldM\n", - gran_size >> 20, chunk_size >> 20); + if (debug_print) { + char chunk_factor; + unsigned long chunk_base; + + chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), + printk(KERN_INFO "\n"); + printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", + gran_base, gran_factor, chunk_base, chunk_factor); + } if (i >= NUM_RESULT) continue; @@ -1326,12 +1421,18 @@ static int __init mtrr_cleanup(unsigned address_bits) /* print out all */ for (i = 0; i < NUM_RESULT; i++) { - printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t", - result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n", - result[i].num_reg, result[i].bad?"-":"", - result[i].lose_cover_sizek >> 10); + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; + + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", + result[i].bad?"*BAD*":" ", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", + result[i].num_reg, result[i].bad?"-":"", + lose_base, lose_factor); } /* try to find the optimal index */ @@ -1339,10 +1440,8 @@ static int __init mtrr_cleanup(unsigned address_bits) nr_mtrr_spare_reg = num_var_ranges - 1; num_reg_good = -1; for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { - if (!min_loss_pfn[i]) { + if (!min_loss_pfn[i]) num_reg_good = i; - break; - } } index_good = -1; @@ -1358,21 +1457,26 @@ static int __init mtrr_cleanup(unsigned address_bits) } if (index_good != -1) { + char gran_factor, chunk_factor, lose_factor; + unsigned long gran_base, chunk_base, lose_base; + printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); i = index_good; - printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t", - result[i].gran_sizek >> 10, - result[i].chunk_sizek >> 10); - printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n", - result[i].num_reg, - result[i].lose_cover_sizek >> 10); + gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), + chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), + lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), + printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", + gran_base, gran_factor, chunk_base, chunk_factor); + printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", + result[i].num_reg, lose_base, lose_factor); /* convert ranges to var ranges state */ chunk_size = result[i].chunk_sizek; chunk_size <<= 10; gran_size = result[i].gran_sizek; gran_size <<= 10; - debug_print = 1; + debug_print++; x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); + debug_print--; set_var_mtrr_all(address_bits); return 1; } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 05cc22d..6bff382 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -295,13 +295,19 @@ static int setup_k7_watchdog(unsigned nmi_hz) /* setup the timer */ wrmsr(evntsel_msr, evntsel, 0); write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= K7_EVNTSEL_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); + /* initialize the wd struct before enabling */ wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= K7_EVNTSEL_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + return 1; } @@ -379,13 +385,19 @@ static int setup_p6_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= P6_EVNTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); + /* initialize the wd struct before enabling */ wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= P6_EVNTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); + return 1; } @@ -432,6 +444,27 @@ static const struct wd_ops p6_wd_ops = { #define P4_CCCR_ENABLE (1 << 12) #define P4_CCCR_OVF (1 << 31) +#define P4_CONTROLS 18 +static unsigned int p4_controls[18] = { + MSR_P4_BPU_CCCR0, + MSR_P4_BPU_CCCR1, + MSR_P4_BPU_CCCR2, + MSR_P4_BPU_CCCR3, + MSR_P4_MS_CCCR0, + MSR_P4_MS_CCCR1, + MSR_P4_MS_CCCR2, + MSR_P4_MS_CCCR3, + MSR_P4_FLAME_CCCR0, + MSR_P4_FLAME_CCCR1, + MSR_P4_FLAME_CCCR2, + MSR_P4_FLAME_CCCR3, + MSR_P4_IQ_CCCR0, + MSR_P4_IQ_CCCR1, + MSR_P4_IQ_CCCR2, + MSR_P4_IQ_CCCR3, + MSR_P4_IQ_CCCR4, + MSR_P4_IQ_CCCR5, +}; /* * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter * CRU_ESCR0 (with any non-null event selector) through a complemented @@ -473,6 +506,26 @@ static int setup_p4_watchdog(unsigned nmi_hz) evntsel_msr = MSR_P4_CRU_ESCR0; cccr_msr = MSR_P4_IQ_CCCR0; cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); + + /* + * If we're on the kdump kernel or other situation, we may + * still have other performance counter registers set to + * interrupt and they'll keep interrupting forever because + * of the P4_CCCR_OVF quirk. So we need to ACK all the + * pending interrupts and disable all the registers here, + * before reenabling the NMI delivery. Refer to p4_rearm() + * about the P4_CCCR_OVF quirk. + */ + if (reset_devices) { + unsigned int low, high; + int i; + + for (i = 0; i < P4_CONTROLS; i++) { + rdmsr(p4_controls[i], low, high); + low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF); + wrmsr(p4_controls[i], low, high); + } + } } else { /* logical cpu 1 */ perfctr_msr = MSR_P4_IQ_PERFCTR1; @@ -499,12 +552,17 @@ static int setup_p4_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); wrmsr(cccr_msr, cccr_val, 0); write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - cccr_val |= P4_CCCR_ENABLE; - wrmsr(cccr_msr, cccr_val, 0); + wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = cccr_msr; + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + cccr_val |= P4_CCCR_ENABLE; + wrmsr(cccr_msr, cccr_val, 0); return 1; } @@ -620,13 +678,17 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) wrmsr(evntsel_msr, evntsel, 0); nmi_hz = adjust_for_32bit_ctr(nmi_hz); write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); - apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsr(evntsel_msr, evntsel, 0); wd->perfctr_msr = perfctr_msr; wd->evntsel_msr = evntsel_msr; wd->cccr_msr = 0; /* unused */ + + /* ok, everything is initialized, announce that we're set */ + cpu_nmi_set_wd_enabled(); + + apic_write(APIC_LVTPC, APIC_DM_NMI); + evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsr(evntsel_msr, evntsel, 0); intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); return 1; } diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 8e9cd6a..6a44d64 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c @@ -36,7 +36,6 @@ #include <linux/smp_lock.h> #include <linux/major.h> #include <linux/fs.h> -#include <linux/smp_lock.h> #include <linux/device.h> #include <linux/cpu.h> #include <linux/notifier.h> diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c index 15e6c6b..e90a60e 100644 --- a/arch/x86/kernel/crash_dump_64.c +++ b/arch/x86/kernel/crash_dump_64.c @@ -7,9 +7,8 @@ #include <linux/errno.h> #include <linux/crash_dump.h> - -#include <asm/uaccess.h> -#include <asm/io.h> +#include <linux/uaccess.h> +#include <linux/io.h> /** * copy_oldmem_page - copy one page from "oldmem" @@ -25,7 +24,7 @@ * in the current kernel. We stitch up a pte, similar to kmap_atomic. */ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, - size_t csize, unsigned long offset, int userbuf) + size_t csize, unsigned long offset, int userbuf) { void *vaddr; @@ -33,14 +32,16 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, return 0; vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!vaddr) + return -ENOMEM; if (userbuf) { - if (copy_to_user(buf, (vaddr + offset), csize)) { + if (copy_to_user(buf, vaddr + offset, csize)) { iounmap(vaddr); return -EFAULT; } } else - memcpy(buf, (vaddr + offset), csize); + memcpy(buf, vaddr + offset, csize); iounmap(vaddr); return csize; diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 11c11b8..2b69994 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -2,26 +2,49 @@ * Debug Store support * * This provides a low-level interface to the hardware's Debug Store - * feature that is used for last branch recording (LBR) and + * feature that is used for branch trace store (BTS) and * precise-event based sampling (PEBS). * - * Different architectures use a different DS layout/pointer size. - * The below functions therefore work on a void*. + * It manages: + * - per-thread and per-cpu allocation of BTS and PEBS + * - buffer memory allocation (optional) + * - buffer overflow handling + * - buffer access * + * It assumes: + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * - * Since there is no user for PEBS, yet, only LBR (or branch - * trace store, BTS) is supported. * - * - * Copyright (C) 2007 Intel Corporation. - * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 + * Copyright (C) 2007-2008 Intel Corporation. + * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 */ + +#ifdef CONFIG_X86_DS + #include <asm/ds.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/slab.h> +#include <linux/sched.h> +#include <linux/mm.h> + + +/* + * The configuration for a particular DS hardware implementation. + */ +struct ds_configuration { + /* the size of the DS structure in bytes */ + unsigned char sizeof_ds; + /* the size of one pointer-typed field in the DS structure in bytes; + this covers the first 8 fields related to buffer management. */ + unsigned char sizeof_field; + /* the size of a BTS/PEBS record in bytes */ + unsigned char sizeof_rec[2]; +}; +static struct ds_configuration ds_cfg; /* @@ -44,378 +67,747 @@ * (interrupt occurs when write pointer passes interrupt pointer) * - value to which counter is reset following counter overflow * - * On later architectures, the last branch recording hardware uses - * 64bit pointers even in 32bit mode. - * - * - * Branch Trace Store (BTS) records store information about control - * flow changes. They at least provide the following information: - * - source linear address - * - destination linear address + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. * - * Netburst supported a predicated bit that had been dropped in later - * architectures. We do not suppor it. * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * - an offset giving the start of the respective region * - * In order to abstract from the actual DS and BTS layout, we describe - * the access to the relevant fields. - * Thanks to Andi Kleen for proposing this design. + * This offset is further used to index various arrays holding + * information for BTS and PEBS at the respective index. * - * The implementation, however, is not as general as it might seem. In - * order to stay somewhat simple and efficient, we assume an - * underlying unsigned type (mostly a pointer type) and we expect the - * field to be at least as big as that type. + * On later 32bit processors, we only access the lower 32bit of the + * 64bit pointer fields. The upper halves will be zeroed out. */ -/* - * A special from_ip address to indicate that the BTS record is an - * info record that needs to be interpreted or skipped. - */ -#define BTS_ESCAPE_ADDRESS (-1) +enum ds_field { + ds_buffer_base = 0, + ds_index, + ds_absolute_maximum, + ds_interrupt_threshold, +}; -/* - * A field access descriptor - */ -struct access_desc { - unsigned char offset; - unsigned char size; +enum ds_qualifier { + ds_bts = 0, + ds_pebs }; +static inline unsigned long ds_get(const unsigned char *base, + enum ds_qualifier qual, enum ds_field field) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + return *(unsigned long *)base; +} + +static inline void ds_set(unsigned char *base, enum ds_qualifier qual, + enum ds_field field, unsigned long value) +{ + base += (ds_cfg.sizeof_field * (field + (4 * qual))); + (*(unsigned long *)base) = value; +} + + /* - * The configuration for a particular DS/BTS hardware implementation. + * Locking is done only for allocating BTS or PEBS resources and for + * guarding context and buffer memory allocation. + * + * Most functions require the current task to own the ds context part + * they are going to access. All the locking is done when validating + * access to the context. */ -struct ds_configuration { - /* the DS configuration */ - unsigned char sizeof_ds; - struct access_desc bts_buffer_base; - struct access_desc bts_index; - struct access_desc bts_absolute_maximum; - struct access_desc bts_interrupt_threshold; - /* the BTS configuration */ - unsigned char sizeof_bts; - struct access_desc from_ip; - struct access_desc to_ip; - /* BTS variants used to store additional information like - timestamps */ - struct access_desc info_type; - struct access_desc info_data; - unsigned long debugctl_mask; -}; +static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); /* - * The global configuration used by the below accessor functions + * Validate that the current task is allowed to access the BTS/PEBS + * buffer of the parameter task. + * + * Returns 0, if access is granted; -Eerrno, otherwise. */ -static struct ds_configuration ds_cfg; +static inline int ds_validate_access(struct ds_context *context, + enum ds_qualifier qual) +{ + if (!context) + return -EPERM; + + if (context->owner[qual] == current) + return 0; + + return -EPERM; +} + /* - * Accessor functions for some DS and BTS fields using the above - * global ptrace_bts_cfg. + * We either support (system-wide) per-cpu or per-thread allocation. + * We distinguish the two based on the task_struct pointer, where a + * NULL pointer indicates per-cpu allocation for the current cpu. + * + * Allocations are use-counted. As soon as resources are allocated, + * further allocations must be of the same type (per-cpu or + * per-thread). We model this by counting allocations (i.e. the number + * of tracers of a certain type) for one type negatively: + * =0 no tracers + * >0 number of per-thread tracers + * <0 number of per-cpu tracers + * + * The below functions to get and put tracers and to check the + * allocation type require the ds_lock to be held by the caller. + * + * Tracers essentially gives the number of ds contexts for a certain + * type of allocation. */ -static inline unsigned long get_bts_buffer_base(char *base) +static long tracers; + +static inline void get_tracer(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_buffer_base.offset); + tracers += (task ? 1 : -1); } -static inline void set_bts_buffer_base(char *base, unsigned long value) + +static inline void put_tracer(struct task_struct *task) { - (*(unsigned long *)(base + ds_cfg.bts_buffer_base.offset)) = value; + tracers -= (task ? 1 : -1); } -static inline unsigned long get_bts_index(char *base) + +static inline int check_tracer(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_index.offset); + return (task ? (tracers >= 0) : (tracers <= 0)); } -static inline void set_bts_index(char *base, unsigned long value) + + +/* + * The DS context is either attached to a thread or to a cpu: + * - in the former case, the thread_struct contains a pointer to the + * attached context. + * - in the latter case, we use a static array of per-cpu context + * pointers. + * + * Contexts are use-counted. They are allocated on first access and + * deallocated when the last user puts the context. + * + * We distinguish between an allocating and a non-allocating get of a + * context: + * - the allocating get is used for requesting BTS/PEBS resources. It + * requires the caller to hold the global ds_lock. + * - the non-allocating get is used for all other cases. A + * non-existing context indicates an error. It acquires and releases + * the ds_lock itself for obtaining the context. + * + * A context and its DS configuration are allocated and deallocated + * together. A context always has a DS configuration of the + * appropriate size. + */ +static DEFINE_PER_CPU(struct ds_context *, system_context); + +#define this_system_context per_cpu(system_context, smp_processor_id()) + +/* + * Returns the pointer to the parameter task's context or to the + * system-wide context, if task is NULL. + * + * Increases the use count of the returned context, if not NULL. + */ +static inline struct ds_context *ds_get_context(struct task_struct *task) { - (*(unsigned long *)(base + ds_cfg.bts_index.offset)) = value; + struct ds_context *context; + + spin_lock(&ds_lock); + + context = (task ? task->thread.ds_ctx : this_system_context); + if (context) + context->count++; + + spin_unlock(&ds_lock); + + return context; } -static inline unsigned long get_bts_absolute_maximum(char *base) + +/* + * Same as ds_get_context, but allocates the context and it's DS + * structure, if necessary; returns NULL; if out of memory. + * + * pre: requires ds_lock to be held + */ +static inline struct ds_context *ds_alloc_context(struct task_struct *task) { - return *(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset); + struct ds_context **p_context = + (task ? &task->thread.ds_ctx : &this_system_context); + struct ds_context *context = *p_context; + + if (!context) { + context = kzalloc(sizeof(*context), GFP_KERNEL); + + if (!context) + return NULL; + + context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + if (!context->ds) { + kfree(context); + return NULL; + } + + *p_context = context; + + context->this = p_context; + context->task = task; + + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + + if (!task || (task == current)) + wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); + + get_tracer(task); + } + + context->count++; + + return context; } -static inline void set_bts_absolute_maximum(char *base, unsigned long value) + +/* + * Decreases the use count of the parameter context, if not NULL. + * Deallocates the context, if the use count reaches zero. + */ +static inline void ds_put_context(struct ds_context *context) { - (*(unsigned long *)(base + ds_cfg.bts_absolute_maximum.offset)) = value; + if (!context) + return; + + spin_lock(&ds_lock); + + if (--context->count) + goto out; + + *(context->this) = NULL; + + if (context->task) + clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); + + if (!context->task || (context->task == current)) + wrmsrl(MSR_IA32_DS_AREA, 0); + + put_tracer(context->task); + + /* free any leftover buffers from tracers that did not + * deallocate them properly. */ + kfree(context->buffer[ds_bts]); + kfree(context->buffer[ds_pebs]); + kfree(context->ds); + kfree(context); + out: + spin_unlock(&ds_lock); } -static inline unsigned long get_bts_interrupt_threshold(char *base) + + +/* + * Handle a buffer overflow + * + * task: the task whose buffers are overflowing; + * NULL for a buffer overflow on the current cpu + * context: the ds context + * qual: the buffer type + */ +static void ds_overflow(struct task_struct *task, struct ds_context *context, + enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset); + if (!context) + return; + + if (context->callback[qual]) + (*context->callback[qual])(task); + + /* todo: do some more overflow handling */ } -static inline void set_bts_interrupt_threshold(char *base, unsigned long value) + + +/* + * Allocate a non-pageable buffer of the parameter size. + * Checks the memory and the locked memory rlimit. + * + * Returns the buffer, if successful; + * NULL, if out of memory or rlimit exceeded. + * + * size: the requested buffer size in bytes + * pages (out): if not NULL, contains the number of pages reserved + */ +static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) { - (*(unsigned long *)(base + ds_cfg.bts_interrupt_threshold.offset)) = value; + unsigned long rlim, vm, pgsz; + void *buffer; + + pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = current->mm->total_vm + pgsz; + if (rlim < vm) + return NULL; + + rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = current->mm->locked_vm + pgsz; + if (rlim < vm) + return NULL; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + return NULL; + + current->mm->total_vm += pgsz; + current->mm->locked_vm += pgsz; + + if (pages) + *pages = pgsz; + + return buffer; } -static inline unsigned long get_from_ip(char *base) + +static int ds_request(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl, enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.from_ip.offset); + struct ds_context *context; + unsigned long buffer, adj; + const unsigned long alignment = (1 << 3); + int error = 0; + + if (!ds_cfg.sizeof_ds) + return -EOPNOTSUPP; + + /* we require some space to do alignment adjustments below */ + if (size < (alignment + ds_cfg.sizeof_rec[qual])) + return -EINVAL; + + /* buffer overflow notification is not yet implemented */ + if (ovfl) + return -EOPNOTSUPP; + + + spin_lock(&ds_lock); + + if (!check_tracer(task)) + return -EPERM; + + error = -ENOMEM; + context = ds_alloc_context(task); + if (!context) + goto out_unlock; + + error = -EALREADY; + if (context->owner[qual] == current) + goto out_unlock; + error = -EPERM; + if (context->owner[qual] != NULL) + goto out_unlock; + context->owner[qual] = current; + + spin_unlock(&ds_lock); + + + error = -ENOMEM; + if (!base) { + base = ds_allocate_buffer(size, &context->pages[qual]); + if (!base) + goto out_release; + + context->buffer[qual] = base; + } + error = 0; + + context->callback[qual] = ovfl; + + /* adjust the buffer address and size to meet alignment + * constraints: + * - buffer is double-word aligned + * - size is multiple of record size + * + * We checked the size at the very beginning; we have enough + * space to do the adjustment. + */ + buffer = (unsigned long)base; + + adj = ALIGN(buffer, alignment) - buffer; + buffer += adj; + size -= adj; + + size /= ds_cfg.sizeof_rec[qual]; + size *= ds_cfg.sizeof_rec[qual]; + + ds_set(context->ds, qual, ds_buffer_base, buffer); + ds_set(context->ds, qual, ds_index, buffer); + ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); + + if (ovfl) { + /* todo: select a suitable interrupt threshold */ + } else + ds_set(context->ds, qual, + ds_interrupt_threshold, buffer + size + 1); + + /* we keep the context until ds_release */ + return error; + + out_release: + context->owner[qual] = NULL; + ds_put_context(context); + return error; + + out_unlock: + spin_unlock(&ds_lock); + ds_put_context(context); + return error; } -static inline void set_from_ip(char *base, unsigned long value) + +int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - (*(unsigned long *)(base + ds_cfg.from_ip.offset)) = value; + return ds_request(task, base, size, ovfl, ds_bts); } -static inline unsigned long get_to_ip(char *base) + +int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl) { - return *(unsigned long *)(base + ds_cfg.to_ip.offset); + return ds_request(task, base, size, ovfl, ds_pebs); } -static inline void set_to_ip(char *base, unsigned long value) + +static int ds_release(struct task_struct *task, enum ds_qualifier qual) { - (*(unsigned long *)(base + ds_cfg.to_ip.offset)) = value; + struct ds_context *context; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + kfree(context->buffer[qual]); + context->buffer[qual] = NULL; + + current->mm->total_vm -= context->pages[qual]; + current->mm->locked_vm -= context->pages[qual]; + context->pages[qual] = 0; + context->owner[qual] = NULL; + + /* + * we put the context twice: + * once for the ds_get_context + * once for the corresponding ds_request + */ + ds_put_context(context); + out: + ds_put_context(context); + return error; } -static inline unsigned char get_info_type(char *base) + +int ds_release_bts(struct task_struct *task) { - return *(unsigned char *)(base + ds_cfg.info_type.offset); + return ds_release(task, ds_bts); } -static inline void set_info_type(char *base, unsigned char value) + +int ds_release_pebs(struct task_struct *task) { - (*(unsigned char *)(base + ds_cfg.info_type.offset)) = value; + return ds_release(task, ds_pebs); } -static inline unsigned long get_info_data(char *base) + +static int ds_get_index(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - return *(unsigned long *)(base + ds_cfg.info_data.offset); + struct ds_context *context; + unsigned long base, index; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + + error = ((index - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; } -static inline void set_info_data(char *base, unsigned long value) + +int ds_get_bts_index(struct task_struct *task, size_t *pos) { - (*(unsigned long *)(base + ds_cfg.info_data.offset)) = value; + return ds_get_index(task, pos, ds_bts); } +int ds_get_pebs_index(struct task_struct *task, size_t *pos) +{ + return ds_get_index(task, pos, ds_pebs); +} -int ds_allocate(void **dsp, size_t bts_size_in_bytes) +static int ds_get_end(struct task_struct *task, size_t *pos, + enum ds_qualifier qual) { - size_t bts_size_in_records; - unsigned long bts; - void *ds; + struct ds_context *context; + unsigned long base, end; + int error; + + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + + base = ds_get(context->ds, qual, ds_buffer_base); + end = ds_get(context->ds, qual, ds_absolute_maximum); + + error = ((end - base) / ds_cfg.sizeof_rec[qual]); + if (pos) + *pos = error; + out: + ds_put_context(context); + return error; +} - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; +int ds_get_bts_end(struct task_struct *task, size_t *pos) +{ + return ds_get_end(task, pos, ds_bts); +} - if (bts_size_in_bytes < 0) - return -EINVAL; +int ds_get_pebs_end(struct task_struct *task, size_t *pos) +{ + return ds_get_end(task, pos, ds_pebs); +} - bts_size_in_records = - bts_size_in_bytes / ds_cfg.sizeof_bts; - bts_size_in_bytes = - bts_size_in_records * ds_cfg.sizeof_bts; +static int ds_access(struct task_struct *task, size_t index, + const void **record, enum ds_qualifier qual) +{ + struct ds_context *context; + unsigned long base, idx; + int error; - if (bts_size_in_bytes <= 0) + if (!record) return -EINVAL; - bts = (unsigned long)kzalloc(bts_size_in_bytes, GFP_KERNEL); - - if (!bts) - return -ENOMEM; + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; - ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); + base = ds_get(context->ds, qual, ds_buffer_base); + idx = base + (index * ds_cfg.sizeof_rec[qual]); - if (!ds) { - kfree((void *)bts); - return -ENOMEM; - } - - set_bts_buffer_base(ds, bts); - set_bts_index(ds, bts); - set_bts_absolute_maximum(ds, bts + bts_size_in_bytes); - set_bts_interrupt_threshold(ds, bts + bts_size_in_bytes + 1); + error = -EINVAL; + if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) + goto out; - *dsp = ds; - return 0; + *record = (const void *)idx; + error = ds_cfg.sizeof_rec[qual]; + out: + ds_put_context(context); + return error; } -int ds_free(void **dsp) +int ds_access_bts(struct task_struct *task, size_t index, const void **record) { - if (*dsp) { - kfree((void *)get_bts_buffer_base(*dsp)); - kfree(*dsp); - *dsp = NULL; - } - return 0; + return ds_access(task, index, record, ds_bts); } -int ds_get_bts_size(void *ds) +int ds_access_pebs(struct task_struct *task, size_t index, const void **record) { - int size_in_bytes; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (!ds) - return 0; - - size_in_bytes = - get_bts_absolute_maximum(ds) - - get_bts_buffer_base(ds); - return size_in_bytes; + return ds_access(task, index, record, ds_pebs); } -int ds_get_bts_end(void *ds) +static int ds_write(struct task_struct *task, const void *record, size_t size, + enum ds_qualifier qual, int force) { - int size_in_bytes = ds_get_bts_size(ds); - - if (size_in_bytes <= 0) - return size_in_bytes; + struct ds_context *context; + int error; - return size_in_bytes / ds_cfg.sizeof_bts; -} + if (!record) + return -EINVAL; -int ds_get_bts_index(void *ds) -{ - int index_offset_in_bytes; + error = -EPERM; + context = ds_get_context(task); + if (!context) + goto out; - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; + if (!force) { + error = ds_validate_access(context, qual); + if (error < 0) + goto out; + } - index_offset_in_bytes = - get_bts_index(ds) - - get_bts_buffer_base(ds); + error = 0; + while (size) { + unsigned long base, index, end, write_end, int_th; + unsigned long write_size, adj_write_size; + + /* + * write as much as possible without producing an + * overflow interrupt. + * + * interrupt_threshold must either be + * - bigger than absolute_maximum or + * - point to a record between buffer_base and absolute_maximum + * + * index points to a valid record. + */ + base = ds_get(context->ds, qual, ds_buffer_base); + index = ds_get(context->ds, qual, ds_index); + end = ds_get(context->ds, qual, ds_absolute_maximum); + int_th = ds_get(context->ds, qual, ds_interrupt_threshold); + + write_end = min(end, int_th); + + /* if we are already beyond the interrupt threshold, + * we fill the entire buffer */ + if (write_end <= index) + write_end = end; + + if (write_end <= index) + goto out; + + write_size = min((unsigned long) size, write_end - index); + memcpy((void *)index, record, write_size); + + record = (const char *)record + write_size; + size -= write_size; + error += write_size; + + adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; + adj_write_size *= ds_cfg.sizeof_rec[qual]; + + /* zero out trailing bytes */ + memset((char *)index + write_size, 0, + adj_write_size - write_size); + index += adj_write_size; + + if (index >= end) + index = base; + ds_set(context->ds, qual, ds_index, index); + + if (index >= int_th) + ds_overflow(task, context, qual); + } - return index_offset_in_bytes / ds_cfg.sizeof_bts; + out: + ds_put_context(context); + return error; } -int ds_set_overflow(void *ds, int method) +int ds_write_bts(struct task_struct *task, const void *record, size_t size) { - switch (method) { - case DS_O_SIGNAL: - return -EOPNOTSUPP; - case DS_O_WRAP: - return 0; - default: - return -EINVAL; - } + return ds_write(task, record, size, ds_bts, /* force = */ 0); } -int ds_get_overflow(void *ds) +int ds_write_pebs(struct task_struct *task, const void *record, size_t size) { - return DS_O_WRAP; + return ds_write(task, record, size, ds_pebs, /* force = */ 0); } -int ds_clear(void *ds) +int ds_unchecked_write_bts(struct task_struct *task, + const void *record, size_t size) { - int bts_size = ds_get_bts_size(ds); - unsigned long bts_base; - - if (bts_size <= 0) - return bts_size; - - bts_base = get_bts_buffer_base(ds); - memset((void *)bts_base, 0, bts_size); - - set_bts_index(ds, bts_base); - return 0; + return ds_write(task, record, size, ds_bts, /* force = */ 1); } -int ds_read_bts(void *ds, int index, struct bts_struct *out) +int ds_unchecked_write_pebs(struct task_struct *task, + const void *record, size_t size) { - void *bts; + return ds_write(task, record, size, ds_pebs, /* force = */ 1); +} - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; +static int ds_reset_or_clear(struct task_struct *task, + enum ds_qualifier qual, int clear) +{ + struct ds_context *context; + unsigned long base, end; + int error; - if (index < 0) - return -EINVAL; + context = ds_get_context(task); + error = ds_validate_access(context, qual); + if (error < 0) + goto out; - if (index >= ds_get_bts_size(ds)) - return -EINVAL; + base = ds_get(context->ds, qual, ds_buffer_base); + end = ds_get(context->ds, qual, ds_absolute_maximum); - bts = (void *)(get_bts_buffer_base(ds) + (index * ds_cfg.sizeof_bts)); + if (clear) + memset((void *)base, 0, end - base); - memset(out, 0, sizeof(*out)); - if (get_from_ip(bts) == BTS_ESCAPE_ADDRESS) { - out->qualifier = get_info_type(bts); - out->variant.jiffies = get_info_data(bts); - } else { - out->qualifier = BTS_BRANCH; - out->variant.lbr.from_ip = get_from_ip(bts); - out->variant.lbr.to_ip = get_to_ip(bts); - } + ds_set(context->ds, qual, ds_index, base); - return sizeof(*out);; + error = 0; + out: + ds_put_context(context); + return error; } -int ds_write_bts(void *ds, const struct bts_struct *in) +int ds_reset_bts(struct task_struct *task) { - unsigned long bts; - - if (!ds_cfg.sizeof_ds || !ds_cfg.sizeof_bts) - return -EOPNOTSUPP; - - if (ds_get_bts_size(ds) <= 0) - return -ENXIO; + return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); +} - bts = get_bts_index(ds); +int ds_reset_pebs(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); +} - memset((void *)bts, 0, ds_cfg.sizeof_bts); - switch (in->qualifier) { - case BTS_INVALID: - break; +int ds_clear_bts(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); +} - case BTS_BRANCH: - set_from_ip((void *)bts, in->variant.lbr.from_ip); - set_to_ip((void *)bts, in->variant.lbr.to_ip); - break; +int ds_clear_pebs(struct task_struct *task) +{ + return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); +} - case BTS_TASK_ARRIVES: - case BTS_TASK_DEPARTS: - set_from_ip((void *)bts, BTS_ESCAPE_ADDRESS); - set_info_type((void *)bts, in->qualifier); - set_info_data((void *)bts, in->variant.jiffies); - break; +int ds_get_pebs_reset(struct task_struct *task, u64 *value) +{ + struct ds_context *context; + int error; - default: + if (!value) return -EINVAL; - } - bts = bts + ds_cfg.sizeof_bts; - if (bts >= get_bts_absolute_maximum(ds)) - bts = get_bts_buffer_base(ds); - set_bts_index(ds, bts); + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; - return ds_cfg.sizeof_bts; + *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); + + error = 0; + out: + ds_put_context(context); + return error; } -unsigned long ds_debugctl_mask(void) +int ds_set_pebs_reset(struct task_struct *task, u64 value) { - return ds_cfg.debugctl_mask; -} + struct ds_context *context; + int error; -#ifdef __i386__ -static const struct ds_configuration ds_cfg_netburst = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<2)|(1<<3) -}; + context = ds_get_context(task); + error = ds_validate_access(context, ds_pebs); + if (error < 0) + goto out; -static const struct ds_configuration ds_cfg_pentium_m = { - .sizeof_ds = 9 * 4, - .bts_buffer_base = { 0, 4 }, - .bts_index = { 4, 4 }, - .bts_absolute_maximum = { 8, 4 }, - .bts_interrupt_threshold = { 12, 4 }, - .sizeof_bts = 3 * 4, - .from_ip = { 0, 4 }, - .to_ip = { 4, 4 }, - .info_type = { 4, 1 }, - .info_data = { 8, 4 }, - .debugctl_mask = (1<<6)|(1<<7) + *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; + + error = 0; + out: + ds_put_context(context); + return error; +} + +static const struct ds_configuration ds_cfg_var = { + .sizeof_ds = sizeof(long) * 12, + .sizeof_field = sizeof(long), + .sizeof_rec[ds_bts] = sizeof(long) * 3, + .sizeof_rec[ds_pebs] = sizeof(long) * 10 }; -#endif /* _i386_ */ - -static const struct ds_configuration ds_cfg_core2 = { - .sizeof_ds = 9 * 8, - .bts_buffer_base = { 0, 8 }, - .bts_index = { 8, 8 }, - .bts_absolute_maximum = { 16, 8 }, - .bts_interrupt_threshold = { 24, 8 }, - .sizeof_bts = 3 * 8, - .from_ip = { 0, 8 }, - .to_ip = { 8, 8 }, - .info_type = { 8, 1 }, - .info_data = { 16, 8 }, - .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +static const struct ds_configuration ds_cfg_64 = { + .sizeof_ds = 8 * 12, + .sizeof_field = 8, + .sizeof_rec[ds_bts] = 8 * 3, + .sizeof_rec[ds_pebs] = 8 * 10 }; static inline void @@ -429,14 +821,13 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) switch (c->x86) { case 0x6: switch (c->x86_model) { -#ifdef __i386__ case 0xD: case 0xE: /* Pentium M */ - ds_configure(&ds_cfg_pentium_m); + ds_configure(&ds_cfg_var); break; -#endif /* _i386_ */ case 0xF: /* Core2 */ - ds_configure(&ds_cfg_core2); + case 0x1C: /* Atom */ + ds_configure(&ds_cfg_64); break; default: /* sorry, don't know about them */ @@ -445,13 +836,11 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; case 0xF: switch (c->x86_model) { -#ifdef __i386__ case 0x0: case 0x1: case 0x2: /* Netburst */ - ds_configure(&ds_cfg_netburst); + ds_configure(&ds_cfg_var); break; -#endif /* _i386_ */ default: /* sorry, don't know about them */ break; @@ -462,3 +851,14 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) break; } } + +void ds_free(struct ds_context *context) +{ + /* This is called when the task owning the parameter context + * is dying. There should not be any user of that context left + * to disturb us, anymore. */ + unsigned long leftovers = context->count; + while (leftovers--) + ds_put_context(context); +} +#endif /* CONFIG_X86_DS */ diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 06cc8d4..945a31c 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -414,9 +414,11 @@ void __init efi_init(void) if (memmap.map == NULL) printk(KERN_ERR "Could not map the EFI memory map!\n"); memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + if (memmap.desc_size != sizeof(efi_memory_desc_t)) - printk(KERN_WARNING "Kernel-defined memdesc" - "doesn't match the one from EFI!\n"); + printk(KERN_WARNING + "Kernel-defined memdesc doesn't match the one from EFI!\n"); + if (add_efi_memmap) do_add_efi_memmap(); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 89434d4..cf3a0b2 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -275,9 +275,9 @@ ENTRY(native_usergs_sysret64) ENTRY(ret_from_fork) CFI_DEFAULT_STACK push kernel_eflags(%rip) - CFI_ADJUST_CFA_OFFSET 4 + CFI_ADJUST_CFA_OFFSET 8 popf # reset kernel eflags - CFI_ADJUST_CFA_OFFSET -4 + CFI_ADJUST_CFA_OFFSET -8 call schedule_tail GET_THREAD_INFO(%rcx) testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 9bfc4d7..d16084f 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -108,12 +108,11 @@ void __init x86_64_start_kernel(char * real_mode_data) } load_idt((const struct desc_ptr *)&idt_descr); - early_printk("Kernel alive\n"); + if (console_loglevel == 10) + early_printk("Kernel alive\n"); x86_64_init_pda(); - early_printk("Kernel really alive\n"); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 50e5e4a..1919143 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/thread_info.h> #include <linux/syscalls.h> +#include <asm/syscalls.h> /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ static void set_bitmap(unsigned long *bitmap, unsigned int base, diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 3f7537b..f1c688e 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c @@ -20,6 +20,8 @@ #ifdef CONFIG_X86_32 #include <mach_apic.h> +#include <mach_ipi.h> + /* * the following functions deal with sending IPIs between CPUs. * @@ -147,7 +149,6 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) } /* must come after the send_IPI functions above for inlining */ -#include <mach_ipi.h> static int convert_apicid_to_cpu(int apic_id) { int i; diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 1cf8c1f..b71e02d 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -325,7 +325,7 @@ skip: for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).irq_call_count); - seq_printf(p, " function call interrupts\n"); + seq_printf(p, " Function call interrupts\n"); seq_printf(p, "TLB: "); for_each_online_cpu(j) seq_printf(p, "%10u ", diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1f78b23..f065fe9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -129,7 +129,7 @@ skip: seq_printf(p, "CAL: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_call_count); - seq_printf(p, " function call interrupts\n"); + seq_printf(p, " Function call interrupts\n"); seq_printf(p, "TLB: "); for_each_online_cpu(j) seq_printf(p, "%10u ", cpu_pda(j)->irq_tlb_count); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8282a21..10435a1 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -455,12 +455,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) return NOTIFY_DONE; case DIE_NMI_IPI: - if (atomic_read(&kgdb_active) != -1) { - /* KGDB CPU roundup */ - kgdb_nmicallback(raw_smp_processor_id(), regs); - was_in_debug_nmi[raw_smp_processor_id()] = 1; - touch_nmi_watchdog(); - } + /* Just ignore, we will handle the roundup on DIE_NMI. */ return NOTIFY_DONE; case DIE_NMIUNKNOWN: diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8b7a3cf..478bca9 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -178,7 +178,7 @@ static void kvm_flush_tlb(void) kvm_deferred_mmu_op(&ftlb, sizeof ftlb); } -static void kvm_release_pt(u32 pfn) +static void kvm_release_pt(unsigned long pfn) { struct kvm_mmu_op_release_pt rpt = { .header.op = KVM_MMU_OP_RELEASE_PT, diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index b68e21f..0ed5f93 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -18,6 +18,7 @@ #include <asm/ldt.h> #include <asm/desc.h> #include <asm/mmu_context.h> +#include <asm/syscalls.h> #ifdef CONFIG_SMP static void flush_ldt(void *current_mm) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index abb78a2..2c97f07 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -299,6 +299,15 @@ void acpi_nmi_disable(void) on_each_cpu(__acpi_nmi_disable, NULL, 1); } +/* + * This function is called as soon the LAPIC NMI watchdog driver has everything + * in place and it's ready to check if the NMIs belong to the NMI watchdog + */ +void cpu_nmi_set_wd_enabled(void) +{ + __get_cpu_var(wd_enabled) = 1; +} + void setup_apic_nmi_watchdog(void *unused) { if (__get_cpu_var(wd_enabled)) @@ -311,8 +320,6 @@ void setup_apic_nmi_watchdog(void *unused) switch (nmi_watchdog) { case NMI_LOCAL_APIC: - /* enable it before to avoid race with handler */ - __get_cpu_var(wd_enabled) = 1; if (lapic_watchdog_init(nmi_hz) < 0) { __get_cpu_var(wd_enabled) = 0; return; diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 3e66722..7a13fac 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c @@ -190,12 +190,12 @@ EXPORT_SYMBOL_GPL(olpc_ec_cmd); static void __init platform_detect(void) { size_t propsize; - u32 rev; + __be32 rev; if (ofw("getprop", 4, 1, NULL, "board-revision-int", &rev, 4, &propsize) || propsize != 4) { printk(KERN_ERR "ofw: getprop call failed!\n"); - rev = 0; + rev = cpu_to_be32(0); } olpc_platform_info.boardrev = be32_to_cpu(rev); } @@ -203,7 +203,7 @@ static void __init platform_detect(void) static void __init platform_detect(void) { /* stopgap until OFW support is added to the kernel */ - olpc_platform_info.boardrev = be32_to_cpu(0xc2); + olpc_platform_info.boardrev = 0xc2; } #endif diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 300da17..e2f4376 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -330,6 +330,7 @@ struct pv_cpu_ops pv_cpu_ops = { #endif .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, + .read_msr_amd = native_read_msr_amd_safe, .write_msr = native_write_msr_safe, .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c index 5826221..9fe644f 100644 --- a/arch/x86/kernel/paravirt_patch_32.c +++ b/arch/x86/kernel/paravirt_patch_32.c @@ -23,7 +23,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf, start = start_##ops##_##x; \ end = end_##ops##_##x; \ goto patch_site - switch(type) { + switch (type) { PATCH_SITE(pv_irq_ops, irq_disable); PATCH_SITE(pv_irq_ops, irq_enable); PATCH_SITE(pv_irq_ops, restore_fl); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 87d4d69..f704cb5 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -82,7 +82,7 @@ void __init dma32_reserve_bootmem(void) * using 512M as goal */ align = 64ULL<<20; - size = round_up(dma32_bootmem_size, align); + size = roundup(dma32_bootmem_size, align); dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, 512ULL<<20); if (dma32_bootmem_ptr) diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 49285f8..1a895a5 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -82,7 +82,8 @@ AGPEXTERN __u32 *agp_gatt_table; static unsigned long next_bit; /* protected by iommu_bitmap_lock */ static int need_flush; /* global flush state. set for each gart wrap */ -static unsigned long alloc_iommu(struct device *dev, int size) +static unsigned long alloc_iommu(struct device *dev, int size, + unsigned long align_mask) { unsigned long offset, flags; unsigned long boundary_size; @@ -90,16 +91,17 @@ static unsigned long alloc_iommu(struct device *dev, int size) base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; spin_lock_irqsave(&iommu_bitmap_lock, flags); offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, - size, base_index, boundary_size, 0); + size, base_index, boundary_size, align_mask); if (offset == -1) { need_flush = 1; offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, - size, base_index, boundary_size, 0); + size, base_index, boundary_size, + align_mask); } if (offset != -1) { next_bit = offset+size; @@ -236,10 +238,10 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) * Caller needs to check if the iommu is needed and flush. */ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, - size_t size, int dir) + size_t size, int dir, unsigned long align_mask) { unsigned long npages = iommu_num_pages(phys_mem, size); - unsigned long iommu_page = alloc_iommu(dev, npages); + unsigned long iommu_page = alloc_iommu(dev, npages, align_mask); int i; if (iommu_page == -1) { @@ -262,7 +264,11 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, static dma_addr_t gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - dma_addr_t map = dma_map_area(dev, paddr, size, dir); + dma_addr_t map; + unsigned long align_mask; + + align_mask = (1UL << get_order(size)) - 1; + map = dma_map_area(dev, paddr, size, dir, align_mask); flush_gart(); @@ -281,7 +287,8 @@ gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) if (!need_iommu(dev, paddr, size)) return paddr; - bus = gart_map_simple(dev, paddr, size, dir); + bus = dma_map_area(dev, paddr, size, dir, 0); + flush_gart(); return bus; } @@ -340,7 +347,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, unsigned long addr = sg_phys(s); if (nonforced_iommu(dev, addr, s->length)) { - addr = dma_map_area(dev, addr, s->length, dir); + addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) gart_unmap_sg(dev, sg, i, dir); @@ -362,7 +369,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, int nelems, struct scatterlist *sout, unsigned long pages) { - unsigned long iommu_start = alloc_iommu(dev, pages); + unsigned long iommu_start = alloc_iommu(dev, pages, 0); unsigned long iommu_page = iommu_start; struct scatterlist *s; int i; @@ -626,7 +633,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) struct pci_dev *dev; void *gatt; int i, error; - unsigned long start_pfn, end_pfn; printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; @@ -672,12 +678,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", aper_base, aper_size>>10); - /* need to map that range */ - end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); - if (end_pfn > max_low_pfn_mapped) { - start_pfn = (aper_base>>PAGE_SHIFT); - init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); - } return 0; nommu: @@ -727,7 +727,8 @@ void __init gart_iommu_init(void) { struct agp_kern_info info; unsigned long iommu_start; - unsigned long aper_size; + unsigned long aper_base, aper_size; + unsigned long start_pfn, end_pfn; unsigned long scratch; long i; @@ -765,8 +766,16 @@ void __init gart_iommu_init(void) return; } + /* need to map that range */ + aper_size = info.aper_size << 20; + aper_base = info.aper_base; + end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); + if (end_pfn > max_low_pfn_mapped) { + start_pfn = (aper_base>>PAGE_SHIFT); + init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); + } + printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); - aper_size = info.aper_size * 1024 * 1024; iommu_size = check_iommu_size(info.aper_base, aper_size); iommu_pages = iommu_size >> PAGE_SHIFT; diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c index bc1f2d3..a311ffc 100644 --- a/arch/x86/kernel/pcspeaker.c +++ b/arch/x86/kernel/pcspeaker.c @@ -1,20 +1,13 @@ #include <linux/platform_device.h> -#include <linux/errno.h> +#include <linux/err.h> #include <linux/init.h> static __init int add_pcspkr(void) { struct platform_device *pd; - int ret; - pd = platform_device_alloc("pcspkr", -1); - if (!pd) - return -ENOMEM; + pd = platform_device_register_simple("pcspkr", -1, NULL, 0); - ret = platform_device_add(pd); - if (ret) - platform_device_put(pd); - - return ret; + return IS_ERR(pd) ? PTR_ERR(pd) : 0; } device_initcall(add_pcspkr); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 876e918..ec7a2ba 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -185,7 +185,8 @@ static void mwait_idle(void) static void poll_idle(void) { local_irq_enable(); - cpu_relax(); + while (!need_resched()) + cpu_relax(); } /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 31f40b2..205188d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -37,6 +37,7 @@ #include <linux/tick.h> #include <linux/percpu.h> #include <linux/prctl.h> +#include <linux/dmi.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -56,6 +57,8 @@ #include <asm/cpu.h> #include <asm/kdebug.h> #include <asm/idle.h> +#include <asm/syscalls.h> +#include <asm/smp.h> asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -161,6 +164,7 @@ void __show_registers(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned long sp; unsigned short ss, gs; + const char *board; if (user_mode_vm(regs)) { sp = regs->sp; @@ -173,11 +177,15 @@ void __show_registers(struct pt_regs *regs, int all) } printk("\n"); - printk("Pid: %d, comm: %s %s (%s %.*s)\n", + + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", task_pid_nr(current), current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + init_utsname()->version, board); printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", (u16)regs->cs, regs->ip, regs->flags, @@ -277,6 +285,14 @@ void exit_thread(void) tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; put_cpu(); } +#ifdef CONFIG_X86_DS + /* Free any DS contexts that have not been properly released. */ + if (unlikely(current->thread.ds_ctx)) { + /* we clear debugctl to make sure DS is not used. */ + update_debugctlmsr(0); + ds_free(current->thread.ds_ctx); + } +#endif /* CONFIG_X86_DS */ } void flush_thread(void) @@ -438,6 +454,35 @@ int set_tsc_mode(unsigned int val) return 0; } +#ifdef CONFIG_X86_DS +static int update_debugctl(struct thread_struct *prev, + struct thread_struct *next, unsigned long debugctl) +{ + unsigned long ds_prev = 0; + unsigned long ds_next = 0; + + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { + /* we clear debugctl to make sure DS + * is not in use when we change it */ + debugctl = 0; + update_debugctlmsr(0); + wrmsr(MSR_IA32_DS_AREA, ds_next, 0); + } + return debugctl; +} +#else +static int update_debugctl(struct thread_struct *prev, + struct thread_struct *next, unsigned long debugctl) +{ + return debugctl; +} +#endif /* CONFIG_X86_DS */ + static noinline void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) @@ -448,14 +493,7 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, prev = &prev_p->thread; next = &next_p->thread; - debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { - /* we clear debugctl to make sure DS - * is not in use when we change it */ - debugctl = 0; - update_debugctlmsr(0); - wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0); - } + debugctl = update_debugctl(prev, next, prev->debugctlmsr); if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); @@ -479,13 +517,13 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, hard_enable_TSC(); } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e12e0e4..2a8ccb9 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -37,11 +37,11 @@ #include <linux/kdebug.h> #include <linux/tick.h> #include <linux/prctl.h> +#include <linux/uaccess.h> +#include <linux/io.h> -#include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/system.h> -#include <asm/io.h> #include <asm/processor.h> #include <asm/i387.h> #include <asm/mmu_context.h> @@ -51,6 +51,7 @@ #include <asm/proto.h> #include <asm/ia32.h> #include <asm/idle.h> +#include <asm/syscalls.h> asmlinkage extern void ret_from_fork(void); @@ -88,7 +89,7 @@ void exit_idle(void) #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); -#include <asm/nmi.h> +#include <linux/nmi.h> /* We halt the CPU with physical CPU hotplug */ static inline void play_dead(void) { @@ -153,7 +154,7 @@ void cpu_idle(void) } /* Prints also some state that isn't saved in the pt_regs */ -void __show_regs(struct pt_regs * regs) +void __show_regs(struct pt_regs *regs) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs; unsigned long d0, d1, d2, d3, d6, d7; @@ -162,59 +163,61 @@ void __show_regs(struct pt_regs * regs) printk("\n"); print_modules(); - printk("Pid: %d, comm: %.20s %s %s %.*s\n", + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); + printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); - printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp, - regs->flags); - printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", + printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, + regs->sp, regs->flags); + printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", regs->ax, regs->bx, regs->cx); - printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", + printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", regs->dx, regs->si, regs->di); - printk("RBP: %016lx R08: %016lx R09: %016lx\n", + printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", regs->bp, regs->r8, regs->r9); - printk("R10: %016lx R11: %016lx R12: %016lx\n", - regs->r10, regs->r11, regs->r12); - printk("R13: %016lx R14: %016lx R15: %016lx\n", - regs->r13, regs->r14, regs->r15); - - asm("movl %%ds,%0" : "=r" (ds)); - asm("movl %%cs,%0" : "=r" (cs)); - asm("movl %%es,%0" : "=r" (es)); + printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", + regs->r10, regs->r11, regs->r12); + printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", + regs->r13, regs->r14, regs->r15); + + asm("movl %%ds,%0" : "=r" (ds)); + asm("movl %%cs,%0" : "=r" (cs)); + asm("movl %%es,%0" : "=r" (es)); asm("movl %%fs,%0" : "=r" (fsindex)); asm("movl %%gs,%0" : "=r" (gsindex)); rdmsrl(MSR_FS_BASE, fs); - rdmsrl(MSR_GS_BASE, gs); - rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); + rdmsrl(MSR_GS_BASE, gs); + rdmsrl(MSR_KERNEL_GS_BASE, shadowgs); cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4(); - printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", - fs,fsindex,gs,gsindex,shadowgs); - printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0); - printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4); + printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", + fs, fsindex, gs, gsindex, shadowgs); + printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, + es, cr0); + printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, + cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); get_debugreg(d2, 2); - printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); + printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); get_debugreg(d3, 3); get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); + printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); } void show_regs(struct pt_regs *regs) { - printk("CPU %d:", smp_processor_id()); + printk(KERN_INFO "CPU %d:", smp_processor_id()); __show_regs(regs); show_trace(NULL, regs, (void *)(regs + 1), regs->bp); } @@ -240,6 +243,14 @@ void exit_thread(void) t->io_bitmap_max = 0; put_cpu(); } +#ifdef CONFIG_X86_DS + /* Free any DS contexts that have not been properly released. */ + if (unlikely(t->ds_ctx)) { + /* we clear debugctl to make sure DS is not used. */ + update_debugctlmsr(0); + ds_free(t->ds_ctx); + } +#endif /* CONFIG_X86_DS */ } void flush_thread(void) @@ -315,10 +326,10 @@ void prepare_to_copy(struct task_struct *tsk) int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, unsigned long unused, - struct task_struct * p, struct pt_regs * regs) + struct task_struct *p, struct pt_regs *regs) { int err; - struct pt_regs * childregs; + struct pt_regs *childregs; struct task_struct *me = current; childregs = ((struct pt_regs *) @@ -363,10 +374,10 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, if (test_thread_flag(TIF_IA32)) err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); - else -#endif - err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); - if (err) + else +#endif + err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8); + if (err) goto out; } err = 0; @@ -473,13 +484,27 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, next = &next_p->thread; debugctl = prev->debugctlmsr; - if (next->ds_area_msr != prev->ds_area_msr) { - /* we clear debugctl to make sure DS - * is not in use when we change it */ - debugctl = 0; - update_debugctlmsr(0); - wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr); + +#ifdef CONFIG_X86_DS + { + unsigned long ds_prev = 0, ds_next = 0; + + if (prev->ds_ctx) + ds_prev = (unsigned long)prev->ds_ctx->ds; + if (next->ds_ctx) + ds_next = (unsigned long)next->ds_ctx->ds; + + if (ds_next != ds_prev) { + /* + * We clear debugctl to make sure DS + * is not in use when we change it: + */ + debugctl = 0; + update_debugctlmsr(0); + wrmsrl(MSR_IA32_DS_AREA, ds_next); + } } +#endif /* CONFIG_X86_DS */ if (next->debugctlmsr != debugctl) update_debugctlmsr(next->debugctlmsr); @@ -517,13 +542,13 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ } /* @@ -545,7 +570,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) unsigned fsindex, gsindex; /* we're going to use this soon, after a few expensive things */ - if (next_p->fpu_counter>5) + if (next_p->fpu_counter > 5) prefetch(next->xstate); /* @@ -553,13 +578,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ load_sp0(tss, next); - /* + /* * Switch DS and ES. * This won't pick up thread selector changes, but I guess that is ok. */ savesegment(es, prev->es); if (unlikely(next->es | prev->es)) - loadsegment(es, next->es); + loadsegment(es, next->es); savesegment(ds, prev->ds); if (unlikely(next->ds | prev->ds)) @@ -585,7 +610,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ arch_leave_lazy_cpu_mode(); - /* + /* * Switch FS and GS. * * Segment register != 0 always requires a reload. Also @@ -594,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (unlikely(fsindex | next->fsindex | prev->fs)) { loadsegment(fs, next->fsindex); - /* + /* * Check if the user used a selector != 0; if yes * clear 64bit base, since overloaded base is always * mapped to the Null selector */ if (fsindex) - prev->fs = 0; + prev->fs = 0; } /* when next process has a 64bit base use it */ if (next->fs) @@ -610,7 +635,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (unlikely(gsindex | next->gsindex | prev->gs)) { load_gs_index(next->gsindex); if (gsindex) - prev->gs = 0; + prev->gs = 0; } if (next->gs) wrmsrl(MSR_KERNEL_GS_BASE, next->gs); @@ -619,12 +644,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Must be after DS reload */ unlazy_fpu(prev_p); - /* + /* * Switch the PDA and FPU contexts. */ prev->usersp = read_pda(oldrsp); write_pda(oldrsp, next->usersp); - write_pda(pcurrent, next_p); + write_pda(pcurrent, next_p); write_pda(kernelstack, (unsigned long)task_stack_page(next_p) + @@ -665,7 +690,7 @@ long sys_execve(char __user *name, char __user * __user *argv, char __user * __user *envp, struct pt_regs *regs) { long error; - char * filename; + char *filename; filename = getname(name); error = PTR_ERR(filename); @@ -723,55 +748,55 @@ asmlinkage long sys_vfork(struct pt_regs *regs) unsigned long get_wchan(struct task_struct *p) { unsigned long stack; - u64 fp,ip; + u64 fp, ip; int count = 0; - if (!p || p == current || p->state==TASK_RUNNING) - return 0; + if (!p || p == current || p->state == TASK_RUNNING) + return 0; stack = (unsigned long)task_stack_page(p); if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE) return 0; fp = *(u64 *)(p->thread.sp); - do { + do { if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) - return 0; + return 0; ip = *(u64 *)(fp+8); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = *(u64 *)fp; + } while (count++ < 16); return 0; } long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) -{ - int ret = 0; +{ + int ret = 0; int doit = task == current; int cpu; - switch (code) { + switch (code) { case ARCH_SET_GS: if (addr >= TASK_SIZE_OF(task)) - return -EPERM; + return -EPERM; cpu = get_cpu(); - /* handle small bases via the GDT because that's faster to + /* handle small bases via the GDT because that's faster to switch. */ - if (addr <= 0xffffffff) { - set_32bit_tls(task, GS_TLS, addr); - if (doit) { + if (addr <= 0xffffffff) { + set_32bit_tls(task, GS_TLS, addr); + if (doit) { load_TLS(&task->thread, cpu); - load_gs_index(GS_TLS_SEL); + load_gs_index(GS_TLS_SEL); } - task->thread.gsindex = GS_TLS_SEL; + task->thread.gsindex = GS_TLS_SEL; task->thread.gs = 0; - } else { + } else { task->thread.gsindex = 0; task->thread.gs = addr; if (doit) { load_gs_index(0); ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); - } + } } put_cpu(); break; @@ -825,8 +850,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) rdmsrl(MSR_KERNEL_GS_BASE, base); else base = task->thread.gs; - } - else + } else base = task->thread.gs; ret = put_user(base, (unsigned long __user *)addr); break; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e37dccc..e375b65 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -14,6 +14,7 @@ #include <linux/errno.h> #include <linux/ptrace.h> #include <linux/regset.h> +#include <linux/tracehook.h> #include <linux/user.h> #include <linux/elf.h> #include <linux/security.h> @@ -69,7 +70,7 @@ static inline bool invalid_selector(u16 value) #define FLAG_MASK FLAG_MASK_32 -static long *pt_regs_access(struct pt_regs *regs, unsigned long regno) +static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) { BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); regno >>= 2; @@ -554,45 +555,115 @@ static int ptrace_set_debugreg(struct task_struct *child, return 0; } -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS +/* + * The configuration for a particular BTS hardware implementation. + */ +struct bts_configuration { + /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ + unsigned char sizeof_bts; + /* the size of a field in the BTS record in bytes */ + unsigned char sizeof_field; + /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ + unsigned long debugctl_mask; +}; +static struct bts_configuration bts_cfg; + +#define BTS_MAX_RECORD_SIZE (8 * 3) + + +/* + * Branch Trace Store (BTS) uses the following format. Different + * architectures vary in the size of those fields. + * - source linear address + * - destination linear address + * - flags + * + * Later architectures use 64bit pointers throughout, whereas earlier + * architectures use 32bit pointers in 32bit mode. + * + * We compute the base address for the first 8 fields based on: + * - the field size stored in the DS configuration + * - the relative field position + * + * In order to store additional information in the BTS buffer, we use + * a special source address to indicate that the record requires + * special interpretation. + * + * Netburst indicated via a bit in the flags field whether the branch + * was predicted; this is ignored. + */ + +enum bts_field { + bts_from = 0, + bts_to, + bts_flags, + + bts_escape = (unsigned long)-1, + bts_qual = bts_to, + bts_jiffies = bts_flags +}; + +static inline unsigned long bts_get(const char *base, enum bts_field field) +{ + base += (bts_cfg.sizeof_field * field); + return *(unsigned long *)base; +} -static int ptrace_bts_get_size(struct task_struct *child) +static inline void bts_set(char *base, enum bts_field field, unsigned long val) { - if (!child->thread.ds_area_msr) - return -ENXIO; + base += (bts_cfg.sizeof_field * field);; + (*(unsigned long *)base) = val; +} - return ds_get_bts_index((void *)child->thread.ds_area_msr); +/* + * Translate a BTS record from the raw format into the bts_struct format + * + * out (out): bts_struct interpretation + * raw: raw BTS record + */ +static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) +{ + memset(out, 0, sizeof(*out)); + if (bts_get(raw, bts_from) == bts_escape) { + out->qualifier = bts_get(raw, bts_qual); + out->variant.jiffies = bts_get(raw, bts_jiffies); + } else { + out->qualifier = BTS_BRANCH; + out->variant.lbr.from_ip = bts_get(raw, bts_from); + out->variant.lbr.to_ip = bts_get(raw, bts_to); + } } -static int ptrace_bts_read_record(struct task_struct *child, - long index, +static int ptrace_bts_read_record(struct task_struct *child, size_t index, struct bts_struct __user *out) { struct bts_struct ret; - int retval; - int bts_end; - int bts_index; - - if (!child->thread.ds_area_msr) - return -ENXIO; + const void *bts_record; + size_t bts_index, bts_end; + int error; - if (index < 0) - return -EINVAL; + error = ds_get_bts_end(child, &bts_end); + if (error < 0) + return error; - bts_end = ds_get_bts_end((void *)child->thread.ds_area_msr); if (bts_end <= index) return -EINVAL; + error = ds_get_bts_index(child, &bts_index); + if (error < 0) + return error; + /* translate the ptrace bts index into the ds bts index */ - bts_index = ds_get_bts_index((void *)child->thread.ds_area_msr); - bts_index -= (index + 1); - if (bts_index < 0) - bts_index += bts_end; + bts_index += bts_end - (index + 1); + if (bts_end <= bts_index) + bts_index -= bts_end; - retval = ds_read_bts((void *)child->thread.ds_area_msr, - bts_index, &ret); - if (retval < 0) - return retval; + error = ds_access_bts(child, bts_index, &bts_record); + if (error < 0) + return error; + + ptrace_bts_translate_record(&ret, bts_record); if (copy_to_user(out, &ret, sizeof(ret))) return -EFAULT; @@ -600,101 +671,106 @@ static int ptrace_bts_read_record(struct task_struct *child, return sizeof(ret); } -static int ptrace_bts_clear(struct task_struct *child) -{ - if (!child->thread.ds_area_msr) - return -ENXIO; - - return ds_clear((void *)child->thread.ds_area_msr); -} - static int ptrace_bts_drain(struct task_struct *child, long size, struct bts_struct __user *out) { - int end, i; - void *ds = (void *)child->thread.ds_area_msr; - - if (!ds) - return -ENXIO; + struct bts_struct ret; + const unsigned char *raw; + size_t end, i; + int error; - end = ds_get_bts_index(ds); - if (end <= 0) - return end; + error = ds_get_bts_index(child, &end); + if (error < 0) + return error; if (size < (end * sizeof(struct bts_struct))) return -EIO; - for (i = 0; i < end; i++, out++) { - struct bts_struct ret; - int retval; + error = ds_access_bts(child, 0, (const void **)&raw); + if (error < 0) + return error; - retval = ds_read_bts(ds, i, &ret); - if (retval < 0) - return retval; + for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { + ptrace_bts_translate_record(&ret, raw); if (copy_to_user(out, &ret, sizeof(ret))) return -EFAULT; } - ds_clear(ds); + error = ds_clear_bts(child); + if (error < 0) + return error; return end; } +static void ptrace_bts_ovfl(struct task_struct *child) +{ + send_sig(child->thread.bts_ovfl_signal, child, 0); +} + static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) { struct ptrace_bts_config cfg; - int bts_size, ret = 0; - void *ds; + int error = 0; + + error = -EOPNOTSUPP; + if (!bts_cfg.sizeof_bts) + goto errout; + error = -EIO; if (cfg_size < sizeof(cfg)) - return -EIO; + goto errout; + error = -EFAULT; if (copy_from_user(&cfg, ucfg, sizeof(cfg))) - return -EFAULT; + goto errout; - if ((int)cfg.size < 0) - return -EINVAL; + error = -EINVAL; + if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && + !(cfg.flags & PTRACE_BTS_O_ALLOC)) + goto errout; - bts_size = 0; - ds = (void *)child->thread.ds_area_msr; - if (ds) { - bts_size = ds_get_bts_size(ds); - if (bts_size < 0) - return bts_size; - } - cfg.size = PAGE_ALIGN(cfg.size); + if (cfg.flags & PTRACE_BTS_O_ALLOC) { + ds_ovfl_callback_t ovfl = NULL; + unsigned int sig = 0; + + /* we ignore the error in case we were not tracing child */ + (void)ds_release_bts(child); - if (bts_size != cfg.size) { - ret = ptrace_bts_realloc(child, cfg.size, - cfg.flags & PTRACE_BTS_O_CUT_SIZE); - if (ret < 0) + if (cfg.flags & PTRACE_BTS_O_SIGNAL) { + if (!cfg.signal) + goto errout; + + sig = cfg.signal; + ovfl = ptrace_bts_ovfl; + } + + error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); + if (error < 0) goto errout; - ds = (void *)child->thread.ds_area_msr; + child->thread.bts_ovfl_signal = sig; } - if (cfg.flags & PTRACE_BTS_O_SIGNAL) - ret = ds_set_overflow(ds, DS_O_SIGNAL); - else - ret = ds_set_overflow(ds, DS_O_WRAP); - if (ret < 0) + error = -EINVAL; + if (!child->thread.ds_ctx && cfg.flags) goto errout; if (cfg.flags & PTRACE_BTS_O_TRACE) - child->thread.debugctlmsr |= ds_debugctl_mask(); + child->thread.debugctlmsr |= bts_cfg.debugctl_mask; else - child->thread.debugctlmsr &= ~ds_debugctl_mask(); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; if (cfg.flags & PTRACE_BTS_O_SCHED) set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); else clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - ret = sizeof(cfg); + error = sizeof(cfg); out: if (child->thread.debugctlmsr) @@ -702,10 +778,10 @@ out: else clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - return ret; + return error; errout: - child->thread.debugctlmsr &= ~ds_debugctl_mask(); + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); goto out; } @@ -714,29 +790,40 @@ static int ptrace_bts_status(struct task_struct *child, long cfg_size, struct ptrace_bts_config __user *ucfg) { - void *ds = (void *)child->thread.ds_area_msr; struct ptrace_bts_config cfg; + size_t end; + const void *base, *max; + int error; if (cfg_size < sizeof(cfg)) return -EIO; - memset(&cfg, 0, sizeof(cfg)); + error = ds_get_bts_end(child, &end); + if (error < 0) + return error; - if (ds) { - cfg.size = ds_get_bts_size(ds); + error = ds_access_bts(child, /* index = */ 0, &base); + if (error < 0) + return error; - if (ds_get_overflow(ds) == DS_O_SIGNAL) - cfg.flags |= PTRACE_BTS_O_SIGNAL; + error = ds_access_bts(child, /* index = */ end, &max); + if (error < 0) + return error; - if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && - child->thread.debugctlmsr & ds_debugctl_mask()) - cfg.flags |= PTRACE_BTS_O_TRACE; + memset(&cfg, 0, sizeof(cfg)); + cfg.size = (max - base); + cfg.signal = child->thread.bts_ovfl_signal; + cfg.bts_size = sizeof(struct bts_struct); - if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) - cfg.flags |= PTRACE_BTS_O_SCHED; - } + if (cfg.signal) + cfg.flags |= PTRACE_BTS_O_SIGNAL; - cfg.bts_size = sizeof(struct bts_struct); + if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && + child->thread.debugctlmsr & bts_cfg.debugctl_mask) + cfg.flags |= PTRACE_BTS_O_TRACE; + + if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) + cfg.flags |= PTRACE_BTS_O_SCHED; if (copy_to_user(ucfg, &cfg, sizeof(cfg))) return -EFAULT; @@ -744,89 +831,38 @@ static int ptrace_bts_status(struct task_struct *child, return sizeof(cfg); } - static int ptrace_bts_write_record(struct task_struct *child, const struct bts_struct *in) { - int retval; + unsigned char bts_record[BTS_MAX_RECORD_SIZE]; - if (!child->thread.ds_area_msr) - return -ENXIO; + BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); - retval = ds_write_bts((void *)child->thread.ds_area_msr, in); - if (retval) - return retval; + memset(bts_record, 0, bts_cfg.sizeof_bts); + switch (in->qualifier) { + case BTS_INVALID: + break; - return sizeof(*in); -} + case BTS_BRANCH: + bts_set(bts_record, bts_from, in->variant.lbr.from_ip); + bts_set(bts_record, bts_to, in->variant.lbr.to_ip); + break; -static int ptrace_bts_realloc(struct task_struct *child, - int size, int reduce_size) -{ - unsigned long rlim, vm; - int ret, old_size; + case BTS_TASK_ARRIVES: + case BTS_TASK_DEPARTS: + bts_set(bts_record, bts_from, bts_escape); + bts_set(bts_record, bts_qual, in->qualifier); + bts_set(bts_record, bts_jiffies, in->variant.jiffies); + break; - if (size < 0) + default: return -EINVAL; - - old_size = ds_get_bts_size((void *)child->thread.ds_area_msr); - if (old_size < 0) - return old_size; - - ret = ds_free((void **)&child->thread.ds_area_msr); - if (ret < 0) - goto out; - - size >>= PAGE_SHIFT; - old_size >>= PAGE_SHIFT; - - current->mm->total_vm -= old_size; - current->mm->locked_vm -= old_size; - - if (size == 0) - goto out; - - rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; - vm = current->mm->total_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->total_vm; - if (size <= 0) - goto out; - } - - rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; - vm = current->mm->locked_vm + size; - if (rlim < vm) { - ret = -ENOMEM; - - if (!reduce_size) - goto out; - - size = rlim - current->mm->locked_vm; - if (size <= 0) - goto out; } - ret = ds_allocate((void **)&child->thread.ds_area_msr, - size << PAGE_SHIFT); - if (ret < 0) - goto out; - - current->mm->total_vm += size; - current->mm->locked_vm += size; - -out: - if (child->thread.ds_area_msr) - set_tsk_thread_flag(child, TIF_DS_AREA_MSR); - else - clear_tsk_thread_flag(child, TIF_DS_AREA_MSR); - - return ret; + /* The writing task will be the switched-to task on a context + * switch. It needs to write into the switched-from task's BTS + * buffer. */ + return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); } void ptrace_bts_take_timestamp(struct task_struct *tsk, @@ -839,7 +875,66 @@ void ptrace_bts_take_timestamp(struct task_struct *tsk, ptrace_bts_write_record(tsk, &rec); } -#endif /* X86_BTS */ + +static const struct bts_configuration bts_cfg_netburst = { + .sizeof_bts = sizeof(long) * 3, + .sizeof_field = sizeof(long), + .debugctl_mask = (1<<2)|(1<<3)|(1<<5) +}; + +static const struct bts_configuration bts_cfg_pentium_m = { + .sizeof_bts = sizeof(long) * 3, + .sizeof_field = sizeof(long), + .debugctl_mask = (1<<6)|(1<<7) +}; + +static const struct bts_configuration bts_cfg_core2 = { + .sizeof_bts = 8 * 3, + .sizeof_field = 8, + .debugctl_mask = (1<<6)|(1<<7)|(1<<9) +}; + +static inline void bts_configure(const struct bts_configuration *cfg) +{ + bts_cfg = *cfg; +} + +void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) +{ + switch (c->x86) { + case 0x6: + switch (c->x86_model) { + case 0xD: + case 0xE: /* Pentium M */ + bts_configure(&bts_cfg_pentium_m); + break; + case 0xF: /* Core2 */ + case 0x1C: /* Atom */ + bts_configure(&bts_cfg_core2); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + case 0xF: + switch (c->x86_model) { + case 0x0: + case 0x1: + case 0x2: /* Netburst */ + bts_configure(&bts_cfg_netburst); + break; + default: + /* sorry, don't know about them */ + break; + } + break; + default: + /* sorry, don't know about them */ + break; + } +} +#endif /* CONFIG_X86_PTRACE_BTS */ /* * Called by kernel/ptrace.c when detaching.. @@ -852,15 +947,15 @@ void ptrace_disable(struct task_struct *child) #ifdef TIF_SYSCALL_EMU clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); #endif - if (child->thread.ds_area_msr) { -#ifdef X86_BTS - ptrace_bts_realloc(child, 0, 0); -#endif - child->thread.debugctlmsr &= ~ds_debugctl_mask(); - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); - } +#ifdef CONFIG_X86_PTRACE_BTS + (void)ds_release_bts(child); + + child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; + if (!child->thread.debugctlmsr) + clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + + clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); +#endif /* CONFIG_X86_PTRACE_BTS */ } #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION @@ -980,7 +1075,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) /* * These bits need more cooking - not enabled yet: */ -#ifdef X86_BTS +#ifdef CONFIG_X86_PTRACE_BTS case PTRACE_BTS_CONFIG: ret = ptrace_bts_config (child, data, (struct ptrace_bts_config __user *)addr); @@ -992,7 +1087,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_SIZE: - ret = ptrace_bts_get_size(child); + ret = ds_get_bts_index(child, /* pos = */ NULL); break; case PTRACE_BTS_GET: @@ -1001,14 +1096,14 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; case PTRACE_BTS_CLEAR: - ret = ptrace_bts_clear(child); + ret = ds_clear_bts(child); break; case PTRACE_BTS_DRAIN: ret = ptrace_bts_drain (child, data, (struct bts_struct __user *) addr); break; -#endif +#endif /* CONFIG_X86_PTRACE_BTS */ default: ret = ptrace_request(child, request, addr, data); @@ -1375,30 +1470,6 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) force_sig_info(SIGTRAP, &info, tsk); } -static void syscall_trace(struct pt_regs *regs) -{ - if (!(current->ptrace & PT_PTRACED)) - return; - -#if 0 - printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", - current->comm, - regs->ip, regs->sp, regs->ax, regs->orig_ax, __builtin_return_address(0), - current_thread_info()->flags, current->ptrace); -#endif - - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); - /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl - */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } -} #ifdef CONFIG_X86_32 # define IS_IA32 1 @@ -1432,8 +1503,9 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) ret = -1L; - if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) - syscall_trace(regs); + if ((ret || test_thread_flag(TIF_SYSCALL_TRACE)) && + tracehook_report_syscall_entry(regs)) + ret = -1L; if (unlikely(current->audit_context)) { if (IS_IA32) @@ -1459,7 +1531,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); if (test_thread_flag(TIF_SYSCALL_TRACE)) - syscall_trace(regs); + tracehook_report_syscall_exit(regs, 0); /* * If TIF_SYSCALL_EMU is set, we only get here because of @@ -1475,6 +1547,6 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) * system call instruction. */ if (test_thread_flag(TIF_SINGLESTEP) && - (current->ptrace & PT_PTRACED)) + tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) send_sigtrap(current, regs, 0); } diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 724adfc..f4c93f1 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -29,7 +29,11 @@ EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; static int reboot_mode; -enum reboot_type reboot_type = BOOT_KBD; +/* + * Keyboard reset and triple fault may result in INIT, not RESET, which + * doesn't work when we're in vmx root mode. Try ACPI first. + */ +enum reboot_type reboot_type = BOOT_ACPI; int reboot_force; #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9838f25..141efab 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -223,6 +223,9 @@ unsigned long saved_video_mode; #define RAMDISK_LOAD_FLAG 0x4000 static char __initdata command_line[COMMAND_LINE_SIZE]; +#ifdef CONFIG_CMDLINE_BOOL +static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +#endif #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) struct edd edd; @@ -665,6 +668,19 @@ void __init setup_arch(char **cmdline_p) bss_resource.start = virt_to_phys(&__bss_start); bss_resource.end = virt_to_phys(&__bss_stop)-1; +#ifdef CONFIG_CMDLINE_BOOL +#ifdef CONFIG_CMDLINE_OVERRIDE + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); +#else + if (builtin_cmdline[0]) { + /* append boot loader cmdline to builtin */ + strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE); + strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE); + strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE); + } +#endif +#endif + strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 76e305e..0e67f72 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -162,9 +162,16 @@ void __init setup_per_cpu_areas(void) printk(KERN_INFO "cpu %d has no node %d or node-local memory\n", cpu, node); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", + cpu, __pa(ptr)); } - else + else { ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); + if (ptr) + printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", + cpu, node, __pa(ptr)); + } #endif per_cpu_offset(cpu) = ptr - __per_cpu_start; memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h index 72bbb51..8b4956e 100644 --- a/arch/x86/kernel/sigframe.h +++ b/arch/x86/kernel/sigframe.h @@ -24,4 +24,9 @@ struct rt_sigframe { struct ucontext uc; struct siginfo info; }; + +int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, + sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct k_sigaction *ka, + sigset_t *set, struct pt_regs *regs); #endif diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index 6fb5bcd..2a2435d 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c @@ -17,6 +17,7 @@ #include <linux/errno.h> #include <linux/sched.h> #include <linux/wait.h> +#include <linux/tracehook.h> #include <linux/elf.h> #include <linux/smp.h> #include <linux/mm.h> @@ -26,6 +27,7 @@ #include <asm/uaccess.h> #include <asm/i387.h> #include <asm/vdso.h> +#include <asm/syscalls.h> #include "sigframe.h" @@ -558,8 +560,6 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, * handler too. */ regs->flags &= ~X86_EFLAGS_TF; - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); @@ -568,6 +568,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); + return 0; } @@ -661,5 +664,10 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } + clear_thread_flag(TIF_IRET); } diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index ca316b5..694aa88 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -15,17 +15,21 @@ #include <linux/errno.h> #include <linux/wait.h> #include <linux/ptrace.h> +#include <linux/tracehook.h> #include <linux/unistd.h> #include <linux/stddef.h> #include <linux/personality.h> #include <linux/compiler.h> +#include <linux/uaccess.h> + #include <asm/processor.h> #include <asm/ucontext.h> -#include <asm/uaccess.h> #include <asm/i387.h> #include <asm/proto.h> #include <asm/ia32_unistd.h> #include <asm/mce.h> +#include <asm/syscall.h> +#include <asm/syscalls.h> #include "sigframe.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -41,11 +45,6 @@ # define FIX_EFLAGS __FIX_EFLAGS #endif -int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs); -int ia32_setup_frame(int sig, struct k_sigaction *ka, - sigset_t *set, struct pt_regs * regs); - asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, struct pt_regs *regs) @@ -128,7 +127,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, /* Always make any pending restarted system calls return -EINTR */ current_thread_info()->restart_block.fn = do_no_restart_syscall; -#define COPY(x) err |= __get_user(regs->x, &sc->x) +#define COPY(x) (err |= __get_user(regs->x, &sc->x)) COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(dx); COPY(cx); COPY(ip); @@ -158,7 +157,7 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, } { - struct _fpstate __user * buf; + struct _fpstate __user *buf; err |= __get_user(buf, &sc->fpstate); if (buf) { @@ -198,7 +197,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) current->blocked = set; recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); - + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) goto badframe; @@ -208,16 +207,17 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) return ax; badframe: - signal_fault(regs,frame,"sigreturn"); + signal_fault(regs, frame, "sigreturn"); return 0; -} +} /* * Set up a signal frame. */ static inline int -setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned long mask, struct task_struct *me) +setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, + unsigned long mask, struct task_struct *me) { int err = 0; @@ -273,35 +273,35 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) } static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs * regs) + sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - struct _fpstate __user *fp = NULL; + struct _fpstate __user *fp = NULL; int err = 0; struct task_struct *me = current; if (used_math()) { - fp = get_stack(ka, regs, sizeof(struct _fpstate)); + fp = get_stack(ka, regs, sizeof(struct _fpstate)); frame = (void __user *)round_down( (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) goto give_sigsegv; - if (save_i387(fp) < 0) - err |= -1; + if (save_i387(fp) < 0) + err |= -1; } else frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) goto give_sigsegv; - if (ka->sa.sa_flags & SA_SIGINFO) { + if (ka->sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, info); if (err) goto give_sigsegv; } - + /* Create the ucontext. */ err |= __put_user(0, &frame->uc.uc_flags); err |= __put_user(0, &frame->uc.uc_link); @@ -311,9 +311,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); - if (sizeof(*set) == 16) { + if (sizeof(*set) == 16) { __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); - __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); + __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); } else err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -324,7 +324,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); } else { /* could use a vstub here */ - goto give_sigsegv; + goto give_sigsegv; } if (err) @@ -332,7 +332,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, /* Set up registers for signal handler */ regs->di = sig; - /* In case the signal handler was declared without prototypes */ + /* In case the signal handler was declared without prototypes */ regs->ax = 0; /* This also works for non SA_SIGINFO handlers because they expect the @@ -355,37 +355,8 @@ give_sigsegv: } /* - * Return -1L or the syscall number that @regs is executing. - */ -static long current_syscall(struct pt_regs *regs) -{ - /* - * We always sign-extend a -1 value being set here, - * so this is always either -1L or a syscall number. - */ - return regs->orig_ax; -} - -/* - * Return a value that is -EFOO if the system call in @regs->orig_ax - * returned an error. This only works for @regs from @current. - */ -static long current_syscall_ret(struct pt_regs *regs) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_thread_flag(TIF_IA32)) - /* - * Sign-extend the value so (int)-EFOO becomes (long)-EFOO - * and will match correctly in comparisons. - */ - return (int) regs->ax; -#endif - return regs->ax; -} - -/* * OK, we're invoking a handler - */ + */ static int handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, @@ -394,9 +365,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, int ret; /* Are we from a system call? */ - if (current_syscall(regs) >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ - switch (current_syscall_ret(regs)) { + switch (syscall_get_error(current, regs)) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: regs->ax = -EINTR; @@ -429,7 +400,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs); else ret = ia32_setup_frame(sig, ka, oldset, regs); - } else + } else #endif ret = setup_rt_frame(sig, ka, info, oldset, regs); @@ -453,15 +424,16 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, * handler too. */ regs->flags &= ~X86_EFLAGS_TF; - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); spin_lock_irq(¤t->sighand->siglock); - sigorsets(¤t->blocked,¤t->blocked,&ka->sa.sa_mask); + sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(¤t->blocked,sig); + sigaddset(¤t->blocked, sig); recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); + + tracehook_signal_handler(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); } return ret; @@ -518,9 +490,9 @@ static void do_signal(struct pt_regs *regs) } /* Did we come from a system call? */ - if (current_syscall(regs) >= 0) { + if (syscall_get_nr(current, regs) >= 0) { /* Restart the system call - no handlers present */ - switch (current_syscall_ret(regs)) { + switch (syscall_get_error(current, regs)) { case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: @@ -558,17 +530,23 @@ void do_notify_resume(struct pt_regs *regs, void *unused, /* deal with pending signal delivery */ if (thread_info_flags & _TIF_SIGPENDING) do_signal(regs); + + if (thread_info_flags & _TIF_NOTIFY_RESUME) { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } } void signal_fault(struct pt_regs *regs, void __user *frame, char *where) -{ - struct task_struct *me = current; +{ + struct task_struct *me = current; if (show_unhandled_signals && printk_ratelimit()) { printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", - me->comm,me->pid,where,frame,regs->ip,regs->sp,regs->orig_ax); + me->comm, me->pid, where, frame, regs->ip, + regs->sp, regs->orig_ax); print_vma_addr(" in ", regs->ip); printk("\n"); } - force_sig(SIGSEGV, me); -} + force_sig(SIGSEGV, me); +} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7985c5b..45531e3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -88,7 +88,7 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) #else -struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; +static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define get_idle_for_cpu(x) (idle_thread_array[(x)]) #define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) #endif @@ -129,7 +129,7 @@ static int boot_cpu_logical_apicid; static cpumask_t cpu_sibling_setup_map; /* Set if we find a B stepping CPU */ -int __cpuinitdata smp_b_stepping; +static int __cpuinitdata smp_b_stepping; #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) @@ -1313,16 +1313,13 @@ __init void prefill_possible_map(void) if (!num_processors) num_processors = 1; -#ifdef CONFIG_HOTPLUG_CPU if (additional_cpus == -1) { if (disabled_cpus > 0) additional_cpus = disabled_cpus; else additional_cpus = 0; } -#else - additional_cpus = 0; -#endif + possible = num_processors + additional_cpus; if (possible > NR_CPUS) possible = NR_CPUS; diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 7066cb8..1884a8d 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -22,6 +22,8 @@ #include <linux/uaccess.h> #include <linux/unistd.h> +#include <asm/syscalls.h> + asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 3b360ef..6bc211a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -13,15 +13,17 @@ #include <linux/utsname.h> #include <linux/personality.h> #include <linux/random.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> #include <asm/ia32.h> +#include <asm/syscalls.h> -asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long off) +asmlinkage long sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long off) { long error; - struct file * file; + struct file *file; error = -EINVAL; if (off & ~PAGE_MASK) @@ -56,9 +58,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, unmapped base down for this case. This can give conflicts with the heap, but we assume that glibc malloc knows how to fall back to mmap. Give it 1GB - of playground for now. -AK */ - *begin = 0x40000000; - *end = 0x80000000; + of playground for now. -AK */ + *begin = 0x40000000; + *end = 0x80000000; if (current->flags & PF_RANDOMIZE) { new_begin = randomize_range(*begin, *begin + 0x02000000, 0); if (new_begin) @@ -66,9 +68,9 @@ static void find_start_end(unsigned long flags, unsigned long *begin, } } else { *begin = TASK_UNMAPPED_BASE; - *end = TASK_SIZE; + *end = TASK_SIZE; } -} +} unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, @@ -78,11 +80,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma; unsigned long start_addr; unsigned long begin, end; - + if (flags & MAP_FIXED) return addr; - find_start_end(flags, &begin, &end); + find_start_end(flags, &begin, &end); if (len > end) return -ENOMEM; @@ -96,12 +98,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, } if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) && len <= mm->cached_hole_size) { - mm->cached_hole_size = 0; + mm->cached_hole_size = 0; mm->free_area_cache = begin; } addr = mm->free_area_cache; - if (addr < begin) - addr = begin; + if (addr < begin) + addr = begin; start_addr = addr; full_search: @@ -127,7 +129,7 @@ full_search: return addr; } if (addr + mm->cached_hole_size < vma->vm_start) - mm->cached_hole_size = vma->vm_start - addr; + mm->cached_hole_size = vma->vm_start - addr; addr = vma->vm_end; } @@ -177,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr-len); if (!vma || addr <= vma->vm_start) /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr-len); + return mm->free_area_cache = addr-len; } if (mm->mmap_base < len) @@ -194,7 +196,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, vma = find_vma(mm, addr); if (!vma || addr+len <= vma->vm_start) /* remember the address as a hint for next time */ - return (mm->free_area_cache = addr); + return mm->free_area_cache = addr; /* remember the largest hole we saw so far */ if (addr + mm->cached_hole_size < vma->vm_start) @@ -224,13 +226,13 @@ bottomup: } -asmlinkage long sys_uname(struct new_utsname __user * name) +asmlinkage long sys_uname(struct new_utsname __user *name) { int err; down_read(&uts_sem); - err = copy_to_user(name, utsname(), sizeof (*name)); + err = copy_to_user(name, utsname(), sizeof(*name)); up_read(&uts_sem); - if (personality(current->personality) == PER_LINUX32) - err |= copy_to_user(&name->machine, "i686", 5); + if (personality(current->personality) == PER_LINUX32) + err |= copy_to_user(&name->machine, "i686", 5); return err ? -EFAULT : 0; } diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c index 170d43c..3d1be4f 100644 --- a/arch/x86/kernel/syscall_64.c +++ b/arch/x86/kernel/syscall_64.c @@ -8,12 +8,12 @@ #define __NO_STUBS #define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H #include <asm/unistd_64.h> #undef __SYSCALL #define __SYSCALL(nr, sym) [nr] = sym, -#undef _ASM_X86_64_UNISTD_H_ +#undef ASM_X86__UNISTD_64_H typedef void (*sys_call_ptr_t)(void); diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index ffe3c66..bbecf8b 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -36,6 +36,7 @@ #include <asm/arch_hooks.h> #include <asm/hpet.h> #include <asm/time.h> +#include <asm/timer.h> #include "do_timer.h" diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c index ab6bf37..6bb7b85 100644 --- a/arch/x86/kernel/tls.c +++ b/arch/x86/kernel/tls.c @@ -10,6 +10,7 @@ #include <asm/ldt.h> #include <asm/processor.h> #include <asm/proto.h> +#include <asm/syscalls.h> #include "tls.h" diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 513caac..7a31f10 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -32,6 +32,8 @@ #include <linux/bug.h> #include <linux/nmi.h> #include <linux/mm.h> +#include <linux/smp.h> +#include <linux/io.h> #if defined(CONFIG_EDAC) #include <linux/edac.h> @@ -45,9 +47,6 @@ #include <asm/unwind.h> #include <asm/desc.h> #include <asm/i387.h> -#include <asm/nmi.h> -#include <asm/smp.h> -#include <asm/io.h> #include <asm/pgalloc.h> #include <asm/proto.h> #include <asm/pda.h> @@ -85,7 +84,8 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) void printk_address(unsigned long address, int reliable) { - printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); + printk(" [<%016lx>] %s%pS\n", + address, reliable ? "" : "? ", (void *) address); } static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, @@ -98,7 +98,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, [STACKFAULT_STACK - 1] = "#SS", [MCE_STACK - 1] = "#MC", #if DEBUG_STKSZ > EXCEPTION_STKSZ - [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" + [N_EXCEPTION_STACKS ... + N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" #endif }; unsigned k; @@ -163,7 +164,7 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, } /* - * x86-64 can have up to three kernel stacks: + * x86-64 can have up to three kernel stacks: * process stack * interrupt stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack @@ -219,7 +220,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); - unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; + unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; unsigned used = 0; struct thread_info *tinfo; @@ -237,7 +238,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, if (!bp) { if (task == current) { /* Grab bp right from our regs */ - asm("movq %%rbp, %0" : "=r" (bp) :); + asm("movq %%rbp, %0" : "=r" (bp) : ); } else { /* bp is the last reg pushed by switch_to */ bp = *(unsigned long *) task->thread.sp; @@ -339,9 +340,8 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, char *log_lvl) { - printk("\nCall Trace:\n"); + printk("Call Trace:\n"); dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); - printk("\n"); } void show_trace(struct task_struct *task, struct pt_regs *regs, @@ -357,11 +357,15 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, unsigned long *stack; int i; const int cpu = smp_processor_id(); - unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); - unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); + unsigned long *irqstack_end = + (unsigned long *) (cpu_pda(cpu)->irqstackptr); + unsigned long *irqstack = + (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); - // debugging aid: "show_stack(NULL, NULL);" prints the - // back trace for this cpu. + /* + * debugging aid: "show_stack(NULL, NULL);" prints the + * back trace for this cpu. + */ if (sp == NULL) { if (task) @@ -386,6 +390,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, printk(" %016lx", *stack++); touch_nmi_watchdog(); } + printk("\n"); show_trace_log_lvl(task, regs, sp, bp, log_lvl); } @@ -404,7 +409,7 @@ void dump_stack(void) #ifdef CONFIG_FRAME_POINTER if (!bp) - asm("movq %%rbp, %0" : "=r" (bp):); + asm("movq %%rbp, %0" : "=r" (bp) : ); #endif printk("Pid: %d, comm: %.20s %s %s %.*s\n", @@ -414,7 +419,6 @@ void dump_stack(void) init_utsname()->version); show_trace(NULL, NULL, &stack, bp); } - EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) @@ -443,7 +447,6 @@ void show_registers(struct pt_regs *regs) printk("Stack: "); show_stack_log_lvl(NULL, regs, (unsigned long *)sp, regs->bp, ""); - printk("\n"); printk(KERN_EMERG "Code: "); @@ -493,7 +496,7 @@ unsigned __kprobes long oops_begin(void) raw_local_irq_save(flags); cpu = smp_processor_id(); if (!__raw_spin_trylock(&die_lock)) { - if (cpu == die_owner) + if (cpu == die_owner) /* nested oops. should stop eventually */; else __raw_spin_lock(&die_lock); @@ -638,7 +641,7 @@ kernel_trap: } #define DO_ERROR(trapnr, signr, str, name) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ @@ -648,7 +651,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ } #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ -asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ +asmlinkage void do_##name(struct pt_regs *regs, long error_code) \ { \ siginfo_t info; \ info.si_signo = signr; \ @@ -683,7 +686,7 @@ asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code) preempt_conditional_cli(regs); } -asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) +asmlinkage void do_double_fault(struct pt_regs *regs, long error_code) { static const char str[] = "double fault"; struct task_struct *tsk = current; @@ -778,9 +781,10 @@ io_check_error(unsigned char reason, struct pt_regs *regs) } static notrace __kprobes void -unknown_nmi_error(unsigned char reason, struct pt_regs * regs) +unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { - if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == + NOTIFY_STOP) return; printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", reason); @@ -882,7 +886,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) else if (user_mode(eregs)) regs = task_pt_regs(current); /* Exception from kernel and interrupts are enabled. Move to - kernel process stack. */ + kernel process stack. */ else if (eregs->flags & X86_EFLAGS_IF) regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); if (eregs != regs) @@ -891,7 +895,7 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) } /* runs on IST stack. */ -asmlinkage void __kprobes do_debug(struct pt_regs * regs, +asmlinkage void __kprobes do_debug(struct pt_regs *regs, unsigned long error_code) { struct task_struct *tsk = current; @@ -1035,7 +1039,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) asmlinkage void bad_intr(void) { - printk("bad interrupt"); + printk("bad interrupt"); } asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) @@ -1047,7 +1051,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) conditional_sti(regs); if (!user_mode(regs) && - kernel_math_error(regs, "kernel simd math error", 19)) + kernel_math_error(regs, "kernel simd math error", 19)) return; /* @@ -1092,7 +1096,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) force_sig_info(SIGFPE, &info, task); } -asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs) +asmlinkage void do_spurious_interrupt_bug(struct pt_regs *regs) { } @@ -1149,8 +1153,10 @@ void __init trap_init(void) set_intr_gate(0, ÷_error); set_intr_gate_ist(1, &debug, DEBUG_STACK); set_intr_gate_ist(2, &nmi, NMI_STACK); - set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */ - set_system_gate(4, &overflow); /* int4 can be called from all */ + /* int3 can be called from all */ + set_system_gate_ist(3, &int3, DEBUG_STACK); + /* int4 can be called from all */ + set_system_gate(4, &overflow); set_intr_gate(5, &bounds); set_intr_gate(6, &invalid_op); set_intr_gate(7, &device_not_available); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 8f98e9d..161bb85 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); /* * Read TSC and the reference counters. Take care of SMI disturbance */ -static u64 tsc_read_refs(u64 *pm, u64 *hpet) +static u64 tsc_read_refs(u64 *p, int hpet) { u64 t1, t2; int i; @@ -112,9 +112,9 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) for (i = 0; i < MAX_RETRIES; i++) { t1 = get_cycles(); if (hpet) - *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; + *p = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; else - *pm = acpi_pm_read_early(); + *p = acpi_pm_read_early(); t2 = get_cycles(); if ((t2 - t1) < SMI_TRESHOLD) return t2; @@ -123,13 +123,59 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet) } /* + * Calculate the TSC frequency from HPET reference + */ +static unsigned long calc_hpet_ref(u64 deltatsc, u64 hpet1, u64 hpet2) +{ + u64 tmp; + + if (hpet2 < hpet1) + hpet2 += 0x100000000ULL; + hpet2 -= hpet1; + tmp = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); + do_div(tmp, 1000000); + do_div(deltatsc, tmp); + + return (unsigned long) deltatsc; +} + +/* + * Calculate the TSC frequency from PMTimer reference + */ +static unsigned long calc_pmtimer_ref(u64 deltatsc, u64 pm1, u64 pm2) +{ + u64 tmp; + + if (!pm1 && !pm2) + return ULONG_MAX; + + if (pm2 < pm1) + pm2 += (u64)ACPI_PM_OVRRUN; + pm2 -= pm1; + tmp = pm2 * 1000000000LL; + do_div(tmp, PMTMR_TICKS_PER_SEC); + do_div(deltatsc, tmp); + + return (unsigned long) deltatsc; +} + +#define CAL_MS 10 +#define CAL_LATCH (CLOCK_TICK_RATE / (1000 / CAL_MS)) +#define CAL_PIT_LOOPS 1000 + +#define CAL2_MS 50 +#define CAL2_LATCH (CLOCK_TICK_RATE / (1000 / CAL2_MS)) +#define CAL2_PIT_LOOPS 5000 + + +/* * Try to calibrate the TSC against the Programmable * Interrupt Timer and return the frequency of the TSC * in kHz. * * Return ULONG_MAX on failure to calibrate. */ -static unsigned long pit_calibrate_tsc(void) +static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin) { u64 tsc, t1, t2, delta; unsigned long tscmin, tscmax; @@ -144,8 +190,8 @@ static unsigned long pit_calibrate_tsc(void) * (LSB then MSB) to begin countdown. */ outb(0xb0, 0x43); - outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); - outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); + outb(latch & 0xff, 0x42); + outb(latch >> 8, 0x42); tsc = t1 = t2 = get_cycles(); @@ -166,31 +212,154 @@ static unsigned long pit_calibrate_tsc(void) /* * Sanity checks: * - * If we were not able to read the PIT more than 5000 + * If we were not able to read the PIT more than loopmin * times, then we have been hit by a massive SMI * * If the maximum is 10 times larger than the minimum, * then we got hit by an SMI as well. */ - if (pitcnt < 5000 || tscmax > 10 * tscmin) + if (pitcnt < loopmin || tscmax > 10 * tscmin) return ULONG_MAX; /* Calculate the PIT value */ delta = t2 - t1; - do_div(delta, 50); + do_div(delta, ms); return delta; } +/* + * This reads the current MSB of the PIT counter, and + * checks if we are running on sufficiently fast and + * non-virtualized hardware. + * + * Our expectations are: + * + * - the PIT is running at roughly 1.19MHz + * + * - each IO is going to take about 1us on real hardware, + * but we allow it to be much faster (by a factor of 10) or + * _slightly_ slower (ie we allow up to a 2us read+counter + * update - anything else implies a unacceptably slow CPU + * or PIT for the fast calibration to work. + * + * - with 256 PIT ticks to read the value, we have 214us to + * see the same MSB (and overhead like doing a single TSC + * read per MSB value etc). + * + * - We're doing 2 reads per loop (LSB, MSB), and we expect + * them each to take about a microsecond on real hardware. + * So we expect a count value of around 100. But we'll be + * generous, and accept anything over 50. + * + * - if the PIT is stuck, and we see *many* more reads, we + * return early (and the next caller of pit_expect_msb() + * then consider it a failure when they don't see the + * next expected value). + * + * These expectations mean that we know that we have seen the + * transition from one expected value to another with a fairly + * high accuracy, and we didn't miss any events. We can thus + * use the TSC value at the transitions to calculate a pretty + * good value for the TSC frequencty. + */ +static inline int pit_expect_msb(unsigned char val) +{ + int count = 0; + + for (count = 0; count < 50000; count++) { + /* Ignore LSB */ + inb(0x42); + if (inb(0x42) != val) + break; + } + return count > 50; +} + +/* + * How many MSB values do we want to see? We aim for a + * 15ms calibration, which assuming a 2us counter read + * error should give us roughly 150 ppm precision for + * the calibration. + */ +#define QUICK_PIT_MS 15 +#define QUICK_PIT_ITERATIONS (QUICK_PIT_MS * PIT_TICK_RATE / 1000 / 256) + +static unsigned long quick_pit_calibrate(void) +{ + /* Set the Gate high, disable speaker */ + outb((inb(0x61) & ~0x02) | 0x01, 0x61); + + /* + * Counter 2, mode 0 (one-shot), binary count + * + * NOTE! Mode 2 decrements by two (and then the + * output is flipped each time, giving the same + * final output frequency as a decrement-by-one), + * so mode 0 is much better when looking at the + * individual counts. + */ + outb(0xb0, 0x43); + + /* Start at 0xffff */ + outb(0xff, 0x42); + outb(0xff, 0x42); + + if (pit_expect_msb(0xff)) { + int i; + u64 t1, t2, delta; + unsigned char expect = 0xfe; + + t1 = get_cycles(); + for (i = 0; i < QUICK_PIT_ITERATIONS; i++, expect--) { + if (!pit_expect_msb(expect)) + goto failed; + } + t2 = get_cycles(); + + /* + * Make sure we can rely on the second TSC timestamp: + */ + if (!pit_expect_msb(expect)) + goto failed; + + /* + * Ok, if we get here, then we've seen the + * MSB of the PIT decrement QUICK_PIT_ITERATIONS + * times, and each MSB had many hits, so we never + * had any sudden jumps. + * + * As a result, we can depend on there not being + * any odd delays anywhere, and the TSC reads are + * reliable. + * + * kHz = ticks / time-in-seconds / 1000; + * kHz = (t2 - t1) / (QPI * 256 / PIT_TICK_RATE) / 1000 + * kHz = ((t2 - t1) * PIT_TICK_RATE) / (QPI * 256 * 1000) + */ + delta = (t2 - t1)*PIT_TICK_RATE; + do_div(delta, QUICK_PIT_ITERATIONS*256*1000); + printk("Fast TSC calibration using PIT\n"); + return delta; + } +failed: + return 0; +} /** * native_calibrate_tsc - calibrate the tsc on boot */ unsigned long native_calibrate_tsc(void) { - u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; + u64 tsc1, tsc2, delta, ref1, ref2; unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; - unsigned long flags; - int hpet = is_hpet_enabled(), i; + unsigned long flags, latch, ms, fast_calibrate; + int hpet = is_hpet_enabled(), i, loopmin; + + local_irq_save(flags); + fast_calibrate = quick_pit_calibrate(); + local_irq_restore(flags); + if (fast_calibrate) + return fast_calibrate; /* * Run 5 calibration loops to get the lowest frequency value @@ -216,7 +385,13 @@ unsigned long native_calibrate_tsc(void) * calibration delay loop as we have to wait for a certain * amount of time anyway. */ - for (i = 0; i < 5; i++) { + + /* Preset PIT loop values */ + latch = CAL_LATCH; + ms = CAL_MS; + loopmin = CAL_PIT_LOOPS; + + for (i = 0; i < 3; i++) { unsigned long tsc_pit_khz; /* @@ -226,16 +401,16 @@ unsigned long native_calibrate_tsc(void) * read the end value. */ local_irq_save(flags); - tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); - tsc_pit_khz = pit_calibrate_tsc(); - tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); + tsc1 = tsc_read_refs(&ref1, hpet); + tsc_pit_khz = pit_calibrate_tsc(latch, ms, loopmin); + tsc2 = tsc_read_refs(&ref2, hpet); local_irq_restore(flags); /* Pick the lowest PIT TSC calibration so far */ tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); /* hpet or pmtimer available ? */ - if (!hpet && !pm1 && !pm2) + if (!hpet && !ref1 && !ref2) continue; /* Check, whether the sampling was disturbed by an SMI */ @@ -243,23 +418,41 @@ unsigned long native_calibrate_tsc(void) continue; tsc2 = (tsc2 - tsc1) * 1000000LL; + if (hpet) + tsc2 = calc_hpet_ref(tsc2, ref1, ref2); + else + tsc2 = calc_pmtimer_ref(tsc2, ref1, ref2); - if (hpet) { - if (hpet2 < hpet1) - hpet2 += 0x100000000ULL; - hpet2 -= hpet1; - tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); - do_div(tsc1, 1000000); - } else { - if (pm2 < pm1) - pm2 += (u64)ACPI_PM_OVRRUN; - pm2 -= pm1; - tsc1 = pm2 * 1000000000LL; - do_div(tsc1, PMTMR_TICKS_PER_SEC); + tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); + + /* Check the reference deviation */ + delta = ((u64) tsc_pit_min) * 100; + do_div(delta, tsc_ref_min); + + /* + * If both calibration results are inside a 10% window + * then we can be sure, that the calibration + * succeeded. We break out of the loop right away. We + * use the reference value, as it is more precise. + */ + if (delta >= 90 && delta <= 110) { + printk(KERN_INFO + "TSC: PIT calibration matches %s. %d loops\n", + hpet ? "HPET" : "PMTIMER", i + 1); + return tsc_ref_min; } - do_div(tsc2, tsc1); - tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); + /* + * Check whether PIT failed more than once. This + * happens in virtualized environments. We need to + * give the virtual PC a slightly longer timeframe for + * the HPET/PMTIMER to make the result precise. + */ + if (i == 1 && tsc_pit_min == ULONG_MAX) { + latch = CAL2_LATCH; + ms = CAL2_MS; + loopmin = CAL2_PIT_LOOPS; + } } /* @@ -270,7 +463,7 @@ unsigned long native_calibrate_tsc(void) printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); /* We don't have an alternative source, disable TSC */ - if (!hpet && !pm1 && !pm2) { + if (!hpet && !ref1 && !ref2) { printk("TSC: No reference (HPET/PMTIMER) available\n"); return 0; } @@ -278,7 +471,7 @@ unsigned long native_calibrate_tsc(void) /* The alternative source failed as well, disable TSC */ if (tsc_ref_min == ULONG_MAX) { printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " - "failed due to SMI disturbance.\n"); + "failed.\n"); return 0; } @@ -290,44 +483,25 @@ unsigned long native_calibrate_tsc(void) } /* We don't have an alternative source, use the PIT calibration value */ - if (!hpet && !pm1 && !pm2) { + if (!hpet && !ref1 && !ref2) { printk(KERN_INFO "TSC: Using PIT calibration value\n"); return tsc_pit_min; } /* The alternative source failed, use the PIT calibration value */ if (tsc_ref_min == ULONG_MAX) { - printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " - "to SMI disturbance. Using PIT calibration\n"); + printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed. " + "Using PIT calibration\n"); return tsc_pit_min; } - /* Check the reference deviation */ - delta = ((u64) tsc_pit_min) * 100; - do_div(delta, tsc_ref_min); - - /* - * If both calibration results are inside a 5% window, the we - * use the lower frequency of those as it is probably the - * closest estimate. - */ - if (delta >= 95 && delta <= 105) { - printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", - hpet ? "HPET" : "PMTIMER"); - printk(KERN_INFO "TSC: using %s calibration value\n", - tsc_pit_min <= tsc_ref_min ? "PIT" : - hpet ? "HPET" : "PMTIMER"); - return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; - } - - printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", - hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); - /* * The calibration values differ too much. In doubt, we use * the PIT value as we know that there are PMTIMERs around - * running at double speed. + * running at double speed. At least we let the user know: */ + printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", + hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); printk(KERN_INFO "TSC: Using PIT calibration value\n"); return tsc_pit_min; } diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 594ef47..61a97e6 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -25,45 +25,31 @@ #include <asm/visws/cobalt.h> #include <asm/visws/piix4.h> #include <asm/arch_hooks.h> +#include <asm/io_apic.h> #include <asm/fixmap.h> #include <asm/reboot.h> #include <asm/setup.h> #include <asm/e820.h> -#include <asm/smp.h> #include <asm/io.h> #include <mach_ipi.h> #include "mach_apic.h" -#include <linux/init.h> -#include <linux/smp.h> - #include <linux/kernel_stat.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <asm/io.h> -#include <asm/apic.h> #include <asm/i8259.h> #include <asm/irq_vectors.h> -#include <asm/visws/cobalt.h> #include <asm/visws/lithium.h> -#include <asm/visws/piix4.h> #include <linux/sched.h> #include <linux/kernel.h> -#include <linux/init.h> #include <linux/pci.h> #include <linux/pci_ids.h> extern int no_broadcast; -#include <asm/io.h> #include <asm/apic.h> -#include <asm/arch_hooks.h> -#include <asm/visws/cobalt.h> -#include <asm/visws/lithium.h> char visws_board_type = -1; char visws_board_rev = -1; diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 38f566f..4eeb5cf 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -46,6 +46,7 @@ #include <asm/io.h> #include <asm/tlbflush.h> #include <asm/irq.h> +#include <asm/syscalls.h> /* * Known problems: diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index edfb09f..8c9ad02 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -393,13 +393,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) } #endif -static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) { vmi_set_page_type(pfn, VMI_PAGE_L1); vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); } -static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) +static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) { /* * This call comes in very early, before mem_map is setup. @@ -410,20 +410,20 @@ static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn) vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); } -static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) +static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) { vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); vmi_check_page_type(clonepfn, VMI_PAGE_L2); vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); } -static void vmi_release_pte(u32 pfn) +static void vmi_release_pte(unsigned long pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L1); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); } -static void vmi_release_pmd(u32 pfn) +static void vmi_release_pmd(unsigned long pfn) { vmi_ops.release_page(pfn, VMI_PAGE_L2); vmi_set_page_type(pfn, VMI_PAGE_NORMAL); diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c index 01b868b..321cf72 100644 --- a/arch/x86/lib/msr-on-cpu.c +++ b/arch/x86/lib/msr-on-cpu.c @@ -16,37 +16,46 @@ static void __rdmsr_on_cpu(void *info) rdmsr(rv->msr_no, rv->l, rv->h); } -static void __rdmsr_safe_on_cpu(void *info) +static void __wrmsr_on_cpu(void *info) { struct msr_info *rv = info; - rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h); + wrmsr(rv->msr_no, rv->l, rv->h); } -static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe) +int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { - int err = 0; + int err; struct msr_info rv; rv.msr_no = msr_no; - if (safe) { - err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, - &rv, 1); - err = err ? err : rv.err; - } else { - err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); - } + err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); *l = rv.l; *h = rv.h; return err; } -static void __wrmsr_on_cpu(void *info) +int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) +{ + int err; + struct msr_info rv; + + rv.msr_no = msr_no; + rv.l = l; + rv.h = h; + err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); + + return err; +} + +/* These "safe" variants are slower and should be used when the target MSR + may not actually exist. */ +static void __rdmsr_safe_on_cpu(void *info) { struct msr_info *rv = info; - wrmsr(rv->msr_no, rv->l, rv->h); + rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h); } static void __wrmsr_safe_on_cpu(void *info) @@ -56,45 +65,30 @@ static void __wrmsr_safe_on_cpu(void *info) rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h); } -static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe) +int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) { - int err = 0; + int err; struct msr_info rv; rv.msr_no = msr_no; - rv.l = l; - rv.h = h; - if (safe) { - err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, - &rv, 1); - err = err ? err : rv.err; - } else { - err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); - } - - return err; -} + err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); + *l = rv.l; + *h = rv.h; -int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) -{ - return _wrmsr_on_cpu(cpu, msr_no, l, h, 0); + return err ? err : rv.err; } -int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - return _rdmsr_on_cpu(cpu, msr_no, l, h, 0); -} - -/* These "safe" variants are slower and should be used when the target MSR - may not actually exist. */ int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) { - return _wrmsr_on_cpu(cpu, msr_no, l, h, 1); -} + int err; + struct msr_info rv; -int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) -{ - return _rdmsr_on_cpu(cpu, msr_no, l, h, 1); + rv.msr_no = msr_no; + rv.l = l; + rv.h = h; + err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); + + return err ? err : rv.err; } EXPORT_SYMBOL(rdmsr_on_cpu); diff --git a/arch/x86/lib/string_32.c b/arch/x86/lib/string_32.c index 94972e7..82004d2 100644 --- a/arch/x86/lib/string_32.c +++ b/arch/x86/lib/string_32.c @@ -22,7 +22,7 @@ char *strcpy(char *dest, const char *src) "testb %%al,%%al\n\t" "jne 1b" : "=&S" (d0), "=&D" (d1), "=&a" (d2) - :"0" (src), "1" (dest) : "memory"); + : "0" (src), "1" (dest) : "memory"); return dest; } EXPORT_SYMBOL(strcpy); @@ -42,7 +42,7 @@ char *strncpy(char *dest, const char *src, size_t count) "stosb\n" "2:" : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3) - :"0" (src), "1" (dest), "2" (count) : "memory"); + : "0" (src), "1" (dest), "2" (count) : "memory"); return dest; } EXPORT_SYMBOL(strncpy); @@ -60,7 +60,7 @@ char *strcat(char *dest, const char *src) "testb %%al,%%al\n\t" "jne 1b" : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3) - : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory"); + : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu) : "memory"); return dest; } EXPORT_SYMBOL(strcat); @@ -105,9 +105,9 @@ int strcmp(const char *cs, const char *ct) "2:\tsbbl %%eax,%%eax\n\t" "orb $1,%%al\n" "3:" - :"=a" (res), "=&S" (d0), "=&D" (d1) - :"1" (cs), "2" (ct) - :"memory"); + : "=a" (res), "=&S" (d0), "=&D" (d1) + : "1" (cs), "2" (ct) + : "memory"); return res; } EXPORT_SYMBOL(strcmp); @@ -130,9 +130,9 @@ int strncmp(const char *cs, const char *ct, size_t count) "3:\tsbbl %%eax,%%eax\n\t" "orb $1,%%al\n" "4:" - :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2) - :"1" (cs), "2" (ct), "3" (count) - :"memory"); + : "=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2) + : "1" (cs), "2" (ct), "3" (count) + : "memory"); return res; } EXPORT_SYMBOL(strncmp); @@ -152,9 +152,9 @@ char *strchr(const char *s, int c) "movl $1,%1\n" "2:\tmovl %1,%0\n\t" "decl %0" - :"=a" (res), "=&S" (d0) - :"1" (s), "0" (c) - :"memory"); + : "=a" (res), "=&S" (d0) + : "1" (s), "0" (c) + : "memory"); return res; } EXPORT_SYMBOL(strchr); @@ -169,9 +169,9 @@ size_t strlen(const char *s) "scasb\n\t" "notl %0\n\t" "decl %0" - :"=c" (res), "=&D" (d0) - :"1" (s), "a" (0), "0" (0xffffffffu) - :"memory"); + : "=c" (res), "=&D" (d0) + : "1" (s), "a" (0), "0" (0xffffffffu) + : "memory"); return res; } EXPORT_SYMBOL(strlen); @@ -189,9 +189,9 @@ void *memchr(const void *cs, int c, size_t count) "je 1f\n\t" "movl $1,%0\n" "1:\tdecl %0" - :"=D" (res), "=&c" (d0) - :"a" (c), "0" (cs), "1" (count) - :"memory"); + : "=D" (res), "=&c" (d0) + : "a" (c), "0" (cs), "1" (count) + : "memory"); return res; } EXPORT_SYMBOL(memchr); @@ -228,9 +228,9 @@ size_t strnlen(const char *s, size_t count) "cmpl $-1,%1\n\t" "jne 1b\n" "3:\tsubl %2,%0" - :"=a" (res), "=&d" (d0) - :"c" (s), "1" (count) - :"memory"); + : "=a" (res), "=&d" (d0) + : "c" (s), "1" (count) + : "memory"); return res; } EXPORT_SYMBOL(strnlen); diff --git a/arch/x86/lib/strstr_32.c b/arch/x86/lib/strstr_32.c index 42e8a50..8e2d55f 100644 --- a/arch/x86/lib/strstr_32.c +++ b/arch/x86/lib/strstr_32.c @@ -23,9 +23,9 @@ __asm__ __volatile__( "jne 1b\n\t" "xorl %%eax,%%eax\n\t" "2:" - :"=a" (__res), "=&c" (d0), "=&S" (d1) - :"0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) - :"dx", "di"); + : "=a" (__res), "=&c" (d0), "=&S" (d1) + : "0" (0), "1" (0xffffffff), "2" (cs), "g" (ct) + : "dx", "di"); return __res; } diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 3d31783..3f2cf11 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c @@ -10,13 +10,15 @@ #include <asm/e820.h> #include <asm/setup.h> +#include <mach_ipi.h> + #ifdef CONFIG_HOTPLUG_CPU #define DEFAULT_SEND_IPI (1) #else #define DEFAULT_SEND_IPI (0) #endif -int no_broadcast=DEFAULT_SEND_IPI; +int no_broadcast = DEFAULT_SEND_IPI; /** * pre_intr_init_hook - initialisation prior to setting up interrupt vectors diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 62fa440..847c164 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -328,7 +328,7 @@ void __init initmem_init(unsigned long start_pfn, get_memcfg_numa(); - kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE); + kva_pages = roundup(calculate_numa_remap_pages(), PTRS_PER_PTE); kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE); do { diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index a20d1fa..e7277cb 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -148,8 +148,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, * we have now. "break" is either changing perms, levels or * address space marker. */ - prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK); - cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK); + prot = pgprot_val(new_prot) & PTE_FLAGS_MASK; + cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK; if (!st->level) { /* First entry */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 455f3fe..8f92cac 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -35,6 +35,7 @@ #include <asm/tlbflush.h> #include <asm/proto.h> #include <asm-generic/sections.h> +#include <asm/traps.h> /* * Page fault error code bits @@ -357,8 +358,6 @@ static int is_errata100(struct pt_regs *regs, unsigned long address) return 0; } -void do_invalid_op(struct pt_regs *, unsigned long); - static int is_f00f_bug(struct pt_regs *regs, unsigned long address) { #ifdef CONFIG_X86_F00F_BUG diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 60ec1d0..6b9a935 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -47,6 +47,7 @@ #include <asm/paravirt.h> #include <asm/setup.h> #include <asm/cacheflush.h> +#include <asm/smp.h> unsigned int __VMALLOC_RESERVE = 128 << 20; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index d3746ef..770536e 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -225,7 +225,7 @@ void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) void __init cleanup_highmap(void) { unsigned long vaddr = __START_KERNEL_map; - unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1; + unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1; pmd_t *pmd = level2_kernel_pgt; pmd_t *last_pmd = pmd + PTRS_PER_PMD; @@ -451,14 +451,14 @@ static void __init find_early_table_space(unsigned long end) unsigned long puds, pmds, ptes, tables, start; puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; - tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); + tables = roundup(puds * sizeof(pud_t), PAGE_SIZE); if (direct_gbpages) { unsigned long extra; extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; } else pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; - tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); + tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE); if (cpu_has_pse) { unsigned long extra; @@ -466,7 +466,7 @@ static void __init find_early_table_space(unsigned long end) ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; - tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); + tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE); /* * RED-PEN putting page tables only on node 0 could diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index d4b6e6a..cac6da5 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -421,7 +421,7 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr) return; } -int __initdata early_ioremap_debug; +static int __initdata early_ioremap_debug; static int __init early_ioremap_debug_setup(char *str) { @@ -547,7 +547,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) } -int __initdata early_ioremap_nested; +static int __initdata early_ioremap_nested; static int __init check_early_ioremap_leak(void) { diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index a4dd793..cebcbf1 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -79,7 +79,7 @@ static int __init allocate_cachealigned_memnodemap(void) return 0; addr = 0x8000; - nodemap_size = round_up(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); + nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES); nodemap_addr = find_e820_area(addr, max_pfn<<PAGE_SHIFT, nodemap_size, L1_CACHE_BYTES); if (nodemap_addr == -1UL) { @@ -176,10 +176,10 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; unsigned long bootmap_start, nodedata_phys; void *bootmap; - const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); + const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); int nid; - start = round_up(start, ZONE_ALIGN); + start = roundup(start, ZONE_ALIGN); printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end); @@ -210,9 +210,9 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); nid = phys_to_nid(nodedata_phys); if (nid == nodeid) - bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); else - bootmap_start = round_up(start, PAGE_SIZE); + bootmap_start = roundup(start, PAGE_SIZE); /* * SMP_CACHE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 43e2f84..898fad6 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -84,7 +84,7 @@ static inline unsigned long highmap_start_pfn(void) static inline unsigned long highmap_end_pfn(void) { - return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; + return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; } #endif @@ -906,11 +906,13 @@ int set_memory_ro(unsigned long addr, int numpages) { return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW)); } +EXPORT_SYMBOL_GPL(set_memory_ro); int set_memory_rw(unsigned long addr, int numpages) { return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW)); } +EXPORT_SYMBOL_GPL(set_memory_rw); int set_memory_np(unsigned long addr, int numpages) { diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index d503027..86f2ffc 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -63,10 +63,8 @@ static inline void pgd_list_del(pgd_t *pgd) #define UNSHARED_PTRS_PER_PGD \ (SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD) -static void pgd_ctor(void *p) +static void pgd_ctor(pgd_t *pgd) { - pgd_t *pgd = p; - /* If the pgd points to a shared pagetable level (either the ptes in non-PAE, or shared PMD in PAE), then just copy the references from swapper_pg_dir. */ @@ -87,7 +85,7 @@ static void pgd_ctor(void *p) pgd_list_add(pgd); } -static void pgd_dtor(void *pgd) +static void pgd_dtor(pgd_t *pgd) { unsigned long flags; /* can be called from interrupt context */ diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index cab0abb..0951db9 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -123,7 +123,8 @@ static int __init parse_vmalloc(char *arg) if (!arg) return -EINVAL; - __VMALLOC_RESERVE = memparse(arg, &arg); + /* Add VMALLOC_OFFSET to the parsed value due to vm area guard hole*/ + __VMALLOC_RESERVE = memparse(arg, &arg) + VMALLOC_OFFSET; return 0; } early_param("vmalloc", parse_vmalloc); diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index 56b4757..43ac5af 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -10,11 +10,12 @@ #include <linux/oprofile.h> #include <linux/smp.h> +#include <linux/ptrace.h> +#include <linux/nmi.h> #include <asm/msr.h> -#include <asm/ptrace.h> #include <asm/fixmap.h> #include <asm/apic.h> -#include <asm/nmi.h> + #include "op_x86_model.h" #include "op_counter.h" @@ -40,7 +41,7 @@ static unsigned int num_controls = NUM_CONTROLS_NON_HT; static inline void setup_num_counters(void) { #ifdef CONFIG_SMP - if (smp_num_siblings == 2){ + if (smp_num_siblings == 2) { num_counters = NUM_COUNTERS_HT2; num_controls = NUM_CONTROLS_HT2; } @@ -86,7 +87,7 @@ struct p4_event_binding { #define CTR_FLAME_2 (1 << 6) #define CTR_IQ_5 (1 << 7) -static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { +static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = { { CTR_BPU_0, MSR_P4_BPU_PERFCTR0, MSR_P4_BPU_CCCR0 }, { CTR_MS_0, MSR_P4_MS_PERFCTR0, MSR_P4_MS_CCCR0 }, { CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 }, @@ -97,32 +98,32 @@ static struct p4_counter_binding p4_counters [NUM_COUNTERS_NON_HT] = { { CTR_IQ_5, MSR_P4_IQ_PERFCTR5, MSR_P4_IQ_CCCR5 } }; -#define NUM_UNUSED_CCCRS NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT +#define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT) /* p4 event codes in libop/op_event.h are indices into this table. */ static struct p4_event_binding p4_events[NUM_EVENTS] = { - + { /* BRANCH_RETIRED */ - 0x05, 0x06, + 0x05, 0x06, { {CTR_IQ_4, MSR_P4_CRU_ESCR2}, {CTR_IQ_5, MSR_P4_CRU_ESCR3} } }, - + { /* MISPRED_BRANCH_RETIRED */ - 0x04, 0x03, + 0x04, 0x03, { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, { CTR_IQ_5, MSR_P4_CRU_ESCR1} } }, - + { /* TC_DELIVER_MODE */ 0x01, 0x01, - { { CTR_MS_0, MSR_P4_TC_ESCR0}, + { { CTR_MS_0, MSR_P4_TC_ESCR0}, { CTR_MS_2, MSR_P4_TC_ESCR1} } }, - + { /* BPU_FETCH_REQUEST */ - 0x00, 0x03, + 0x00, 0x03, { { CTR_BPU_0, MSR_P4_BPU_ESCR0}, { CTR_BPU_2, MSR_P4_BPU_ESCR1} } }, @@ -146,7 +147,7 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { }, { /* LOAD_PORT_REPLAY */ - 0x02, 0x04, + 0x02, 0x04, { { CTR_FLAME_0, MSR_P4_SAAT_ESCR0}, { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} } }, @@ -170,43 +171,43 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { }, { /* BSQ_CACHE_REFERENCE */ - 0x07, 0x0c, + 0x07, 0x0c, { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, { CTR_BPU_2, MSR_P4_BSU_ESCR1} } }, { /* IOQ_ALLOCATION */ - 0x06, 0x03, + 0x06, 0x03, { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, { 0, 0 } } }, { /* IOQ_ACTIVE_ENTRIES */ - 0x06, 0x1a, + 0x06, 0x1a, { { CTR_BPU_2, MSR_P4_FSB_ESCR1}, { 0, 0 } } }, { /* FSB_DATA_ACTIVITY */ - 0x06, 0x17, + 0x06, 0x17, { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, { CTR_BPU_2, MSR_P4_FSB_ESCR1} } }, { /* BSQ_ALLOCATION */ - 0x07, 0x05, + 0x07, 0x05, { { CTR_BPU_0, MSR_P4_BSU_ESCR0}, { 0, 0 } } }, { /* BSQ_ACTIVE_ENTRIES */ 0x07, 0x06, - { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, + { { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */}, { 0, 0 } } }, { /* X87_ASSIST */ - 0x05, 0x03, + 0x05, 0x03, { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, { CTR_IQ_5, MSR_P4_CRU_ESCR3} } }, @@ -216,21 +217,21 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, - + { /* PACKED_SP_UOP */ - 0x01, 0x08, + 0x01, 0x08, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, - + { /* PACKED_DP_UOP */ - 0x01, 0x0c, + 0x01, 0x0c, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, { /* SCALAR_SP_UOP */ - 0x01, 0x0a, + 0x01, 0x0a, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, @@ -242,31 +243,31 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { }, { /* 64BIT_MMX_UOP */ - 0x01, 0x02, + 0x01, 0x02, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, - + { /* 128BIT_MMX_UOP */ - 0x01, 0x1a, + 0x01, 0x1a, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, { /* X87_FP_UOP */ - 0x01, 0x04, + 0x01, 0x04, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, - + { /* X87_SIMD_MOVES_UOP */ - 0x01, 0x2e, + 0x01, 0x2e, { { CTR_FLAME_0, MSR_P4_FIRM_ESCR0}, { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} } }, - + { /* MACHINE_CLEAR */ - 0x05, 0x02, + 0x05, 0x02, { { CTR_IQ_4, MSR_P4_CRU_ESCR2}, { CTR_IQ_5, MSR_P4_CRU_ESCR3} } }, @@ -276,9 +277,9 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { { { CTR_BPU_0, MSR_P4_FSB_ESCR0}, { CTR_BPU_2, MSR_P4_FSB_ESCR1} } }, - + { /* TC_MS_XFER */ - 0x00, 0x05, + 0x00, 0x05, { { CTR_MS_0, MSR_P4_MS_ESCR0}, { CTR_MS_2, MSR_P4_MS_ESCR1} } }, @@ -308,7 +309,7 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { }, { /* INSTR_RETIRED */ - 0x04, 0x02, + 0x04, 0x02, { { CTR_IQ_4, MSR_P4_CRU_ESCR0}, { CTR_IQ_5, MSR_P4_CRU_ESCR1} } }, @@ -319,14 +320,14 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { { CTR_IQ_5, MSR_P4_CRU_ESCR1} } }, - { /* UOP_TYPE */ - 0x02, 0x02, + { /* UOP_TYPE */ + 0x02, 0x02, { { CTR_IQ_4, MSR_P4_RAT_ESCR0}, { CTR_IQ_5, MSR_P4_RAT_ESCR1} } }, { /* RETIRED_MISPRED_BRANCH_TYPE */ - 0x02, 0x05, + 0x02, 0x05, { { CTR_MS_0, MSR_P4_TBPU_ESCR0}, { CTR_MS_2, MSR_P4_TBPU_ESCR1} } }, @@ -349,8 +350,8 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1)) #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25)) #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9)) -#define ESCR_READ(escr,high,ev,i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) -#define ESCR_WRITE(escr,high,ev,i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high));} while (0) +#define ESCR_READ(escr, high, ev, i) do {rdmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) +#define ESCR_WRITE(escr, high, ev, i) do {wrmsr(ev->bindings[(i)].escr_address, (escr), (high)); } while (0) #define CCCR_RESERVED_BITS 0x38030FFF #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS) @@ -360,15 +361,15 @@ static struct p4_event_binding p4_events[NUM_EVENTS] = { #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27)) #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12)) #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12)) -#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) -#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high));} while (0) +#define CCCR_READ(low, high, i) do {rdmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) +#define CCCR_WRITE(low, high, i) do {wrmsr(p4_counters[(i)].cccr_address, (low), (high)); } while (0) #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31)) #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31))) -#define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) -#define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l,h,i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h));} while (0) -#define CTR_WRITE(l,i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1);} while (0) +#define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) +#define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) +#define CTR_READ(l, h, i) do {rdmsr(p4_counters[(i)].counter_address, (l), (h)); } while (0) +#define CTR_WRITE(l, i) do {wrmsr(p4_counters[(i)].counter_address, -(u32)(l), -1); } while (0) #define CTR_OVERFLOW_P(ctr) (!((ctr) & 0x80000000)) @@ -380,7 +381,7 @@ static unsigned int get_stagger(void) #ifdef CONFIG_SMP int cpu = smp_processor_id(); return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu))); -#endif +#endif return 0; } @@ -395,25 +396,23 @@ static unsigned long reset_value[NUM_COUNTERS_NON_HT]; static void p4_fill_in_addresses(struct op_msrs * const msrs) { - unsigned int i; + unsigned int i; unsigned int addr, cccraddr, stag; setup_num_counters(); stag = get_stagger(); /* initialize some registers */ - for (i = 0; i < num_counters; ++i) { + for (i = 0; i < num_counters; ++i) msrs->counters[i].addr = 0; - } - for (i = 0; i < num_controls; ++i) { + for (i = 0; i < num_controls; ++i) msrs->controls[i].addr = 0; - } - + /* the counter & cccr registers we pay attention to */ for (i = 0; i < num_counters; ++i) { addr = p4_counters[VIRT_CTR(stag, i)].counter_address; cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address; - if (reserve_perfctr_nmi(addr)){ + if (reserve_perfctr_nmi(addr)) { msrs->counters[i].addr = addr; msrs->controls[i].addr = cccraddr; } @@ -447,22 +446,22 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) if (reserve_evntsel_nmi(addr)) msrs->controls[i].addr = addr; } - + for (addr = MSR_P4_MS_ESCR0 + stag; - addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { + addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) { if (reserve_evntsel_nmi(addr)) msrs->controls[i].addr = addr; } - + for (addr = MSR_P4_IX_ESCR0 + stag; - addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { + addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) { if (reserve_evntsel_nmi(addr)) msrs->controls[i].addr = addr; } /* there are 2 remaining non-contiguously located ESCRs */ - if (num_counters == NUM_COUNTERS_NON_HT) { + if (num_counters == NUM_COUNTERS_NON_HT) { /* standard non-HT CPUs handle both remaining ESCRs*/ if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) msrs->controls[i++].addr = MSR_P4_CRU_ESCR5; @@ -498,20 +497,20 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) unsigned int stag; stag = get_stagger(); - + /* convert from counter *number* to counter *bit* */ counter_bit = 1 << VIRT_CTR(stag, ctr); - + /* find our event binding structure. */ if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) { - printk(KERN_ERR - "oprofile: P4 event code 0x%lx out of range\n", + printk(KERN_ERR + "oprofile: P4 event code 0x%lx out of range\n", counter_config[ctr].event); return; } - + ev = &(p4_events[counter_config[ctr].event - 1]); - + for (i = 0; i < maxbind; i++) { if (ev->bindings[i].virt_counter & counter_bit) { @@ -526,25 +525,24 @@ static void pmc_setup_one_p4_counter(unsigned int ctr) ESCR_SET_OS_1(escr, counter_config[ctr].kernel); } ESCR_SET_EVENT_SELECT(escr, ev->event_select); - ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); + ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask); ESCR_WRITE(escr, high, ev, i); - + /* modify CCCR */ CCCR_READ(cccr, high, VIRT_CTR(stag, ctr)); CCCR_CLEAR(cccr); CCCR_SET_REQUIRED_BITS(cccr); CCCR_SET_ESCR_SELECT(cccr, ev->escr_select); - if (stag == 0) { + if (stag == 0) CCCR_SET_PMI_OVF_0(cccr); - } else { + else CCCR_SET_PMI_OVF_1(cccr); - } CCCR_WRITE(cccr, high, VIRT_CTR(stag, ctr)); return; } } - printk(KERN_ERR + printk(KERN_ERR "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n", counter_config[ctr].event, stag, ctr); } @@ -559,14 +557,14 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) stag = get_stagger(); rdmsr(MSR_IA32_MISC_ENABLE, low, high); - if (! MISC_PMC_ENABLED_P(low)) { + if (!MISC_PMC_ENABLED_P(low)) { printk(KERN_ERR "oprofile: P4 PMC not available\n"); return; } /* clear the cccrs we will use */ for (i = 0 ; i < num_counters ; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high); CCCR_CLEAR(low); @@ -576,14 +574,14 @@ static void p4_setup_ctrs(struct op_msrs const * const msrs) /* clear all escrs (including those outside our concern) */ for (i = num_counters; i < num_controls; i++) { - if (unlikely(!CTRL_IS_RESERVED(msrs,i))) + if (unlikely(!CTRL_IS_RESERVED(msrs, i))) continue; wrmsr(msrs->controls[i].addr, 0, 0); } /* setup all counters */ for (i = 0 ; i < num_counters ; ++i) { - if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs,i))) { + if ((counter_config[i].enabled) && (CTRL_IS_RESERVED(msrs, i))) { reset_value[i] = counter_config[i].count; pmc_setup_one_p4_counter(i); CTR_WRITE(counter_config[i].count, VIRT_CTR(stag, i)); @@ -603,11 +601,11 @@ static int p4_check_ctrs(struct pt_regs * const regs, stag = get_stagger(); for (i = 0; i < num_counters; ++i) { - - if (!reset_value[i]) + + if (!reset_value[i]) continue; - /* + /* * there is some eccentricity in the hardware which * requires that we perform 2 extra corrections: * @@ -616,24 +614,24 @@ static int p4_check_ctrs(struct pt_regs * const regs, * * - write the counter back twice to ensure it gets * updated properly. - * + * * the former seems to be related to extra NMIs happening * during the current NMI; the latter is reported as errata * N15 in intel doc 249199-029, pentium 4 specification * update, though their suggested work-around does not * appear to solve the problem. */ - + real = VIRT_CTR(stag, i); CCCR_READ(low, high, real); - CTR_READ(ctr, high, real); + CTR_READ(ctr, high, real); if (CCCR_OVF_P(low) || CTR_OVERFLOW_P(ctr)) { oprofile_add_sample(regs, i); - CTR_WRITE(reset_value[i], real); + CTR_WRITE(reset_value[i], real); CCCR_CLEAR_OVF(low); CCCR_WRITE(low, high, real); - CTR_WRITE(reset_value[i], real); + CTR_WRITE(reset_value[i], real); } } @@ -683,15 +681,16 @@ static void p4_shutdown(struct op_msrs const * const msrs) int i; for (i = 0 ; i < num_counters ; ++i) { - if (CTR_IS_RESERVED(msrs,i)) + if (CTR_IS_RESERVED(msrs, i)) release_perfctr_nmi(msrs->counters[i].addr); } - /* some of the control registers are specially reserved in + /* + * some of the control registers are specially reserved in * conjunction with the counter registers (hence the starting offset). * This saves a few bits. */ for (i = num_counters ; i < num_controls ; ++i) { - if (CTRL_IS_RESERVED(msrs,i)) + if (CTRL_IS_RESERVED(msrs, i)) release_evntsel_nmi(msrs->controls[i].addr); } } diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 6a0fca7..22e0576 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -580,7 +580,7 @@ static int __cpuinit amd_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { int cpu = (long)hcpu; - switch(action) { + switch (action) { case CPU_ONLINE: case CPU_ONLINE_FROZEN: smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0); diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 8e07718..006599d 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -1043,35 +1043,44 @@ static void __init pcibios_fixup_irqs(void) if (io_apic_assign_pci_irqs) { int irq; - if (pin) { - /* - * interrupt pins are numbered starting - * from 1 - */ - pin--; - irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, - PCI_SLOT(dev->devfn), pin); - /* - * Busses behind bridges are typically not listed in the MP-table. - * In this case we have to look up the IRQ based on the parent bus, - * parent slot, and pin number. The SMP code detects such bridged - * busses itself so we should get into this branch reliably. - */ - if (irq < 0 && dev->bus->parent) { /* go back to the bridge */ - struct pci_dev *bridge = dev->bus->self; - - pin = (pin + PCI_SLOT(dev->devfn)) % 4; - irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, - PCI_SLOT(bridge->devfn), pin); - if (irq >= 0) - dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n", - pci_name(bridge), - 'A' + pin, irq); - } - if (irq >= 0) { - dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq); - dev->irq = irq; - } + if (!pin) + continue; + + /* + * interrupt pins are numbered starting from 1 + */ + pin--; + irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, + PCI_SLOT(dev->devfn), pin); + /* + * Busses behind bridges are typically not listed in the + * MP-table. In this case we have to look up the IRQ + * based on the parent bus, parent slot, and pin number. + * The SMP code detects such bridged busses itself so we + * should get into this branch reliably. + */ + if (irq < 0 && dev->bus->parent) { + /* go back to the bridge */ + struct pci_dev *bridge = dev->bus->self; + int bus; + + pin = (pin + PCI_SLOT(dev->devfn)) % 4; + bus = bridge->bus->number; + irq = IO_APIC_get_PCI_irq_vector(bus, + PCI_SLOT(bridge->devfn), pin); + if (irq >= 0) + dev_warn(&dev->dev, + "using bridge %s INT %c to " + "get IRQ %d\n", + pci_name(bridge), + 'A' + pin, irq); + } + if (irq >= 0) { + dev_info(&dev->dev, + "PCI->APIC IRQ transform: INT %c " + "-> IRQ %d\n", + 'A' + pin, irq); + dev->irq = irq; } } #endif diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index 4fc7e87..d1e9b53 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S @@ -1,5 +1,3 @@ -.text - /* * This may not use any stack, nor any variable that is not "NoSave": * @@ -12,17 +10,18 @@ #include <asm/segment.h> #include <asm/page.h> #include <asm/asm-offsets.h> +#include <asm/processor-flags.h> - .text +.text ENTRY(swsusp_arch_suspend) - movl %esp, saved_context_esp movl %ebx, saved_context_ebx movl %ebp, saved_context_ebp movl %esi, saved_context_esi movl %edi, saved_context_edi - pushfl ; popl saved_context_eflags + pushfl + popl saved_context_eflags call swsusp_save ret @@ -59,7 +58,7 @@ done: movl mmu_cr4_features, %ecx jecxz 1f # cr4 Pentium and higher, skip if zero movl %ecx, %edx - andl $~(1<<7), %edx; # PGE + andl $~(X86_CR4_PGE), %edx movl %edx, %cr4; # turn off PGE 1: movl %cr3, %eax; # flush TLB @@ -74,7 +73,8 @@ done: movl saved_context_esi, %esi movl saved_context_edi, %edi - pushl saved_context_eflags ; popfl + pushl saved_context_eflags + popfl xorl %eax, %eax diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index a4e201b..7dcd321 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -812,7 +812,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) /* Early in boot, while setting up the initial pagetable, assume everything is pinned. */ -static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) +static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn) { #ifdef CONFIG_FLATMEM BUG_ON(mem_map); /* should only be used early */ @@ -822,7 +822,7 @@ static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) /* Early release_pte assumes that all pts are pinned, since there's only init_mm and anything attached to that is pinned. */ -static void xen_release_pte_init(u32 pfn) +static void xen_release_pte_init(unsigned long pfn) { make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); } @@ -838,7 +838,7 @@ static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn) /* This needs to make sure the new pte page is pinned iff its being attached to a pinned pagetable. */ -static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) +static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level) { struct page *page = pfn_to_page(pfn); @@ -856,12 +856,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level) } } -static void xen_alloc_pte(struct mm_struct *mm, u32 pfn) +static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn) { xen_alloc_ptpage(mm, pfn, PT_PTE); } -static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn) +static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn) { xen_alloc_ptpage(mm, pfn, PT_PMD); } @@ -909,7 +909,7 @@ static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) } /* This should never happen until we're OK to use struct page */ -static void xen_release_ptpage(u32 pfn, unsigned level) +static void xen_release_ptpage(unsigned long pfn, unsigned level) { struct page *page = pfn_to_page(pfn); @@ -923,23 +923,23 @@ static void xen_release_ptpage(u32 pfn, unsigned level) } } -static void xen_release_pte(u32 pfn) +static void xen_release_pte(unsigned long pfn) { xen_release_ptpage(pfn, PT_PTE); } -static void xen_release_pmd(u32 pfn) +static void xen_release_pmd(unsigned long pfn) { xen_release_ptpage(pfn, PT_PMD); } #if PAGETABLE_LEVELS == 4 -static void xen_alloc_pud(struct mm_struct *mm, u32 pfn) +static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) { xen_alloc_ptpage(mm, pfn, PT_PUD); } -static void xen_release_pud(u32 pfn) +static void xen_release_pud(unsigned long pfn) { xen_release_ptpage(pfn, PT_PUD); } diff --git a/block/Makefile b/block/Makefile index 208000b..bfe7304 100644 --- a/block/Makefile +++ b/block/Makefile @@ -4,8 +4,8 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ - blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \ - cmd-filter.o + blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ + ioctl.o genhd.o scsi_ioctl.o cmd-filter.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/as-iosched.c b/block/as-iosched.c index cf4eb0e..71f0abb 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -462,7 +462,7 @@ static void as_antic_stop(struct as_data *ad) del_timer(&ad->antic_timer); ad->antic_status = ANTIC_FINISHED; /* see as_work_handler */ - kblockd_schedule_work(&ad->antic_work); + kblockd_schedule_work(ad->q, &ad->antic_work); } } @@ -483,7 +483,7 @@ static void as_antic_timeout(unsigned long data) aic = ad->io_context->aic; ad->antic_status = ANTIC_FINISHED; - kblockd_schedule_work(&ad->antic_work); + kblockd_schedule_work(q, &ad->antic_work); if (aic->ttime_samples == 0) { /* process anticipated on has exited or timed out*/ @@ -745,6 +745,14 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq) */ static int as_can_anticipate(struct as_data *ad, struct request *rq) { +#if 0 /* disable for now, we need to check tag level as well */ + /* + * SSD device without seek penalty, disable idling + */ + if (blk_queue_nonrot(ad->q)) axman + return 0; +#endif + if (!ad->io_context) /* * Last request submitted was a write @@ -844,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq) if (ad->changed_batch && ad->nr_dispatched == 1) { ad->current_batch_expires = jiffies + ad->batch_expire[ad->batch_data_dir]; - kblockd_schedule_work(&ad->antic_work); + kblockd_schedule_work(q, &ad->antic_work); ad->changed_batch = 0; if (ad->batch_data_dir == REQ_SYNC) diff --git a/block/blk-barrier.c b/block/blk-barrier.c index a09ead1..5c99ff8 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -293,7 +293,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) bio->bi_end_io = bio_end_empty_barrier; bio->bi_private = &wait; bio->bi_bdev = bdev; - submit_bio(1 << BIO_RW_BARRIER, bio); + submit_bio(WRITE_BARRIER, bio); wait_for_completion(&wait); @@ -315,3 +315,73 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) return ret; } EXPORT_SYMBOL(blkdev_issue_flush); + +static void blkdev_discard_end_io(struct bio *bio, int err) +{ + if (err) { + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + clear_bit(BIO_UPTODATE, &bio->bi_flags); + } + + bio_put(bio); +} + +/** + * blkdev_issue_discard - queue a discard + * @bdev: blockdev to issue discard for + * @sector: start sector + * @nr_sects: number of sectors to discard + * @gfp_mask: memory allocation flags (for bio_alloc) + * + * Description: + * Issue a discard request for the sectors in question. Does not wait. + */ +int blkdev_issue_discard(struct block_device *bdev, + sector_t sector, sector_t nr_sects, gfp_t gfp_mask) +{ + struct request_queue *q; + struct bio *bio; + int ret = 0; + + if (bdev->bd_disk == NULL) + return -ENXIO; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + if (!q->prepare_discard_fn) + return -EOPNOTSUPP; + + while (nr_sects && !ret) { + bio = bio_alloc(gfp_mask, 0); + if (!bio) + return -ENOMEM; + + bio->bi_end_io = blkdev_discard_end_io; + bio->bi_bdev = bdev; + + bio->bi_sector = sector; + + if (nr_sects > q->max_hw_sectors) { + bio->bi_size = q->max_hw_sectors << 9; + nr_sects -= q->max_hw_sectors; + sector += q->max_hw_sectors; + } else { + bio->bi_size = nr_sects << 9; + nr_sects = 0; + } + bio_get(bio); + submit_bio(DISCARD_BARRIER, bio); + + /* Check if it failed immediately */ + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + else if (!bio_flagged(bio, BIO_UPTODATE)) + ret = -EIO; + bio_put(bio); + } + return ret; +} +EXPORT_SYMBOL(blkdev_issue_discard); diff --git a/block/blk-core.c b/block/blk-core.c index 2cba5ef..2d053b5 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -26,8 +26,6 @@ #include <linux/swap.h> #include <linux/writeback.h> #include <linux/task_io_accounting_ops.h> -#include <linux/interrupt.h> -#include <linux/cpu.h> #include <linux/blktrace_api.h> #include <linux/fault-inject.h> @@ -50,27 +48,26 @@ struct kmem_cache *blk_requestq_cachep; */ static struct workqueue_struct *kblockd_workqueue; -static DEFINE_PER_CPU(struct list_head, blk_cpu_done); - static void drive_stat_acct(struct request *rq, int new_io) { struct hd_struct *part; int rw = rq_data_dir(rq); + int cpu; if (!blk_fs_request(rq) || !rq->rq_disk) return; - part = get_part(rq->rq_disk, rq->sector); + cpu = part_stat_lock(); + part = disk_map_sector_rcu(rq->rq_disk, rq->sector); + if (!new_io) - __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector); + part_stat_inc(cpu, part, merges[rw]); else { - disk_round_stats(rq->rq_disk); - rq->rq_disk->in_flight++; - if (part) { - part_round_stats(part); - part->in_flight++; - } + part_round_stats(cpu, part); + part_inc_in_flight(part); } + + part_stat_unlock(); } void blk_queue_congestion_threshold(struct request_queue *q) @@ -113,7 +110,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq) memset(rq, 0, sizeof(*rq)); INIT_LIST_HEAD(&rq->queuelist); - INIT_LIST_HEAD(&rq->donelist); + INIT_LIST_HEAD(&rq->timeout_list); + rq->cpu = -1; rq->q = q; rq->sector = rq->hard_sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); @@ -308,7 +306,7 @@ void blk_unplug_timeout(unsigned long data) blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, q->rq.count[READ] + q->rq.count[WRITE]); - kblockd_schedule_work(&q->unplug_work); + kblockd_schedule_work(q, &q->unplug_work); } void blk_unplug(struct request_queue *q) @@ -325,6 +323,21 @@ void blk_unplug(struct request_queue *q) } EXPORT_SYMBOL(blk_unplug); +static void blk_invoke_request_fn(struct request_queue *q) +{ + /* + * one level of recursion is ok and is much faster than kicking + * the unplug handling + */ + if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { + q->request_fn(q); + queue_flag_clear(QUEUE_FLAG_REENTER, q); + } else { + queue_flag_set(QUEUE_FLAG_PLUGGED, q); + kblockd_schedule_work(q, &q->unplug_work); + } +} + /** * blk_start_queue - restart a previously stopped queue * @q: The &struct request_queue in question @@ -339,18 +352,7 @@ void blk_start_queue(struct request_queue *q) WARN_ON(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); - - /* - * one level of recursion is ok and is much faster than kicking - * the unplug handling - */ - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { - q->request_fn(q); - queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else { - blk_plug_device(q); - kblockd_schedule_work(&q->unplug_work); - } + blk_invoke_request_fn(q); } EXPORT_SYMBOL(blk_start_queue); @@ -408,15 +410,8 @@ void __blk_run_queue(struct request_queue *q) * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. */ - if (!elv_queue_empty(q)) { - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { - q->request_fn(q); - queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else { - blk_plug_device(q); - kblockd_schedule_work(&q->unplug_work); - } - } + if (!elv_queue_empty(q)) + blk_invoke_request_fn(q); } EXPORT_SYMBOL(__blk_run_queue); @@ -441,6 +436,14 @@ void blk_put_queue(struct request_queue *q) void blk_cleanup_queue(struct request_queue *q) { + /* + * We know we have process context here, so we can be a little + * cautious and ensure that pending block actions on this device + * are done before moving on. Going into this function, we should + * not have processes doing IO to this device. + */ + blk_sync_queue(q); + mutex_lock(&q->sysfs_lock); queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); mutex_unlock(&q->sysfs_lock); @@ -496,6 +499,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) } init_timer(&q->unplug_timer); + setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); + INIT_LIST_HEAD(&q->timeout_list); kobject_init(&q->kobj, &blk_queue_ktype); @@ -531,7 +536,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node); * request queue; this lock will be taken also from interrupt context, so irq * disabling is needed for it. * - * Function returns a pointer to the initialized request queue, or NULL if + * Function returns a pointer to the initialized request queue, or %NULL if * it didn't succeed. * * Note: @@ -569,7 +574,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) q->request_fn = rfn; q->prep_rq_fn = NULL; q->unplug_fn = generic_unplug_device; - q->queue_flags = (1 << QUEUE_FLAG_CLUSTER); + q->queue_flags = (1 << QUEUE_FLAG_CLUSTER | + 1 << QUEUE_FLAG_STACKABLE); q->queue_lock = lock; blk_queue_segment_boundary(q, 0xffffffff); @@ -624,10 +630,6 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) blk_rq_init(q, rq); - /* - * first three bits are identical in rq->cmd_flags and bio->bi_rw, - * see bio.h and blkdev.h - */ rq->cmd_flags = rw | REQ_ALLOCED; if (priv) { @@ -888,9 +890,11 @@ EXPORT_SYMBOL(blk_get_request); */ void blk_start_queueing(struct request_queue *q) { - if (!blk_queue_plugged(q)) + if (!blk_queue_plugged(q)) { + if (unlikely(blk_queue_stopped(q))) + return; q->request_fn(q); - else + } else __generic_unplug_device(q); } EXPORT_SYMBOL(blk_start_queueing); @@ -907,6 +911,8 @@ EXPORT_SYMBOL(blk_start_queueing); */ void blk_requeue_request(struct request_queue *q, struct request *rq) { + blk_delete_timer(rq); + blk_clear_rq_complete(rq); blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); if (blk_rq_tagged(rq)) @@ -917,7 +923,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) EXPORT_SYMBOL(blk_requeue_request); /** - * blk_insert_request - insert a special request in to a request queue + * blk_insert_request - insert a special request into a request queue * @q: request queue where request should be inserted * @rq: request to be inserted * @at_head: insert request at head or tail of queue @@ -927,8 +933,8 @@ EXPORT_SYMBOL(blk_requeue_request); * Many block devices need to execute commands asynchronously, so they don't * block the whole kernel from preemption during request execution. This is * accomplished normally by inserting aritficial requests tagged as - * REQ_SPECIAL in to the corresponding request queue, and letting them be - * scheduled for actual execution by the request queue. + * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them + * be scheduled for actual execution by the request queue. * * We have the option of inserting the head or the tail of the queue. * Typically we use the tail for new ioctls and so forth. We use the head @@ -982,8 +988,22 @@ static inline void add_request(struct request_queue *q, struct request *req) __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); } -/* - * disk_round_stats() - Round off the performance stats on a struct +static void part_round_stats_single(int cpu, struct hd_struct *part, + unsigned long now) +{ + if (now == part->stamp) + return; + + if (part->in_flight) { + __part_stat_add(cpu, part, time_in_queue, + part->in_flight * (now - part->stamp)); + __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); + } + part->stamp = now; +} + +/** + * part_round_stats() - Round off the performance stats on a struct * disk_stats. * * The average IO queue length and utilisation statistics are maintained @@ -997,36 +1017,15 @@ static inline void add_request(struct request_queue *q, struct request *req) * /proc/diskstats. This accounts immediately for all queue usage up to * the current jiffies and restarts the counters again. */ -void disk_round_stats(struct gendisk *disk) +void part_round_stats(int cpu, struct hd_struct *part) { unsigned long now = jiffies; - if (now == disk->stamp) - return; - - if (disk->in_flight) { - __disk_stat_add(disk, time_in_queue, - disk->in_flight * (now - disk->stamp)); - __disk_stat_add(disk, io_ticks, (now - disk->stamp)); - } - disk->stamp = now; -} -EXPORT_SYMBOL_GPL(disk_round_stats); - -void part_round_stats(struct hd_struct *part) -{ - unsigned long now = jiffies; - - if (now == part->stamp) - return; - - if (part->in_flight) { - __part_stat_add(part, time_in_queue, - part->in_flight * (now - part->stamp)); - __part_stat_add(part, io_ticks, (now - part->stamp)); - } - part->stamp = now; + if (part->partno) + part_round_stats_single(cpu, &part_to_disk(part)->part0, now); + part_round_stats_single(cpu, part, now); } +EXPORT_SYMBOL_GPL(part_round_stats); /* * queue lock must be held @@ -1070,6 +1069,7 @@ EXPORT_SYMBOL(blk_put_request); void init_request_from_bio(struct request *req, struct bio *bio) { + req->cpu = bio->bi_comp_cpu; req->cmd_type = REQ_TYPE_FS; /* @@ -1081,7 +1081,12 @@ void init_request_from_bio(struct request *req, struct bio *bio) /* * REQ_BARRIER implies no merging, but lets make it explicit */ - if (unlikely(bio_barrier(bio))) + if (unlikely(bio_discard(bio))) { + req->cmd_flags |= REQ_DISCARD; + if (bio_barrier(bio)) + req->cmd_flags |= REQ_SOFTBARRIER; + req->q->prepare_discard_fn(req->q, req); + } else if (unlikely(bio_barrier(bio))) req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); if (bio_sync(bio)) @@ -1099,7 +1104,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) static int __make_request(struct request_queue *q, struct bio *bio) { struct request *req; - int el_ret, nr_sectors, barrier, err; + int el_ret, nr_sectors, barrier, discard, err; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); int rw_flags; @@ -1114,7 +1119,14 @@ static int __make_request(struct request_queue *q, struct bio *bio) blk_queue_bounce(q, &bio); barrier = bio_barrier(bio); - if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { + if (unlikely(barrier) && bio_has_data(bio) && + (q->next_ordered == QUEUE_ORDERED_NONE)) { + err = -EOPNOTSUPP; + goto end_io; + } + + discard = bio_discard(bio); + if (unlikely(discard) && !q->prepare_discard_fn) { err = -EOPNOTSUPP; goto end_io; } @@ -1138,6 +1150,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) req->biotail = bio; req->nr_sectors = req->hard_nr_sectors += nr_sectors; req->ioprio = ioprio_best(req->ioprio, prio); + if (!blk_rq_cpu_valid(req)) + req->cpu = bio->bi_comp_cpu; drive_stat_acct(req, 0); if (!attempt_back_merge(q, req)) elv_merged_request(q, req, el_ret); @@ -1165,6 +1179,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) req->sector = req->hard_sector = bio->bi_sector; req->nr_sectors = req->hard_nr_sectors += nr_sectors; req->ioprio = ioprio_best(req->ioprio, prio); + if (!blk_rq_cpu_valid(req)) + req->cpu = bio->bi_comp_cpu; drive_stat_acct(req, 0); if (!attempt_front_merge(q, req)) elv_merged_request(q, req, el_ret); @@ -1200,13 +1216,15 @@ get_rq: init_request_from_bio(req, bio); spin_lock_irq(q->queue_lock); + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || + bio_flagged(bio, BIO_CPU_AFFINE)) + req->cpu = blk_cpu_to_group(smp_processor_id()); if (elv_queue_empty(q)) blk_plug_device(q); add_request(q, req); out: if (sync) __generic_unplug_device(q); - spin_unlock_irq(q->queue_lock); return 0; @@ -1260,8 +1278,9 @@ __setup("fail_make_request=", setup_fail_make_request); static int should_fail_request(struct bio *bio) { - if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) || - (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail)) + struct hd_struct *part = bio->bi_bdev->bd_part; + + if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail) return should_fail(&fail_make_request, bio->bi_size); return 0; @@ -1314,7 +1333,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) } /** - * generic_make_request: hand a buffer to its device driver for I/O + * generic_make_request - hand a buffer to its device driver for I/O * @bio: The bio describing the location in memory and on the device. * * generic_make_request() is used to make I/O requests of block @@ -1409,7 +1428,8 @@ end_io: if (bio_check_eod(bio, nr_sectors)) goto end_io; - if (bio_empty_barrier(bio) && !q->prepare_flush_fn) { + if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || + (bio_discard(bio) && !q->prepare_discard_fn)) { err = -EOPNOTSUPP; goto end_io; } @@ -1471,13 +1491,13 @@ void generic_make_request(struct bio *bio) EXPORT_SYMBOL(generic_make_request); /** - * submit_bio: submit a bio to the block device layer for I/O + * submit_bio - submit a bio to the block device layer for I/O * @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead) * @bio: The &struct bio which describes the I/O * * submit_bio() is very similar in purpose to generic_make_request(), and * uses that function to do most of the work. Both are fairly rough - * interfaces, @bio must be presetup and ready for I/O. + * interfaces; @bio must be presetup and ready for I/O. * */ void submit_bio(int rw, struct bio *bio) @@ -1490,11 +1510,7 @@ void submit_bio(int rw, struct bio *bio) * If it's a regular read/write or a barrier with data attached, * go through the normal accounting stuff before submission. */ - if (!bio_empty_barrier(bio)) { - - BIO_BUG_ON(!bio->bi_size); - BIO_BUG_ON(!bio->bi_io_vec); - + if (bio_has_data(bio)) { if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { @@ -1517,9 +1533,90 @@ void submit_bio(int rw, struct bio *bio) EXPORT_SYMBOL(submit_bio); /** + * blk_rq_check_limits - Helper function to check a request for the queue limit + * @q: the queue + * @rq: the request being checked + * + * Description: + * @rq may have been made based on weaker limitations of upper-level queues + * in request stacking drivers, and it may violate the limitation of @q. + * Since the block layer and the underlying device driver trust @rq + * after it is inserted to @q, it should be checked against @q before + * the insertion using this generic function. + * + * This function should also be useful for request stacking drivers + * in some cases below, so export this fuction. + * Request stacking drivers like request-based dm may change the queue + * limits while requests are in the queue (e.g. dm's table swapping). + * Such request stacking drivers should check those requests agaist + * the new queue limits again when they dispatch those requests, + * although such checkings are also done against the old queue limits + * when submitting requests. + */ +int blk_rq_check_limits(struct request_queue *q, struct request *rq) +{ + if (rq->nr_sectors > q->max_sectors || + rq->data_len > q->max_hw_sectors << 9) { + printk(KERN_ERR "%s: over max size limit.\n", __func__); + return -EIO; + } + + /* + * queue's settings related to segment counting like q->bounce_pfn + * may differ from that of other stacking queues. + * Recalculate it to check the request correctly on this queue's + * limitation. + */ + blk_recalc_rq_segments(rq); + if (rq->nr_phys_segments > q->max_phys_segments || + rq->nr_phys_segments > q->max_hw_segments) { + printk(KERN_ERR "%s: over max segments limit.\n", __func__); + return -EIO; + } + + return 0; +} +EXPORT_SYMBOL_GPL(blk_rq_check_limits); + +/** + * blk_insert_cloned_request - Helper for stacking drivers to submit a request + * @q: the queue to submit the request + * @rq: the request being queued + */ +int blk_insert_cloned_request(struct request_queue *q, struct request *rq) +{ + unsigned long flags; + + if (blk_rq_check_limits(q, rq)) + return -EIO; + +#ifdef CONFIG_FAIL_MAKE_REQUEST + if (rq->rq_disk && rq->rq_disk->part0.make_it_fail && + should_fail(&fail_make_request, blk_rq_bytes(rq))) + return -EIO; +#endif + + spin_lock_irqsave(q->queue_lock, flags); + + /* + * Submitting request must be dequeued before calling this function + * because it will be linked to another request_queue + */ + BUG_ON(blk_queued_rq(rq)); + + drive_stat_acct(rq, 1); + __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); + + spin_unlock_irqrestore(q->queue_lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(blk_insert_cloned_request); + +/** * __end_that_request_first - end I/O on a request * @req: the request being processed - * @error: 0 for success, < 0 for error + * @error: %0 for success, < %0 for error * @nr_bytes: number of bytes to complete * * Description: @@ -1527,8 +1624,8 @@ EXPORT_SYMBOL(submit_bio); * for the next range of segments (if any) in the cluster. * * Return: - * 0 - we are done with this request, call end_that_request_last() - * 1 - still buffers pending for this request + * %0 - we are done with this request, call end_that_request_last() + * %1 - still buffers pending for this request **/ static int __end_that_request_first(struct request *req, int error, int nr_bytes) @@ -1539,7 +1636,7 @@ static int __end_that_request_first(struct request *req, int error, blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); /* - * for a REQ_BLOCK_PC request, we want to carry any eventual + * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual * sense key with us all the way through */ if (!blk_pc_request(req)) @@ -1552,11 +1649,14 @@ static int __end_that_request_first(struct request *req, int error, } if (blk_fs_request(req) && req->rq_disk) { - struct hd_struct *part = get_part(req->rq_disk, req->sector); const int rw = rq_data_dir(req); + struct hd_struct *part; + int cpu; - all_stat_add(req->rq_disk, part, sectors[rw], - nr_bytes >> 9, req->sector); + cpu = part_stat_lock(); + part = disk_map_sector_rcu(req->rq_disk, req->sector); + part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9); + part_stat_unlock(); } total_bytes = bio_nbytes = 0; @@ -1641,88 +1741,14 @@ static int __end_that_request_first(struct request *req, int error, } /* - * splice the completion data to a local structure and hand off to - * process_completion_queue() to complete the requests - */ -static void blk_done_softirq(struct softirq_action *h) -{ - struct list_head *cpu_list, local_list; - - local_irq_disable(); - cpu_list = &__get_cpu_var(blk_cpu_done); - list_replace_init(cpu_list, &local_list); - local_irq_enable(); - - while (!list_empty(&local_list)) { - struct request *rq; - - rq = list_entry(local_list.next, struct request, donelist); - list_del_init(&rq->donelist); - rq->q->softirq_done_fn(rq); - } -} - -static int __cpuinit blk_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - /* - * If a CPU goes away, splice its entries to the current CPU - * and trigger a run of the softirq - */ - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { - int cpu = (unsigned long) hcpu; - - local_irq_disable(); - list_splice_init(&per_cpu(blk_cpu_done, cpu), - &__get_cpu_var(blk_cpu_done)); - raise_softirq_irqoff(BLOCK_SOFTIRQ); - local_irq_enable(); - } - - return NOTIFY_OK; -} - - -static struct notifier_block blk_cpu_notifier __cpuinitdata = { - .notifier_call = blk_cpu_notify, -}; - -/** - * blk_complete_request - end I/O on a request - * @req: the request being processed - * - * Description: - * Ends all I/O on a request. It does not handle partial completions, - * unless the driver actually implements this in its completion callback - * through requeueing. The actual completion happens out-of-order, - * through a softirq handler. The user must have registered a completion - * callback through blk_queue_softirq_done(). - **/ - -void blk_complete_request(struct request *req) -{ - struct list_head *cpu_list; - unsigned long flags; - - BUG_ON(!req->q->softirq_done_fn); - - local_irq_save(flags); - - cpu_list = &__get_cpu_var(blk_cpu_done); - list_add_tail(&req->donelist, cpu_list); - raise_softirq_irqoff(BLOCK_SOFTIRQ); - - local_irq_restore(flags); -} -EXPORT_SYMBOL(blk_complete_request); - -/* * queue lock must be held */ static void end_that_request_last(struct request *req, int error) { struct gendisk *disk = req->rq_disk; + blk_delete_timer(req); + if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); @@ -1740,16 +1766,18 @@ static void end_that_request_last(struct request *req, int error) if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); - struct hd_struct *part = get_part(disk, req->sector); - - __all_stat_inc(disk, part, ios[rw], req->sector); - __all_stat_add(disk, part, ticks[rw], duration, req->sector); - disk_round_stats(disk); - disk->in_flight--; - if (part) { - part_round_stats(part); - part->in_flight--; - } + struct hd_struct *part; + int cpu; + + cpu = part_stat_lock(); + part = disk_map_sector_rcu(disk, req->sector); + + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_round_stats(cpu, part); + part_dec_in_flight(part); + + part_stat_unlock(); } if (req->end_io) @@ -1762,17 +1790,6 @@ static void end_that_request_last(struct request *req, int error) } } -static inline void __end_request(struct request *rq, int uptodate, - unsigned int nr_bytes) -{ - int error = 0; - - if (uptodate <= 0) - error = uptodate ? uptodate : -EIO; - - __blk_end_request(rq, error, nr_bytes); -} - /** * blk_rq_bytes - Returns bytes left to complete in the entire request * @rq: the request being processed @@ -1803,74 +1820,57 @@ unsigned int blk_rq_cur_bytes(struct request *rq) EXPORT_SYMBOL_GPL(blk_rq_cur_bytes); /** - * end_queued_request - end all I/O on a queued request - * @rq: the request being processed - * @uptodate: error value or 0/1 uptodate flag - * - * Description: - * Ends all I/O on a request, and removes it from the block layer queues. - * Not suitable for normal IO completion, unless the driver still has - * the request attached to the block layer. - * - **/ -void end_queued_request(struct request *rq, int uptodate) -{ - __end_request(rq, uptodate, blk_rq_bytes(rq)); -} -EXPORT_SYMBOL(end_queued_request); - -/** - * end_dequeued_request - end all I/O on a dequeued request - * @rq: the request being processed - * @uptodate: error value or 0/1 uptodate flag - * - * Description: - * Ends all I/O on a request. The request must already have been - * dequeued using blkdev_dequeue_request(), as is normally the case - * for most drivers. - * - **/ -void end_dequeued_request(struct request *rq, int uptodate) -{ - __end_request(rq, uptodate, blk_rq_bytes(rq)); -} -EXPORT_SYMBOL(end_dequeued_request); - - -/** * end_request - end I/O on the current segment of the request * @req: the request being processed - * @uptodate: error value or 0/1 uptodate flag + * @uptodate: error value or %0/%1 uptodate flag * * Description: * Ends I/O on the current segment of a request. If that is the only * remaining segment, the request is also completed and freed. * - * This is a remnant of how older block drivers handled IO completions. - * Modern drivers typically end IO on the full request in one go, unless + * This is a remnant of how older block drivers handled I/O completions. + * Modern drivers typically end I/O on the full request in one go, unless * they have a residual value to account for. For that case this function * isn't really useful, unless the residual just happens to be the * full current segment. In other words, don't use this function in new - * code. Either use end_request_completely(), or the - * end_that_request_chunk() (along with end_that_request_last()) for - * partial completions. - * + * code. Use blk_end_request() or __blk_end_request() to end a request. **/ void end_request(struct request *req, int uptodate) { - __end_request(req, uptodate, req->hard_cur_sectors << 9); + int error = 0; + + if (uptodate <= 0) + error = uptodate ? uptodate : -EIO; + + __blk_end_request(req, error, req->hard_cur_sectors << 9); } EXPORT_SYMBOL(end_request); +static int end_that_request_data(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes) +{ + if (rq->bio) { + if (__end_that_request_first(rq, error, nr_bytes)) + return 1; + + /* Bidi request must be completed as a whole */ + if (blk_bidi_rq(rq) && + __end_that_request_first(rq->next_rq, error, bidi_bytes)) + return 1; + } + + return 0; +} + /** * blk_end_io - Generic end_io function to complete a request. * @rq: the request being processed - * @error: 0 for success, < 0 for error + * @error: %0 for success, < %0 for error * @nr_bytes: number of bytes to complete @rq * @bidi_bytes: number of bytes to complete @rq->next_rq * @drv_callback: function called between completion of bios in the request * and completion of the request. - * If the callback returns non 0, this helper returns without + * If the callback returns non %0, this helper returns without * completion of the request. * * Description: @@ -1878,8 +1878,8 @@ EXPORT_SYMBOL(end_request); * If @rq has leftover, sets it up for the next range of segments. * * Return: - * 0 - we are done with this request - * 1 - this request is not freed yet, it still has pending buffers. + * %0 - we are done with this request + * %1 - this request is not freed yet, it still has pending buffers. **/ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes, @@ -1888,15 +1888,8 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, struct request_queue *q = rq->q; unsigned long flags = 0UL; - if (blk_fs_request(rq) || blk_pc_request(rq)) { - if (__end_that_request_first(rq, error, nr_bytes)) - return 1; - - /* Bidi request must be completed as a whole */ - if (blk_bidi_rq(rq) && - __end_that_request_first(rq->next_rq, error, bidi_bytes)) - return 1; - } + if (end_that_request_data(rq, error, nr_bytes, bidi_bytes)) + return 1; /* Special feature for tricky drivers */ if (drv_callback && drv_callback(rq)) @@ -1914,7 +1907,7 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, /** * blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed - * @error: 0 for success, < 0 for error + * @error: %0 for success, < %0 for error * @nr_bytes: number of bytes to complete * * Description: @@ -1922,8 +1915,8 @@ static int blk_end_io(struct request *rq, int error, unsigned int nr_bytes, * If @rq has leftover, sets it up for the next range of segments. * * Return: - * 0 - we are done with this request - * 1 - still buffers pending for this request + * %0 - we are done with this request + * %1 - still buffers pending for this request **/ int blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { @@ -1934,22 +1927,20 @@ EXPORT_SYMBOL_GPL(blk_end_request); /** * __blk_end_request - Helper function for drivers to complete the request. * @rq: the request being processed - * @error: 0 for success, < 0 for error + * @error: %0 for success, < %0 for error * @nr_bytes: number of bytes to complete * * Description: * Must be called with queue lock held unlike blk_end_request(). * * Return: - * 0 - we are done with this request - * 1 - still buffers pending for this request + * %0 - we are done with this request + * %1 - still buffers pending for this request **/ int __blk_end_request(struct request *rq, int error, unsigned int nr_bytes) { - if (blk_fs_request(rq) || blk_pc_request(rq)) { - if (__end_that_request_first(rq, error, nr_bytes)) - return 1; - } + if (rq->bio && __end_that_request_first(rq, error, nr_bytes)) + return 1; add_disk_randomness(rq->rq_disk); @@ -1962,7 +1953,7 @@ EXPORT_SYMBOL_GPL(__blk_end_request); /** * blk_end_bidi_request - Helper function for drivers to complete bidi request. * @rq: the bidi request being processed - * @error: 0 for success, < 0 for error + * @error: %0 for success, < %0 for error * @nr_bytes: number of bytes to complete @rq * @bidi_bytes: number of bytes to complete @rq->next_rq * @@ -1970,8 +1961,8 @@ EXPORT_SYMBOL_GPL(__blk_end_request); * Ends I/O on a number of bytes attached to @rq and @rq->next_rq. * * Return: - * 0 - we are done with this request - * 1 - still buffers pending for this request + * %0 - we are done with this request + * %1 - still buffers pending for this request **/ int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes) @@ -1981,13 +1972,43 @@ int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, EXPORT_SYMBOL_GPL(blk_end_bidi_request); /** + * blk_update_request - Special helper function for request stacking drivers + * @rq: the request being processed + * @error: %0 for success, < %0 for error + * @nr_bytes: number of bytes to complete @rq + * + * Description: + * Ends I/O on a number of bytes attached to @rq, but doesn't complete + * the request structure even if @rq doesn't have leftover. + * If @rq has leftover, sets it up for the next range of segments. + * + * This special helper function is only for request stacking drivers + * (e.g. request-based dm) so that they can handle partial completion. + * Actual device drivers should use blk_end_request instead. + */ +void blk_update_request(struct request *rq, int error, unsigned int nr_bytes) +{ + if (!end_that_request_data(rq, error, nr_bytes, 0)) { + /* + * These members are not updated in end_that_request_data() + * when all bios are completed. + * Update them so that the request stacking driver can find + * how many bytes remain in the request later. + */ + rq->nr_sectors = rq->hard_nr_sectors = 0; + rq->current_nr_sectors = rq->hard_cur_sectors = 0; + } +} +EXPORT_SYMBOL_GPL(blk_update_request); + +/** * blk_end_request_callback - Special helper function for tricky drivers * @rq: the request being processed - * @error: 0 for success, < 0 for error + * @error: %0 for success, < %0 for error * @nr_bytes: number of bytes to complete * @drv_callback: function called between completion of bios in the request * and completion of the request. - * If the callback returns non 0, this helper returns without + * If the callback returns non %0, this helper returns without * completion of the request. * * Description: @@ -2000,10 +2021,10 @@ EXPORT_SYMBOL_GPL(blk_end_bidi_request); * Don't use this interface in other places anymore. * * Return: - * 0 - we are done with this request - * 1 - this request is not freed yet. - * this request still has pending buffers or - * the driver doesn't want to finish this request yet. + * %0 - we are done with this request + * %1 - this request is not freed yet. + * this request still has pending buffers or + * the driver doesn't want to finish this request yet. **/ int blk_end_request_callback(struct request *rq, int error, unsigned int nr_bytes, @@ -2016,15 +2037,17 @@ EXPORT_SYMBOL_GPL(blk_end_request_callback); void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) { - /* first two bits are identical in rq->cmd_flags and bio->bi_rw */ + /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and + we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ rq->cmd_flags |= (bio->bi_rw & 3); - rq->nr_phys_segments = bio_phys_segments(q, bio); - rq->nr_hw_segments = bio_hw_segments(q, bio); + if (bio_has_data(bio)) { + rq->nr_phys_segments = bio_phys_segments(q, bio); + rq->buffer = bio_data(bio); + } rq->current_nr_sectors = bio_cur_sectors(bio); rq->hard_cur_sectors = rq->current_nr_sectors; rq->hard_nr_sectors = rq->nr_sectors = bio_sectors(bio); - rq->buffer = bio_data(bio); rq->data_len = bio->bi_size; rq->bio = rq->biotail = bio; @@ -2033,7 +2056,35 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, rq->rq_disk = bio->bi_bdev->bd_disk; } -int kblockd_schedule_work(struct work_struct *work) +/** + * blk_lld_busy - Check if underlying low-level drivers of a device are busy + * @q : the queue of the device being checked + * + * Description: + * Check if underlying low-level drivers of a device are busy. + * If the drivers want to export their busy state, they must set own + * exporting function using blk_queue_lld_busy() first. + * + * Basically, this function is used only by request stacking drivers + * to stop dispatching requests to underlying devices when underlying + * devices are busy. This behavior helps more I/O merging on the queue + * of the request stacking driver and prevents I/O throughput regression + * on burst I/O load. + * + * Return: + * 0 - Not busy (The request stacking driver should dispatch request) + * 1 - Busy (The request stacking driver should stop dispatching request) + */ +int blk_lld_busy(struct request_queue *q) +{ + if (q->lld_busy_fn) + return q->lld_busy_fn(q); + + return 0; +} +EXPORT_SYMBOL_GPL(blk_lld_busy); + +int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) { return queue_work(kblockd_workqueue, work); } @@ -2047,8 +2098,6 @@ EXPORT_SYMBOL(kblockd_flush_work); int __init blk_dev_init(void) { - int i; - kblockd_workqueue = create_workqueue("kblockd"); if (!kblockd_workqueue) panic("Failed to create kblockd\n"); @@ -2059,12 +2108,6 @@ int __init blk_dev_init(void) blk_requestq_cachep = kmem_cache_create("blkdev_queue", sizeof(struct request_queue), 0, SLAB_PANIC, NULL); - for_each_possible_cpu(i) - INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); - - open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); - register_hotcpu_notifier(&blk_cpu_notifier); - return 0; } diff --git a/block/blk-exec.c b/block/blk-exec.c index 9bceff7..6af716d 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -16,7 +16,7 @@ /** * blk_end_sync_rq - executes a completion event on a request * @rq: request to complete - * @error: end io status of the request + * @error: end I/O status of the request */ static void blk_end_sync_rq(struct request *rq, int error) { @@ -41,7 +41,7 @@ static void blk_end_sync_rq(struct request *rq, int error) * @done: I/O completion handler * * Description: - * Insert a fully prepared request at the back of the io scheduler queue + * Insert a fully prepared request at the back of the I/O scheduler queue * for execution. Don't wait for completion. */ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, @@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(blk_execute_rq_nowait); * @at_head: insert request at head or tail of queue * * Description: - * Insert a fully prepared request at the back of the io scheduler queue + * Insert a fully prepared request at the back of the I/O scheduler queue * for execution and wait for completion. */ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 3f1a847..61a8e2f 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -108,51 +108,51 @@ new_segment: EXPORT_SYMBOL(blk_rq_map_integrity_sg); /** - * blk_integrity_compare - Compare integrity profile of two block devices - * @b1: Device to compare - * @b2: Device to compare + * blk_integrity_compare - Compare integrity profile of two disks + * @gd1: Disk to compare + * @gd2: Disk to compare * * Description: Meta-devices like DM and MD need to verify that all * sub-devices use the same integrity format before advertising to * upper layers that they can send/receive integrity metadata. This - * function can be used to check whether two block devices have + * function can be used to check whether two gendisk devices have * compatible integrity formats. */ -int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2) +int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) { - struct blk_integrity *b1 = bd1->bd_disk->integrity; - struct blk_integrity *b2 = bd2->bd_disk->integrity; + struct blk_integrity *b1 = gd1->integrity; + struct blk_integrity *b2 = gd2->integrity; - BUG_ON(bd1->bd_disk == NULL); - BUG_ON(bd2->bd_disk == NULL); + if (!b1 && !b2) + return 0; if (!b1 || !b2) - return 0; + return -1; if (b1->sector_size != b2->sector_size) { printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__, - bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, + gd1->disk_name, gd2->disk_name, b1->sector_size, b2->sector_size); return -1; } if (b1->tuple_size != b2->tuple_size) { printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__, - bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, + gd1->disk_name, gd2->disk_name, b1->tuple_size, b2->tuple_size); return -1; } if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) { printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__, - bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, + gd1->disk_name, gd2->disk_name, b1->tag_size, b2->tag_size); return -1; } if (strcmp(b1->name, b2->name)) { printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__, - bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, + gd1->disk_name, gd2->disk_name, b1->name, b2->name); return -1; } @@ -331,7 +331,8 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) return -1; if (kobject_init_and_add(&bi->kobj, &integrity_ktype, - &disk->dev.kobj, "%s", "integrity")) { + &disk_to_dev(disk)->kobj, + "%s", "integrity")) { kmem_cache_free(integrity_cachep, bi); return -1; } @@ -375,7 +376,7 @@ void blk_integrity_unregister(struct gendisk *disk) kobject_uevent(&bi->kobj, KOBJ_REMOVE); kobject_del(&bi->kobj); - kobject_put(&disk->dev.kobj); kmem_cache_free(integrity_cachep, bi); + disk->integrity = NULL; } EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-map.c b/block/blk-map.c index af37e4a..4849fa3 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -41,10 +41,10 @@ static int __blk_rq_unmap_user(struct bio *bio) } static int __blk_rq_map_user(struct request_queue *q, struct request *rq, - void __user *ubuf, unsigned int len) + struct rq_map_data *map_data, void __user *ubuf, + unsigned int len, int null_mapped, gfp_t gfp_mask) { unsigned long uaddr; - unsigned int alignment; struct bio *bio, *orig_bio; int reading, ret; @@ -55,15 +55,17 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * direct dma. else, set up kernel bounce buffers */ uaddr = (unsigned long) ubuf; - alignment = queue_dma_alignment(q) | q->dma_pad_mask; - if (!(uaddr & alignment) && !(len & alignment)) - bio = bio_map_user(q, NULL, uaddr, len, reading); + if (blk_rq_aligned(q, ubuf, len) && !map_data) + bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); else - bio = bio_copy_user(q, uaddr, len, reading); + bio = bio_copy_user(q, map_data, uaddr, len, reading, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); + if (null_mapped) + bio->bi_flags |= (1 << BIO_NULL_MAPPED); + orig_bio = bio; blk_queue_bounce(q, &bio); @@ -85,17 +87,19 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, } /** - * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage + * blk_rq_map_user - map user data to a request, for REQ_TYPE_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request structure to fill + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @ubuf: the user buffer * @len: length of user data + * @gfp_mask: memory allocation flags * * Description: - * Data will be mapped directly for zero copy io, if possible. Otherwise + * Data will be mapped directly for zero copy I/O, if possible. Otherwise * a kernel bounce buffer is used. * - * A matching blk_rq_unmap_user() must be issued at the end of io, while + * A matching blk_rq_unmap_user() must be issued at the end of I/O, while * still in process context. * * Note: The mapped bio may need to be bounced through blk_queue_bounce() @@ -105,16 +109,22 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq, * unmapping. */ int blk_rq_map_user(struct request_queue *q, struct request *rq, - void __user *ubuf, unsigned long len) + struct rq_map_data *map_data, void __user *ubuf, + unsigned long len, gfp_t gfp_mask) { unsigned long bytes_read = 0; struct bio *bio = NULL; - int ret; + int ret, null_mapped = 0; if (len > (q->max_hw_sectors << 9)) return -EINVAL; - if (!len || !ubuf) + if (!len) return -EINVAL; + if (!ubuf) { + if (!map_data || rq_data_dir(rq) != READ) + return -EINVAL; + null_mapped = 1; + } while (bytes_read != len) { unsigned long map_len, end, start; @@ -132,7 +142,8 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, if (end - start > BIO_MAX_PAGES) map_len -= PAGE_SIZE; - ret = __blk_rq_map_user(q, rq, ubuf, map_len); + ret = __blk_rq_map_user(q, rq, map_data, ubuf, map_len, + null_mapped, gfp_mask); if (ret < 0) goto unmap_rq; if (!bio) @@ -154,18 +165,20 @@ unmap_rq: EXPORT_SYMBOL(blk_rq_map_user); /** - * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage + * blk_rq_map_user_iov - map user data to a request, for REQ_TYPE_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request to map data to + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @iov: pointer to the iovec * @iov_count: number of elements in the iovec * @len: I/O byte count + * @gfp_mask: memory allocation flags * * Description: - * Data will be mapped directly for zero copy io, if possible. Otherwise + * Data will be mapped directly for zero copy I/O, if possible. Otherwise * a kernel bounce buffer is used. * - * A matching blk_rq_unmap_user() must be issued at the end of io, while + * A matching blk_rq_unmap_user() must be issued at the end of I/O, while * still in process context. * * Note: The mapped bio may need to be bounced through blk_queue_bounce() @@ -175,7 +188,8 @@ EXPORT_SYMBOL(blk_rq_map_user); * unmapping. */ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, - struct sg_iovec *iov, int iov_count, unsigned int len) + struct rq_map_data *map_data, struct sg_iovec *iov, + int iov_count, unsigned int len, gfp_t gfp_mask) { struct bio *bio; int i, read = rq_data_dir(rq) == READ; @@ -193,10 +207,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, } } - if (unaligned || (q->dma_pad_mask & len)) - bio = bio_copy_user_iov(q, iov, iov_count, read); + if (unaligned || (q->dma_pad_mask & len) || map_data) + bio = bio_copy_user_iov(q, map_data, iov, iov_count, read, + gfp_mask); else - bio = bio_map_user_iov(q, NULL, iov, iov_count, read); + bio = bio_map_user_iov(q, NULL, iov, iov_count, read, gfp_mask); if (IS_ERR(bio)) return PTR_ERR(bio); @@ -216,6 +231,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, rq->buffer = rq->data = NULL; return 0; } +EXPORT_SYMBOL(blk_rq_map_user_iov); /** * blk_rq_unmap_user - unmap a request with user data @@ -224,7 +240,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, * Description: * Unmap a rq previously mapped by blk_rq_map_user(). The caller must * supply the original rq->bio from the blk_rq_map_user() return, since - * the io completion may have changed rq->bio. + * the I/O completion may have changed rq->bio. */ int blk_rq_unmap_user(struct bio *bio) { @@ -250,7 +266,7 @@ int blk_rq_unmap_user(struct bio *bio) EXPORT_SYMBOL(blk_rq_unmap_user); /** - * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage + * blk_rq_map_kern - map kernel data to a request, for REQ_TYPE_BLOCK_PC usage * @q: request queue where request should be inserted * @rq: request to fill * @kbuf: the kernel buffer @@ -264,8 +280,6 @@ EXPORT_SYMBOL(blk_rq_unmap_user); int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, unsigned int len, gfp_t gfp_mask) { - unsigned long kaddr; - unsigned int alignment; int reading = rq_data_dir(rq) == READ; int do_copy = 0; struct bio *bio; @@ -275,11 +289,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (!len || !kbuf) return -EINVAL; - kaddr = (unsigned long)kbuf; - alignment = queue_dma_alignment(q) | q->dma_pad_mask; - do_copy = ((kaddr & alignment) || (len & alignment) || - object_is_on_stack(kbuf)); - + do_copy = !blk_rq_aligned(q, kbuf, len) || object_is_on_stack(kbuf); if (do_copy) bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); else diff --git a/block/blk-merge.c b/block/blk-merge.c index 5efc9e7..908d3e1 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -11,7 +11,7 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) { - if (blk_fs_request(rq)) { + if (blk_fs_request(rq) || blk_discard_rq(rq)) { rq->hard_sector += nsect; rq->hard_nr_sectors -= nsect; @@ -41,12 +41,9 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) void blk_recalc_rq_segments(struct request *rq) { int nr_phys_segs; - int nr_hw_segs; unsigned int phys_size; - unsigned int hw_size; struct bio_vec *bv, *bvprv = NULL; int seg_size; - int hw_seg_size; int cluster; struct req_iterator iter; int high, highprv = 1; @@ -56,8 +53,8 @@ void blk_recalc_rq_segments(struct request *rq) return; cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); - hw_seg_size = seg_size = 0; - phys_size = hw_size = nr_phys_segs = nr_hw_segs = 0; + seg_size = 0; + phys_size = nr_phys_segs = 0; rq_for_each_segment(bv, rq, iter) { /* * the trick here is making sure that a high page is never @@ -66,7 +63,7 @@ void blk_recalc_rq_segments(struct request *rq) */ high = page_to_pfn(bv->bv_page) > q->bounce_pfn; if (high || highprv) - goto new_hw_segment; + goto new_segment; if (cluster) { if (seg_size + bv->bv_len > q->max_segment_size) goto new_segment; @@ -74,40 +71,19 @@ void blk_recalc_rq_segments(struct request *rq) goto new_segment; if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) goto new_segment; - if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) - goto new_hw_segment; seg_size += bv->bv_len; - hw_seg_size += bv->bv_len; bvprv = bv; continue; } new_segment: - if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) && - !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) - hw_seg_size += bv->bv_len; - else { -new_hw_segment: - if (nr_hw_segs == 1 && - hw_seg_size > rq->bio->bi_hw_front_size) - rq->bio->bi_hw_front_size = hw_seg_size; - hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len; - nr_hw_segs++; - } - nr_phys_segs++; bvprv = bv; seg_size = bv->bv_len; highprv = high; } - if (nr_hw_segs == 1 && - hw_seg_size > rq->bio->bi_hw_front_size) - rq->bio->bi_hw_front_size = hw_seg_size; - if (hw_seg_size > rq->biotail->bi_hw_back_size) - rq->biotail->bi_hw_back_size = hw_seg_size; rq->nr_phys_segments = nr_phys_segs; - rq->nr_hw_segments = nr_hw_segs; } void blk_recount_segments(struct request_queue *q, struct bio *bio) @@ -120,7 +96,6 @@ void blk_recount_segments(struct request_queue *q, struct bio *bio) blk_recalc_rq_segments(&rq); bio->bi_next = nxt; bio->bi_phys_segments = rq.nr_phys_segments; - bio->bi_hw_segments = rq.nr_hw_segments; bio->bi_flags |= (1 << BIO_SEG_VALID); } EXPORT_SYMBOL(blk_recount_segments); @@ -131,13 +106,17 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, if (!test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags)) return 0; - if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) - return 0; if (bio->bi_size + nxt->bi_size > q->max_segment_size) return 0; + if (!bio_has_data(bio)) + return 1; + + if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) + return 0; + /* - * bio and nxt are contigous in memory, check if the queue allows + * bio and nxt are contiguous in memory; check if the queue allows * these two to be merged into one */ if (BIO_SEG_BOUNDARY(q, bio, nxt)) @@ -146,22 +125,6 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, return 0; } -static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio, - struct bio *nxt) -{ - if (!bio_flagged(bio, BIO_SEG_VALID)) - blk_recount_segments(q, bio); - if (!bio_flagged(nxt, BIO_SEG_VALID)) - blk_recount_segments(q, nxt); - if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) || - BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size)) - return 0; - if (bio->bi_hw_back_size + nxt->bi_hw_front_size > q->max_segment_size) - return 0; - - return 1; -} - /* * map a request to scatterlist, return number of sg entries setup. Caller * must make sure sg can hold rq->nr_phys_segments entries @@ -275,10 +238,9 @@ static inline int ll_new_hw_segment(struct request_queue *q, struct request *req, struct bio *bio) { - int nr_hw_segs = bio_hw_segments(q, bio); int nr_phys_segs = bio_phys_segments(q, bio); - if (req->nr_hw_segments + nr_hw_segs > q->max_hw_segments + if (req->nr_phys_segments + nr_phys_segs > q->max_hw_segments || req->nr_phys_segments + nr_phys_segs > q->max_phys_segments) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) @@ -290,7 +252,6 @@ static inline int ll_new_hw_segment(struct request_queue *q, * This will form the start of a new hw segment. Bump both * counters. */ - req->nr_hw_segments += nr_hw_segs; req->nr_phys_segments += nr_phys_segs; return 1; } @@ -299,7 +260,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { unsigned short max_sectors; - int len; if (unlikely(blk_pc_request(req))) max_sectors = q->max_hw_sectors; @@ -316,19 +276,6 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, blk_recount_segments(q, req->biotail); if (!bio_flagged(bio, BIO_SEG_VALID)) blk_recount_segments(q, bio); - len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size; - if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)) - && !BIOVEC_VIRT_OVERSIZE(len)) { - int mergeable = ll_new_mergeable(q, req, bio); - - if (mergeable) { - if (req->nr_hw_segments == 1) - req->bio->bi_hw_front_size = len; - if (bio->bi_hw_segments == 1) - bio->bi_hw_back_size = len; - } - return mergeable; - } return ll_new_hw_segment(q, req, bio); } @@ -337,7 +284,6 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, struct bio *bio) { unsigned short max_sectors; - int len; if (unlikely(blk_pc_request(req))) max_sectors = q->max_hw_sectors; @@ -351,23 +297,10 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, q->last_merge = NULL; return 0; } - len = bio->bi_hw_back_size + req->bio->bi_hw_front_size; if (!bio_flagged(bio, BIO_SEG_VALID)) blk_recount_segments(q, bio); if (!bio_flagged(req->bio, BIO_SEG_VALID)) blk_recount_segments(q, req->bio); - if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) && - !BIOVEC_VIRT_OVERSIZE(len)) { - int mergeable = ll_new_mergeable(q, req, bio); - - if (mergeable) { - if (bio->bi_hw_segments == 1) - bio->bi_hw_front_size = len; - if (req->nr_hw_segments == 1) - req->biotail->bi_hw_back_size = len; - } - return mergeable; - } return ll_new_hw_segment(q, req, bio); } @@ -376,7 +309,6 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, struct request *next) { int total_phys_segments; - int total_hw_segments; /* * First check if the either of the requests are re-queued @@ -398,26 +330,11 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, if (total_phys_segments > q->max_phys_segments) return 0; - total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; - if (blk_hw_contig_segment(q, req->biotail, next->bio)) { - int len = req->biotail->bi_hw_back_size + - next->bio->bi_hw_front_size; - /* - * propagate the combined length to the end of the requests - */ - if (req->nr_hw_segments == 1) - req->bio->bi_hw_front_size = len; - if (next->nr_hw_segments == 1) - next->biotail->bi_hw_back_size = len; - total_hw_segments--; - } - - if (total_hw_segments > q->max_hw_segments) + if (total_phys_segments > q->max_hw_segments) return 0; /* Merge is OK... */ req->nr_phys_segments = total_phys_segments; - req->nr_hw_segments = total_hw_segments; return 1; } @@ -470,17 +387,21 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); if (req->rq_disk) { - struct hd_struct *part - = get_part(req->rq_disk, req->sector); - disk_round_stats(req->rq_disk); - req->rq_disk->in_flight--; - if (part) { - part_round_stats(part); - part->in_flight--; - } + struct hd_struct *part; + int cpu; + + cpu = part_stat_lock(); + part = disk_map_sector_rcu(req->rq_disk, req->sector); + + part_round_stats(cpu, part); + part_dec_in_flight(part); + + part_stat_unlock(); } req->ioprio = ioprio_best(req->ioprio, next->ioprio); + if (blk_rq_cpu_valid(next)) + req->cpu = next->cpu; __blk_put_request(q, next); return 1; diff --git a/block/blk-settings.c b/block/blk-settings.c index dfc7701..b21dcdb 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -33,6 +33,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) EXPORT_SYMBOL(blk_queue_prep_rq); /** + * blk_queue_set_discard - set a discard_sectors function for queue + * @q: queue + * @dfn: prepare_discard function + * + * It's possible for a queue to register a discard callback which is used + * to transform a discard request into the appropriate type for the + * hardware. If none is registered, then discard requests are failed + * with %EOPNOTSUPP. + * + */ +void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn) +{ + q->prepare_discard_fn = dfn; +} +EXPORT_SYMBOL(blk_queue_set_discard); + +/** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue * @mbfn: merge_bvec_fn @@ -60,6 +77,24 @@ void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) } EXPORT_SYMBOL(blk_queue_softirq_done); +void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) +{ + q->rq_timeout = timeout; +} +EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); + +void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) +{ + q->rq_timed_out_fn = fn; +} +EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); + +void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn) +{ + q->lld_busy_fn = fn; +} +EXPORT_SYMBOL_GPL(blk_queue_lld_busy); + /** * blk_queue_make_request - define an alternate make_request function for a device * @q: the request queue for the device to be affected @@ -127,7 +162,7 @@ EXPORT_SYMBOL(blk_queue_make_request); * Different hardware can have different requirements as to what pages * it can do I/O directly to. A low level driver can call * blk_queue_bounce_limit to have lower memory pages allocated as bounce - * buffers for doing I/O to pages residing above @page. + * buffers for doing I/O to pages residing above @dma_addr. **/ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) { @@ -212,7 +247,7 @@ EXPORT_SYMBOL(blk_queue_max_phys_segments); * Description: * Enables a low level driver to set an upper limit on the number of * hw data segments in a request. This would be the largest number of - * address/length pairs the host adapter can actually give as once + * address/length pairs the host adapter can actually give at once * to the device. **/ void blk_queue_max_hw_segments(struct request_queue *q, @@ -393,7 +428,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary); * @mask: alignment mask * * description: - * set required memory and length aligment for direct dma transactions. + * set required memory and length alignment for direct dma transactions. * this is used when buiding direct io requests for the queue. * **/ @@ -409,7 +444,7 @@ EXPORT_SYMBOL(blk_queue_dma_alignment); * @mask: alignment mask * * description: - * update required memory and length aligment for direct dma transactions. + * update required memory and length alignment for direct dma transactions. * If the requested alignment is larger than the current alignment, then * the current queue alignment is updated to the new value, otherwise it * is left alone. The design of this is to allow multiple objects diff --git a/block/blk-softirq.c b/block/blk-softirq.c new file mode 100644 index 0000000..e660d26 --- /dev/null +++ b/block/blk-softirq.c @@ -0,0 +1,175 @@ +/* + * Functions related to softirq rq completions + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/interrupt.h> +#include <linux/cpu.h> + +#include "blk.h" + +static DEFINE_PER_CPU(struct list_head, blk_cpu_done); + +/* + * Softirq action handler - move entries to local list and loop over them + * while passing them to the queue registered handler. + */ +static void blk_done_softirq(struct softirq_action *h) +{ + struct list_head *cpu_list, local_list; + + local_irq_disable(); + cpu_list = &__get_cpu_var(blk_cpu_done); + list_replace_init(cpu_list, &local_list); + local_irq_enable(); + + while (!list_empty(&local_list)) { + struct request *rq; + + rq = list_entry(local_list.next, struct request, csd.list); + list_del_init(&rq->csd.list); + rq->q->softirq_done_fn(rq); + } +} + +#if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) +static void trigger_softirq(void *data) +{ + struct request *rq = data; + unsigned long flags; + struct list_head *list; + + local_irq_save(flags); + list = &__get_cpu_var(blk_cpu_done); + list_add_tail(&rq->csd.list, list); + + if (list->next == &rq->csd.list) + raise_softirq_irqoff(BLOCK_SOFTIRQ); + + local_irq_restore(flags); +} + +/* + * Setup and invoke a run of 'trigger_softirq' on the given cpu. + */ +static int raise_blk_irq(int cpu, struct request *rq) +{ + if (cpu_online(cpu)) { + struct call_single_data *data = &rq->csd; + + data->func = trigger_softirq; + data->info = rq; + data->flags = 0; + + __smp_call_function_single(cpu, data); + return 0; + } + + return 1; +} +#else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ +static int raise_blk_irq(int cpu, struct request *rq) +{ + return 1; +} +#endif + +static int __cpuinit blk_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + int cpu = (unsigned long) hcpu; + + local_irq_disable(); + list_splice_init(&per_cpu(blk_cpu_done, cpu), + &__get_cpu_var(blk_cpu_done)); + raise_softirq_irqoff(BLOCK_SOFTIRQ); + local_irq_enable(); + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata blk_cpu_notifier = { + .notifier_call = blk_cpu_notify, +}; + +void __blk_complete_request(struct request *req) +{ + struct request_queue *q = req->q; + unsigned long flags; + int ccpu, cpu, group_cpu; + + BUG_ON(!q->softirq_done_fn); + + local_irq_save(flags); + cpu = smp_processor_id(); + group_cpu = blk_cpu_to_group(cpu); + + /* + * Select completion CPU + */ + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) + ccpu = req->cpu; + else + ccpu = cpu; + + if (ccpu == cpu || ccpu == group_cpu) { + struct list_head *list; +do_local: + list = &__get_cpu_var(blk_cpu_done); + list_add_tail(&req->csd.list, list); + + /* + * if the list only contains our just added request, + * signal a raise of the softirq. If there are already + * entries there, someone already raised the irq but it + * hasn't run yet. + */ + if (list->next == &req->csd.list) + raise_softirq_irqoff(BLOCK_SOFTIRQ); + } else if (raise_blk_irq(ccpu, req)) + goto do_local; + + local_irq_restore(flags); +} + +/** + * blk_complete_request - end I/O on a request + * @req: the request being processed + * + * Description: + * Ends all I/O on a request. It does not handle partial completions, + * unless the driver actually implements this in its completion callback + * through requeueing. The actual completion happens out-of-order, + * through a softirq handler. The user must have registered a completion + * callback through blk_queue_softirq_done(). + **/ +void blk_complete_request(struct request *req) +{ + if (unlikely(blk_should_fake_timeout(req->q))) + return; + if (!blk_mark_rq_complete(req)) + __blk_complete_request(req); +} +EXPORT_SYMBOL(blk_complete_request); + +__init int blk_softirq_init(void) +{ + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(blk_cpu_done, i)); + + open_softirq(BLOCK_SOFTIRQ, blk_done_softirq); + register_hotcpu_notifier(&blk_cpu_notifier); + return 0; +} +subsys_initcall(blk_softirq_init); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 304ec73..21e275d 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -156,6 +156,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, return ret; } +static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page) +{ + unsigned int set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags); + + return queue_var_show(set != 0, page); +} + +static ssize_t +queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) +{ + ssize_t ret = -EINVAL; +#if defined(CONFIG_USE_GENERIC_SMP_HELPERS) + unsigned long val; + + ret = queue_var_store(&val, page, count); + spin_lock_irq(q->queue_lock); + if (val) + queue_flag_set(QUEUE_FLAG_SAME_COMP, q); + else + queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); + spin_unlock_irq(q->queue_lock); +#endif + return ret; +} static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, @@ -197,6 +221,12 @@ static struct queue_sysfs_entry queue_nomerges_entry = { .store = queue_nomerges_store, }; +static struct queue_sysfs_entry queue_rq_affinity_entry = { + .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR }, + .show = queue_rq_affinity_show, + .store = queue_rq_affinity_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -205,6 +235,7 @@ static struct attribute *default_attrs[] = { &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, &queue_nomerges_entry.attr, + &queue_rq_affinity_entry.attr, NULL, }; @@ -310,7 +341,7 @@ int blk_register_queue(struct gendisk *disk) if (!q->request_fn) return 0; - ret = kobject_add(&q->kobj, kobject_get(&disk->dev.kobj), + ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), "%s", "queue"); if (ret < 0) return ret; @@ -339,6 +370,6 @@ void blk_unregister_queue(struct gendisk *disk) kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); } } diff --git a/block/blk-tag.c b/block/blk-tag.c index ed5166f..c0d419e 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -29,7 +29,7 @@ EXPORT_SYMBOL(blk_queue_find_tag); * __blk_free_tags - release a given set of tag maintenance info * @bqt: the tag map to free * - * Tries to free the specified @bqt@. Returns true if it was + * Tries to free the specified @bqt. Returns true if it was * actually freed and false if there are still references using it */ static int __blk_free_tags(struct blk_queue_tag *bqt) @@ -78,7 +78,7 @@ void __blk_queue_free_tags(struct request_queue *q) * blk_free_tags - release a given set of tag maintenance info * @bqt: the tag map to free * - * For externally managed @bqt@ frees the map. Callers of this + * For externally managed @bqt frees the map. Callers of this * function must guarantee to have released all the queues that * might have been using this tag map. */ @@ -94,7 +94,7 @@ EXPORT_SYMBOL(blk_free_tags); * @q: the request queue for the device * * Notes: - * This is used to disabled tagged queuing to a device, yet leave + * This is used to disable tagged queuing to a device, yet leave * queue in function. **/ void blk_queue_free_tags(struct request_queue *q) @@ -271,7 +271,7 @@ EXPORT_SYMBOL(blk_queue_resize_tags); * @rq: the request that has completed * * Description: - * Typically called when end_that_request_first() returns 0, meaning + * Typically called when end_that_request_first() returns %0, meaning * all transfers have been done for a request. It's important to call * this function before end_that_request_last(), as that will put the * request back on the free list thus corrupting the internal tag list. @@ -337,6 +337,7 @@ EXPORT_SYMBOL(blk_queue_end_tag); int blk_queue_start_tag(struct request_queue *q, struct request *rq) { struct blk_queue_tag *bqt = q->queue_tags; + unsigned max_depth, offset; int tag; if (unlikely((rq->cmd_flags & REQ_QUEUED))) { @@ -350,10 +351,19 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) /* * Protect against shared tag maps, as we may not have exclusive * access to the tag map. + * + * We reserve a few tags just for sync IO, since we don't want + * to starve sync IO on behalf of flooding async IO. */ + max_depth = bqt->max_depth; + if (rq_is_sync(rq)) + offset = 0; + else + offset = max_depth >> 2; + do { - tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth); - if (tag >= bqt->max_depth) + tag = find_next_zero_bit(bqt->tag_map, max_depth, offset); + if (tag >= max_depth) return 1; } while (test_and_set_bit_lock(tag, bqt->tag_map)); diff --git a/block/blk-timeout.c b/block/blk-timeout.c new file mode 100644 index 0000000..972a63f --- /dev/null +++ b/block/blk-timeout.c @@ -0,0 +1,238 @@ +/* + * Functions related to generic timeout handling of requests. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/blkdev.h> +#include <linux/fault-inject.h> + +#include "blk.h" + +#ifdef CONFIG_FAIL_IO_TIMEOUT + +static DECLARE_FAULT_ATTR(fail_io_timeout); + +static int __init setup_fail_io_timeout(char *str) +{ + return setup_fault_attr(&fail_io_timeout, str); +} +__setup("fail_io_timeout=", setup_fail_io_timeout); + +int blk_should_fake_timeout(struct request_queue *q) +{ + if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags)) + return 0; + + return should_fail(&fail_io_timeout, 1); +} + +static int __init fail_io_timeout_debugfs(void) +{ + return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout"); +} + +late_initcall(fail_io_timeout_debugfs); + +ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct gendisk *disk = dev_to_disk(dev); + int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags); + + return sprintf(buf, "%d\n", set != 0); +} + +ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct gendisk *disk = dev_to_disk(dev); + int val; + + if (count) { + struct request_queue *q = disk->queue; + char *p = (char *) buf; + + val = simple_strtoul(p, &p, 10); + spin_lock_irq(q->queue_lock); + if (val) + queue_flag_set(QUEUE_FLAG_FAIL_IO, q); + else + queue_flag_clear(QUEUE_FLAG_FAIL_IO, q); + spin_unlock_irq(q->queue_lock); + } + + return count; +} + +#endif /* CONFIG_FAIL_IO_TIMEOUT */ + +/* + * blk_delete_timer - Delete/cancel timer for a given function. + * @req: request that we are canceling timer for + * + */ +void blk_delete_timer(struct request *req) +{ + struct request_queue *q = req->q; + + /* + * Nothing to detach + */ + if (!q->rq_timed_out_fn || !req->deadline) + return; + + list_del_init(&req->timeout_list); + + if (list_empty(&q->timeout_list)) + del_timer(&q->timeout); +} + +static void blk_rq_timed_out(struct request *req) +{ + struct request_queue *q = req->q; + enum blk_eh_timer_return ret; + + ret = q->rq_timed_out_fn(req); + switch (ret) { + case BLK_EH_HANDLED: + __blk_complete_request(req); + break; + case BLK_EH_RESET_TIMER: + blk_clear_rq_complete(req); + blk_add_timer(req); + break; + case BLK_EH_NOT_HANDLED: + /* + * LLD handles this for now but in the future + * we can send a request msg to abort the command + * and we can move more of the generic scsi eh code to + * the blk layer. + */ + break; + default: + printk(KERN_ERR "block: bad eh return: %d\n", ret); + break; + } +} + +void blk_rq_timed_out_timer(unsigned long data) +{ + struct request_queue *q = (struct request_queue *) data; + unsigned long flags, uninitialized_var(next), next_set = 0; + struct request *rq, *tmp; + + spin_lock_irqsave(q->queue_lock, flags); + + list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) { + if (time_after_eq(jiffies, rq->deadline)) { + list_del_init(&rq->timeout_list); + + /* + * Check if we raced with end io completion + */ + if (blk_mark_rq_complete(rq)) + continue; + blk_rq_timed_out(rq); + } + if (!next_set) { + next = rq->deadline; + next_set = 1; + } else if (time_after(next, rq->deadline)) + next = rq->deadline; + } + + if (next_set && !list_empty(&q->timeout_list)) + mod_timer(&q->timeout, round_jiffies(next)); + + spin_unlock_irqrestore(q->queue_lock, flags); +} + +/** + * blk_abort_request -- Request request recovery for the specified command + * @req: pointer to the request of interest + * + * This function requests that the block layer start recovery for the + * request by deleting the timer and calling the q's timeout function. + * LLDDs who implement their own error recovery MAY ignore the timeout + * event if they generated blk_abort_req. Must hold queue lock. + */ +void blk_abort_request(struct request *req) +{ + if (blk_mark_rq_complete(req)) + return; + blk_delete_timer(req); + blk_rq_timed_out(req); +} +EXPORT_SYMBOL_GPL(blk_abort_request); + +/** + * blk_add_timer - Start timeout timer for a single request + * @req: request that is about to start running. + * + * Notes: + * Each request has its own timer, and as it is added to the queue, we + * set up the timer. When the request completes, we cancel the timer. + */ +void blk_add_timer(struct request *req) +{ + struct request_queue *q = req->q; + unsigned long expiry; + + if (!q->rq_timed_out_fn) + return; + + BUG_ON(!list_empty(&req->timeout_list)); + BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); + + if (req->timeout) + req->deadline = jiffies + req->timeout; + else { + req->deadline = jiffies + q->rq_timeout; + /* + * Some LLDs, like scsi, peek at the timeout to prevent + * a command from being retried forever. + */ + req->timeout = q->rq_timeout; + } + list_add_tail(&req->timeout_list, &q->timeout_list); + + /* + * If the timer isn't already pending or this timeout is earlier + * than an existing one, modify the timer. Round to next nearest + * second. + */ + expiry = round_jiffies(req->deadline); + + /* + * We use ->deadline == 0 to detect whether a timer was added or + * not, so just increase to next jiffy for that specific case + */ + if (unlikely(!req->deadline)) + req->deadline = 1; + + if (!timer_pending(&q->timeout) || + time_before(expiry, q->timeout.expires)) + mod_timer(&q->timeout, expiry); +} + +/** + * blk_abort_queue -- Abort all request on given queue + * @queue: pointer to queue + * + */ +void blk_abort_queue(struct request_queue *q) +{ + unsigned long flags; + struct request *rq, *tmp; + + spin_lock_irqsave(q->queue_lock, flags); + + elv_abort_queue(q); + + list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) + blk_abort_request(rq); + + spin_unlock_irqrestore(q->queue_lock, flags); + +} +EXPORT_SYMBOL_GPL(blk_abort_queue); diff --git a/block/blk.h b/block/blk.h index c79f30e..e5c5797 100644 --- a/block/blk.h +++ b/block/blk.h @@ -17,6 +17,42 @@ void __blk_queue_free_tags(struct request_queue *q); void blk_unplug_work(struct work_struct *work); void blk_unplug_timeout(unsigned long data); +void blk_rq_timed_out_timer(unsigned long data); +void blk_delete_timer(struct request *); +void blk_add_timer(struct request *); + +/* + * Internal atomic flags for request handling + */ +enum rq_atomic_flags { + REQ_ATOM_COMPLETE = 0, +}; + +/* + * EH timer and IO completion will both attempt to 'grab' the request, make + * sure that only one of them suceeds + */ +static inline int blk_mark_rq_complete(struct request *rq) +{ + return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); +} + +static inline void blk_clear_rq_complete(struct request *rq) +{ + clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); +} + +#ifdef CONFIG_FAIL_IO_TIMEOUT +int blk_should_fake_timeout(struct request_queue *); +ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); +ssize_t part_timeout_store(struct device *, struct device_attribute *, + const char *, size_t); +#else +static inline int blk_should_fake_timeout(struct request_queue *q) +{ + return 0; +} +#endif struct io_context *current_io_context(gfp_t gfp_flags, int node); @@ -59,4 +95,16 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) #endif /* BLK_DEV_INTEGRITY */ +static inline int blk_cpu_to_group(int cpu) +{ +#ifdef CONFIG_SCHED_MC + cpumask_t mask = cpu_coregroup_map(cpu); + return first_cpu(mask); +#elif defined(CONFIG_SCHED_SMT) + return first_cpu(per_cpu(cpu_sibling_map, cpu)); +#else + return cpu; +#endif +} + #endif diff --git a/block/blktrace.c b/block/blktrace.c index eb9651c..85049a7 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -111,23 +111,9 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, */ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) }; -/* - * Bio action bits of interest - */ -static u32 bio_act[9] __read_mostly = { 0, BLK_TC_ACT(BLK_TC_BARRIER), BLK_TC_ACT(BLK_TC_SYNC), 0, BLK_TC_ACT(BLK_TC_AHEAD), 0, 0, 0, BLK_TC_ACT(BLK_TC_META) }; - -/* - * More could be added as needed, taking care to increment the decrementer - * to get correct indexing - */ -#define trace_barrier_bit(rw) \ - (((rw) & (1 << BIO_RW_BARRIER)) >> (BIO_RW_BARRIER - 0)) -#define trace_sync_bit(rw) \ - (((rw) & (1 << BIO_RW_SYNC)) >> (BIO_RW_SYNC - 1)) -#define trace_ahead_bit(rw) \ - (((rw) & (1 << BIO_RW_AHEAD)) << (2 - BIO_RW_AHEAD)) -#define trace_meta_bit(rw) \ - (((rw) & (1 << BIO_RW_META)) >> (BIO_RW_META - 3)) +/* The ilog2() calls fall out because they're constant */ +#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \ + (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) ) /* * The worker for the various blk_add_trace*() types. Fills out a @@ -147,10 +133,11 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, return; what |= ddir_act[rw & WRITE]; - what |= bio_act[trace_barrier_bit(rw)]; - what |= bio_act[trace_sync_bit(rw)]; - what |= bio_act[trace_ahead_bit(rw)]; - what |= bio_act[trace_meta_bit(rw)]; + what |= MASK_TC_BIT(rw, BARRIER); + what |= MASK_TC_BIT(rw, SYNC); + what |= MASK_TC_BIT(rw, AHEAD); + what |= MASK_TC_BIT(rw, META); + what |= MASK_TC_BIT(rw, DISCARD); pid = tsk->pid; if (unlikely(act_log_check(bt, what, sector, pid))) @@ -382,7 +369,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, if (!buts->buf_size || !buts->buf_nr) return -EINVAL; - strcpy(buts->name, name); + strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); + buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0'; /* * some device names have larger paths - convert the slashes diff --git a/block/bsg.c b/block/bsg.c index 0aae8d7..56cb343 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -283,7 +283,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) next_rq->cmd_type = rq->cmd_type; dxferp = (void*)(unsigned long)hdr->din_xferp; - ret = blk_rq_map_user(q, next_rq, dxferp, hdr->din_xfer_len); + ret = blk_rq_map_user(q, next_rq, NULL, dxferp, + hdr->din_xfer_len, GFP_KERNEL); if (ret) goto out; } @@ -298,7 +299,8 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, int has_write_perm) dxfer_len = 0; if (dxfer_len) { - ret = blk_rq_map_user(q, rq, dxferp, dxfer_len); + ret = blk_rq_map_user(q, rq, NULL, dxferp, dxfer_len, + GFP_KERNEL); if (ret) goto out; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1e2aff8..6a062ee 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -39,6 +39,7 @@ static int cfq_slice_idle = HZ / 125; #define CFQ_MIN_TT (2) #define CFQ_SLICE_SCALE (5) +#define CFQ_HW_QUEUE_MIN (5) #define RQ_CIC(rq) \ ((struct cfq_io_context *) (rq)->elevator_private) @@ -86,7 +87,14 @@ struct cfq_data { int rq_in_driver; int sync_flight; + + /* + * queue-depth detection + */ + int rq_queued; int hw_tag; + int hw_tag_samples; + int rq_in_driver_peak; /* * idle window management @@ -244,7 +252,7 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) { if (cfqd->busy_queues) { cfq_log(cfqd, "schedule dispatch"); - kblockd_schedule_work(&cfqd->unplug_work); + kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); } } @@ -654,15 +662,6 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq) cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", cfqd->rq_in_driver); - /* - * If the depth is larger 1, it really could be queueing. But lets - * make the mark a little higher - idling could still be good for - * low queueing, and a low queueing number could also just indicate - * a SCSI mid layer like behaviour where limit+1 is often seen. - */ - if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) - cfqd->hw_tag = 1; - cfqd->last_position = rq->hard_sector + rq->hard_nr_sectors; } @@ -686,6 +685,7 @@ static void cfq_remove_request(struct request *rq) list_del_init(&rq->queuelist); cfq_del_rq_rb(rq); + cfqq->cfqd->rq_queued--; if (rq_is_meta(rq)) { WARN_ON(!cfqq->meta_pending); cfqq->meta_pending--; @@ -878,6 +878,14 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) struct cfq_io_context *cic; unsigned long sl; + /* + * SSD device without seek penalty, disable idling. But only do so + * for devices that support queuing, otherwise we still have a problem + * with sync vs async workloads. + */ + if (blk_queue_nonrot(cfqd->queue) && cfqd->hw_tag) + return; + WARN_ON(!RB_EMPTY_ROOT(&cfqq->sort_list)); WARN_ON(cfq_cfqq_slice_new(cfqq)); @@ -1833,6 +1841,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, { struct cfq_io_context *cic = RQ_CIC(rq); + cfqd->rq_queued++; if (rq_is_meta(rq)) cfqq->meta_pending++; @@ -1880,6 +1889,31 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq) cfq_rq_enqueued(cfqd, cfqq, rq); } +/* + * Update hw_tag based on peak queue depth over 50 samples under + * sufficient load. + */ +static void cfq_update_hw_tag(struct cfq_data *cfqd) +{ + if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak) + cfqd->rq_in_driver_peak = cfqd->rq_in_driver; + + if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && + cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) + return; + + if (cfqd->hw_tag_samples++ < 50) + return; + + if (cfqd->rq_in_driver_peak >= CFQ_HW_QUEUE_MIN) + cfqd->hw_tag = 1; + else + cfqd->hw_tag = 0; + + cfqd->hw_tag_samples = 0; + cfqd->rq_in_driver_peak = 0; +} + static void cfq_completed_request(struct request_queue *q, struct request *rq) { struct cfq_queue *cfqq = RQ_CFQQ(rq); @@ -1890,6 +1924,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) now = jiffies; cfq_log_cfqq(cfqd, cfqq, "complete"); + cfq_update_hw_tag(cfqd); + WARN_ON(!cfqd->rq_in_driver); WARN_ON(!cfqq->dispatched); cfqd->rq_in_driver--; @@ -2200,6 +2236,7 @@ static void *cfq_init_queue(struct request_queue *q) cfqd->cfq_slice[1] = cfq_slice_sync; cfqd->cfq_slice_async_rq = cfq_slice_async_rq; cfqd->cfq_slice_idle = cfq_slice_idle; + cfqd->hw_tag = 1; return cfqd; } diff --git a/block/cmd-filter.c b/block/cmd-filter.c index 79c1499..e669aed 100644 --- a/block/cmd-filter.c +++ b/block/cmd-filter.c @@ -211,14 +211,10 @@ int blk_register_filter(struct gendisk *disk) { int ret; struct blk_cmd_filter *filter = &disk->queue->cmd_filter; - struct kobject *parent = kobject_get(disk->holder_dir->parent); - if (!parent) - return -ENODEV; - - ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent, + ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, + &disk_to_dev(disk)->kobj, "%s", "cmd_filter"); - if (ret < 0) return ret; @@ -231,7 +227,6 @@ void blk_unregister_filter(struct gendisk *disk) struct blk_cmd_filter *filter = &disk->queue->cmd_filter; kobject_put(&filter->kobj); - kobject_put(disk->holder_dir->parent); } EXPORT_SYMBOL(blk_unregister_filter); #endif diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index c23177e..1e559fb 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -788,6 +788,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return compat_hdio_getgeo(disk, bdev, compat_ptr(arg)); case BLKFLSBUF: case BLKROSET: + case BLKDISCARD: /* * the ones below are implemented in blkdev_locked_ioctl, * but we call blkdev_ioctl, which gets the lock for us diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 342448c..fd31117 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -33,7 +33,7 @@ struct deadline_data { */ struct rb_root sort_list[2]; struct list_head fifo_list[2]; - + /* * next in sort order. read, write or both are NULL */ @@ -53,7 +53,11 @@ struct deadline_data { static void deadline_move_request(struct deadline_data *, struct request *); -#define RQ_RB_ROOT(dd, rq) (&(dd)->sort_list[rq_data_dir((rq))]) +static inline struct rb_root * +deadline_rb_root(struct deadline_data *dd, struct request *rq) +{ + return &dd->sort_list[rq_data_dir(rq)]; +} /* * get the request after `rq' in sector-sorted order @@ -72,15 +76,11 @@ deadline_latter_request(struct request *rq) static void deadline_add_rq_rb(struct deadline_data *dd, struct request *rq) { - struct rb_root *root = RQ_RB_ROOT(dd, rq); + struct rb_root *root = deadline_rb_root(dd, rq); struct request *__alias; -retry: - __alias = elv_rb_add(root, rq); - if (unlikely(__alias)) { + while (unlikely(__alias = elv_rb_add(root, rq))) deadline_move_request(dd, __alias); - goto retry; - } } static inline void @@ -91,7 +91,7 @@ deadline_del_rq_rb(struct deadline_data *dd, struct request *rq) if (dd->next_rq[data_dir] == rq) dd->next_rq[data_dir] = deadline_latter_request(rq); - elv_rb_del(RQ_RB_ROOT(dd, rq), rq); + elv_rb_del(deadline_rb_root(dd, rq), rq); } /* @@ -106,7 +106,7 @@ deadline_add_request(struct request_queue *q, struct request *rq) deadline_add_rq_rb(dd, rq); /* - * set expire time (only used for reads) and add to fifo list + * set expire time and add to fifo list */ rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]); list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]); @@ -162,7 +162,7 @@ static void deadline_merged_request(struct request_queue *q, * if the merge was a front merge, we need to reposition request */ if (type == ELEVATOR_FRONT_MERGE) { - elv_rb_del(RQ_RB_ROOT(dd, req), req); + elv_rb_del(deadline_rb_root(dd, req), req); deadline_add_rq_rb(dd, req); } } @@ -212,7 +212,7 @@ deadline_move_request(struct deadline_data *dd, struct request *rq) dd->next_rq[WRITE] = NULL; dd->next_rq[data_dir] = deadline_latter_request(rq); - dd->last_sector = rq->sector + rq->nr_sectors; + dd->last_sector = rq_end_sector(rq); /* * take it off the sort and fifo list, move @@ -222,7 +222,7 @@ deadline_move_request(struct deadline_data *dd, struct request *rq) } /* - * deadline_check_fifo returns 0 if there are no expired reads on the fifo, + * deadline_check_fifo returns 0 if there are no expired requests on the fifo, * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir]) */ static inline int deadline_check_fifo(struct deadline_data *dd, int ddir) @@ -258,17 +258,9 @@ static int deadline_dispatch_requests(struct request_queue *q, int force) else rq = dd->next_rq[READ]; - if (rq) { - /* we have a "next request" */ - - if (dd->last_sector != rq->sector) - /* end the batch on a non sequential request */ - dd->batching += dd->fifo_batch; - - if (dd->batching < dd->fifo_batch) - /* we are still entitled to batch */ - goto dispatch_request; - } + if (rq && dd->batching < dd->fifo_batch) + /* we have a next request are still entitled to batch */ + goto dispatch_request; /* * at this point we are not running a batch. select the appropriate diff --git a/block/elevator.c b/block/elevator.c index ed6f8f3..0451892 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -34,8 +34,9 @@ #include <linux/delay.h> #include <linux/blktrace_api.h> #include <linux/hash.h> +#include <linux/uaccess.h> -#include <asm/uaccess.h> +#include "blk.h" static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); @@ -75,6 +76,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) return 0; /* + * Don't merge file system requests and discard requests + */ + if (bio_discard(bio) != bio_discard(rq->bio)) + return 0; + + /* * different data direction or already started, don't merge */ if (bio_data_dir(bio) != rq_data_dir(rq)) @@ -438,6 +445,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); + if (blk_discard_rq(rq) != blk_discard_rq(pos)) + break; if (rq_data_dir(rq) != rq_data_dir(pos)) break; if (pos->cmd_flags & stop_flags) @@ -607,7 +616,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) break; case ELEVATOR_INSERT_SORT: - BUG_ON(!blk_fs_request(rq)); + BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq)); rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; if (rq_mergeable(rq)) { @@ -692,7 +701,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where, * this request is scheduling boundary, update * end_sector */ - if (blk_fs_request(rq)) { + if (blk_fs_request(rq) || blk_discard_rq(rq)) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } @@ -745,7 +754,7 @@ struct request *elv_next_request(struct request_queue *q) * not ever see it. */ if (blk_empty_barrier(rq)) { - end_queued_request(rq, 1); + __blk_end_request(rq, 0, blk_rq_bytes(rq)); continue; } if (!(rq->cmd_flags & REQ_STARTED)) { @@ -764,6 +773,12 @@ struct request *elv_next_request(struct request_queue *q) */ rq->cmd_flags |= REQ_STARTED; blk_add_trace_rq(q, rq, BLK_TA_ISSUE); + + /* + * We are now handing the request to the hardware, + * add the timeout handler + */ + blk_add_timer(rq); } if (!q->boundary_rq || q->boundary_rq == rq) { @@ -782,7 +797,6 @@ struct request *elv_next_request(struct request_queue *q) * device can handle */ rq->nr_phys_segments++; - rq->nr_hw_segments++; } if (!q->prep_rq_fn) @@ -805,14 +819,13 @@ struct request *elv_next_request(struct request_queue *q) * so that we don't add it again */ --rq->nr_phys_segments; - --rq->nr_hw_segments; } rq = NULL; break; } else if (ret == BLKPREP_KILL) { rq->cmd_flags |= REQ_QUIET; - end_queued_request(rq, 0); + __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); } else { printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); break; @@ -901,6 +914,19 @@ int elv_may_queue(struct request_queue *q, int rw) return ELV_MQUEUE_MAY; } +void elv_abort_queue(struct request_queue *q) +{ + struct request *rq; + + while (!list_empty(&q->queue_head)) { + rq = list_entry_rq(q->queue_head.next); + rq->cmd_flags |= REQ_QUIET; + blk_add_trace_rq(q, rq, BLK_TA_ABORT); + __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); + } +} +EXPORT_SYMBOL(elv_abort_queue); + void elv_completed_request(struct request_queue *q, struct request *rq) { elevator_t *e = q->elevator; diff --git a/block/genhd.c b/block/genhd.c index e0ce23ac..4cd3433 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -16,6 +16,7 @@ #include <linux/kobj_map.h> #include <linux/buffer_head.h> #include <linux/mutex.h> +#include <linux/idr.h> #include "blk.h" @@ -24,8 +25,194 @@ static DEFINE_MUTEX(block_class_lock); struct kobject *block_depr; #endif +/* for extended dynamic devt allocation, currently only one major is used */ +#define MAX_EXT_DEVT (1 << MINORBITS) + +/* For extended devt allocation. ext_devt_mutex prevents look up + * results from going away underneath its user. + */ +static DEFINE_MUTEX(ext_devt_mutex); +static DEFINE_IDR(ext_devt_idr); + static struct device_type disk_type; +/** + * disk_get_part - get partition + * @disk: disk to look partition from + * @partno: partition number + * + * Look for partition @partno from @disk. If found, increment + * reference count and return it. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * Pointer to the found partition on success, NULL if not found. + */ +struct hd_struct *disk_get_part(struct gendisk *disk, int partno) +{ + struct hd_struct *part = NULL; + struct disk_part_tbl *ptbl; + + if (unlikely(partno < 0)) + return NULL; + + rcu_read_lock(); + + ptbl = rcu_dereference(disk->part_tbl); + if (likely(partno < ptbl->len)) { + part = rcu_dereference(ptbl->part[partno]); + if (part) + get_device(part_to_dev(part)); + } + + rcu_read_unlock(); + + return part; +} +EXPORT_SYMBOL_GPL(disk_get_part); + +/** + * disk_part_iter_init - initialize partition iterator + * @piter: iterator to initialize + * @disk: disk to iterate over + * @flags: DISK_PITER_* flags + * + * Initialize @piter so that it iterates over partitions of @disk. + * + * CONTEXT: + * Don't care. + */ +void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, + unsigned int flags) +{ + struct disk_part_tbl *ptbl; + + rcu_read_lock(); + ptbl = rcu_dereference(disk->part_tbl); + + piter->disk = disk; + piter->part = NULL; + + if (flags & DISK_PITER_REVERSE) + piter->idx = ptbl->len - 1; + else if (flags & DISK_PITER_INCL_PART0) + piter->idx = 0; + else + piter->idx = 1; + + piter->flags = flags; + + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(disk_part_iter_init); + +/** + * disk_part_iter_next - proceed iterator to the next partition and return it + * @piter: iterator of interest + * + * Proceed @piter to the next partition and return it. + * + * CONTEXT: + * Don't care. + */ +struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) +{ + struct disk_part_tbl *ptbl; + int inc, end; + + /* put the last partition */ + disk_put_part(piter->part); + piter->part = NULL; + + /* get part_tbl */ + rcu_read_lock(); + ptbl = rcu_dereference(piter->disk->part_tbl); + + /* determine iteration parameters */ + if (piter->flags & DISK_PITER_REVERSE) { + inc = -1; + if (piter->flags & DISK_PITER_INCL_PART0) + end = -1; + else + end = 0; + } else { + inc = 1; + end = ptbl->len; + } + + /* iterate to the next partition */ + for (; piter->idx != end; piter->idx += inc) { + struct hd_struct *part; + + part = rcu_dereference(ptbl->part[piter->idx]); + if (!part) + continue; + if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) + continue; + + get_device(part_to_dev(part)); + piter->part = part; + piter->idx += inc; + break; + } + + rcu_read_unlock(); + + return piter->part; +} +EXPORT_SYMBOL_GPL(disk_part_iter_next); + +/** + * disk_part_iter_exit - finish up partition iteration + * @piter: iter of interest + * + * Called when iteration is over. Cleans up @piter. + * + * CONTEXT: + * Don't care. + */ +void disk_part_iter_exit(struct disk_part_iter *piter) +{ + disk_put_part(piter->part); + piter->part = NULL; +} +EXPORT_SYMBOL_GPL(disk_part_iter_exit); + +/** + * disk_map_sector_rcu - map sector to partition + * @disk: gendisk of interest + * @sector: sector to map + * + * Find out which partition @sector maps to on @disk. This is + * primarily used for stats accounting. + * + * CONTEXT: + * RCU read locked. The returned partition pointer is valid only + * while preemption is disabled. + * + * RETURNS: + * Found partition on success, part0 is returned if no partition matches + */ +struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector) +{ + struct disk_part_tbl *ptbl; + int i; + + ptbl = rcu_dereference(disk->part_tbl); + + for (i = 1; i < ptbl->len; i++) { + struct hd_struct *part = rcu_dereference(ptbl->part[i]); + + if (part && part->start_sect <= sector && + sector < part->start_sect + part->nr_sects) + return part; + } + return &disk->part0; +} +EXPORT_SYMBOL_GPL(disk_map_sector_rcu); + /* * Can be deleted altogether. Later. * @@ -43,14 +230,14 @@ static inline int major_to_index(int major) } #ifdef CONFIG_PROC_FS -void blkdev_show(struct seq_file *f, off_t offset) +void blkdev_show(struct seq_file *seqf, off_t offset) { struct blk_major_name *dp; if (offset < BLKDEV_MAJOR_HASH_SIZE) { mutex_lock(&block_class_lock); for (dp = major_names[offset]; dp; dp = dp->next) - seq_printf(f, "%3d %s\n", dp->major, dp->name); + seq_printf(seqf, "%3d %s\n", dp->major, dp->name); mutex_unlock(&block_class_lock); } } @@ -136,6 +323,118 @@ EXPORT_SYMBOL(unregister_blkdev); static struct kobj_map *bdev_map; +/** + * blk_mangle_minor - scatter minor numbers apart + * @minor: minor number to mangle + * + * Scatter consecutively allocated @minor number apart if MANGLE_DEVT + * is enabled. Mangling twice gives the original value. + * + * RETURNS: + * Mangled value. + * + * CONTEXT: + * Don't care. + */ +static int blk_mangle_minor(int minor) +{ +#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT + int i; + + for (i = 0; i < MINORBITS / 2; i++) { + int low = minor & (1 << i); + int high = minor & (1 << (MINORBITS - 1 - i)); + int distance = MINORBITS - 1 - 2 * i; + + minor ^= low | high; /* clear both bits */ + low <<= distance; /* swap the positions */ + high >>= distance; + minor |= low | high; /* and set */ + } +#endif + return minor; +} + +/** + * blk_alloc_devt - allocate a dev_t for a partition + * @part: partition to allocate dev_t for + * @gfp_mask: memory allocation flag + * @devt: out parameter for resulting dev_t + * + * Allocate a dev_t for block device. + * + * RETURNS: + * 0 on success, allocated dev_t is returned in *@devt. -errno on + * failure. + * + * CONTEXT: + * Might sleep. + */ +int blk_alloc_devt(struct hd_struct *part, dev_t *devt) +{ + struct gendisk *disk = part_to_disk(part); + int idx, rc; + + /* in consecutive minor range? */ + if (part->partno < disk->minors) { + *devt = MKDEV(disk->major, disk->first_minor + part->partno); + return 0; + } + + /* allocate ext devt */ + do { + if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL)) + return -ENOMEM; + rc = idr_get_new(&ext_devt_idr, part, &idx); + } while (rc == -EAGAIN); + + if (rc) + return rc; + + if (idx > MAX_EXT_DEVT) { + idr_remove(&ext_devt_idr, idx); + return -EBUSY; + } + + *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx)); + return 0; +} + +/** + * blk_free_devt - free a dev_t + * @devt: dev_t to free + * + * Free @devt which was allocated using blk_alloc_devt(). + * + * CONTEXT: + * Might sleep. + */ +void blk_free_devt(dev_t devt) +{ + might_sleep(); + + if (devt == MKDEV(0, 0)) + return; + + if (MAJOR(devt) == BLOCK_EXT_MAJOR) { + mutex_lock(&ext_devt_mutex); + idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); + mutex_unlock(&ext_devt_mutex); + } +} + +static char *bdevt_str(dev_t devt, char *buf) +{ + if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) { + char tbuf[BDEVT_SIZE]; + snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt)); + snprintf(buf, BDEVT_SIZE, "%-9s", tbuf); + } else + snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt)); + + return buf; +} + /* * Register device numbers dev..(dev+range-1) * range must be nonzero @@ -157,11 +456,11 @@ void blk_unregister_region(dev_t devt, unsigned long range) EXPORT_SYMBOL(blk_unregister_region); -static struct kobject *exact_match(dev_t devt, int *part, void *data) +static struct kobject *exact_match(dev_t devt, int *partno, void *data) { struct gendisk *p = data; - return &p->dev.kobj; + return &disk_to_dev(p)->kobj; } static int exact_lock(dev_t devt, void *data) @@ -179,21 +478,46 @@ static int exact_lock(dev_t devt, void *data) * * This function registers the partitioning information in @disk * with the kernel. + * + * FIXME: error handling */ void add_disk(struct gendisk *disk) { struct backing_dev_info *bdi; + dev_t devt; int retval; + /* minors == 0 indicates to use ext devt from part0 and should + * be accompanied with EXT_DEVT flag. Make sure all + * parameters make sense. + */ + WARN_ON(disk->minors && !(disk->major || disk->first_minor)); + WARN_ON(!disk->minors && !(disk->flags & GENHD_FL_EXT_DEVT)); + disk->flags |= GENHD_FL_UP; - blk_register_region(MKDEV(disk->major, disk->first_minor), - disk->minors, NULL, exact_match, exact_lock, disk); + + retval = blk_alloc_devt(&disk->part0, &devt); + if (retval) { + WARN_ON(1); + return; + } + disk_to_dev(disk)->devt = devt; + + /* ->major and ->first_minor aren't supposed to be + * dereferenced from here on, but set them just in case. + */ + disk->major = MAJOR(devt); + disk->first_minor = MINOR(devt); + + blk_register_region(disk_devt(disk), disk->minors, NULL, + exact_match, exact_lock, disk); register_disk(disk); blk_register_queue(disk); bdi = &disk->queue->backing_dev_info; - bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor)); - retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi"); + bdi_register_dev(bdi, disk_devt(disk)); + retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, + "bdi"); WARN_ON(retval); } @@ -202,78 +526,71 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */ void unlink_gendisk(struct gendisk *disk) { - sysfs_remove_link(&disk->dev.kobj, "bdi"); + sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); bdi_unregister(&disk->queue->backing_dev_info); blk_unregister_queue(disk); - blk_unregister_region(MKDEV(disk->major, disk->first_minor), - disk->minors); + blk_unregister_region(disk_devt(disk), disk->minors); } /** * get_gendisk - get partitioning information for a given device - * @dev: device to get partitioning information for + * @devt: device to get partitioning information for + * @part: returned partition index * * This function gets the structure containing partitioning - * information for the given device @dev. + * information for the given device @devt. */ -struct gendisk *get_gendisk(dev_t devt, int *part) +struct gendisk *get_gendisk(dev_t devt, int *partno) { - struct kobject *kobj = kobj_lookup(bdev_map, devt, part); - struct device *dev = kobj_to_dev(kobj); + struct gendisk *disk = NULL; + + if (MAJOR(devt) != BLOCK_EXT_MAJOR) { + struct kobject *kobj; + + kobj = kobj_lookup(bdev_map, devt, partno); + if (kobj) + disk = dev_to_disk(kobj_to_dev(kobj)); + } else { + struct hd_struct *part; - return kobj ? dev_to_disk(dev) : NULL; + mutex_lock(&ext_devt_mutex); + part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); + if (part && get_disk(part_to_disk(part))) { + *partno = part->partno; + disk = part_to_disk(part); + } + mutex_unlock(&ext_devt_mutex); + } + + return disk; } -/* - * print a partitions - intended for places where the root filesystem can't be - * mounted and thus to give the victim some idea of what went wrong +/** + * bdget_disk - do bdget() by gendisk and partition number + * @disk: gendisk of interest + * @partno: partition number + * + * Find partition @partno from @disk, do bdget() on it. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * Resulting block_device on success, NULL on failure. */ -static int printk_partition(struct device *dev, void *data) +struct block_device *bdget_disk(struct gendisk *disk, int partno) { - struct gendisk *sgp; - char buf[BDEVNAME_SIZE]; - int n; - - if (dev->type != &disk_type) - goto exit; + struct hd_struct *part; + struct block_device *bdev = NULL; - sgp = dev_to_disk(dev); - /* - * Don't show empty devices or things that have been surpressed - */ - if (get_capacity(sgp) == 0 || - (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) - goto exit; + part = disk_get_part(disk, partno); + if (part) + bdev = bdget(part_devt(part)); + disk_put_part(part); - /* - * Note, unlike /proc/partitions, I am showing the numbers in - * hex - the same format as the root= option takes. - */ - printk("%02x%02x %10llu %s", - sgp->major, sgp->first_minor, - (unsigned long long)get_capacity(sgp) >> 1, - disk_name(sgp, 0, buf)); - if (sgp->driverfs_dev != NULL && - sgp->driverfs_dev->driver != NULL) - printk(" driver: %s\n", - sgp->driverfs_dev->driver->name); - else - printk(" (driver?)\n"); - - /* now show the partitions */ - for (n = 0; n < sgp->minors - 1; ++n) { - if (sgp->part[n] == NULL) - goto exit; - if (sgp->part[n]->nr_sects == 0) - goto exit; - printk(" %02x%02x %10llu %s\n", - sgp->major, n + 1 + sgp->first_minor, - (unsigned long long)sgp->part[n]->nr_sects >> 1, - disk_name(sgp, n + 1, buf)); - } -exit: - return 0; + return bdev; } +EXPORT_SYMBOL(bdget_disk); /* * print a full list of all partitions - intended for places where the root @@ -282,120 +599,145 @@ exit: */ void __init printk_all_partitions(void) { - mutex_lock(&block_class_lock); - class_for_each_device(&block_class, NULL, NULL, printk_partition); - mutex_unlock(&block_class_lock); + struct class_dev_iter iter; + struct device *dev; + + class_dev_iter_init(&iter, &block_class, NULL, &disk_type); + while ((dev = class_dev_iter_next(&iter))) { + struct gendisk *disk = dev_to_disk(dev); + struct disk_part_iter piter; + struct hd_struct *part; + char name_buf[BDEVNAME_SIZE]; + char devt_buf[BDEVT_SIZE]; + + /* + * Don't show empty devices or things that have been + * surpressed + */ + if (get_capacity(disk) == 0 || + (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) + continue; + + /* + * Note, unlike /proc/partitions, I am showing the + * numbers in hex - the same format as the root= + * option takes. + */ + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0); + while ((part = disk_part_iter_next(&piter))) { + bool is_part0 = part == &disk->part0; + + printk("%s%s %10llu %s", is_part0 ? "" : " ", + bdevt_str(part_devt(part), devt_buf), + (unsigned long long)part->nr_sects >> 1, + disk_name(disk, part->partno, name_buf)); + if (is_part0) { + if (disk->driverfs_dev != NULL && + disk->driverfs_dev->driver != NULL) + printk(" driver: %s\n", + disk->driverfs_dev->driver->name); + else + printk(" (driver?)\n"); + } else + printk("\n"); + } + disk_part_iter_exit(&piter); + } + class_dev_iter_exit(&iter); } #ifdef CONFIG_PROC_FS /* iterator */ -static int find_start(struct device *dev, void *data) +static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos) { - loff_t *k = data; + loff_t skip = *pos; + struct class_dev_iter *iter; + struct device *dev; - if (dev->type != &disk_type) - return 0; - if (!*k) - return 1; - (*k)--; - return 0; + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) + return ERR_PTR(-ENOMEM); + + seqf->private = iter; + class_dev_iter_init(iter, &block_class, NULL, &disk_type); + do { + dev = class_dev_iter_next(iter); + if (!dev) + return NULL; + } while (skip--); + + return dev_to_disk(dev); } -static void *part_start(struct seq_file *part, loff_t *pos) +static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos) { struct device *dev; - loff_t k = *pos; - - if (!k) - part->private = (void *)1LU; /* tell show to print header */ - mutex_lock(&block_class_lock); - dev = class_find_device(&block_class, NULL, &k, find_start); - if (dev) { - put_device(dev); + (*pos)++; + dev = class_dev_iter_next(seqf->private); + if (dev) return dev_to_disk(dev); - } + return NULL; } -static int find_next(struct device *dev, void *data) +static void disk_seqf_stop(struct seq_file *seqf, void *v) { - if (dev->type == &disk_type) - return 1; - return 0; -} + struct class_dev_iter *iter = seqf->private; -static void *part_next(struct seq_file *part, void *v, loff_t *pos) -{ - struct gendisk *gp = v; - struct device *dev; - ++*pos; - dev = class_find_device(&block_class, &gp->dev, NULL, find_next); - if (dev) { - put_device(dev); - return dev_to_disk(dev); + /* stop is called even after start failed :-( */ + if (iter) { + class_dev_iter_exit(iter); + kfree(iter); } - return NULL; } -static void part_stop(struct seq_file *part, void *v) +static void *show_partition_start(struct seq_file *seqf, loff_t *pos) { - mutex_unlock(&block_class_lock); + static void *p; + + p = disk_seqf_start(seqf, pos); + if (!IS_ERR(p) && p && !*pos) + seq_puts(seqf, "major minor #blocks name\n\n"); + return p; } -static int show_partition(struct seq_file *part, void *v) +static int show_partition(struct seq_file *seqf, void *v) { struct gendisk *sgp = v; - int n; + struct disk_part_iter piter; + struct hd_struct *part; char buf[BDEVNAME_SIZE]; - /* - * Print header if start told us to do. This is to preserve - * the original behavior of not printing header if no - * partition exists. This hackery will be removed later with - * class iteration clean up. - */ - if (part->private) { - seq_puts(part, "major minor #blocks name\n\n"); - part->private = NULL; - } - /* Don't show non-partitionable removeable devices or empty devices */ - if (!get_capacity(sgp) || - (sgp->minors == 1 && (sgp->flags & GENHD_FL_REMOVABLE))) + if (!get_capacity(sgp) || (!disk_partitionable(sgp) && + (sgp->flags & GENHD_FL_REMOVABLE))) return 0; if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) return 0; /* show the full disk and all non-0 size partitions of it */ - seq_printf(part, "%4d %4d %10llu %s\n", - sgp->major, sgp->first_minor, - (unsigned long long)get_capacity(sgp) >> 1, - disk_name(sgp, 0, buf)); - for (n = 0; n < sgp->minors - 1; n++) { - if (!sgp->part[n]) - continue; - if (sgp->part[n]->nr_sects == 0) - continue; - seq_printf(part, "%4d %4d %10llu %s\n", - sgp->major, n + 1 + sgp->first_minor, - (unsigned long long)sgp->part[n]->nr_sects >> 1 , - disk_name(sgp, n + 1, buf)); - } + disk_part_iter_init(&piter, sgp, DISK_PITER_INCL_PART0); + while ((part = disk_part_iter_next(&piter))) + seq_printf(seqf, "%4d %7d %10llu %s\n", + MAJOR(part_devt(part)), MINOR(part_devt(part)), + (unsigned long long)part->nr_sects >> 1, + disk_name(sgp, part->partno, buf)); + disk_part_iter_exit(&piter); return 0; } const struct seq_operations partitions_op = { - .start = part_start, - .next = part_next, - .stop = part_stop, + .start = show_partition_start, + .next = disk_seqf_next, + .stop = disk_seqf_stop, .show = show_partition }; #endif -static struct kobject *base_probe(dev_t devt, int *part, void *data) +static struct kobject *base_probe(dev_t devt, int *partno, void *data) { if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0) /* Make old-style 2.4 aliases work */ @@ -431,29 +773,29 @@ static ssize_t disk_range_show(struct device *dev, return sprintf(buf, "%d\n", disk->minors); } -static ssize_t disk_removable_show(struct device *dev, +static ssize_t disk_ext_range_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", - (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); + return sprintf(buf, "%d\n", disk_max_parts(disk)); } -static ssize_t disk_ro_show(struct device *dev, +static ssize_t disk_removable_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%d\n", disk->policy ? 1 : 0); + return sprintf(buf, "%d\n", + (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0)); } -static ssize_t disk_size_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t disk_ro_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); - return sprintf(buf, "%llu\n", (unsigned long long)get_capacity(disk)); + return sprintf(buf, "%d\n", get_disk_ro(disk) ? 1 : 0); } static ssize_t disk_capability_show(struct device *dev, @@ -464,73 +806,26 @@ static ssize_t disk_capability_show(struct device *dev, return sprintf(buf, "%x\n", disk->flags); } -static ssize_t disk_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gendisk *disk = dev_to_disk(dev); - - preempt_disable(); - disk_round_stats(disk); - preempt_enable(); - return sprintf(buf, - "%8lu %8lu %8llu %8u " - "%8lu %8lu %8llu %8u " - "%8u %8u %8u" - "\n", - disk_stat_read(disk, ios[READ]), - disk_stat_read(disk, merges[READ]), - (unsigned long long)disk_stat_read(disk, sectors[READ]), - jiffies_to_msecs(disk_stat_read(disk, ticks[READ])), - disk_stat_read(disk, ios[WRITE]), - disk_stat_read(disk, merges[WRITE]), - (unsigned long long)disk_stat_read(disk, sectors[WRITE]), - jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])), - disk->in_flight, - jiffies_to_msecs(disk_stat_read(disk, io_ticks)), - jiffies_to_msecs(disk_stat_read(disk, time_in_queue))); -} - -#ifdef CONFIG_FAIL_MAKE_REQUEST -static ssize_t disk_fail_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct gendisk *disk = dev_to_disk(dev); - - return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0); -} - -static ssize_t disk_fail_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct gendisk *disk = dev_to_disk(dev); - int i; - - if (count > 0 && sscanf(buf, "%d", &i) > 0) { - if (i == 0) - disk->flags &= ~GENHD_FL_FAIL; - else - disk->flags |= GENHD_FL_FAIL; - } - - return count; -} - -#endif - static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL); +static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL); static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL); static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL); -static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL); +static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); -static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL); +static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); #ifdef CONFIG_FAIL_MAKE_REQUEST static struct device_attribute dev_attr_fail = - __ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store); + __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); +#endif +#ifdef CONFIG_FAIL_IO_TIMEOUT +static struct device_attribute dev_attr_fail_timeout = + __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show, + part_timeout_store); #endif static struct attribute *disk_attrs[] = { &dev_attr_range.attr, + &dev_attr_ext_range.attr, &dev_attr_removable.attr, &dev_attr_ro.attr, &dev_attr_size.attr, @@ -539,6 +834,9 @@ static struct attribute *disk_attrs[] = { #ifdef CONFIG_FAIL_MAKE_REQUEST &dev_attr_fail.attr, #endif +#ifdef CONFIG_FAIL_IO_TIMEOUT + &dev_attr_fail_timeout.attr, +#endif NULL }; @@ -551,13 +849,87 @@ static struct attribute_group *disk_attr_groups[] = { NULL }; +static void disk_free_ptbl_rcu_cb(struct rcu_head *head) +{ + struct disk_part_tbl *ptbl = + container_of(head, struct disk_part_tbl, rcu_head); + + kfree(ptbl); +} + +/** + * disk_replace_part_tbl - replace disk->part_tbl in RCU-safe way + * @disk: disk to replace part_tbl for + * @new_ptbl: new part_tbl to install + * + * Replace disk->part_tbl with @new_ptbl in RCU-safe way. The + * original ptbl is freed using RCU callback. + * + * LOCKING: + * Matching bd_mutx locked. + */ +static void disk_replace_part_tbl(struct gendisk *disk, + struct disk_part_tbl *new_ptbl) +{ + struct disk_part_tbl *old_ptbl = disk->part_tbl; + + rcu_assign_pointer(disk->part_tbl, new_ptbl); + if (old_ptbl) + call_rcu(&old_ptbl->rcu_head, disk_free_ptbl_rcu_cb); +} + +/** + * disk_expand_part_tbl - expand disk->part_tbl + * @disk: disk to expand part_tbl for + * @partno: expand such that this partno can fit in + * + * Expand disk->part_tbl such that @partno can fit in. disk->part_tbl + * uses RCU to allow unlocked dereferencing for stats and other stuff. + * + * LOCKING: + * Matching bd_mutex locked, might sleep. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int disk_expand_part_tbl(struct gendisk *disk, int partno) +{ + struct disk_part_tbl *old_ptbl = disk->part_tbl; + struct disk_part_tbl *new_ptbl; + int len = old_ptbl ? old_ptbl->len : 0; + int target = partno + 1; + size_t size; + int i; + + /* disk_max_parts() is zero during initialization, ignore if so */ + if (disk_max_parts(disk) && target > disk_max_parts(disk)) + return -EINVAL; + + if (target <= len) + return 0; + + size = sizeof(*new_ptbl) + target * sizeof(new_ptbl->part[0]); + new_ptbl = kzalloc_node(size, GFP_KERNEL, disk->node_id); + if (!new_ptbl) + return -ENOMEM; + + INIT_RCU_HEAD(&new_ptbl->rcu_head); + new_ptbl->len = target; + + for (i = 0; i < len; i++) + rcu_assign_pointer(new_ptbl->part[i], old_ptbl->part[i]); + + disk_replace_part_tbl(disk, new_ptbl); + return 0; +} + static void disk_release(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); kfree(disk->random); - kfree(disk->part); - free_disk_stats(disk); + disk_replace_part_tbl(disk, NULL); + free_part_stats(&disk->part0); kfree(disk); } struct class block_class = { @@ -578,83 +950,31 @@ static struct device_type disk_type = { * The output looks suspiciously like /proc/partitions with a bunch of * extra fields. */ - -static void *diskstats_start(struct seq_file *part, loff_t *pos) -{ - struct device *dev; - loff_t k = *pos; - - mutex_lock(&block_class_lock); - dev = class_find_device(&block_class, NULL, &k, find_start); - if (dev) { - put_device(dev); - return dev_to_disk(dev); - } - return NULL; -} - -static void *diskstats_next(struct seq_file *part, void *v, loff_t *pos) -{ - struct gendisk *gp = v; - struct device *dev; - - ++*pos; - dev = class_find_device(&block_class, &gp->dev, NULL, find_next); - if (dev) { - put_device(dev); - return dev_to_disk(dev); - } - return NULL; -} - -static void diskstats_stop(struct seq_file *part, void *v) -{ - mutex_unlock(&block_class_lock); -} - -static int diskstats_show(struct seq_file *s, void *v) +static int diskstats_show(struct seq_file *seqf, void *v) { struct gendisk *gp = v; + struct disk_part_iter piter; + struct hd_struct *hd; char buf[BDEVNAME_SIZE]; - int n = 0; + int cpu; /* - if (&gp->dev.kobj.entry == block_class.devices.next) - seq_puts(s, "major minor name" + if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) + seq_puts(seqf, "major minor name" " rio rmerge rsect ruse wio wmerge " "wsect wuse running use aveq" "\n\n"); */ - preempt_disable(); - disk_round_stats(gp); - preempt_enable(); - seq_printf(s, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n", - gp->major, n + gp->first_minor, disk_name(gp, n, buf), - disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]), - (unsigned long long)disk_stat_read(gp, sectors[0]), - jiffies_to_msecs(disk_stat_read(gp, ticks[0])), - disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]), - (unsigned long long)disk_stat_read(gp, sectors[1]), - jiffies_to_msecs(disk_stat_read(gp, ticks[1])), - gp->in_flight, - jiffies_to_msecs(disk_stat_read(gp, io_ticks)), - jiffies_to_msecs(disk_stat_read(gp, time_in_queue))); - - /* now show all non-0 size partitions of it */ - for (n = 0; n < gp->minors - 1; n++) { - struct hd_struct *hd = gp->part[n]; - - if (!hd || !hd->nr_sects) - continue; - - preempt_disable(); - part_round_stats(hd); - preempt_enable(); - seq_printf(s, "%4d %4d %s %lu %lu %llu " + disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); + while ((hd = disk_part_iter_next(&piter))) { + cpu = part_stat_lock(); + part_round_stats(cpu, hd); + part_stat_unlock(); + seq_printf(seqf, "%4d %7d %s %lu %lu %llu " "%u %lu %lu %llu %u %u %u %u\n", - gp->major, n + gp->first_minor + 1, - disk_name(gp, n + 1, buf), + MAJOR(part_devt(hd)), MINOR(part_devt(hd)), + disk_name(gp, hd->partno, buf), part_stat_read(hd, ios[0]), part_stat_read(hd, merges[0]), (unsigned long long)part_stat_read(hd, sectors[0]), @@ -668,14 +988,15 @@ static int diskstats_show(struct seq_file *s, void *v) jiffies_to_msecs(part_stat_read(hd, time_in_queue)) ); } + disk_part_iter_exit(&piter); return 0; } const struct seq_operations diskstats_op = { - .start = diskstats_start, - .next = diskstats_next, - .stop = diskstats_stop, + .start = disk_seqf_start, + .next = disk_seqf_next, + .stop = disk_seqf_stop, .show = diskstats_show }; #endif /* CONFIG_PROC_FS */ @@ -690,7 +1011,7 @@ static void media_change_notify_thread(struct work_struct *work) * set enviroment vars to indicate which event this is for * so that user space will know to go check the media status. */ - kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp); + kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp); put_device(gd->driverfs_dev); } @@ -703,42 +1024,29 @@ void genhd_media_change_notify(struct gendisk *disk) EXPORT_SYMBOL_GPL(genhd_media_change_notify); #endif /* 0 */ -struct find_block { - const char *name; - int part; -}; - -static int match_id(struct device *dev, void *data) +dev_t blk_lookup_devt(const char *name, int partno) { - struct find_block *find = data; + dev_t devt = MKDEV(0, 0); + struct class_dev_iter iter; + struct device *dev; - if (dev->type != &disk_type) - return 0; - if (strcmp(dev->bus_id, find->name) == 0) { + class_dev_iter_init(&iter, &block_class, NULL, &disk_type); + while ((dev = class_dev_iter_next(&iter))) { struct gendisk *disk = dev_to_disk(dev); - if (find->part < disk->minors) - return 1; - } - return 0; -} + struct hd_struct *part; -dev_t blk_lookup_devt(const char *name, int part) -{ - struct device *dev; - dev_t devt = MKDEV(0, 0); - struct find_block find; + if (strcmp(dev->bus_id, name)) + continue; - mutex_lock(&block_class_lock); - find.name = name; - find.part = part; - dev = class_find_device(&block_class, NULL, &find, match_id); - if (dev) { - put_device(dev); - devt = MKDEV(MAJOR(dev->devt), - MINOR(dev->devt) + part); + part = disk_get_part(disk, partno); + if (part) { + devt = part_devt(part); + disk_put_part(part); + break; + } + disk_put_part(part); } - mutex_unlock(&block_class_lock); - + class_dev_iter_exit(&iter); return devt; } EXPORT_SYMBOL(blk_lookup_devt); @@ -747,6 +1055,7 @@ struct gendisk *alloc_disk(int minors) { return alloc_disk_node(minors, -1); } +EXPORT_SYMBOL(alloc_disk); struct gendisk *alloc_disk_node(int minors, int node_id) { @@ -755,32 +1064,28 @@ struct gendisk *alloc_disk_node(int minors, int node_id) disk = kmalloc_node(sizeof(struct gendisk), GFP_KERNEL | __GFP_ZERO, node_id); if (disk) { - if (!init_disk_stats(disk)) { + if (!init_part_stats(&disk->part0)) { kfree(disk); return NULL; } - if (minors > 1) { - int size = (minors - 1) * sizeof(struct hd_struct *); - disk->part = kmalloc_node(size, - GFP_KERNEL | __GFP_ZERO, node_id); - if (!disk->part) { - free_disk_stats(disk); - kfree(disk); - return NULL; - } + if (disk_expand_part_tbl(disk, 0)) { + free_part_stats(&disk->part0); + kfree(disk); + return NULL; } + disk->part_tbl->part[0] = &disk->part0; + disk->minors = minors; rand_initialize_disk(disk); - disk->dev.class = &block_class; - disk->dev.type = &disk_type; - device_initialize(&disk->dev); + disk_to_dev(disk)->class = &block_class; + disk_to_dev(disk)->type = &disk_type; + device_initialize(disk_to_dev(disk)); INIT_WORK(&disk->async_notify, media_change_notify_thread); + disk->node_id = node_id; } return disk; } - -EXPORT_SYMBOL(alloc_disk); EXPORT_SYMBOL(alloc_disk_node); struct kobject *get_disk(struct gendisk *disk) @@ -793,7 +1098,7 @@ struct kobject *get_disk(struct gendisk *disk) owner = disk->fops->owner; if (owner && !try_module_get(owner)) return NULL; - kobj = kobject_get(&disk->dev.kobj); + kobj = kobject_get(&disk_to_dev(disk)->kobj); if (kobj == NULL) { module_put(owner); return NULL; @@ -807,27 +1112,28 @@ EXPORT_SYMBOL(get_disk); void put_disk(struct gendisk *disk) { if (disk) - kobject_put(&disk->dev.kobj); + kobject_put(&disk_to_dev(disk)->kobj); } EXPORT_SYMBOL(put_disk); void set_device_ro(struct block_device *bdev, int flag) { - if (bdev->bd_contains != bdev) - bdev->bd_part->policy = flag; - else - bdev->bd_disk->policy = flag; + bdev->bd_part->policy = flag; } EXPORT_SYMBOL(set_device_ro); void set_disk_ro(struct gendisk *disk, int flag) { - int i; - disk->policy = flag; - for (i = 0; i < disk->minors - 1; i++) - if (disk->part[i]) disk->part[i]->policy = flag; + struct disk_part_iter piter; + struct hd_struct *part; + + disk_part_iter_init(&piter, disk, + DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0); + while ((part = disk_part_iter_next(&piter))) + part->policy = flag; + disk_part_iter_exit(&piter); } EXPORT_SYMBOL(set_disk_ro); @@ -836,18 +1142,15 @@ int bdev_read_only(struct block_device *bdev) { if (!bdev) return 0; - else if (bdev->bd_contains != bdev) - return bdev->bd_part->policy; - else - return bdev->bd_disk->policy; + return bdev->bd_part->policy; } EXPORT_SYMBOL(bdev_read_only); -int invalidate_partition(struct gendisk *disk, int index) +int invalidate_partition(struct gendisk *disk, int partno) { int res = 0; - struct block_device *bdev = bdget_disk(disk, index); + struct block_device *bdev = bdget_disk(disk, partno); if (bdev) { fsync_bdev(bdev); res = __invalidate_device(bdev); diff --git a/block/ioctl.c b/block/ioctl.c index 77185e5..38bee321 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -12,11 +12,12 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user { struct block_device *bdevp; struct gendisk *disk; + struct hd_struct *part; struct blkpg_ioctl_arg a; struct blkpg_partition p; + struct disk_part_iter piter; long long start, length; - int part; - int i; + int partno; int err; if (!capable(CAP_SYS_ADMIN)) @@ -28,8 +29,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user disk = bdev->bd_disk; if (bdev != bdev->bd_contains) return -EINVAL; - part = p.pno; - if (part <= 0 || part >= disk->minors) + partno = p.pno; + if (partno <= 0) return -EINVAL; switch (a.op) { case BLKPG_ADD_PARTITION: @@ -43,36 +44,37 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user || pstart < 0 || plength < 0) return -EINVAL; } - /* partition number in use? */ + mutex_lock(&bdev->bd_mutex); - if (disk->part[part - 1]) { - mutex_unlock(&bdev->bd_mutex); - return -EBUSY; - } - /* overlap? */ - for (i = 0; i < disk->minors - 1; i++) { - struct hd_struct *s = disk->part[i]; - if (!s) - continue; - if (!(start+length <= s->start_sect || - start >= s->start_sect + s->nr_sects)) { + /* overlap? */ + disk_part_iter_init(&piter, disk, + DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) { + if (!(start + length <= part->start_sect || + start >= part->start_sect + part->nr_sects)) { + disk_part_iter_exit(&piter); mutex_unlock(&bdev->bd_mutex); return -EBUSY; } } + disk_part_iter_exit(&piter); + /* all seems OK */ - err = add_partition(disk, part, start, length, ADDPART_FLAG_NONE); + err = add_partition(disk, partno, start, length, + ADDPART_FLAG_NONE); mutex_unlock(&bdev->bd_mutex); return err; case BLKPG_DEL_PARTITION: - if (!disk->part[part-1]) - return -ENXIO; - if (disk->part[part - 1]->nr_sects == 0) + part = disk_get_part(disk, partno); + if (!part) return -ENXIO; - bdevp = bdget_disk(disk, part); + + bdevp = bdget(part_devt(part)); + disk_put_part(part); if (!bdevp) return -ENOMEM; + mutex_lock(&bdevp->bd_mutex); if (bdevp->bd_openers) { mutex_unlock(&bdevp->bd_mutex); @@ -84,7 +86,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user invalidate_bdev(bdevp); mutex_lock_nested(&bdev->bd_mutex, 1); - delete_partition(disk, part); + delete_partition(disk, partno); mutex_unlock(&bdev->bd_mutex); mutex_unlock(&bdevp->bd_mutex); bdput(bdevp); @@ -100,7 +102,7 @@ static int blkdev_reread_part(struct block_device *bdev) struct gendisk *disk = bdev->bd_disk; int res; - if (disk->minors == 1 || bdev != bdev->bd_contains) + if (!disk_partitionable(disk) || bdev != bdev->bd_contains) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -111,6 +113,69 @@ static int blkdev_reread_part(struct block_device *bdev) return res; } +static void blk_ioc_discard_endio(struct bio *bio, int err) +{ + if (err) { + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + clear_bit(BIO_UPTODATE, &bio->bi_flags); + } + complete(bio->bi_private); +} + +static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, + uint64_t len) +{ + struct request_queue *q = bdev_get_queue(bdev); + int ret = 0; + + if (start & 511) + return -EINVAL; + if (len & 511) + return -EINVAL; + start >>= 9; + len >>= 9; + + if (start + len > (bdev->bd_inode->i_size >> 9)) + return -EINVAL; + + if (!q->prepare_discard_fn) + return -EOPNOTSUPP; + + while (len && !ret) { + DECLARE_COMPLETION_ONSTACK(wait); + struct bio *bio; + + bio = bio_alloc(GFP_KERNEL, 0); + if (!bio) + return -ENOMEM; + + bio->bi_end_io = blk_ioc_discard_endio; + bio->bi_bdev = bdev; + bio->bi_private = &wait; + bio->bi_sector = start; + + if (len > q->max_hw_sectors) { + bio->bi_size = q->max_hw_sectors << 9; + len -= q->max_hw_sectors; + start += q->max_hw_sectors; + } else { + bio->bi_size = len << 9; + len = 0; + } + submit_bio(DISCARD_NOBARRIER, bio); + + wait_for_completion(&wait); + + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + else if (!bio_flagged(bio, BIO_UPTODATE)) + ret = -EIO; + bio_put(bio); + } + return ret; +} + static int put_ushort(unsigned long arg, unsigned short val) { return put_user(val, (unsigned short __user *)arg); @@ -258,6 +323,19 @@ int blkdev_ioctl(struct inode *inode, struct file *file, unsigned cmd, set_device_ro(bdev, n); unlock_kernel(); return 0; + + case BLKDISCARD: { + uint64_t range[2]; + + if (!(file->f_mode & FMODE_WRITE)) + return -EBADF; + + if (copy_from_user(range, (void __user *)arg, sizeof(range))) + return -EFAULT; + + return blk_ioctl_discard(bdev, range[0], range[1]); + } + case HDIO_GETGEO: { struct hd_geometry geo; diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index ec4b7f2..c34272a 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -185,6 +185,7 @@ void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok); __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok); __set_bit(GPCMD_SET_STREAMING, filter->write_ok); + __set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok); } EXPORT_SYMBOL_GPL(blk_set_cmd_filter_defaults); @@ -313,11 +314,12 @@ static int sg_io(struct file *file, struct request_queue *q, goto out; } - ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count, - hdr->dxfer_len); + ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count, + hdr->dxfer_len, GFP_KERNEL); kfree(iov); } else if (hdr->dxfer_len) - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); + ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, + GFP_KERNEL); if (ret) goto out; diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 0841095..8dd3336 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -165,8 +165,11 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle) "firmware_node"); ret = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj, "physical_node"); - if (acpi_dev->wakeup.flags.valid) + if (acpi_dev->wakeup.flags.valid) { device_set_wakeup_capable(dev, true); + device_set_wakeup_enable(dev, + acpi_dev->wakeup.state.enabled); + } } return 0; diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c index 4ebbba2..bf5b04d 100644 --- a/drivers/acpi/sleep/proc.c +++ b/drivers/acpi/sleep/proc.c @@ -377,6 +377,14 @@ acpi_system_wakeup_device_seq_show(struct seq_file *seq, void *offset) return 0; } +static void physical_device_enable_wakeup(struct acpi_device *adev) +{ + struct device *dev = acpi_get_physical_device(adev->handle); + + if (dev && device_can_wakeup(dev)) + device_set_wakeup_enable(dev, adev->wakeup.state.enabled); +} + static ssize_t acpi_system_write_wakeup_device(struct file *file, const char __user * buffer, @@ -411,6 +419,7 @@ acpi_system_write_wakeup_device(struct file *file, } } if (found_dev) { + physical_device_enable_wakeup(found_dev); list_for_each_safe(node, next, &acpi_wakeup_device_list) { struct acpi_device *dev = container_of(node, struct @@ -428,6 +437,7 @@ acpi_system_write_wakeup_device(struct file *file, dev->pnp.bus_id, found_dev->pnp.bus_id); dev->wakeup.state.enabled = found_dev->wakeup.state.enabled; + physical_device_enable_wakeup(dev); } } } diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 11c8c19..f17cd4b 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -663,7 +663,7 @@ config HAVE_PATA_PLATFORM config PATA_PLATFORM tristate "Generic platform device PATA support" - depends on EMBEDDED || ARCH_RPC || PPC || HAVE_PATA_PLATFORM + depends on EMBEDDED || PPC || HAVE_PATA_PLATFORM help This option enables support for generic directly connected ATA devices commonly found on embedded systems. diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 2e1a7cb..aeadd00 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -267,8 +267,8 @@ struct ahci_port_priv { * per PM slot */ }; -static int ahci_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val); -static int ahci_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val); +static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); +static int ahci_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static unsigned int ahci_qc_issue(struct ata_queued_cmd *qc); static bool ahci_qc_fill_rtf(struct ata_queued_cmd *qc); @@ -316,6 +316,7 @@ static struct device_attribute *ahci_shost_attrs[] = { static struct device_attribute *ahci_sdev_attrs[] = { &dev_attr_sw_activity, + &dev_attr_unload_heads, NULL }; @@ -820,10 +821,10 @@ static unsigned ahci_scr_offset(struct ata_port *ap, unsigned int sc_reg) return 0; } -static int ahci_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val) { - void __iomem *port_mmio = ahci_port_base(ap); - int offset = ahci_scr_offset(ap, sc_reg); + void __iomem *port_mmio = ahci_port_base(link->ap); + int offset = ahci_scr_offset(link->ap, sc_reg); if (offset) { *val = readl(port_mmio + offset); @@ -832,10 +833,10 @@ static int ahci_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) return -EINVAL; } -static int ahci_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int ahci_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val) { - void __iomem *port_mmio = ahci_port_base(ap); - int offset = ahci_scr_offset(ap, sc_reg); + void __iomem *port_mmio = ahci_port_base(link->ap); + int offset = ahci_scr_offset(link->ap, sc_reg); if (offset) { writel(val, port_mmio + offset); @@ -973,7 +974,7 @@ static void ahci_disable_alpm(struct ata_port *ap) writel(PORT_IRQ_PHYRDY, port_mmio + PORT_IRQ_STAT); /* go ahead and clean out PhyRdy Change from Serror too */ - ahci_scr_write(ap, SCR_ERROR, ((1 << 16) | (1 << 18))); + ahci_scr_write(&ap->link, SCR_ERROR, ((1 << 16) | (1 << 18))); /* * Clear flag to indicate that we should ignore all PhyRdy @@ -1937,8 +1938,8 @@ static void ahci_error_intr(struct ata_port *ap, u32 irq_stat) ata_ehi_push_desc(host_ehi, "irq_stat 0x%08x", irq_stat); /* AHCI needs SError cleared; otherwise, it might lock up */ - ahci_scr_read(ap, SCR_ERROR, &serror); - ahci_scr_write(ap, SCR_ERROR, serror); + ahci_scr_read(&ap->link, SCR_ERROR, &serror); + ahci_scr_write(&ap->link, SCR_ERROR, serror); host_ehi->serror |= serror; /* some controllers set IRQ_IF_ERR on device errors, ignore it */ @@ -2027,7 +2028,7 @@ static void ahci_port_intr(struct ata_port *ap) if ((hpriv->flags & AHCI_HFLAG_NO_HOTPLUG) && (status & PORT_IRQ_PHYRDY)) { status &= ~PORT_IRQ_PHYRDY; - ahci_scr_write(ap, SCR_ERROR, ((1 << 16) | (1 << 18))); + ahci_scr_write(&ap->link, SCR_ERROR, ((1 << 16) | (1 << 18))); } if (unlikely(status & PORT_IRQ_ERROR)) { diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c index e6b4606..e9e32ed 100644 --- a/drivers/ata/ata_piix.c +++ b/drivers/ata/ata_piix.c @@ -165,8 +165,10 @@ static void piix_set_dmamode(struct ata_port *ap, struct ata_device *adev); static void ich_set_dmamode(struct ata_port *ap, struct ata_device *adev); static int ich_pata_cable_detect(struct ata_port *ap); static u8 piix_vmw_bmdma_status(struct ata_port *ap); -static int piix_sidpr_scr_read(struct ata_port *ap, unsigned int reg, u32 *val); -static int piix_sidpr_scr_write(struct ata_port *ap, unsigned int reg, u32 val); +static int piix_sidpr_scr_read(struct ata_link *link, + unsigned int reg, u32 *val); +static int piix_sidpr_scr_write(struct ata_link *link, + unsigned int reg, u32 val); #ifdef CONFIG_PM static int piix_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg); static int piix_pci_device_resume(struct pci_dev *pdev); @@ -278,12 +280,15 @@ static const struct pci_device_id piix_pci_tbl[] = { /* SATA Controller IDE (PCH) */ { 0x8086, 0x3b20, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata }, /* SATA Controller IDE (PCH) */ + { 0x8086, 0x3b21, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, + /* SATA Controller IDE (PCH) */ { 0x8086, 0x3b26, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, /* SATA Controller IDE (PCH) */ + { 0x8086, 0x3b28, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata }, + /* SATA Controller IDE (PCH) */ { 0x8086, 0x3b2d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_2port_sata }, /* SATA Controller IDE (PCH) */ { 0x8086, 0x3b2e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata }, - { } /* terminate list */ }; @@ -582,6 +587,7 @@ static const struct ich_laptop ich_laptop[] = { { 0x27DF, 0x1025, 0x0110 }, /* ICH7 on Acer 3682WLMi */ { 0x27DF, 0x1043, 0x1267 }, /* ICH7 on Asus W5F */ { 0x27DF, 0x103C, 0x30A1 }, /* ICH7 on HP Compaq nc2400 */ + { 0x27DF, 0x1071, 0xD221 }, /* ICH7 on Hercules EC-900 */ { 0x24CA, 0x1025, 0x0061 }, /* ICH4 on ACER Aspire 2023WLMi */ { 0x24CA, 0x1025, 0x003d }, /* ICH4 on ACER TM290 */ { 0x266F, 0x1025, 0x0066 }, /* ICH6 on ACER Aspire 1694WLMi */ @@ -885,23 +891,9 @@ static void ich_set_dmamode(struct ata_port *ap, struct ata_device *adev) * Serial ATA Index/Data Pair Superset Registers access * * Beginning from ICH8, there's a sane way to access SCRs using index - * and data register pair located at BAR5. This creates an - * interesting problem of mapping two SCRs to one port. - * - * Although they have separate SCRs, the master and slave aren't - * independent enough to be treated as separate links - e.g. softreset - * resets both. Also, there's no protocol defined for hard resetting - * singled device sharing the virtual port (no defined way to acquire - * device signature). This is worked around by merging the SCR values - * into one sensible value and requesting follow-up SRST after - * hardreset. - * - * SCR merging is perfomed in nibbles which is the unit contents in - * SCRs are organized. If two values are equal, the value is used. - * When they differ, merge table which lists precedence of possible - * values is consulted and the first match or the last entry when - * nothing matches is used. When there's no merge table for the - * specific nibble, value from the first port is used. + * and data register pair located at BAR5 which means that we have + * separate SCRs for master and slave. This is handled using libata + * slave_link facility. */ static const int piix_sidx_map[] = { [SCR_STATUS] = 0, @@ -909,120 +901,38 @@ static const int piix_sidx_map[] = { [SCR_CONTROL] = 1, }; -static void piix_sidpr_sel(struct ata_device *dev, unsigned int reg) +static void piix_sidpr_sel(struct ata_link *link, unsigned int reg) { - struct ata_port *ap = dev->link->ap; + struct ata_port *ap = link->ap; struct piix_host_priv *hpriv = ap->host->private_data; - iowrite32(((ap->port_no * 2 + dev->devno) << 8) | piix_sidx_map[reg], + iowrite32(((ap->port_no * 2 + link->pmp) << 8) | piix_sidx_map[reg], hpriv->sidpr + PIIX_SIDPR_IDX); } -static int piix_sidpr_read(struct ata_device *dev, unsigned int reg) -{ - struct piix_host_priv *hpriv = dev->link->ap->host->private_data; - - piix_sidpr_sel(dev, reg); - return ioread32(hpriv->sidpr + PIIX_SIDPR_DATA); -} - -static void piix_sidpr_write(struct ata_device *dev, unsigned int reg, u32 val) -{ - struct piix_host_priv *hpriv = dev->link->ap->host->private_data; - - piix_sidpr_sel(dev, reg); - iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA); -} - -static u32 piix_merge_scr(u32 val0, u32 val1, const int * const *merge_tbl) -{ - u32 val = 0; - int i, mi; - - for (i = 0, mi = 0; i < 32 / 4; i++) { - u8 c0 = (val0 >> (i * 4)) & 0xf; - u8 c1 = (val1 >> (i * 4)) & 0xf; - u8 merged = c0; - const int *cur; - - /* if no merge preference, assume the first value */ - cur = merge_tbl[mi]; - if (!cur) - goto done; - mi++; - - /* if two values equal, use it */ - if (c0 == c1) - goto done; - - /* choose the first match or the last from the merge table */ - while (*cur != -1) { - if (c0 == *cur || c1 == *cur) - break; - cur++; - } - if (*cur == -1) - cur--; - merged = *cur; - done: - val |= merged << (i * 4); - } - - return val; -} - -static int piix_sidpr_scr_read(struct ata_port *ap, unsigned int reg, u32 *val) +static int piix_sidpr_scr_read(struct ata_link *link, + unsigned int reg, u32 *val) { - const int * const sstatus_merge_tbl[] = { - /* DET */ (const int []){ 1, 3, 0, 4, 3, -1 }, - /* SPD */ (const int []){ 2, 1, 0, -1 }, - /* IPM */ (const int []){ 6, 2, 1, 0, -1 }, - NULL, - }; - const int * const scontrol_merge_tbl[] = { - /* DET */ (const int []){ 1, 0, 4, 0, -1 }, - /* SPD */ (const int []){ 0, 2, 1, 0, -1 }, - /* IPM */ (const int []){ 0, 1, 2, 3, 0, -1 }, - NULL, - }; - u32 v0, v1; + struct piix_host_priv *hpriv = link->ap->host->private_data; if (reg >= ARRAY_SIZE(piix_sidx_map)) return -EINVAL; - if (!(ap->flags & ATA_FLAG_SLAVE_POSS)) { - *val = piix_sidpr_read(&ap->link.device[0], reg); - return 0; - } - - v0 = piix_sidpr_read(&ap->link.device[0], reg); - v1 = piix_sidpr_read(&ap->link.device[1], reg); - - switch (reg) { - case SCR_STATUS: - *val = piix_merge_scr(v0, v1, sstatus_merge_tbl); - break; - case SCR_ERROR: - *val = v0 | v1; - break; - case SCR_CONTROL: - *val = piix_merge_scr(v0, v1, scontrol_merge_tbl); - break; - } - + piix_sidpr_sel(link, reg); + *val = ioread32(hpriv->sidpr + PIIX_SIDPR_DATA); return 0; } -static int piix_sidpr_scr_write(struct ata_port *ap, unsigned int reg, u32 val) +static int piix_sidpr_scr_write(struct ata_link *link, + unsigned int reg, u32 val) { + struct piix_host_priv *hpriv = link->ap->host->private_data; + if (reg >= ARRAY_SIZE(piix_sidx_map)) return -EINVAL; - piix_sidpr_write(&ap->link.device[0], reg, val); - - if (ap->flags & ATA_FLAG_SLAVE_POSS) - piix_sidpr_write(&ap->link.device[1], reg, val); - + piix_sidpr_sel(link, reg); + iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA); return 0; } @@ -1363,28 +1273,28 @@ static const int *__devinit piix_init_sata_map(struct pci_dev *pdev, return map; } -static void __devinit piix_init_sidpr(struct ata_host *host) +static int __devinit piix_init_sidpr(struct ata_host *host) { struct pci_dev *pdev = to_pci_dev(host->dev); struct piix_host_priv *hpriv = host->private_data; - struct ata_device *dev0 = &host->ports[0]->link.device[0]; + struct ata_link *link0 = &host->ports[0]->link; u32 scontrol; - int i; + int i, rc; /* check for availability */ for (i = 0; i < 4; i++) if (hpriv->map[i] == IDE) - return; + return 0; if (!(host->ports[0]->flags & PIIX_FLAG_SIDPR)) - return; + return 0; if (pci_resource_start(pdev, PIIX_SIDPR_BAR) == 0 || pci_resource_len(pdev, PIIX_SIDPR_BAR) != PIIX_SIDPR_LEN) - return; + return 0; if (pcim_iomap_regions(pdev, 1 << PIIX_SIDPR_BAR, DRV_NAME)) - return; + return 0; hpriv->sidpr = pcim_iomap_table(pdev)[PIIX_SIDPR_BAR]; @@ -1392,7 +1302,7 @@ static void __devinit piix_init_sidpr(struct ata_host *host) * Give it a test drive by inhibiting power save modes which * we'll do anyway. */ - scontrol = piix_sidpr_read(dev0, SCR_CONTROL); + piix_sidpr_scr_read(link0, SCR_CONTROL, &scontrol); /* if IPM is already 3, SCR access is probably working. Don't * un-inhibit power save modes as BIOS might have inhibited @@ -1400,18 +1310,30 @@ static void __devinit piix_init_sidpr(struct ata_host *host) */ if ((scontrol & 0xf00) != 0x300) { scontrol |= 0x300; - piix_sidpr_write(dev0, SCR_CONTROL, scontrol); - scontrol = piix_sidpr_read(dev0, SCR_CONTROL); + piix_sidpr_scr_write(link0, SCR_CONTROL, scontrol); + piix_sidpr_scr_read(link0, SCR_CONTROL, &scontrol); if ((scontrol & 0xf00) != 0x300) { dev_printk(KERN_INFO, host->dev, "SCR access via " "SIDPR is available but doesn't work\n"); - return; + return 0; } } - host->ports[0]->ops = &piix_sidpr_sata_ops; - host->ports[1]->ops = &piix_sidpr_sata_ops; + /* okay, SCRs available, set ops and ask libata for slave_link */ + for (i = 0; i < 2; i++) { + struct ata_port *ap = host->ports[i]; + + ap->ops = &piix_sidpr_sata_ops; + + if (ap->flags & ATA_FLAG_SLAVE_POSS) { + rc = ata_slave_link_init(ap); + if (rc) + return rc; + } + } + + return 0; } static void piix_iocfg_bit18_quirk(struct pci_dev *pdev) @@ -1521,7 +1443,9 @@ static int __devinit piix_init_one(struct pci_dev *pdev, /* initialize controller */ if (port_flags & ATA_FLAG_SATA) { piix_init_pcs(host, piix_map_db_table[ent->driver_data]); - piix_init_sidpr(host); + rc = piix_init_sidpr(host); + if (rc) + return rc; } /* apply IOCFG bit18 quirk */ diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 79e3a8e..1ee9499 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -163,6 +163,67 @@ MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION); +/* + * Iterator helpers. Don't use directly. + * + * LOCKING: + * Host lock or EH context. + */ +struct ata_link *__ata_port_next_link(struct ata_port *ap, + struct ata_link *link, bool dev_only) +{ + /* NULL link indicates start of iteration */ + if (!link) { + if (dev_only && sata_pmp_attached(ap)) + return ap->pmp_link; + return &ap->link; + } + + /* we just iterated over the host master link, what's next? */ + if (link == &ap->link) { + if (!sata_pmp_attached(ap)) { + if (unlikely(ap->slave_link) && !dev_only) + return ap->slave_link; + return NULL; + } + return ap->pmp_link; + } + + /* slave_link excludes PMP */ + if (unlikely(link == ap->slave_link)) + return NULL; + + /* iterate to the next PMP link */ + if (++link < ap->pmp_link + ap->nr_pmp_links) + return link; + return NULL; +} + +/** + * ata_dev_phys_link - find physical link for a device + * @dev: ATA device to look up physical link for + * + * Look up physical link which @dev is attached to. Note that + * this is different from @dev->link only when @dev is on slave + * link. For all other cases, it's the same as @dev->link. + * + * LOCKING: + * Don't care. + * + * RETURNS: + * Pointer to the found physical link. + */ +struct ata_link *ata_dev_phys_link(struct ata_device *dev) +{ + struct ata_port *ap = dev->link->ap; + + if (!ap->slave_link) + return dev->link; + if (!dev->devno) + return &ap->link; + return ap->slave_link; +} + /** * ata_force_cbl - force cable type according to libata.force * @ap: ATA port of interest @@ -206,7 +267,8 @@ void ata_force_cbl(struct ata_port *ap) * the host link and all fan-out ports connected via PMP. If the * device part is specified as 0 (e.g. 1.00:), it specifies the * first fan-out link not the host link. Device number 15 always - * points to the host link whether PMP is attached or not. + * points to the host link whether PMP is attached or not. If the + * controller has slave link, device number 16 points to it. * * LOCKING: * EH context. @@ -214,12 +276,11 @@ void ata_force_cbl(struct ata_port *ap) static void ata_force_link_limits(struct ata_link *link) { bool did_spd = false; - int linkno, i; + int linkno = link->pmp; + int i; if (ata_is_host_link(link)) - linkno = 15; - else - linkno = link->pmp; + linkno += 15; for (i = ata_force_tbl_size - 1; i >= 0; i--) { const struct ata_force_ent *fe = &ata_force_tbl[i]; @@ -266,9 +327,9 @@ static void ata_force_xfermask(struct ata_device *dev) int alt_devno = devno; int i; - /* allow n.15 for the first device attached to host port */ - if (ata_is_host_link(dev->link) && devno == 0) - alt_devno = 15; + /* allow n.15/16 for devices attached to host port */ + if (ata_is_host_link(dev->link)) + alt_devno += 15; for (i = ata_force_tbl_size - 1; i >= 0; i--) { const struct ata_force_ent *fe = &ata_force_tbl[i]; @@ -320,9 +381,9 @@ static void ata_force_horkage(struct ata_device *dev) int alt_devno = devno; int i; - /* allow n.15 for the first device attached to host port */ - if (ata_is_host_link(dev->link) && devno == 0) - alt_devno = 15; + /* allow n.15/16 for devices attached to host port */ + if (ata_is_host_link(dev->link)) + alt_devno += 15; for (i = 0; i < ata_force_tbl_size; i++) { const struct ata_force_ent *fe = &ata_force_tbl[i]; @@ -2681,7 +2742,7 @@ static void sata_print_link_status(struct ata_link *link) return; sata_scr_read(link, SCR_CONTROL, &scontrol); - if (ata_link_online(link)) { + if (ata_phys_link_online(link)) { tmp = (sstatus >> 4) & 0xf; ata_link_printk(link, KERN_INFO, "SATA link up %s (SStatus %X SControl %X)\n", @@ -3372,6 +3433,12 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline, unsigned long nodev_deadline = ata_deadline(start, ATA_TMOUT_FF_WAIT); int warned = 0; + /* Slave readiness can't be tested separately from master. On + * M/S emulation configuration, this function should be called + * only on the master and it will handle both master and slave. + */ + WARN_ON(link == link->ap->slave_link); + if (time_after(nodev_deadline, deadline)) nodev_deadline = deadline; @@ -3593,7 +3660,7 @@ int ata_std_prereset(struct ata_link *link, unsigned long deadline) } /* no point in trying softreset on offline link */ - if (ata_link_offline(link)) + if (ata_phys_link_offline(link)) ehc->i.action &= ~ATA_EH_SOFTRESET; return 0; @@ -3671,7 +3738,7 @@ int sata_link_hardreset(struct ata_link *link, const unsigned long *timing, if (rc) goto out; /* if link is offline nothing more to do */ - if (ata_link_offline(link)) + if (ata_phys_link_offline(link)) goto out; /* Link is online. From this point, -ENODEV too is an error. */ @@ -4868,10 +4935,8 @@ int sata_scr_valid(struct ata_link *link) int sata_scr_read(struct ata_link *link, int reg, u32 *val) { if (ata_is_host_link(link)) { - struct ata_port *ap = link->ap; - if (sata_scr_valid(link)) - return ap->ops->scr_read(ap, reg, val); + return link->ap->ops->scr_read(link, reg, val); return -EOPNOTSUPP; } @@ -4897,10 +4962,8 @@ int sata_scr_read(struct ata_link *link, int reg, u32 *val) int sata_scr_write(struct ata_link *link, int reg, u32 val) { if (ata_is_host_link(link)) { - struct ata_port *ap = link->ap; - if (sata_scr_valid(link)) - return ap->ops->scr_write(ap, reg, val); + return link->ap->ops->scr_write(link, reg, val); return -EOPNOTSUPP; } @@ -4925,13 +4988,12 @@ int sata_scr_write(struct ata_link *link, int reg, u32 val) int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) { if (ata_is_host_link(link)) { - struct ata_port *ap = link->ap; int rc; if (sata_scr_valid(link)) { - rc = ap->ops->scr_write(ap, reg, val); + rc = link->ap->ops->scr_write(link, reg, val); if (rc == 0) - rc = ap->ops->scr_read(ap, reg, &val); + rc = link->ap->ops->scr_read(link, reg, &val); return rc; } return -EOPNOTSUPP; @@ -4941,7 +5003,7 @@ int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) } /** - * ata_link_online - test whether the given link is online + * ata_phys_link_online - test whether the given link is online * @link: ATA link to test * * Test whether @link is online. Note that this function returns @@ -4952,20 +5014,20 @@ int sata_scr_write_flush(struct ata_link *link, int reg, u32 val) * None. * * RETURNS: - * 1 if the port online status is available and online. + * True if the port online status is available and online. */ -int ata_link_online(struct ata_link *link) +bool ata_phys_link_online(struct ata_link *link) { u32 sstatus; if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0 && (sstatus & 0xf) == 0x3) - return 1; - return 0; + return true; + return false; } /** - * ata_link_offline - test whether the given link is offline + * ata_phys_link_offline - test whether the given link is offline * @link: ATA link to test * * Test whether @link is offline. Note that this function @@ -4976,16 +5038,68 @@ int ata_link_online(struct ata_link *link) * None. * * RETURNS: - * 1 if the port offline status is available and offline. + * True if the port offline status is available and offline. */ -int ata_link_offline(struct ata_link *link) +bool ata_phys_link_offline(struct ata_link *link) { u32 sstatus; if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0 && (sstatus & 0xf) != 0x3) - return 1; - return 0; + return true; + return false; +} + +/** + * ata_link_online - test whether the given link is online + * @link: ATA link to test + * + * Test whether @link is online. This is identical to + * ata_phys_link_online() when there's no slave link. When + * there's a slave link, this function should only be called on + * the master link and will return true if any of M/S links is + * online. + * + * LOCKING: + * None. + * + * RETURNS: + * True if the port online status is available and online. + */ +bool ata_link_online(struct ata_link *link) +{ + struct ata_link *slave = link->ap->slave_link; + + WARN_ON(link == slave); /* shouldn't be called on slave link */ + + return ata_phys_link_online(link) || + (slave && ata_phys_link_online(slave)); +} + +/** + * ata_link_offline - test whether the given link is offline + * @link: ATA link to test + * + * Test whether @link is offline. This is identical to + * ata_phys_link_offline() when there's no slave link. When + * there's a slave link, this function should only be called on + * the master link and will return true if both M/S links are + * offline. + * + * LOCKING: + * None. + * + * RETURNS: + * True if the port offline status is available and offline. + */ +bool ata_link_offline(struct ata_link *link) +{ + struct ata_link *slave = link->ap->slave_link; + + WARN_ON(link == slave); /* shouldn't be called on slave link */ + + return ata_phys_link_offline(link) && + (!slave || ata_phys_link_offline(slave)); } #ifdef CONFIG_PM @@ -5127,11 +5241,11 @@ int ata_port_start(struct ata_port *ap) */ void ata_dev_init(struct ata_device *dev) { - struct ata_link *link = dev->link; + struct ata_link *link = ata_dev_phys_link(dev); struct ata_port *ap = link->ap; unsigned long flags; - /* SATA spd limit is bound to the first device */ + /* SATA spd limit is bound to the attached device, reset together */ link->sata_spd_limit = link->hw_sata_spd_limit; link->sata_spd = 0; @@ -5264,6 +5378,7 @@ struct ata_port *ata_port_alloc(struct ata_host *host) INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan); INIT_LIST_HEAD(&ap->eh_done_q); init_waitqueue_head(&ap->eh_wait_q); + init_completion(&ap->park_req_pending); init_timer_deferrable(&ap->fastdrain_timer); ap->fastdrain_timer.function = ata_eh_fastdrain_timerfn; ap->fastdrain_timer.data = (unsigned long)ap; @@ -5294,6 +5409,7 @@ static void ata_host_release(struct device *gendev, void *res) scsi_host_put(ap->scsi_host); kfree(ap->pmp_link); + kfree(ap->slave_link); kfree(ap); host->ports[i] = NULL; } @@ -5414,6 +5530,68 @@ struct ata_host *ata_host_alloc_pinfo(struct device *dev, return host; } +/** + * ata_slave_link_init - initialize slave link + * @ap: port to initialize slave link for + * + * Create and initialize slave link for @ap. This enables slave + * link handling on the port. + * + * In libata, a port contains links and a link contains devices. + * There is single host link but if a PMP is attached to it, + * there can be multiple fan-out links. On SATA, there's usually + * a single device connected to a link but PATA and SATA + * controllers emulating TF based interface can have two - master + * and slave. + * + * However, there are a few controllers which don't fit into this + * abstraction too well - SATA controllers which emulate TF + * interface with both master and slave devices but also have + * separate SCR register sets for each device. These controllers + * need separate links for physical link handling + * (e.g. onlineness, link speed) but should be treated like a + * traditional M/S controller for everything else (e.g. command + * issue, softreset). + * + * slave_link is libata's way of handling this class of + * controllers without impacting core layer too much. For + * anything other than physical link handling, the default host + * link is used for both master and slave. For physical link + * handling, separate @ap->slave_link is used. All dirty details + * are implemented inside libata core layer. From LLD's POV, the + * only difference is that prereset, hardreset and postreset are + * called once more for the slave link, so the reset sequence + * looks like the following. + * + * prereset(M) -> prereset(S) -> hardreset(M) -> hardreset(S) -> + * softreset(M) -> postreset(M) -> postreset(S) + * + * Note that softreset is called only for the master. Softreset + * resets both M/S by definition, so SRST on master should handle + * both (the standard method will work just fine). + * + * LOCKING: + * Should be called before host is registered. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +int ata_slave_link_init(struct ata_port *ap) +{ + struct ata_link *link; + + WARN_ON(ap->slave_link); + WARN_ON(ap->flags & ATA_FLAG_PMP); + + link = kzalloc(sizeof(*link), GFP_KERNEL); + if (!link) + return -ENOMEM; + + ata_link_init(ap, link, 1); + ap->slave_link = link; + return 0; +} + static void ata_host_stop(struct device *gendev, void *res) { struct ata_host *host = dev_get_drvdata(gendev); @@ -5640,6 +5818,8 @@ int ata_host_register(struct ata_host *host, struct scsi_host_template *sht) /* init sata_spd_limit to the current value */ sata_link_init_spd(&ap->link); + if (ap->slave_link) + sata_link_init_spd(ap->slave_link); /* print per-port info to dmesg */ xfer_mask = ata_pack_xfermask(ap->pio_mask, ap->mwdma_mask, @@ -6260,10 +6440,12 @@ EXPORT_SYMBOL_GPL(ata_base_port_ops); EXPORT_SYMBOL_GPL(sata_port_ops); EXPORT_SYMBOL_GPL(ata_dummy_port_ops); EXPORT_SYMBOL_GPL(ata_dummy_port_info); +EXPORT_SYMBOL_GPL(__ata_port_next_link); EXPORT_SYMBOL_GPL(ata_std_bios_param); EXPORT_SYMBOL_GPL(ata_host_init); EXPORT_SYMBOL_GPL(ata_host_alloc); EXPORT_SYMBOL_GPL(ata_host_alloc_pinfo); +EXPORT_SYMBOL_GPL(ata_slave_link_init); EXPORT_SYMBOL_GPL(ata_host_start); EXPORT_SYMBOL_GPL(ata_host_register); EXPORT_SYMBOL_GPL(ata_host_activate); diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c1db2f2..a93247c 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -33,6 +33,7 @@ */ #include <linux/kernel.h> +#include <linux/blkdev.h> #include <linux/pci.h> #include <scsi/scsi.h> #include <scsi/scsi_host.h> @@ -79,6 +80,8 @@ enum { */ ATA_EH_PRERESET_TIMEOUT = 10000, ATA_EH_FASTDRAIN_INTERVAL = 3000, + + ATA_EH_UA_TRIES = 5, }; /* The following table determines how we sequence resets. Each entry @@ -457,29 +460,29 @@ static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, * RETURNS: * EH_HANDLED or EH_NOT_HANDLED */ -enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) +enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; struct ata_port *ap = ata_shost_to_port(host); unsigned long flags; struct ata_queued_cmd *qc; - enum scsi_eh_timer_return ret; + enum blk_eh_timer_return ret; DPRINTK("ENTER\n"); if (ap->ops->error_handler) { - ret = EH_NOT_HANDLED; + ret = BLK_EH_NOT_HANDLED; goto out; } - ret = EH_HANDLED; + ret = BLK_EH_HANDLED; spin_lock_irqsave(ap->lock, flags); qc = ata_qc_from_tag(ap, ap->link.active_tag); if (qc) { WARN_ON(qc->scsicmd != cmd); qc->flags |= ATA_QCFLAG_EH_SCHEDULED; qc->err_mask |= AC_ERR_TIMEOUT; - ret = EH_NOT_HANDLED; + ret = BLK_EH_NOT_HANDLED; } spin_unlock_irqrestore(ap->lock, flags); @@ -831,7 +834,7 @@ void ata_qc_schedule_eh(struct ata_queued_cmd *qc) * Note that ATA_QCFLAG_FAILED is unconditionally set after * this function completes. */ - scsi_req_abort_cmd(qc->scsicmd); + blk_abort_request(qc->scsicmd->request); } /** @@ -1357,6 +1360,37 @@ static int ata_eh_read_log_10h(struct ata_device *dev, } /** + * atapi_eh_tur - perform ATAPI TEST_UNIT_READY + * @dev: target ATAPI device + * @r_sense_key: out parameter for sense_key + * + * Perform ATAPI TEST_UNIT_READY. + * + * LOCKING: + * EH context (may sleep). + * + * RETURNS: + * 0 on success, AC_ERR_* mask on failure. + */ +static unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) +{ + u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; + struct ata_taskfile tf; + unsigned int err_mask; + + ata_tf_init(dev, &tf); + + tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; + tf.command = ATA_CMD_PACKET; + tf.protocol = ATAPI_PROT_NODATA; + + err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); + if (err_mask == AC_ERR_DEV) + *r_sense_key = tf.feature >> 4; + return err_mask; +} + +/** * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE * @dev: device to perform REQUEST_SENSE to * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) @@ -1756,7 +1790,7 @@ static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) static unsigned int ata_eh_speed_down(struct ata_device *dev, unsigned int eflags, unsigned int err_mask) { - struct ata_link *link = dev->link; + struct ata_link *link = ata_dev_phys_link(dev); int xfer_ok = 0; unsigned int verdict; unsigned int action = 0; @@ -1880,7 +1914,8 @@ static void ata_eh_link_autopsy(struct ata_link *link) for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link) + if (!(qc->flags & ATA_QCFLAG_FAILED) || + ata_dev_phys_link(qc->dev) != link) continue; /* inherit upper level err_mask */ @@ -1967,6 +2002,23 @@ void ata_eh_autopsy(struct ata_port *ap) ata_port_for_each_link(link, ap) ata_eh_link_autopsy(link); + /* Handle the frigging slave link. Autopsy is done similarly + * but actions and flags are transferred over to the master + * link and handled from there. + */ + if (ap->slave_link) { + struct ata_eh_context *mehc = &ap->link.eh_context; + struct ata_eh_context *sehc = &ap->slave_link->eh_context; + + ata_eh_link_autopsy(ap->slave_link); + + ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); + mehc->i.action |= sehc->i.action; + mehc->i.dev_action[1] |= sehc->i.dev_action[1]; + mehc->i.flags |= sehc->i.flags; + ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); + } + /* Autopsy of fanout ports can affect host link autopsy. * Perform host link autopsy last. */ @@ -2001,7 +2053,8 @@ static void ata_eh_link_report(struct ata_link *link) for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); - if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link || + if (!(qc->flags & ATA_QCFLAG_FAILED) || + ata_dev_phys_link(qc->dev) != link || ((qc->flags & ATA_QCFLAG_QUIET) && qc->err_mask == AC_ERR_DEV)) continue; @@ -2068,7 +2121,7 @@ static void ata_eh_link_report(struct ata_link *link) char cdb_buf[70] = ""; if (!(qc->flags & ATA_QCFLAG_FAILED) || - qc->dev->link != link || !qc->err_mask) + ata_dev_phys_link(qc->dev) != link || !qc->err_mask) continue; if (qc->dma_dir != DMA_NONE) { @@ -2160,12 +2213,14 @@ void ata_eh_report(struct ata_port *ap) } static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, - unsigned int *classes, unsigned long deadline) + unsigned int *classes, unsigned long deadline, + bool clear_classes) { struct ata_device *dev; - ata_link_for_each_dev(dev, link) - classes[dev->devno] = ATA_DEV_UNKNOWN; + if (clear_classes) + ata_link_for_each_dev(dev, link) + classes[dev->devno] = ATA_DEV_UNKNOWN; return reset(link, classes, deadline); } @@ -2187,17 +2242,20 @@ int ata_eh_reset(struct ata_link *link, int classify, ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) { struct ata_port *ap = link->ap; + struct ata_link *slave = ap->slave_link; struct ata_eh_context *ehc = &link->eh_context; + struct ata_eh_context *sehc = &slave->eh_context; unsigned int *classes = ehc->classes; unsigned int lflags = link->flags; int verbose = !(ehc->i.flags & ATA_EHI_QUIET); int max_tries = 0, try = 0; + struct ata_link *failed_link; struct ata_device *dev; unsigned long deadline, now; ata_reset_fn_t reset; unsigned long flags; u32 sstatus; - int nr_known, rc; + int nr_unknown, rc; /* * Prepare to reset @@ -2252,8 +2310,30 @@ int ata_eh_reset(struct ata_link *link, int classify, } if (prereset) { - rc = prereset(link, - ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT)); + unsigned long deadline = ata_deadline(jiffies, + ATA_EH_PRERESET_TIMEOUT); + + if (slave) { + sehc->i.action &= ~ATA_EH_RESET; + sehc->i.action |= ehc->i.action; + } + + rc = prereset(link, deadline); + + /* If present, do prereset on slave link too. Reset + * is skipped iff both master and slave links report + * -ENOENT or clear ATA_EH_RESET. + */ + if (slave && (rc == 0 || rc == -ENOENT)) { + int tmp; + + tmp = prereset(slave, deadline); + if (tmp != -ENOENT) + rc = tmp; + + ehc->i.action |= sehc->i.action; + } + if (rc) { if (rc == -ENOENT) { ata_link_printk(link, KERN_DEBUG, @@ -2302,25 +2382,51 @@ int ata_eh_reset(struct ata_link *link, int classify, else ehc->i.flags |= ATA_EHI_DID_SOFTRESET; - rc = ata_do_reset(link, reset, classes, deadline); - if (rc && rc != -EAGAIN) + rc = ata_do_reset(link, reset, classes, deadline, true); + if (rc && rc != -EAGAIN) { + failed_link = link; goto fail; + } + + /* hardreset slave link if existent */ + if (slave && reset == hardreset) { + int tmp; + + if (verbose) + ata_link_printk(slave, KERN_INFO, + "hard resetting link\n"); + ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); + tmp = ata_do_reset(slave, reset, classes, deadline, + false); + switch (tmp) { + case -EAGAIN: + rc = -EAGAIN; + case 0: + break; + default: + failed_link = slave; + rc = tmp; + goto fail; + } + } + + /* perform follow-up SRST if necessary */ if (reset == hardreset && ata_eh_followup_srst_needed(link, rc, classes)) { - /* okay, let's do follow-up softreset */ reset = softreset; if (!reset) { ata_link_printk(link, KERN_ERR, "follow-up softreset required " "but no softreset avaliable\n"); + failed_link = link; rc = -EINVAL; goto fail; } ata_eh_about_to_do(link, NULL, ATA_EH_RESET); - rc = ata_do_reset(link, reset, classes, deadline); + rc = ata_do_reset(link, reset, classes, deadline, true); } } else { if (verbose) @@ -2341,7 +2447,7 @@ int ata_eh_reset(struct ata_link *link, int classify, dev->pio_mode = XFER_PIO_0; dev->flags &= ~ATA_DFLAG_SLEEPING; - if (ata_link_offline(link)) + if (ata_phys_link_offline(ata_dev_phys_link(dev))) continue; /* apply class override */ @@ -2354,6 +2460,8 @@ int ata_eh_reset(struct ata_link *link, int classify, /* record current link speed */ if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) link->sata_spd = (sstatus >> 4) & 0xf; + if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) + slave->sata_spd = (sstatus >> 4) & 0xf; /* thaw the port */ if (ata_is_host_link(link)) @@ -2366,12 +2474,17 @@ int ata_eh_reset(struct ata_link *link, int classify, * reset and here. This race is mediated by cross checking * link onlineness and classification result later. */ - if (postreset) + if (postreset) { postreset(link, classes); + if (slave) + postreset(slave, classes); + } /* clear cached SError */ spin_lock_irqsave(link->ap->lock, flags); link->eh_info.serror = 0; + if (slave) + slave->eh_info.serror = 0; spin_unlock_irqrestore(link->ap->lock, flags); /* Make sure onlineness and classification result correspond. @@ -2381,19 +2494,21 @@ int ata_eh_reset(struct ata_link *link, int classify, * link onlineness and classification result, those conditions * can be reliably detected and retried. */ - nr_known = 0; + nr_unknown = 0; ata_link_for_each_dev(dev, link) { /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */ - if (classes[dev->devno] == ATA_DEV_UNKNOWN) + if (classes[dev->devno] == ATA_DEV_UNKNOWN) { classes[dev->devno] = ATA_DEV_NONE; - else - nr_known++; + if (ata_phys_link_online(ata_dev_phys_link(dev))) + nr_unknown++; + } } - if (classify && !nr_known && ata_link_online(link)) { + if (classify && nr_unknown) { if (try < max_tries) { ata_link_printk(link, KERN_WARNING, "link online but " "device misclassified, retrying\n"); + failed_link = link; rc = -EAGAIN; goto fail; } @@ -2404,6 +2519,8 @@ int ata_eh_reset(struct ata_link *link, int classify, /* reset successful, schedule revalidation */ ata_eh_done(link, NULL, ATA_EH_RESET); + if (slave) + ata_eh_done(slave, NULL, ATA_EH_RESET); ehc->last_reset = jiffies; ehc->i.action |= ATA_EH_REVALIDATE; @@ -2411,6 +2528,8 @@ int ata_eh_reset(struct ata_link *link, int classify, out: /* clear hotplug flag */ ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; + if (slave) + sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; spin_lock_irqsave(ap->lock, flags); ap->pflags &= ~ATA_PFLAG_RESETTING; @@ -2431,7 +2550,7 @@ int ata_eh_reset(struct ata_link *link, int classify, if (time_before(now, deadline)) { unsigned long delta = deadline - now; - ata_link_printk(link, KERN_WARNING, + ata_link_printk(failed_link, KERN_WARNING, "reset failed (errno=%d), retrying in %u secs\n", rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); @@ -2439,13 +2558,92 @@ int ata_eh_reset(struct ata_link *link, int classify, delta = schedule_timeout_uninterruptible(delta); } - if (rc == -EPIPE || try == max_tries - 1) + if (try == max_tries - 1) { sata_down_spd_limit(link); + if (slave) + sata_down_spd_limit(slave); + } else if (rc == -EPIPE) + sata_down_spd_limit(failed_link); + if (hardreset) reset = hardreset; goto retry; } +static inline void ata_eh_pull_park_action(struct ata_port *ap) +{ + struct ata_link *link; + struct ata_device *dev; + unsigned long flags; + + /* + * This function can be thought of as an extended version of + * ata_eh_about_to_do() specially crafted to accommodate the + * requirements of ATA_EH_PARK handling. Since the EH thread + * does not leave the do {} while () loop in ata_eh_recover as + * long as the timeout for a park request to *one* device on + * the port has not expired, and since we still want to pick + * up park requests to other devices on the same port or + * timeout updates for the same device, we have to pull + * ATA_EH_PARK actions from eh_info into eh_context.i + * ourselves at the beginning of each pass over the loop. + * + * Additionally, all write accesses to &ap->park_req_pending + * through INIT_COMPLETION() (see below) or complete_all() + * (see ata_scsi_park_store()) are protected by the host lock. + * As a result we have that park_req_pending.done is zero on + * exit from this function, i.e. when ATA_EH_PARK actions for + * *all* devices on port ap have been pulled into the + * respective eh_context structs. If, and only if, + * park_req_pending.done is non-zero by the time we reach + * wait_for_completion_timeout(), another ATA_EH_PARK action + * has been scheduled for at least one of the devices on port + * ap and we have to cycle over the do {} while () loop in + * ata_eh_recover() again. + */ + + spin_lock_irqsave(ap->lock, flags); + INIT_COMPLETION(ap->park_req_pending); + ata_port_for_each_link(link, ap) { + ata_link_for_each_dev(dev, link) { + struct ata_eh_info *ehi = &link->eh_info; + + link->eh_context.i.dev_action[dev->devno] |= + ehi->dev_action[dev->devno] & ATA_EH_PARK; + ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); + } + } + spin_unlock_irqrestore(ap->lock, flags); +} + +static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) +{ + struct ata_eh_context *ehc = &dev->link->eh_context; + struct ata_taskfile tf; + unsigned int err_mask; + + ata_tf_init(dev, &tf); + if (park) { + ehc->unloaded_mask |= 1 << dev->devno; + tf.command = ATA_CMD_IDLEIMMEDIATE; + tf.feature = 0x44; + tf.lbal = 0x4c; + tf.lbam = 0x4e; + tf.lbah = 0x55; + } else { + ehc->unloaded_mask &= ~(1 << dev->devno); + tf.command = ATA_CMD_CHK_POWER; + } + + tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf.protocol |= ATA_PROT_NODATA; + err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); + if (park && (err_mask || tf.lbal != 0xc4)) { + ata_dev_printk(dev, KERN_ERR, "head unload failed!\n"); + ehc->unloaded_mask &= ~(1 << dev->devno); + } +} + static int ata_eh_revalidate_and_attach(struct ata_link *link, struct ata_device **r_failed_dev) { @@ -2472,7 +2670,7 @@ static int ata_eh_revalidate_and_attach(struct ata_link *link, if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { WARN_ON(dev->class == ATA_DEV_PMP); - if (ata_link_offline(link)) { + if (ata_phys_link_offline(ata_dev_phys_link(dev))) { rc = -EIO; goto err; } @@ -2610,6 +2808,53 @@ int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) return rc; } +/** + * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset + * @dev: ATAPI device to clear UA for + * + * Resets and other operations can make an ATAPI device raise + * UNIT ATTENTION which causes the next operation to fail. This + * function clears UA. + * + * LOCKING: + * EH context (may sleep). + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int atapi_eh_clear_ua(struct ata_device *dev) +{ + int i; + + for (i = 0; i < ATA_EH_UA_TRIES; i++) { + u8 sense_buffer[SCSI_SENSE_BUFFERSIZE]; + u8 sense_key = 0; + unsigned int err_mask; + + err_mask = atapi_eh_tur(dev, &sense_key); + if (err_mask != 0 && err_mask != AC_ERR_DEV) { + ata_dev_printk(dev, KERN_WARNING, "TEST_UNIT_READY " + "failed (err_mask=0x%x)\n", err_mask); + return -EIO; + } + + if (!err_mask || sense_key != UNIT_ATTENTION) + return 0; + + err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); + if (err_mask) { + ata_dev_printk(dev, KERN_WARNING, "failed to clear " + "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); + return -EIO; + } + } + + ata_dev_printk(dev, KERN_WARNING, + "UNIT ATTENTION persists after %d tries\n", ATA_EH_UA_TRIES); + + return 0; +} + static int ata_link_nr_enabled(struct ata_link *link) { struct ata_device *dev; @@ -2697,7 +2942,7 @@ static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) /* This is the last chance, better to slow * down than lose it. */ - sata_down_spd_limit(dev->link); + sata_down_spd_limit(ata_dev_phys_link(dev)); ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); } } @@ -2707,7 +2952,7 @@ static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) ata_dev_disable(dev); /* detach if offline */ - if (ata_link_offline(dev->link)) + if (ata_phys_link_offline(ata_dev_phys_link(dev))) ata_eh_detach_dev(dev); /* schedule probe if necessary */ @@ -2755,7 +3000,7 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, struct ata_device *dev; int nr_failed_devs; int rc; - unsigned long flags; + unsigned long flags, deadline; DPRINTK("ENTER\n"); @@ -2829,6 +3074,56 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, } } + do { + unsigned long now; + + /* + * clears ATA_EH_PARK in eh_info and resets + * ap->park_req_pending + */ + ata_eh_pull_park_action(ap); + + deadline = jiffies; + ata_port_for_each_link(link, ap) { + ata_link_for_each_dev(dev, link) { + struct ata_eh_context *ehc = &link->eh_context; + unsigned long tmp; + + if (dev->class != ATA_DEV_ATA) + continue; + if (!(ehc->i.dev_action[dev->devno] & + ATA_EH_PARK)) + continue; + tmp = dev->unpark_deadline; + if (time_before(deadline, tmp)) + deadline = tmp; + else if (time_before_eq(tmp, jiffies)) + continue; + if (ehc->unloaded_mask & (1 << dev->devno)) + continue; + + ata_eh_park_issue_cmd(dev, 1); + } + } + + now = jiffies; + if (time_before_eq(deadline, now)) + break; + + deadline = wait_for_completion_timeout(&ap->park_req_pending, + deadline - now); + } while (deadline); + ata_port_for_each_link(link, ap) { + ata_link_for_each_dev(dev, link) { + if (!(link->eh_context.unloaded_mask & + (1 << dev->devno))) + continue; + + ata_eh_park_issue_cmd(dev, 0); + ata_eh_done(link, dev, ATA_EH_PARK); + } + } + /* the rest */ ata_port_for_each_link(link, ap) { struct ata_eh_context *ehc = &link->eh_context; @@ -2852,6 +3147,20 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, ehc->i.flags &= ~ATA_EHI_SETMODE; } + /* If reset has been issued, clear UA to avoid + * disrupting the current users of the device. + */ + if (ehc->i.flags & ATA_EHI_DID_RESET) { + ata_link_for_each_dev(dev, link) { + if (dev->class != ATA_DEV_ATAPI) + continue; + rc = atapi_eh_clear_ua(dev); + if (rc) + goto dev_fail; + } + } + + /* configure link power saving */ if (ehc->i.action & ATA_EH_LPM) ata_link_for_each_dev(dev, link) ata_dev_enable_pm(dev, ap->pm_policy); diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index b9d3ba4..59fe051 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -183,6 +183,105 @@ DEVICE_ATTR(link_power_management_policy, S_IRUGO | S_IWUSR, ata_scsi_lpm_show, ata_scsi_lpm_put); EXPORT_SYMBOL_GPL(dev_attr_link_power_management_policy); +static ssize_t ata_scsi_park_show(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev = to_scsi_device(device); + struct ata_port *ap; + struct ata_link *link; + struct ata_device *dev; + unsigned long flags; + unsigned int uninitialized_var(msecs); + int rc = 0; + + ap = ata_shost_to_port(sdev->host); + + spin_lock_irqsave(ap->lock, flags); + dev = ata_scsi_find_dev(ap, sdev); + if (!dev) { + rc = -ENODEV; + goto unlock; + } + if (dev->flags & ATA_DFLAG_NO_UNLOAD) { + rc = -EOPNOTSUPP; + goto unlock; + } + + link = dev->link; + if (ap->pflags & ATA_PFLAG_EH_IN_PROGRESS && + link->eh_context.unloaded_mask & (1 << dev->devno) && + time_after(dev->unpark_deadline, jiffies)) + msecs = jiffies_to_msecs(dev->unpark_deadline - jiffies); + else + msecs = 0; + +unlock: + spin_unlock_irq(ap->lock); + + return rc ? rc : snprintf(buf, 20, "%u\n", msecs); +} + +static ssize_t ata_scsi_park_store(struct device *device, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct scsi_device *sdev = to_scsi_device(device); + struct ata_port *ap; + struct ata_device *dev; + long int input; + unsigned long flags; + int rc; + + rc = strict_strtol(buf, 10, &input); + if (rc || input < -2) + return -EINVAL; + if (input > ATA_TMOUT_MAX_PARK) { + rc = -EOVERFLOW; + input = ATA_TMOUT_MAX_PARK; + } + + ap = ata_shost_to_port(sdev->host); + + spin_lock_irqsave(ap->lock, flags); + dev = ata_scsi_find_dev(ap, sdev); + if (unlikely(!dev)) { + rc = -ENODEV; + goto unlock; + } + if (dev->class != ATA_DEV_ATA) { + rc = -EOPNOTSUPP; + goto unlock; + } + + if (input >= 0) { + if (dev->flags & ATA_DFLAG_NO_UNLOAD) { + rc = -EOPNOTSUPP; + goto unlock; + } + + dev->unpark_deadline = ata_deadline(jiffies, input); + dev->link->eh_info.dev_action[dev->devno] |= ATA_EH_PARK; + ata_port_schedule_eh(ap); + complete(&ap->park_req_pending); + } else { + switch (input) { + case -1: + dev->flags &= ~ATA_DFLAG_NO_UNLOAD; + break; + case -2: + dev->flags |= ATA_DFLAG_NO_UNLOAD; + break; + } + } +unlock: + spin_unlock_irqrestore(ap->lock, flags); + + return rc ? rc : len; +} +DEVICE_ATTR(unload_heads, S_IRUGO | S_IWUSR, + ata_scsi_park_show, ata_scsi_park_store); +EXPORT_SYMBOL_GPL(dev_attr_unload_heads); + static void ata_scsi_set_sense(struct scsi_cmnd *cmd, u8 sk, u8 asc, u8 ascq) { cmd->result = (DRIVER_SENSE << 24) | SAM_STAT_CHECK_CONDITION; @@ -269,6 +368,12 @@ DEVICE_ATTR(sw_activity, S_IWUGO | S_IRUGO, ata_scsi_activity_show, ata_scsi_activity_store); EXPORT_SYMBOL_GPL(dev_attr_sw_activity); +struct device_attribute *ata_common_sdev_attrs[] = { + &dev_attr_unload_heads, + NULL +}; +EXPORT_SYMBOL_GPL(ata_common_sdev_attrs); + static void ata_scsi_invalid_field(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) { @@ -954,6 +1059,9 @@ static int atapi_drain_needed(struct request *rq) static int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev) { + if (!ata_id_has_unload(dev->id)) + dev->flags |= ATA_DFLAG_NO_UNLOAD; + /* configure max sectors */ blk_queue_max_sectors(sdev->request_queue, dev->max_sectors); @@ -977,6 +1085,10 @@ static int ata_scsi_dev_config(struct scsi_device *sdev, blk_queue_dma_drain(q, atapi_drain_needed, buf, ATAPI_MAX_DRAIN); } else { + if (ata_id_is_ssd(dev->id)) + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, + sdev->request_queue); + /* ATA devices must be sector aligned */ blk_queue_update_dma_alignment(sdev->request_queue, ATA_SECT_SIZE - 1); diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h index ade5c75..fe2839e 100644 --- a/drivers/ata/libata.h +++ b/drivers/ata/libata.h @@ -70,6 +70,7 @@ extern int atapi_passthru16; extern int libata_fua; extern int libata_noacpi; extern int libata_allow_tpm; +extern struct ata_link *ata_dev_phys_link(struct ata_device *dev); extern void ata_force_cbl(struct ata_port *ap); extern u64 ata_tf_to_lba(const struct ata_taskfile *tf); extern u64 ata_tf_to_lba48(const struct ata_taskfile *tf); @@ -107,6 +108,8 @@ extern void ata_qc_issue(struct ata_queued_cmd *qc); extern void __ata_qc_complete(struct ata_queued_cmd *qc); extern int atapi_check_dma(struct ata_queued_cmd *qc); extern void swap_buf_le16(u16 *buf, unsigned int buf_words); +extern bool ata_phys_link_online(struct ata_link *link); +extern bool ata_phys_link_offline(struct ata_link *link); extern void ata_dev_init(struct ata_device *dev); extern void ata_link_init(struct ata_port *ap, struct ata_link *link, int pmp); extern int sata_link_init_spd(struct ata_link *link); @@ -152,7 +155,7 @@ extern int ata_bus_probe(struct ata_port *ap); /* libata-eh.c */ extern unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd); extern void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd); -extern enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); +extern enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd); extern void ata_scsi_error(struct Scsi_Host *host); extern void ata_port_wait_eh(struct ata_port *ap); extern void ata_eh_fastdrain_timerfn(unsigned long arg); diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c index d3932901..1266924c 100644 --- a/drivers/ata/pata_bf54x.c +++ b/drivers/ata/pata_bf54x.c @@ -1632,6 +1632,8 @@ static int __devinit bfin_atapi_probe(struct platform_device *pdev) return -ENODEV; } + dev_set_drvdata(&pdev->dev, host); + return 0; } @@ -1648,6 +1650,7 @@ static int __devexit bfin_atapi_remove(struct platform_device *pdev) struct ata_host *host = dev_get_drvdata(dev); ata_host_detach(host); + dev_set_drvdata(&pdev->dev, NULL); peripheral_free_list(atapi_io_port); @@ -1655,27 +1658,44 @@ static int __devexit bfin_atapi_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -int bfin_atapi_suspend(struct platform_device *pdev, pm_message_t state) +static int bfin_atapi_suspend(struct platform_device *pdev, pm_message_t state) { - return 0; + struct ata_host *host = dev_get_drvdata(&pdev->dev); + if (host) + return ata_host_suspend(host, state); + else + return 0; } -int bfin_atapi_resume(struct platform_device *pdev) +static int bfin_atapi_resume(struct platform_device *pdev) { + struct ata_host *host = dev_get_drvdata(&pdev->dev); + int ret; + + if (host) { + ret = bfin_reset_controller(host); + if (ret) { + printk(KERN_ERR DRV_NAME ": Error during HW init\n"); + return ret; + } + ata_host_resume(host); + } + return 0; } +#else +#define bfin_atapi_suspend NULL +#define bfin_atapi_resume NULL #endif static struct platform_driver bfin_atapi_driver = { .probe = bfin_atapi_probe, .remove = __devexit_p(bfin_atapi_remove), + .suspend = bfin_atapi_suspend, + .resume = bfin_atapi_resume, .driver = { .name = DRV_NAME, .owner = THIS_MODULE, -#ifdef CONFIG_PM - .suspend = bfin_atapi_suspend, - .resume = bfin_atapi_resume, -#endif }, }; diff --git a/drivers/ata/pata_sil680.c b/drivers/ata/pata_sil680.c index e970b22..a598bb3 100644 --- a/drivers/ata/pata_sil680.c +++ b/drivers/ata/pata_sil680.c @@ -230,7 +230,7 @@ static u8 sil680_init_chip(struct pci_dev *pdev, int *try_mmio) tmpbyte & 1, tmpbyte & 0x30); *try_mmio = 0; -#ifdef CONFIG_PPC_MERGE +#ifdef CONFIG_PPC if (machine_is(cell)) *try_mmio = (tmpbyte & 1) || pci_resource_start(pdev, 5); #endif diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 3924e72..1a56db9 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -469,10 +469,10 @@ static bool sata_fsl_qc_fill_rtf(struct ata_queued_cmd *qc) return true; } -static int sata_fsl_scr_write(struct ata_port *ap, unsigned int sc_reg_in, - u32 val) +static int sata_fsl_scr_write(struct ata_link *link, + unsigned int sc_reg_in, u32 val) { - struct sata_fsl_host_priv *host_priv = ap->host->private_data; + struct sata_fsl_host_priv *host_priv = link->ap->host->private_data; void __iomem *ssr_base = host_priv->ssr_base; unsigned int sc_reg; @@ -493,10 +493,10 @@ static int sata_fsl_scr_write(struct ata_port *ap, unsigned int sc_reg_in, return 0; } -static int sata_fsl_scr_read(struct ata_port *ap, unsigned int sc_reg_in, - u32 *val) +static int sata_fsl_scr_read(struct ata_link *link, + unsigned int sc_reg_in, u32 *val) { - struct sata_fsl_host_priv *host_priv = ap->host->private_data; + struct sata_fsl_host_priv *host_priv = link->ap->host->private_data; void __iomem *ssr_base = host_priv->ssr_base; unsigned int sc_reg; @@ -645,12 +645,12 @@ static int sata_fsl_port_start(struct ata_port *ap) * Workaround for 8315DS board 3gbps link-up issue, * currently limit SATA port to GEN1 speed */ - sata_fsl_scr_read(ap, SCR_CONTROL, &temp); + sata_fsl_scr_read(&ap->link, SCR_CONTROL, &temp); temp &= ~(0xF << 4); temp |= (0x1 << 4); - sata_fsl_scr_write(ap, SCR_CONTROL, temp); + sata_fsl_scr_write(&ap->link, SCR_CONTROL, temp); - sata_fsl_scr_read(ap, SCR_CONTROL, &temp); + sata_fsl_scr_read(&ap->link, SCR_CONTROL, &temp); dev_printk(KERN_WARNING, dev, "scr_control, speed limited to %x\n", temp); #endif @@ -868,7 +868,7 @@ issue_srst: ioread32(CQ + hcr_base), ioread32(CA + hcr_base), ioread32(CC + hcr_base)); - sata_fsl_scr_read(ap, SCR_ERROR, &Serror); + sata_fsl_scr_read(&ap->link, SCR_ERROR, &Serror); DPRINTK("HStatus = 0x%x\n", ioread32(hcr_base + HSTATUS)); DPRINTK("HControl = 0x%x\n", ioread32(hcr_base + HCONTROL)); @@ -972,9 +972,9 @@ static void sata_fsl_error_intr(struct ata_port *ap) * Handle & Clear SError */ - sata_fsl_scr_read(ap, SCR_ERROR, &SError); + sata_fsl_scr_read(&ap->link, SCR_ERROR, &SError); if (unlikely(SError & 0xFFFF0000)) { - sata_fsl_scr_write(ap, SCR_ERROR, SError); + sata_fsl_scr_write(&ap->link, SCR_ERROR, SError); } DPRINTK("error_intr,hStat=0x%x,CE=0x%x,DE =0x%x,SErr=0x%x\n", @@ -1091,7 +1091,7 @@ static void sata_fsl_host_intr(struct ata_port *ap) hstatus = ioread32(hcr_base + HSTATUS); - sata_fsl_scr_read(ap, SCR_ERROR, &SError); + sata_fsl_scr_read(&ap->link, SCR_ERROR, &SError); if (unlikely(SError & 0xFFFF0000)) { DPRINTK("serror @host_intr : 0x%x\n", SError); diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c index 5032c32..fbbd87c 100644 --- a/drivers/ata/sata_inic162x.c +++ b/drivers/ata/sata_inic162x.c @@ -269,9 +269,9 @@ static void inic_reset_port(void __iomem *port_base) writeb(0xff, port_base + PORT_IRQ_STAT); } -static int inic_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val) +static int inic_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val) { - void __iomem *scr_addr = inic_port_base(ap) + PORT_SCR; + void __iomem *scr_addr = inic_port_base(link->ap) + PORT_SCR; void __iomem *addr; if (unlikely(sc_reg >= ARRAY_SIZE(scr_map))) @@ -286,9 +286,9 @@ static int inic_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val) return 0; } -static int inic_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val) +static int inic_scr_write(struct ata_link *link, unsigned sc_reg, u32 val) { - void __iomem *scr_addr = inic_port_base(ap) + PORT_SCR; + void __iomem *scr_addr = inic_port_base(link->ap) + PORT_SCR; if (unlikely(sc_reg >= ARRAY_SIZE(scr_map))) return -EINVAL; diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index c815f8e..2b24ae5 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -493,10 +493,10 @@ struct mv_hw_ops { void (*reset_bus)(struct ata_host *host, void __iomem *mmio); }; -static int mv_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val); -static int mv_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val); -static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val); -static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val); +static int mv_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val); +static int mv_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val); +static int mv5_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val); +static int mv5_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val); static int mv_port_start(struct ata_port *ap); static void mv_port_stop(struct ata_port *ap); static int mv_qc_defer(struct ata_queued_cmd *qc); @@ -1070,23 +1070,23 @@ static unsigned int mv_scr_offset(unsigned int sc_reg_in) return ofs; } -static int mv_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val) +static int mv_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val) { unsigned int ofs = mv_scr_offset(sc_reg_in); if (ofs != 0xffffffffU) { - *val = readl(mv_ap_base(ap) + ofs); + *val = readl(mv_ap_base(link->ap) + ofs); return 0; } else return -EINVAL; } -static int mv_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val) +static int mv_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val) { unsigned int ofs = mv_scr_offset(sc_reg_in); if (ofs != 0xffffffffU) { - writelfl(val, mv_ap_base(ap) + ofs); + writelfl(val, mv_ap_base(link->ap) + ofs); return 0; } else return -EINVAL; @@ -2251,11 +2251,11 @@ static unsigned int mv5_scr_offset(unsigned int sc_reg_in) return ofs; } -static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val) +static int mv5_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val) { - struct mv_host_priv *hpriv = ap->host->private_data; + struct mv_host_priv *hpriv = link->ap->host->private_data; void __iomem *mmio = hpriv->base; - void __iomem *addr = mv5_phy_base(mmio, ap->port_no); + void __iomem *addr = mv5_phy_base(mmio, link->ap->port_no); unsigned int ofs = mv5_scr_offset(sc_reg_in); if (ofs != 0xffffffffU) { @@ -2265,11 +2265,11 @@ static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val) return -EINVAL; } -static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val) +static int mv5_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val) { - struct mv_host_priv *hpriv = ap->host->private_data; + struct mv_host_priv *hpriv = link->ap->host->private_data; void __iomem *mmio = hpriv->base; - void __iomem *addr = mv5_phy_base(mmio, ap->port_no); + void __iomem *addr = mv5_phy_base(mmio, link->ap->port_no); unsigned int ofs = mv5_scr_offset(sc_reg_in); if (ofs != 0xffffffffU) { diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 14601dc..fae3841 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -302,8 +302,8 @@ static void nv_ck804_host_stop(struct ata_host *host); static irqreturn_t nv_generic_interrupt(int irq, void *dev_instance); static irqreturn_t nv_nf2_interrupt(int irq, void *dev_instance); static irqreturn_t nv_ck804_interrupt(int irq, void *dev_instance); -static int nv_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val); -static int nv_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val); +static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); +static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); static void nv_nf2_freeze(struct ata_port *ap); static void nv_nf2_thaw(struct ata_port *ap); @@ -1511,21 +1511,21 @@ static irqreturn_t nv_ck804_interrupt(int irq, void *dev_instance) return ret; } -static int nv_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - *val = ioread32(ap->ioaddr.scr_addr + (sc_reg * 4)); + *val = ioread32(link->ap->ioaddr.scr_addr + (sc_reg * 4)); return 0; } -static int nv_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - iowrite32(val, ap->ioaddr.scr_addr + (sc_reg * 4)); + iowrite32(val, link->ap->ioaddr.scr_addr + (sc_reg * 4)); return 0; } @@ -2218,9 +2218,9 @@ static void nv_swncq_host_interrupt(struct ata_port *ap, u16 fis) if (!pp->qc_active) return; - if (ap->ops->scr_read(ap, SCR_ERROR, &serror)) + if (ap->ops->scr_read(&ap->link, SCR_ERROR, &serror)) return; - ap->ops->scr_write(ap, SCR_ERROR, serror); + ap->ops->scr_write(&ap->link, SCR_ERROR, serror); if (ata_stat & ATA_ERR) { ata_ehi_clear_desc(ehi); diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index 030665b..750d8cd 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c @@ -137,8 +137,8 @@ struct pdc_port_priv { dma_addr_t pkt_dma; }; -static int pdc_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val); -static int pdc_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val); +static int pdc_sata_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); +static int pdc_sata_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); static int pdc_ata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static int pdc_common_port_start(struct ata_port *ap); static int pdc_sata_port_start(struct ata_port *ap); @@ -386,19 +386,21 @@ static int pdc_sata_cable_detect(struct ata_port *ap) return ATA_CBL_SATA; } -static int pdc_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static int pdc_sata_scr_read(struct ata_link *link, + unsigned int sc_reg, u32 *val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - *val = readl(ap->ioaddr.scr_addr + (sc_reg * 4)); + *val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 4)); return 0; } -static int pdc_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int pdc_sata_scr_write(struct ata_link *link, + unsigned int sc_reg, u32 val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - writel(val, ap->ioaddr.scr_addr + (sc_reg * 4)); + writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 4)); return 0; } @@ -731,7 +733,7 @@ static void pdc_error_intr(struct ata_port *ap, struct ata_queued_cmd *qc, if (sata_scr_valid(&ap->link)) { u32 serror; - pdc_sata_scr_read(ap, SCR_ERROR, &serror); + pdc_sata_scr_read(&ap->link, SCR_ERROR, &serror); ehi->serror |= serror; } diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c index 1600107..a000c86 100644 --- a/drivers/ata/sata_qstor.c +++ b/drivers/ata/sata_qstor.c @@ -111,8 +111,8 @@ struct qs_port_priv { qs_state_t state; }; -static int qs_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val); -static int qs_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val); +static int qs_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); +static int qs_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); static int qs_ata_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static int qs_port_start(struct ata_port *ap); static void qs_host_stop(struct ata_host *host); @@ -242,11 +242,11 @@ static int qs_prereset(struct ata_link *link, unsigned long deadline) return ata_sff_prereset(link, deadline); } -static int qs_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static int qs_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - *val = readl(ap->ioaddr.scr_addr + (sc_reg * 8)); + *val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 8)); return 0; } @@ -256,11 +256,11 @@ static void qs_error_handler(struct ata_port *ap) ata_std_error_handler(ap); } -static int qs_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int qs_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - writel(val, ap->ioaddr.scr_addr + (sc_reg * 8)); + writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 8)); return 0; } diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c index 88bf421..031d7b7 100644 --- a/drivers/ata/sata_sil.c +++ b/drivers/ata/sata_sil.c @@ -115,8 +115,8 @@ static int sil_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); static int sil_pci_device_resume(struct pci_dev *pdev); #endif static void sil_dev_config(struct ata_device *dev); -static int sil_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val); -static int sil_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val); +static int sil_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); +static int sil_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); static int sil_set_mode(struct ata_link *link, struct ata_device **r_failed); static void sil_freeze(struct ata_port *ap); static void sil_thaw(struct ata_port *ap); @@ -317,9 +317,9 @@ static inline void __iomem *sil_scr_addr(struct ata_port *ap, return NULL; } -static int sil_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static int sil_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val) { - void __iomem *mmio = sil_scr_addr(ap, sc_reg); + void __iomem *mmio = sil_scr_addr(link->ap, sc_reg); if (mmio) { *val = readl(mmio); @@ -328,9 +328,9 @@ static int sil_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) return -EINVAL; } -static int sil_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int sil_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val) { - void __iomem *mmio = sil_scr_addr(ap, sc_reg); + void __iomem *mmio = sil_scr_addr(link->ap, sc_reg); if (mmio) { writel(val, mmio); @@ -352,8 +352,8 @@ static void sil_host_intr(struct ata_port *ap, u32 bmdma2) * controllers continue to assert IRQ as long as * SError bits are pending. Clear SError immediately. */ - sil_scr_read(ap, SCR_ERROR, &serror); - sil_scr_write(ap, SCR_ERROR, serror); + sil_scr_read(&ap->link, SCR_ERROR, &serror); + sil_scr_write(&ap->link, SCR_ERROR, serror); /* Sometimes spurious interrupts occur, double check * it's PHYRDY CHG. diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index 84ffcc2..4621807 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -340,8 +340,8 @@ struct sil24_port_priv { }; static void sil24_dev_config(struct ata_device *dev); -static int sil24_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val); -static int sil24_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val); +static int sil24_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val); +static int sil24_scr_write(struct ata_link *link, unsigned sc_reg, u32 val); static int sil24_qc_defer(struct ata_queued_cmd *qc); static void sil24_qc_prep(struct ata_queued_cmd *qc); static unsigned int sil24_qc_issue(struct ata_queued_cmd *qc); @@ -504,9 +504,9 @@ static int sil24_scr_map[] = { [SCR_ACTIVE] = 3, }; -static int sil24_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val) +static int sil24_scr_read(struct ata_link *link, unsigned sc_reg, u32 *val) { - void __iomem *scr_addr = sil24_port_base(ap) + PORT_SCONTROL; + void __iomem *scr_addr = sil24_port_base(link->ap) + PORT_SCONTROL; if (sc_reg < ARRAY_SIZE(sil24_scr_map)) { void __iomem *addr; @@ -517,9 +517,9 @@ static int sil24_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val) return -EINVAL; } -static int sil24_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val) +static int sil24_scr_write(struct ata_link *link, unsigned sc_reg, u32 val) { - void __iomem *scr_addr = sil24_port_base(ap) + PORT_SCONTROL; + void __iomem *scr_addr = sil24_port_base(link->ap) + PORT_SCONTROL; if (sc_reg < ARRAY_SIZE(sil24_scr_map)) { void __iomem *addr; diff --git a/drivers/ata/sata_sis.c b/drivers/ata/sata_sis.c index 1010b30..9c43b4e 100644 --- a/drivers/ata/sata_sis.c +++ b/drivers/ata/sata_sis.c @@ -64,8 +64,8 @@ enum { }; static int sis_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); -static int sis_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val); -static int sis_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val); +static int sis_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); +static int sis_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); static const struct pci_device_id sis_pci_tbl[] = { { PCI_VDEVICE(SI, 0x0180), sis_180 }, /* SiS 964/180 */ @@ -134,10 +134,11 @@ static unsigned int get_scr_cfg_addr(struct ata_port *ap, unsigned int sc_reg) return addr; } -static u32 sis_scr_cfg_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static u32 sis_scr_cfg_read(struct ata_link *link, + unsigned int sc_reg, u32 *val) { - struct pci_dev *pdev = to_pci_dev(ap->host->dev); - unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg); + struct pci_dev *pdev = to_pci_dev(link->ap->host->dev); + unsigned int cfg_addr = get_scr_cfg_addr(link->ap, sc_reg); u32 val2 = 0; u8 pmr; @@ -158,10 +159,11 @@ static u32 sis_scr_cfg_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) return 0; } -static int sis_scr_cfg_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int sis_scr_cfg_write(struct ata_link *link, + unsigned int sc_reg, u32 val) { - struct pci_dev *pdev = to_pci_dev(ap->host->dev); - unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg); + struct pci_dev *pdev = to_pci_dev(link->ap->host->dev); + unsigned int cfg_addr = get_scr_cfg_addr(link->ap, sc_reg); u8 pmr; if (sc_reg == SCR_ERROR) /* doesn't exist in PCI cfg space */ @@ -178,8 +180,9 @@ static int sis_scr_cfg_write(struct ata_port *ap, unsigned int sc_reg, u32 val) return 0; } -static int sis_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static int sis_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val) { + struct ata_port *ap = link->ap; struct pci_dev *pdev = to_pci_dev(ap->host->dev); u8 pmr; @@ -187,7 +190,7 @@ static int sis_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) return -EINVAL; if (ap->flags & SIS_FLAG_CFGSCR) - return sis_scr_cfg_read(ap, sc_reg, val); + return sis_scr_cfg_read(link, sc_reg, val); pci_read_config_byte(pdev, SIS_PMR, &pmr); @@ -202,8 +205,9 @@ static int sis_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) return 0; } -static int sis_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int sis_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val) { + struct ata_port *ap = link->ap; struct pci_dev *pdev = to_pci_dev(ap->host->dev); u8 pmr; @@ -213,7 +217,7 @@ static int sis_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) pci_read_config_byte(pdev, SIS_PMR, &pmr); if (ap->flags & SIS_FLAG_CFGSCR) - return sis_scr_cfg_write(ap, sc_reg, val); + return sis_scr_cfg_write(link, sc_reg, val); else { iowrite32(val, ap->ioaddr.scr_addr + (sc_reg * 4)); if ((pdev->device == 0x0182) || (pdev->device == 0x0183) || diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c index fb13b82..609d147 100644 --- a/drivers/ata/sata_svw.c +++ b/drivers/ata/sata_svw.c @@ -123,20 +123,22 @@ static int k2_sata_check_atapi_dma(struct ata_queued_cmd *qc) } } -static int k2_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static int k2_sata_scr_read(struct ata_link *link, + unsigned int sc_reg, u32 *val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - *val = readl(ap->ioaddr.scr_addr + (sc_reg * 4)); + *val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 4)); return 0; } -static int k2_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int k2_sata_scr_write(struct ata_link *link, + unsigned int sc_reg, u32 val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - writel(val, ap->ioaddr.scr_addr + (sc_reg * 4)); + writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 4)); return 0; } diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c index db529b8..019575b 100644 --- a/drivers/ata/sata_uli.c +++ b/drivers/ata/sata_uli.c @@ -57,8 +57,8 @@ struct uli_priv { }; static int uli_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); -static int uli_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val); -static int uli_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val); +static int uli_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); +static int uli_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); static const struct pci_device_id uli_pci_tbl[] = { { PCI_VDEVICE(AL, 0x5289), uli_5289 }, @@ -107,39 +107,39 @@ static unsigned int get_scr_cfg_addr(struct ata_port *ap, unsigned int sc_reg) return hpriv->scr_cfg_addr[ap->port_no] + (4 * sc_reg); } -static u32 uli_scr_cfg_read(struct ata_port *ap, unsigned int sc_reg) +static u32 uli_scr_cfg_read(struct ata_link *link, unsigned int sc_reg) { - struct pci_dev *pdev = to_pci_dev(ap->host->dev); - unsigned int cfg_addr = get_scr_cfg_addr(ap, sc_reg); + struct pci_dev *pdev = to_pci_dev(link->ap->host->dev); + unsigned int cfg_addr = get_scr_cfg_addr(link->ap, sc_reg); u32 val; pci_read_config_dword(pdev, cfg_addr, &val); return val; } -static void uli_scr_cfg_write(struct ata_port *ap, unsigned int scr, u32 val) +static void uli_scr_cfg_write(struct ata_link *link, unsigned int scr, u32 val) { - struct pci_dev *pdev = to_pci_dev(ap->host->dev); - unsigned int cfg_addr = get_scr_cfg_addr(ap, scr); + struct pci_dev *pdev = to_pci_dev(link->ap->host->dev); + unsigned int cfg_addr = get_scr_cfg_addr(link->ap, scr); pci_write_config_dword(pdev, cfg_addr, val); } -static int uli_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static int uli_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - *val = uli_scr_cfg_read(ap, sc_reg); + *val = uli_scr_cfg_read(link, sc_reg); return 0; } -static int uli_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int uli_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val) { if (sc_reg > SCR_CONTROL) //SCR_CONTROL=2, SCR_ERROR=1, SCR_STATUS=0 return -EINVAL; - uli_scr_cfg_write(ap, sc_reg, val); + uli_scr_cfg_write(link, sc_reg, val); return 0; } diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index 96deeb3..1cfa745 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -68,8 +68,8 @@ enum { }; static int svia_init_one(struct pci_dev *pdev, const struct pci_device_id *ent); -static int svia_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val); -static int svia_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val); +static int svia_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); +static int svia_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); static void svia_noop_freeze(struct ata_port *ap); static int vt6420_prereset(struct ata_link *link, unsigned long deadline); static int vt6421_pata_cable_detect(struct ata_port *ap); @@ -152,19 +152,19 @@ MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(pci, svia_pci_tbl); MODULE_VERSION(DRV_VERSION); -static int svia_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static int svia_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - *val = ioread32(ap->ioaddr.scr_addr + (4 * sc_reg)); + *val = ioread32(link->ap->ioaddr.scr_addr + (4 * sc_reg)); return 0; } -static int svia_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int svia_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - iowrite32(val, ap->ioaddr.scr_addr + (4 * sc_reg)); + iowrite32(val, link->ap->ioaddr.scr_addr + (4 * sc_reg)); return 0; } @@ -210,20 +210,20 @@ static int vt6420_prereset(struct ata_link *link, unsigned long deadline) goto skip_scr; /* Resume phy. This is the old SATA resume sequence */ - svia_scr_write(ap, SCR_CONTROL, 0x300); - svia_scr_read(ap, SCR_CONTROL, &scontrol); /* flush */ + svia_scr_write(link, SCR_CONTROL, 0x300); + svia_scr_read(link, SCR_CONTROL, &scontrol); /* flush */ /* wait for phy to become ready, if necessary */ do { msleep(200); - svia_scr_read(ap, SCR_STATUS, &sstatus); + svia_scr_read(link, SCR_STATUS, &sstatus); if ((sstatus & 0xf) != 1) break; } while (time_before(jiffies, timeout)); /* open code sata_print_link_status() */ - svia_scr_read(ap, SCR_STATUS, &sstatus); - svia_scr_read(ap, SCR_CONTROL, &scontrol); + svia_scr_read(link, SCR_STATUS, &sstatus); + svia_scr_read(link, SCR_CONTROL, &scontrol); online = (sstatus & 0xf) == 0x3; @@ -232,7 +232,7 @@ static int vt6420_prereset(struct ata_link *link, unsigned long deadline) online ? "up" : "down", sstatus, scontrol); /* SStatus is read one more time */ - svia_scr_read(ap, SCR_STATUS, &sstatus); + svia_scr_read(link, SCR_STATUS, &sstatus); if (!online) { /* tell EH to bail */ diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c index f3d635c..c57cdff 100644 --- a/drivers/ata/sata_vsc.c +++ b/drivers/ata/sata_vsc.c @@ -98,20 +98,22 @@ enum { VSC_SATA_INT_PHY_CHANGE), }; -static int vsc_sata_scr_read(struct ata_port *ap, unsigned int sc_reg, u32 *val) +static int vsc_sata_scr_read(struct ata_link *link, + unsigned int sc_reg, u32 *val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - *val = readl(ap->ioaddr.scr_addr + (sc_reg * 4)); + *val = readl(link->ap->ioaddr.scr_addr + (sc_reg * 4)); return 0; } -static int vsc_sata_scr_write(struct ata_port *ap, unsigned int sc_reg, u32 val) +static int vsc_sata_scr_write(struct ata_link *link, + unsigned int sc_reg, u32 val) { if (sc_reg > SCR_CONTROL) return -EINVAL; - writel(val, ap->ioaddr.scr_addr + (sc_reg * 4)); + writel(val, link->ap->ioaddr.scr_addr + (sc_reg * 4)); return 0; } diff --git a/drivers/base/base.h b/drivers/base/base.h index 31dc0cd..0a5f055 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -54,7 +54,7 @@ struct driver_private { */ struct class_private { struct kset class_subsys; - struct list_head class_devices; + struct klist class_devices; struct list_head class_interfaces; struct kset class_dirs; struct mutex class_mutex; diff --git a/drivers/base/class.c b/drivers/base/class.c index cc5e28c..eb85e43 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -135,6 +135,20 @@ static void remove_class_attrs(struct class *cls) } } +static void klist_class_dev_get(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_class); + + get_device(dev); +} + +static void klist_class_dev_put(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_class); + + put_device(dev); +} + int __class_register(struct class *cls, struct lock_class_key *key) { struct class_private *cp; @@ -145,7 +159,7 @@ int __class_register(struct class *cls, struct lock_class_key *key) cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; - INIT_LIST_HEAD(&cp->class_devices); + klist_init(&cp->class_devices, klist_class_dev_get, klist_class_dev_put); INIT_LIST_HEAD(&cp->class_interfaces); kset_init(&cp->class_dirs); __mutex_init(&cp->class_mutex, "struct class mutex", key); @@ -269,6 +283,71 @@ char *make_class_name(const char *name, struct kobject *kobj) #endif /** + * class_dev_iter_init - initialize class device iterator + * @iter: class iterator to initialize + * @class: the class we wanna iterate over + * @start: the device to start iterating from, if any + * @type: device_type of the devices to iterate over, NULL for all + * + * Initialize class iterator @iter such that it iterates over devices + * of @class. If @start is set, the list iteration will start there, + * otherwise if it is NULL, the iteration starts at the beginning of + * the list. + */ +void class_dev_iter_init(struct class_dev_iter *iter, struct class *class, + struct device *start, const struct device_type *type) +{ + struct klist_node *start_knode = NULL; + + if (start) + start_knode = &start->knode_class; + klist_iter_init_node(&class->p->class_devices, &iter->ki, start_knode); + iter->type = type; +} +EXPORT_SYMBOL_GPL(class_dev_iter_init); + +/** + * class_dev_iter_next - iterate to the next device + * @iter: class iterator to proceed + * + * Proceed @iter to the next device and return it. Returns NULL if + * iteration is complete. + * + * The returned device is referenced and won't be released till + * iterator is proceed to the next device or exited. The caller is + * free to do whatever it wants to do with the device including + * calling back into class code. + */ +struct device *class_dev_iter_next(struct class_dev_iter *iter) +{ + struct klist_node *knode; + struct device *dev; + + while (1) { + knode = klist_next(&iter->ki); + if (!knode) + return NULL; + dev = container_of(knode, struct device, knode_class); + if (!iter->type || iter->type == dev->type) + return dev; + } +} +EXPORT_SYMBOL_GPL(class_dev_iter_next); + +/** + * class_dev_iter_exit - finish iteration + * @iter: class iterator to finish + * + * Finish an iteration. Always call this function after iteration is + * complete whether the iteration ran till the end or not. + */ +void class_dev_iter_exit(struct class_dev_iter *iter) +{ + klist_iter_exit(&iter->ki); +} +EXPORT_SYMBOL_GPL(class_dev_iter_exit); + +/** * class_for_each_device - device iterator * @class: the class we're iterating * @start: the device to start with in the list, if any. @@ -283,13 +362,13 @@ char *make_class_name(const char *name, struct kobject *kobj) * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. * - * Note, we hold class->class_mutex in this function, so it can not be - * re-acquired in @fn, otherwise it will self-deadlocking. For - * example, calls to add or remove class members would be verboten. + * @fn is allowed to do anything including calling back into class + * code. There's no locking restriction. */ int class_for_each_device(struct class *class, struct device *start, void *data, int (*fn)(struct device *, void *)) { + struct class_dev_iter iter; struct device *dev; int error = 0; @@ -301,20 +380,13 @@ int class_for_each_device(struct class *class, struct device *start, return -EINVAL; } - mutex_lock(&class->p->class_mutex); - list_for_each_entry(dev, &class->p->class_devices, node) { - if (start) { - if (start == dev) - start = NULL; - continue; - } - dev = get_device(dev); + class_dev_iter_init(&iter, class, start, NULL); + while ((dev = class_dev_iter_next(&iter))) { error = fn(dev, data); - put_device(dev); if (error) break; } - mutex_unlock(&class->p->class_mutex); + class_dev_iter_exit(&iter); return error; } @@ -337,16 +409,15 @@ EXPORT_SYMBOL_GPL(class_for_each_device); * * Note, you will need to drop the reference with put_device() after use. * - * We hold class->class_mutex in this function, so it can not be - * re-acquired in @match, otherwise it will self-deadlocking. For - * example, calls to add or remove class members would be verboten. + * @fn is allowed to do anything including calling back into class + * code. There's no locking restriction. */ struct device *class_find_device(struct class *class, struct device *start, void *data, int (*match)(struct device *, void *)) { + struct class_dev_iter iter; struct device *dev; - int found = 0; if (!class) return NULL; @@ -356,29 +427,23 @@ struct device *class_find_device(struct class *class, struct device *start, return NULL; } - mutex_lock(&class->p->class_mutex); - list_for_each_entry(dev, &class->p->class_devices, node) { - if (start) { - if (start == dev) - start = NULL; - continue; - } - dev = get_device(dev); + class_dev_iter_init(&iter, class, start, NULL); + while ((dev = class_dev_iter_next(&iter))) { if (match(dev, data)) { - found = 1; + get_device(dev); break; - } else - put_device(dev); + } } - mutex_unlock(&class->p->class_mutex); + class_dev_iter_exit(&iter); - return found ? dev : NULL; + return dev; } EXPORT_SYMBOL_GPL(class_find_device); int class_interface_register(struct class_interface *class_intf) { struct class *parent; + struct class_dev_iter iter; struct device *dev; if (!class_intf || !class_intf->class) @@ -391,8 +456,10 @@ int class_interface_register(struct class_interface *class_intf) mutex_lock(&parent->p->class_mutex); list_add_tail(&class_intf->node, &parent->p->class_interfaces); if (class_intf->add_dev) { - list_for_each_entry(dev, &parent->p->class_devices, node) + class_dev_iter_init(&iter, parent, NULL, NULL); + while ((dev = class_dev_iter_next(&iter))) class_intf->add_dev(dev, class_intf); + class_dev_iter_exit(&iter); } mutex_unlock(&parent->p->class_mutex); @@ -402,6 +469,7 @@ int class_interface_register(struct class_interface *class_intf) void class_interface_unregister(struct class_interface *class_intf) { struct class *parent = class_intf->class; + struct class_dev_iter iter; struct device *dev; if (!parent) @@ -410,8 +478,10 @@ void class_interface_unregister(struct class_interface *class_intf) mutex_lock(&parent->p->class_mutex); list_del_init(&class_intf->node); if (class_intf->remove_dev) { - list_for_each_entry(dev, &parent->p->class_devices, node) + class_dev_iter_init(&iter, parent, NULL, NULL); + while ((dev = class_dev_iter_next(&iter))) class_intf->remove_dev(dev, class_intf); + class_dev_iter_exit(&iter); } mutex_unlock(&parent->p->class_mutex); diff --git a/drivers/base/core.c b/drivers/base/core.c index d021c98..b98cb14 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -536,7 +536,6 @@ void device_initialize(struct device *dev) klist_init(&dev->klist_children, klist_children_get, klist_children_put); INIT_LIST_HEAD(&dev->dma_pools); - INIT_LIST_HEAD(&dev->node); init_MUTEX(&dev->sem); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); @@ -916,7 +915,8 @@ int device_add(struct device *dev) if (dev->class) { mutex_lock(&dev->class->p->class_mutex); /* tie the class to the device */ - list_add_tail(&dev->node, &dev->class->p->class_devices); + klist_add_tail(&dev->knode_class, + &dev->class->p->class_devices); /* notify any interfaces that the device is here */ list_for_each_entry(class_intf, @@ -1032,7 +1032,7 @@ void device_del(struct device *dev) if (class_intf->remove_dev) class_intf->remove_dev(dev, class_intf); /* remove the device from the class list */ - list_del_init(&dev->node); + klist_del(&dev->knode_class); mutex_unlock(&dev->class->p->class_mutex); } device_remove_file(dev, &uevent_attr); diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index 0c39782..aa69556 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c @@ -109,12 +109,12 @@ static const struct attribute_group attr_group = { static int aoedisk_add_sysfs(struct aoedev *d) { - return sysfs_create_group(&d->gd->dev.kobj, &attr_group); + return sysfs_create_group(&disk_to_dev(d->gd)->kobj, &attr_group); } void aoedisk_rm_sysfs(struct aoedev *d) { - sysfs_remove_group(&d->gd->dev.kobj, &attr_group); + sysfs_remove_group(&disk_to_dev(d->gd)->kobj, &attr_group); } static int @@ -276,7 +276,7 @@ aoeblk_gdalloc(void *vp) gd->first_minor = d->sysminor * AOE_PARTITIONS; gd->fops = &aoe_bdops; gd->private_data = d; - gd->capacity = d->ssize; + set_capacity(gd, d->ssize); snprintf(gd->disk_name, sizeof gd->disk_name, "etherd/e%ld.%d", d->aoemajor, d->aoeminor); diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 2f17462..961d29a 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -645,7 +645,7 @@ aoecmd_sleepwork(struct work_struct *work) unsigned long flags; u64 ssize; - ssize = d->gd->capacity; + ssize = get_capacity(d->gd); bd = bdget_disk(d->gd, 0); if (bd) { @@ -707,7 +707,7 @@ ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) return; if (d->gd != NULL) { - d->gd->capacity = ssize; + set_capacity(d->gd, ssize); d->flags |= DEVFL_NEWSIZE; } else d->flags |= DEVFL_GDALLOC; @@ -756,12 +756,17 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector unsigned long n_sect = bio->bi_size >> 9; const int rw = bio_data_dir(bio); struct hd_struct *part; + int cpu; - part = get_part(disk, sector); - all_stat_inc(disk, part, ios[rw], sector); - all_stat_add(disk, part, ticks[rw], duration, sector); - all_stat_add(disk, part, sectors[rw], n_sect, sector); - all_stat_add(disk, part, io_ticks, duration, sector); + cpu = part_stat_lock(); + part = disk_map_sector_rcu(disk, sector); + + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_stat_add(cpu, part, sectors[rw], n_sect); + part_stat_add(cpu, part, io_ticks, duration); + + part_stat_unlock(); } void diff --git a/drivers/block/aoe/aoedev.c b/drivers/block/aoe/aoedev.c index a1d813a..6a8038d 100644 --- a/drivers/block/aoe/aoedev.c +++ b/drivers/block/aoe/aoedev.c @@ -91,7 +91,7 @@ aoedev_downdev(struct aoedev *d) } if (d->gd) - d->gd->capacity = 0; + set_capacity(d->gd, 0); d->flags &= ~DEVFL_UP; } diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index b73116e..1e1f915 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -3460,8 +3460,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not"); hba[i]->cmd_pool_bits = - kmalloc(((hba[i]->nr_cmds + BITS_PER_LONG - - 1) / BITS_PER_LONG) * sizeof(unsigned long), GFP_KERNEL); + kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) + * sizeof(unsigned long), GFP_KERNEL); hba[i]->cmd_pool = (CommandList_struct *) pci_alloc_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct), @@ -3493,8 +3493,8 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, /* command and error info recs zeroed out before they are used */ memset(hba[i]->cmd_pool_bits, 0, - ((hba[i]->nr_cmds + BITS_PER_LONG - - 1) / BITS_PER_LONG) * sizeof(unsigned long)); + DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG) + * sizeof(unsigned long)); hba[i]->num_luns = 0; hba[i]->highest_lun = -1; diff --git a/drivers/block/cciss_scsi.c b/drivers/block/cciss_scsi.c index e1233aa..a3fd87b 100644 --- a/drivers/block/cciss_scsi.c +++ b/drivers/block/cciss_scsi.c @@ -365,7 +365,7 @@ struct scsi2map { static int cciss_scsi_add_entry(int ctlr, int hostno, - unsigned char *scsi3addr, int devtype, + struct cciss_scsi_dev_t *device, struct scsi2map *added, int *nadded) { /* assumes hba[ctlr]->scsi_ctlr->lock is held */ @@ -384,12 +384,12 @@ cciss_scsi_add_entry(int ctlr, int hostno, lun = 0; /* Is this device a non-zero lun of a multi-lun device */ /* byte 4 of the 8-byte LUN addr will contain the logical unit no. */ - if (scsi3addr[4] != 0) { + if (device->scsi3addr[4] != 0) { /* Search through our list and find the device which */ /* has the same 8 byte LUN address, excepting byte 4. */ /* Assign the same bus and target for this new LUN. */ /* Use the logical unit number from the firmware. */ - memcpy(addr1, scsi3addr, 8); + memcpy(addr1, device->scsi3addr, 8); addr1[4] = 0; for (i = 0; i < n; i++) { sd = &ccissscsi[ctlr].dev[i]; @@ -399,7 +399,7 @@ cciss_scsi_add_entry(int ctlr, int hostno, if (memcmp(addr1, addr2, 8) == 0) { bus = sd->bus; target = sd->target; - lun = scsi3addr[4]; + lun = device->scsi3addr[4]; break; } } @@ -420,8 +420,12 @@ cciss_scsi_add_entry(int ctlr, int hostno, added[*nadded].lun = sd->lun; (*nadded)++; - memcpy(&sd->scsi3addr[0], scsi3addr, 8); - sd->devtype = devtype; + memcpy(sd->scsi3addr, device->scsi3addr, 8); + memcpy(sd->vendor, device->vendor, sizeof(sd->vendor)); + memcpy(sd->revision, device->revision, sizeof(sd->revision)); + memcpy(sd->device_id, device->device_id, sizeof(sd->device_id)); + sd->devtype = device->devtype; + ccissscsi[ctlr].ndevices++; /* initially, (before registering with scsi layer) we don't @@ -487,6 +491,22 @@ static void fixup_botched_add(int ctlr, char *scsi3addr) CPQ_TAPE_UNLOCK(ctlr, flags); } +static int device_is_the_same(struct cciss_scsi_dev_t *dev1, + struct cciss_scsi_dev_t *dev2) +{ + return dev1->devtype == dev2->devtype && + memcmp(dev1->scsi3addr, dev2->scsi3addr, + sizeof(dev1->scsi3addr)) == 0 && + memcmp(dev1->device_id, dev2->device_id, + sizeof(dev1->device_id)) == 0 && + memcmp(dev1->vendor, dev2->vendor, + sizeof(dev1->vendor)) == 0 && + memcmp(dev1->model, dev2->model, + sizeof(dev1->model)) == 0 && + memcmp(dev1->revision, dev2->revision, + sizeof(dev1->revision)) == 0; +} + static int adjust_cciss_scsi_table(int ctlr, int hostno, struct cciss_scsi_dev_t sd[], int nsds) @@ -532,7 +552,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno, for (j=0;j<nsds;j++) { if (SCSI3ADDR_EQ(sd[j].scsi3addr, csd->scsi3addr)) { - if (sd[j].devtype == csd->devtype) + if (device_is_the_same(&sd[j], csd)) found=2; else found=1; @@ -548,22 +568,26 @@ adjust_cciss_scsi_table(int ctlr, int hostno, cciss_scsi_remove_entry(ctlr, hostno, i, removed, &nremoved); /* remove ^^^, hence i not incremented */ - } - else if (found == 1) { /* device is different kind */ + } else if (found == 1) { /* device is different in some way */ changes++; - printk("cciss%d: device c%db%dt%dl%d type changed " - "(device type now %s).\n", - ctlr, hostno, csd->bus, csd->target, csd->lun, - scsi_device_type(csd->devtype)); + printk("cciss%d: device c%db%dt%dl%d has changed.\n", + ctlr, hostno, csd->bus, csd->target, csd->lun); cciss_scsi_remove_entry(ctlr, hostno, i, removed, &nremoved); /* remove ^^^, hence i not incremented */ - if (cciss_scsi_add_entry(ctlr, hostno, - &sd[j].scsi3addr[0], sd[j].devtype, + if (cciss_scsi_add_entry(ctlr, hostno, &sd[j], added, &nadded) != 0) /* we just removed one, so add can't fail. */ BUG(); csd->devtype = sd[j].devtype; + memcpy(csd->device_id, sd[j].device_id, + sizeof(csd->device_id)); + memcpy(csd->vendor, sd[j].vendor, + sizeof(csd->vendor)); + memcpy(csd->model, sd[j].model, + sizeof(csd->model)); + memcpy(csd->revision, sd[j].revision, + sizeof(csd->revision)); } else /* device is same as it ever was, */ i++; /* so just move along. */ } @@ -577,7 +601,7 @@ adjust_cciss_scsi_table(int ctlr, int hostno, csd = &ccissscsi[ctlr].dev[j]; if (SCSI3ADDR_EQ(sd[i].scsi3addr, csd->scsi3addr)) { - if (sd[i].devtype == csd->devtype) + if (device_is_the_same(&sd[i], csd)) found=2; /* found device */ else found=1; /* found a bug. */ @@ -586,16 +610,14 @@ adjust_cciss_scsi_table(int ctlr, int hostno, } if (!found) { changes++; - if (cciss_scsi_add_entry(ctlr, hostno, - - &sd[i].scsi3addr[0], sd[i].devtype, + if (cciss_scsi_add_entry(ctlr, hostno, &sd[i], added, &nadded) != 0) break; } else if (found == 1) { /* should never happen... */ changes++; - printk("cciss%d: device unexpectedly changed type\n", - ctlr); + printk(KERN_WARNING "cciss%d: device " + "unexpectedly changed\n", ctlr); /* but if it does happen, we just ignore that device */ } } @@ -1012,7 +1034,8 @@ cciss_scsi_interpret_error(CommandList_struct *cp) static int cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, - unsigned char *buf, unsigned char bufsize) + unsigned char page, unsigned char *buf, + unsigned char bufsize) { int rc; CommandList_struct *cp; @@ -1032,8 +1055,8 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, ei = cp->err_info; cdb[0] = CISS_INQUIRY; - cdb[1] = 0; - cdb[2] = 0; + cdb[1] = (page != 0); + cdb[2] = page; cdb[3] = 0; cdb[4] = bufsize; cdb[5] = 0; @@ -1053,6 +1076,25 @@ cciss_scsi_do_inquiry(ctlr_info_t *c, unsigned char *scsi3addr, return rc; } +/* Get the device id from inquiry page 0x83 */ +static int cciss_scsi_get_device_id(ctlr_info_t *c, unsigned char *scsi3addr, + unsigned char *device_id, int buflen) +{ + int rc; + unsigned char *buf; + + if (buflen > 16) + buflen = 16; + buf = kzalloc(64, GFP_KERNEL); + if (!buf) + return -1; + rc = cciss_scsi_do_inquiry(c, scsi3addr, 0x83, buf, 64); + if (rc == 0) + memcpy(device_id, &buf[8], buflen); + kfree(buf); + return rc != 0; +} + static int cciss_scsi_do_report_phys_luns(ctlr_info_t *c, ReportLunData_struct *buf, int bufsize) @@ -1142,25 +1184,21 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) ctlr_info_t *c; __u32 num_luns=0; unsigned char *ch; - /* unsigned char found[CCISS_MAX_SCSI_DEVS_PER_HBA]; */ - struct cciss_scsi_dev_t currentsd[CCISS_MAX_SCSI_DEVS_PER_HBA]; + struct cciss_scsi_dev_t *currentsd, *this_device; int ncurrent=0; int reportlunsize = sizeof(*ld_buff) + CISS_MAX_PHYS_LUN * 8; int i; c = (ctlr_info_t *) hba[cntl_num]; ld_buff = kzalloc(reportlunsize, GFP_KERNEL); - if (ld_buff == NULL) { - printk(KERN_ERR "cciss: out of memory\n"); - return; - } inq_buff = kmalloc(OBDR_TAPE_INQ_SIZE, GFP_KERNEL); - if (inq_buff == NULL) { - printk(KERN_ERR "cciss: out of memory\n"); - kfree(ld_buff); - return; + currentsd = kzalloc(sizeof(*currentsd) * + (CCISS_MAX_SCSI_DEVS_PER_HBA+1), GFP_KERNEL); + if (ld_buff == NULL || inq_buff == NULL || currentsd == NULL) { + printk(KERN_ERR "cciss: out of memory\n"); + goto out; } - + this_device = ¤tsd[CCISS_MAX_SCSI_DEVS_PER_HBA]; if (cciss_scsi_do_report_phys_luns(c, ld_buff, reportlunsize) == 0) { ch = &ld_buff->LUNListLength[0]; num_luns = ((ch[0]<<24) | (ch[1]<<16) | (ch[2]<<8) | ch[3]) / 8; @@ -1179,23 +1217,34 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) /* adjust our table of devices */ - for(i=0; i<num_luns; i++) - { - int devtype; - + for (i = 0; i < num_luns; i++) { /* for each physical lun, do an inquiry */ if (ld_buff->LUN[i][3] & 0xC0) continue; memset(inq_buff, 0, OBDR_TAPE_INQ_SIZE); memcpy(&scsi3addr[0], &ld_buff->LUN[i][0], 8); - if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, inq_buff, - (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) { + if (cciss_scsi_do_inquiry(hba[cntl_num], scsi3addr, 0, inq_buff, + (unsigned char) OBDR_TAPE_INQ_SIZE) != 0) /* Inquiry failed (msg printed already) */ - devtype = 0; /* so we will skip this device. */ - } else /* what kind of device is this? */ - devtype = (inq_buff[0] & 0x1f); - - switch (devtype) + continue; /* so we will skip this device. */ + + this_device->devtype = (inq_buff[0] & 0x1f); + this_device->bus = -1; + this_device->target = -1; + this_device->lun = -1; + memcpy(this_device->scsi3addr, scsi3addr, 8); + memcpy(this_device->vendor, &inq_buff[8], + sizeof(this_device->vendor)); + memcpy(this_device->model, &inq_buff[16], + sizeof(this_device->model)); + memcpy(this_device->revision, &inq_buff[32], + sizeof(this_device->revision)); + memset(this_device->device_id, 0, + sizeof(this_device->device_id)); + cciss_scsi_get_device_id(hba[cntl_num], scsi3addr, + this_device->device_id, sizeof(this_device->device_id)); + + switch (this_device->devtype) { case 0x05: /* CD-ROM */ { @@ -1220,15 +1269,10 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) if (ncurrent >= CCISS_MAX_SCSI_DEVS_PER_HBA) { printk(KERN_INFO "cciss%d: %s ignored, " "too many devices.\n", cntl_num, - scsi_device_type(devtype)); + scsi_device_type(this_device->devtype)); break; } - memcpy(¤tsd[ncurrent].scsi3addr[0], - &scsi3addr[0], 8); - currentsd[ncurrent].devtype = devtype; - currentsd[ncurrent].bus = -1; - currentsd[ncurrent].target = -1; - currentsd[ncurrent].lun = -1; + currentsd[ncurrent] = *this_device; ncurrent++; break; default: @@ -1240,6 +1284,7 @@ cciss_update_non_disk_devices(int cntl_num, int hostno) out: kfree(inq_buff); kfree(ld_buff); + kfree(currentsd); return; } diff --git a/drivers/block/cciss_scsi.h b/drivers/block/cciss_scsi.h index d9c2c58..7b75024 100644 --- a/drivers/block/cciss_scsi.h +++ b/drivers/block/cciss_scsi.h @@ -66,6 +66,10 @@ struct cciss_scsi_dev_t { int devtype; int bus, target, lun; /* as presented to the OS */ unsigned char scsi3addr[8]; /* as presented to the HW */ + unsigned char device_id[16]; /* from inquiry pg. 0x83 */ + unsigned char vendor[8]; /* bytes 8-15 of inquiry data */ + unsigned char model[16]; /* bytes 16-31 of inquiry data */ + unsigned char revision[4]; /* bytes 32-35 of inquiry data */ }; struct cciss_scsi_hba_t { diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 09c1434..3d96752 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -424,7 +424,7 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev) hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t), &(hba[i]->cmd_pool_dhandle)); hba[i]->cmd_pool_bits = kcalloc( - (NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long), + DIV_ROUND_UP(NR_CMDS, BITS_PER_LONG), sizeof(unsigned long), GFP_KERNEL); if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 395f8ea..cf64ddf 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -423,8 +423,15 @@ static struct floppy_raw_cmd *raw_cmd, default_raw_cmd; * 1581's logical side 0 is on physical side 1, whereas the Sharp's logical * side 0 is on physical side 0 (but with the misnamed sector IDs). * 'stretch' should probably be renamed to something more general, like - * 'options'. Other parameters should be self-explanatory (see also - * setfdprm(8)). + * 'options'. + * + * Bits 2 through 9 of 'stretch' tell the number of the first sector. + * The LSB (bit 2) is flipped. For most disks, the first sector + * is 1 (represented by 0x00<<2). For some CP/M and music sampler + * disks (such as Ensoniq EPS 16plus) it is 0 (represented as 0x01<<2). + * For Amstrad CPC disks it is 0xC1 (represented as 0xC0<<2). + * + * Other parameters should be self-explanatory (see also setfdprm(8)). */ /* Size @@ -1355,20 +1362,20 @@ static void fdc_specify(void) } /* Convert step rate from microseconds to milliseconds and 4 bits */ - srt = 16 - (DP->srt * scale_dtr / 1000 + NOMINAL_DTR - 1) / NOMINAL_DTR; + srt = 16 - DIV_ROUND_UP(DP->srt * scale_dtr / 1000, NOMINAL_DTR); if (slow_floppy) { srt = srt / 4; } SUPBOUND(srt, 0xf); INFBOUND(srt, 0); - hlt = (DP->hlt * scale_dtr / 2 + NOMINAL_DTR - 1) / NOMINAL_DTR; + hlt = DIV_ROUND_UP(DP->hlt * scale_dtr / 2, NOMINAL_DTR); if (hlt < 0x01) hlt = 0x01; else if (hlt > 0x7f) hlt = hlt_max_code; - hut = (DP->hut * scale_dtr / 16 + NOMINAL_DTR - 1) / NOMINAL_DTR; + hut = DIV_ROUND_UP(DP->hut * scale_dtr / 16, NOMINAL_DTR); if (hut < 0x1) hut = 0x1; else if (hut > 0xf) @@ -2236,9 +2243,9 @@ static void setup_format_params(int track) } } } - if (_floppy->stretch & FD_ZEROBASED) { + if (_floppy->stretch & FD_SECTBASEMASK) { for (count = 0; count < F_SECT_PER_TRACK; count++) - here[count].sect--; + here[count].sect += FD_SECTBASE(_floppy) - 1; } } @@ -2385,7 +2392,7 @@ static void rw_interrupt(void) #ifdef FLOPPY_SANITY_CHECK if (nr_sectors / ssize > - (in_sector_offset + current_count_sectors + ssize - 1) / ssize) { + DIV_ROUND_UP(in_sector_offset + current_count_sectors, ssize)) { DPRINT("long rw: %x instead of %lx\n", nr_sectors, current_count_sectors); printk("rs=%d s=%d\n", R_SECTOR, SECTOR); @@ -2649,7 +2656,7 @@ static int make_raw_rw_request(void) } HEAD = fsector_t / _floppy->sect; - if (((_floppy->stretch & (FD_SWAPSIDES | FD_ZEROBASED)) || + if (((_floppy->stretch & (FD_SWAPSIDES | FD_SECTBASEMASK)) || TESTF(FD_NEED_TWADDLE)) && fsector_t < _floppy->sect) max_sector = _floppy->sect; @@ -2679,7 +2686,7 @@ static int make_raw_rw_request(void) CODE2SIZE; SECT_PER_TRACK = _floppy->sect << 2 >> SIZECODE; SECTOR = ((fsector_t % _floppy->sect) << 2 >> SIZECODE) + - ((_floppy->stretch & FD_ZEROBASED) ? 0 : 1); + FD_SECTBASE(_floppy); /* tracksize describes the size which can be filled up with sectors * of size ssize. @@ -3311,7 +3318,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, g->head <= 0 || g->track <= 0 || g->track > UDP->tracks >> STRETCH(g) || /* check if reserved bits are set */ - (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_ZEROBASED)) != 0) + (g->stretch & ~(FD_STRETCH | FD_SWAPSIDES | FD_SECTBASEMASK)) != 0) return -EINVAL; if (type) { if (!capable(CAP_SYS_ADMIN)) @@ -3356,7 +3363,7 @@ static inline int set_geometry(unsigned int cmd, struct floppy_struct *g, if (DRS->maxblock > user_params[drive].sect || DRS->maxtrack || ((user_params[drive].sect ^ oldStretch) & - (FD_SWAPSIDES | FD_ZEROBASED))) + (FD_SWAPSIDES | FD_SECTBASEMASK))) invalidate_drive(bdev); else process_fd_request(); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 1778e4a..7b33512 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -403,7 +403,7 @@ static int nbd_do_it(struct nbd_device *lo) BUG_ON(lo->magic != LO_MAGIC); lo->pid = current->pid; - ret = sysfs_create_file(&lo->disk->dev.kobj, &pid_attr.attr); + ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); if (ret) { printk(KERN_ERR "nbd: sysfs_create_file failed!"); return ret; @@ -412,7 +412,7 @@ static int nbd_do_it(struct nbd_device *lo) while ((req = nbd_read_stat(lo)) != NULL) nbd_end_request(req); - sysfs_remove_file(&lo->disk->dev.kobj, &pid_attr.attr); + sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); return 0; } diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 29b7a64..0e07715 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2544,7 +2544,7 @@ static int pkt_make_request(struct request_queue *q, struct bio *bio) if (last_zone != zone) { BUG_ON(last_zone != zone + pd->settings.size); first_sectors = last_zone - bio->bi_sector; - bp = bio_split(bio, bio_split_pool, first_sectors); + bp = bio_split(bio, first_sectors); BUG_ON(!bp); pkt_make_request(q, &bp->bio1); pkt_make_request(q, &bp->bio2); @@ -2911,7 +2911,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) if (!disk->queue) goto out_mem2; - pd->pkt_dev = MKDEV(disk->major, disk->first_minor); + pd->pkt_dev = MKDEV(pktdev_major, idx); ret = pkt_new_dev(pd, dev); if (ret) goto out_new_dev; diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index d797e20..936466f 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -199,7 +199,8 @@ static void ps3disk_do_request(struct ps3_storage_device *dev, if (blk_fs_request(req)) { if (ps3disk_submit_request_sg(dev, req)) break; - } else if (req->cmd_type == REQ_TYPE_FLUSH) { + } else if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_FLUSH) { if (ps3disk_submit_flush_request(dev, req)) break; } else { @@ -257,7 +258,8 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) return IRQ_HANDLED; } - if (req->cmd_type == REQ_TYPE_FLUSH) { + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_FLUSH) { read = 0; num_sectors = req->hard_cur_sectors; op = "flush"; @@ -405,7 +407,8 @@ static void ps3disk_prepare_flush(struct request_queue *q, struct request *req) dev_dbg(&dev->sbd.core, "%s:%u\n", __func__, __LINE__); - req->cmd_type = REQ_TYPE_FLUSH; + req->cmd_type = REQ_TYPE_LINUX_BLOCK; + req->cmd[0] = REQ_LB_OP_FLUSH; } static unsigned long ps3disk_mask; @@ -538,7 +541,7 @@ static int ps3disk_remove(struct ps3_system_bus_device *_dev) struct ps3disk_private *priv = dev->sbd.core.driver_data; mutex_lock(&ps3disk_mask_mutex); - __clear_bit(priv->gendisk->first_minor / PS3DISK_MINORS, + __clear_bit(MINOR(disk_devt(priv->gendisk)) / PS3DISK_MINORS, &ps3disk_mask); mutex_unlock(&ps3disk_mask_mutex); del_gendisk(priv->gendisk); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 4225109..6ec5fc0 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -47,20 +47,20 @@ static void blk_done(struct virtqueue *vq) spin_lock_irqsave(&vblk->lock, flags); while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) { - int uptodate; + int error; switch (vbr->status) { case VIRTIO_BLK_S_OK: - uptodate = 1; + error = 0; break; case VIRTIO_BLK_S_UNSUPP: - uptodate = -ENOTTY; + error = -ENOTTY; break; default: - uptodate = 0; + error = -EIO; break; } - end_dequeued_request(vbr->req, uptodate); + __blk_end_request(vbr->req, error, blk_rq_bytes(vbr->req)); list_del(&vbr->list); mempool_free(vbr, vblk->pool); } @@ -84,11 +84,11 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, if (blk_fs_request(vbr->req)) { vbr->out_hdr.type = 0; vbr->out_hdr.sector = vbr->req->sector; - vbr->out_hdr.ioprio = vbr->req->ioprio; + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); } else if (blk_pc_request(vbr->req)) { vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD; vbr->out_hdr.sector = 0; - vbr->out_hdr.ioprio = vbr->req->ioprio; + vbr->out_hdr.ioprio = req_get_ioprio(vbr->req); } else { /* We don't put anything else in the queue. */ BUG(); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 3ca643c..bff602c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -105,15 +105,17 @@ static DEFINE_SPINLOCK(blkif_io_lock); #define GRANT_INVALID_REF 0 #define PARTS_PER_DISK 16 +#define PARTS_PER_EXT_DISK 256 #define BLKIF_MAJOR(dev) ((dev)>>8) #define BLKIF_MINOR(dev) ((dev) & 0xff) -#define DEV_NAME "xvd" /* name in /dev */ +#define EXT_SHIFT 28 +#define EXTENDED (1<<EXT_SHIFT) +#define VDEV_IS_EXTENDED(dev) ((dev)&(EXTENDED)) +#define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) -/* Information about our VBDs. */ -#define MAX_VBDS 64 -static LIST_HEAD(vbds_list); +#define DEV_NAME "xvd" /* name in /dev */ static int get_id_from_freelist(struct blkfront_info *info) { @@ -386,31 +388,60 @@ static int xlvbd_barrier(struct blkfront_info *info) } -static int xlvbd_alloc_gendisk(int minor, blkif_sector_t capacity, - int vdevice, u16 vdisk_info, u16 sector_size, - struct blkfront_info *info) +static int xlvbd_alloc_gendisk(blkif_sector_t capacity, + struct blkfront_info *info, + u16 vdisk_info, u16 sector_size) { struct gendisk *gd; int nr_minors = 1; int err = -ENODEV; + unsigned int offset; + int minor; + int nr_parts; BUG_ON(info->gd != NULL); BUG_ON(info->rq != NULL); - if ((minor % PARTS_PER_DISK) == 0) - nr_minors = PARTS_PER_DISK; + if ((info->vdevice>>EXT_SHIFT) > 1) { + /* this is above the extended range; something is wrong */ + printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", info->vdevice); + return -ENODEV; + } + + if (!VDEV_IS_EXTENDED(info->vdevice)) { + minor = BLKIF_MINOR(info->vdevice); + nr_parts = PARTS_PER_DISK; + } else { + minor = BLKIF_MINOR_EXT(info->vdevice); + nr_parts = PARTS_PER_EXT_DISK; + } + + if ((minor % nr_parts) == 0) + nr_minors = nr_parts; gd = alloc_disk(nr_minors); if (gd == NULL) goto out; - if (nr_minors > 1) - sprintf(gd->disk_name, "%s%c", DEV_NAME, - 'a' + minor / PARTS_PER_DISK); - else - sprintf(gd->disk_name, "%s%c%d", DEV_NAME, - 'a' + minor / PARTS_PER_DISK, - minor % PARTS_PER_DISK); + offset = minor / nr_parts; + + if (nr_minors > 1) { + if (offset < 26) + sprintf(gd->disk_name, "%s%c", DEV_NAME, 'a' + offset); + else + sprintf(gd->disk_name, "%s%c%c", DEV_NAME, + 'a' + ((offset / 26)-1), 'a' + (offset % 26)); + } else { + if (offset < 26) + sprintf(gd->disk_name, "%s%c%d", DEV_NAME, + 'a' + offset, + minor & (nr_parts - 1)); + else + sprintf(gd->disk_name, "%s%c%c%d", DEV_NAME, + 'a' + ((offset / 26) - 1), + 'a' + (offset % 26), + minor & (nr_parts - 1)); + } gd->major = XENVBD_MAJOR; gd->first_minor = minor; @@ -699,8 +730,13 @@ static int blkfront_probe(struct xenbus_device *dev, err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device", "%i", &vdevice); if (err != 1) { - xenbus_dev_fatal(dev, err, "reading virtual-device"); - return err; + /* go looking in the extended area instead */ + err = xenbus_scanf(XBT_NIL, dev->nodename, "virtual-device-ext", + "%i", &vdevice); + if (err != 1) { + xenbus_dev_fatal(dev, err, "reading virtual-device"); + return err; + } } info = kzalloc(sizeof(*info), GFP_KERNEL); @@ -861,9 +897,7 @@ static void blkfront_connect(struct blkfront_info *info) if (err) info->feature_barrier = 0; - err = xlvbd_alloc_gendisk(BLKIF_MINOR(info->vdevice), - sectors, info->vdevice, - binfo, sector_size, info); + err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size); if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c index 1e55a65..32f3a8e 100644 --- a/drivers/bluetooth/bpa10x.c +++ b/drivers/bluetooth/bpa10x.c @@ -256,7 +256,6 @@ static inline int bpa10x_submit_intr_urb(struct hci_dev *hdev) BT_ERR("%s urb %p submission failed (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); - kfree(buf); } usb_free_urb(urb); @@ -298,7 +297,6 @@ static inline int bpa10x_submit_bulk_urb(struct hci_dev *hdev) BT_ERR("%s urb %p submission failed (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); - kfree(buf); } usb_free_urb(urb); diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 29ae998..af472e0 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -102,6 +102,7 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0a5c, 0x2101), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, /* Broadcom BCM2046 */ + { USB_DEVICE(0x0a5c, 0x2146), .driver_info = BTUSB_RESET }, { USB_DEVICE(0x0a5c, 0x2151), .driver_info = BTUSB_RESET }, /* Apple MacBook Pro with Broadcom chip */ @@ -113,6 +114,7 @@ static struct usb_device_id blacklist_table[] = { /* Targus ACB10US */ { USB_DEVICE(0x0a5c, 0x2100), .driver_info = BTUSB_RESET }, + { USB_DEVICE(0x0a5c, 0x2154), .driver_info = BTUSB_RESET }, /* ANYCOM Bluetooth USB-200 and USB-250 */ { USB_DEVICE(0x0a5c, 0x2111), .driver_info = BTUSB_RESET }, @@ -150,6 +152,9 @@ static struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x050d, 0x0012), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, { USB_DEVICE(0x050d, 0x0013), .driver_info = BTUSB_RESET | BTUSB_WRONG_SCO_MTU }, + /* Belkin F8T016 device */ + { USB_DEVICE(0x050d, 0x016a), .driver_info = BTUSB_RESET }, + /* Digianswer devices */ { USB_DEVICE(0x08fd, 0x0001), .driver_info = BTUSB_DIGIANSWER }, { USB_DEVICE(0x08fd, 0x0002), .driver_info = BTUSB_IGNORE }, @@ -271,7 +276,6 @@ static int btusb_submit_intr_urb(struct hci_dev *hdev) BT_ERR("%s urb %p submission failed (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); - kfree(buf); } usb_free_urb(urb); @@ -354,7 +358,6 @@ static int btusb_submit_bulk_urb(struct hci_dev *hdev) BT_ERR("%s urb %p submission failed (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); - kfree(buf); } usb_free_urb(urb); @@ -475,7 +478,6 @@ static int btusb_submit_isoc_urb(struct hci_dev *hdev) BT_ERR("%s urb %p submission failed (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); - kfree(buf); } usb_free_urb(urb); diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 74031de..d47f2f8 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2097,7 +2097,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - ret = blk_rq_map_user(q, rq, ubuf, len); + ret = blk_rq_map_user(q, rq, NULL, ubuf, len, GFP_KERNEL); if (ret) break; diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 1231d95..d6ba77a 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c @@ -624,14 +624,14 @@ static void gdrom_readdisk_dma(struct work_struct *work) ctrl_outb(1, GDROM_DMA_STATUS_REG); wait_event_interruptible_timeout(request_queue, gd.transfer == 0, GDROM_DEFAULT_TIMEOUT); - err = gd.transfer; + err = gd.transfer ? -EIO : 0; gd.transfer = 0; gd.pending = 0; /* now seek to take the request spinlock * before handling ending the request */ spin_lock(&gdrom_lock); list_del_init(&req->queuelist); - end_dequeued_request(req, 1 - err); + __blk_end_request(req, err, blk_rq_bytes(req)); } spin_unlock(&gdrom_lock); kfree(read_command); diff --git a/drivers/char/random.c b/drivers/char/random.c index 7ce1ac4..6af435b 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -661,10 +661,10 @@ void add_disk_randomness(struct gendisk *disk) if (!disk || !disk->random) return; /* first major is 1, so we get >= 0x200 here */ - DEBUG_ENT("disk event %d:%d\n", disk->major, disk->first_minor); + DEBUG_ENT("disk event %d:%d\n", + MAJOR(disk_devt(disk)), MINOR(disk_devt(disk))); - add_timer_randomness(disk->random, - 0x100 + MKDEV(disk->major, disk->first_minor)); + add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); } #endif diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 94df917..0778d99 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -364,7 +364,7 @@ static void dw_dma_tasklet(unsigned long data) int i; status_block = dma_readl(dw, RAW.BLOCK); - status_xfer = dma_readl(dw, RAW.BLOCK); + status_xfer = dma_readl(dw, RAW.XFER); status_err = dma_readl(dw, RAW.ERROR); dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n", diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c index d568c65..d9e7a49 100644 --- a/drivers/hwmon/abituguru3.c +++ b/drivers/hwmon/abituguru3.c @@ -279,7 +279,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "OTES1 Fan", 36, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, - { 0x0011, NULL /* Abit AT8 32X, need DMI string */, { + { 0x0011, "AT8 32X(ATI RD580-ULI M1575)", { { "CPU Core", 0, 0, 10, 1, 0 }, { "DDR", 1, 0, 20, 1, 0 }, { "DDR VTT", 2, 0, 10, 1, 0 }, @@ -303,6 +303,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "SYS Fan", 34, 2, 60, 1, 0 }, { "AUX1 Fan", 35, 2, 60, 1, 0 }, { "AUX2 Fan", 36, 2, 60, 1, 0 }, + { "AUX3 Fan", 37, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, { 0x0012, NULL /* Abit AN8 32X, need DMI string */, { diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index f113308..d793cc0 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -46,6 +46,8 @@ #include <linux/err.h> #include <linux/mutex.h> #include <linux/sysfs.h> +#include <linux/string.h> +#include <linux/dmi.h> #include <asm/io.h> #define DRVNAME "it87" @@ -236,6 +238,8 @@ struct it87_sio_data { /* Values read from Super-I/O config space */ u8 revision; u8 vid_value; + /* Values set based on DMI strings */ + u8 skip_pwm; }; /* For each registered chip, we need to keep some data in memory. @@ -964,6 +968,7 @@ static int __init it87_find(unsigned short *address, { int err = -ENODEV; u16 chip_type; + const char *board_vendor, *board_name; superio_enter(); chip_type = force_id ? force_id : superio_inw(DEVID); @@ -1022,6 +1027,24 @@ static int __init it87_find(unsigned short *address, pr_info("it87: in7 is VCCH (+5V Stand-By)\n"); } + /* Disable specific features based on DMI strings */ + board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR); + board_name = dmi_get_system_info(DMI_BOARD_NAME); + if (board_vendor && board_name) { + if (strcmp(board_vendor, "nVIDIA") == 0 + && strcmp(board_name, "FN68PT") == 0) { + /* On the Shuttle SN68PT, FAN_CTL2 is apparently not + connected to a fan, but to something else. One user + has reported instant system power-off when changing + the PWM2 duty cycle, so we disable it. + I use the board name string as the trigger in case + the same board is ever used in other systems. */ + pr_info("it87: Disabling pwm2 due to " + "hardware constraints\n"); + sio_data->skip_pwm = (1 << 1); + } + } + exit: superio_exit(); return err; @@ -1168,25 +1191,33 @@ static int __devinit it87_probe(struct platform_device *pdev) } if (enable_pwm_interface) { - if ((err = device_create_file(dev, - &sensor_dev_attr_pwm1_enable.dev_attr)) - || (err = device_create_file(dev, - &sensor_dev_attr_pwm2_enable.dev_attr)) - || (err = device_create_file(dev, - &sensor_dev_attr_pwm3_enable.dev_attr)) - || (err = device_create_file(dev, - &sensor_dev_attr_pwm1.dev_attr)) - || (err = device_create_file(dev, - &sensor_dev_attr_pwm2.dev_attr)) - || (err = device_create_file(dev, - &sensor_dev_attr_pwm3.dev_attr)) - || (err = device_create_file(dev, - &dev_attr_pwm1_freq)) - || (err = device_create_file(dev, - &dev_attr_pwm2_freq)) - || (err = device_create_file(dev, - &dev_attr_pwm3_freq))) - goto ERROR4; + if (!(sio_data->skip_pwm & (1 << 0))) { + if ((err = device_create_file(dev, + &sensor_dev_attr_pwm1_enable.dev_attr)) + || (err = device_create_file(dev, + &sensor_dev_attr_pwm1.dev_attr)) + || (err = device_create_file(dev, + &dev_attr_pwm1_freq))) + goto ERROR4; + } + if (!(sio_data->skip_pwm & (1 << 1))) { + if ((err = device_create_file(dev, + &sensor_dev_attr_pwm2_enable.dev_attr)) + || (err = device_create_file(dev, + &sensor_dev_attr_pwm2.dev_attr)) + || (err = device_create_file(dev, + &dev_attr_pwm2_freq))) + goto ERROR4; + } + if (!(sio_data->skip_pwm & (1 << 2))) { + if ((err = device_create_file(dev, + &sensor_dev_attr_pwm3_enable.dev_attr)) + || (err = device_create_file(dev, + &sensor_dev_attr_pwm3.dev_attr)) + || (err = device_create_file(dev, + &dev_attr_pwm3_freq))) + goto ERROR4; + } } if (data->type == it8712 || data->type == it8716 @@ -1546,6 +1577,7 @@ static int __init sm_it87_init(void) unsigned short isa_address=0; struct it87_sio_data sio_data; + memset(&sio_data, 0, sizeof(struct it87_sio_data)); err = it87_find(&isa_address, &sio_data); if (err) return err; diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 8e93a79..052879a 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -780,10 +780,6 @@ config BLK_DEV_IDEDMA_PMAC to transfer data to and from memory. Saying Y is safe and improves performance. -config BLK_DEV_IDE_SWARM - tristate "IDE for Sibyte evaluation boards" - depends on SIBYTE_SB1xxx_SOC - config BLK_DEV_IDE_AU1XXX bool "IDE for AMD Alchemy Au1200" depends on SOC_AU1200 diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 49a8c58..03c2cb6 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1113,7 +1113,7 @@ static ide_startstop_t cdrom_start_rw(ide_drive_t *drive, struct request *rq) if (write) { /* disk has become write protected */ - if (cd->disk->policy) { + if (get_disk_ro(cd->disk)) { cdrom_end_request(drive, 0); return ide_stopped; } @@ -1661,7 +1661,9 @@ static int ide_cdrom_probe_capabilities(ide_drive_t *drive) cdi->mask &= ~CDC_PLAY_AUDIO; mechtype = buf[8 + 6] >> 5; - if (mechtype == mechtype_caddy || mechtype == mechtype_popup) + if (mechtype == mechtype_caddy || + mechtype == mechtype_popup || + (drive->atapi_flags & IDE_AFLAG_NO_AUTOCLOSE)) cdi->mask |= CDC_CLOSE_TRAY; if (cdi->sanyo_slot > 0) { @@ -1859,6 +1861,8 @@ static const struct cd_list_entry ide_cd_quirks_list[] = { { "MATSHITADVD-ROM SR-8176", NULL, IDE_AFLAG_PLAY_AUDIO_OK }, { "MATSHITADVD-ROM SR-8174", NULL, IDE_AFLAG_PLAY_AUDIO_OK }, { "Optiarc DVD RW AD-5200A", NULL, IDE_AFLAG_PLAY_AUDIO_OK }, + { "Optiarc DVD RW AD-7200A", NULL, IDE_AFLAG_PLAY_AUDIO_OK }, + { "Optiarc DVD RW AD-7543A", NULL, IDE_AFLAG_NO_AUTOCLOSE }, { NULL, NULL, 0 } }; diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 07ef88b..33ea8c0 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -41,6 +41,12 @@ #include <asm/io.h> #include <asm/div64.h> +#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) +#define IDE_DISK_MINORS (1 << PARTN_BITS) +#else +#define IDE_DISK_MINORS 0 +#endif + struct ide_disk_obj { ide_drive_t *drive; ide_driver_t *driver; @@ -1151,8 +1157,7 @@ static int ide_disk_probe(ide_drive_t *drive) if (!idkp) goto failed; - g = alloc_disk_node(1 << PARTN_BITS, - hwif_to_node(drive->hwif)); + g = alloc_disk_node(IDE_DISK_MINORS, hwif_to_node(drive->hwif)); if (!g) goto out_free_idkp; @@ -1178,9 +1183,11 @@ static int ide_disk_probe(ide_drive_t *drive) } else drive->attach = 1; - g->minors = 1 << PARTN_BITS; + g->minors = IDE_DISK_MINORS; g->driverfs_dev = &drive->gendev; - g->flags = drive->removable ? GENHD_FL_REMOVABLE : 0; + g->flags |= GENHD_FL_EXT_DEVT; + if (drive->removable) + g->flags |= GENHD_FL_REMOVABLE; set_capacity(g, idedisk_capacity(drive)); g->fops = &idedisk_ops; add_disk(g); diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c index adc6827..3fa07c0 100644 --- a/drivers/ide/ide-dma.c +++ b/drivers/ide/ide-dma.c @@ -211,7 +211,7 @@ int ide_build_dmatable (ide_drive_t *drive, struct request *rq) xcount = bcount & 0xffff; if (is_trm290) xcount = ((xcount >> 2) - 1) << 16; - if (xcount == 0x0000) { + else if (xcount == 0x0000) { /* * Most chipsets correctly interpret a length of 0x0000 as 64KB, * but at least one (e.g. CS5530) misinterprets it as zero (!). diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 994e410..70aa86c 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -1188,7 +1188,7 @@ static struct kobject *exact_match(dev_t dev, int *part, void *data) { struct gendisk *p = data; *part &= (1 << PARTN_BITS) - 1; - return &p->dev.kobj; + return &disk_to_dev(p)->kobj; } static int exact_lock(dev_t dev, void *data) @@ -1492,7 +1492,7 @@ static struct device_attribute *ide_port_attrs[] = { static int ide_sysfs_register_port(ide_hwif_t *hwif) { - int i, rc; + int i, uninitialized_var(rc); for (i = 0; ide_port_attrs[i]; i++) { rc = device_create_file(hwif->portdev, ide_port_attrs[i]); diff --git a/drivers/ide/mips/Makefile b/drivers/ide/mips/Makefile index 677c7b2..5873fa0 100644 --- a/drivers/ide/mips/Makefile +++ b/drivers/ide/mips/Makefile @@ -1,4 +1,3 @@ -obj-$(CONFIG_BLK_DEV_IDE_SWARM) += swarm.o obj-$(CONFIG_BLK_DEV_IDE_AU1XXX) += au1xxx-ide.o EXTRA_CFLAGS := -Idrivers/ide diff --git a/drivers/ide/mips/swarm.c b/drivers/ide/mips/swarm.c deleted file mode 100644 index 39c9ee9..0000000 --- a/drivers/ide/mips/swarm.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (C) 2001, 2002, 2003 Broadcom Corporation - * Copyright (C) 2004 MontaVista Software Inc. - * Author: Manish Lachwani, mlachwani@mvista.com - * Copyright (C) 2004 MIPS Technologies, Inc. All rights reserved. - * Author: Maciej W. Rozycki <macro@mips.com> - * Copyright (c) 2006, 2008 Maciej W. Rozycki - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -/* - * Derived loosely from ide-pmac.c, so: - * Copyright (C) 1998 Paul Mackerras. - * Copyright (C) 1995-1998 Mark Lord - */ - -/* - * Boards with SiByte processors so far have supported IDE devices via - * the Generic Bus, PCI bus, and built-in PCMCIA interface. In all - * cases, byte-swapping must be avoided for these devices (whereas - * other PCI devices, for example, will require swapping). Any - * SiByte-targetted kernel including IDE support will include this - * file. Probing of a Generic Bus for an IDE device is controlled by - * the definition of "SIBYTE_HAVE_IDE", which is provided by - * <asm/sibyte/board.h> for Broadcom boards. - */ - -#include <linux/ide.h> -#include <linux/ioport.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/platform_device.h> - -#include <asm/io.h> - -#include <asm/sibyte/board.h> -#include <asm/sibyte/sb1250_genbus.h> -#include <asm/sibyte/sb1250_regs.h> - -#define DRV_NAME "ide-swarm" - -static char swarm_ide_string[] = DRV_NAME; - -static struct resource swarm_ide_resource = { - .name = "SWARM GenBus IDE", - .flags = IORESOURCE_MEM, -}; - -static struct platform_device *swarm_ide_dev; - -static const struct ide_port_info swarm_port_info = { - .name = DRV_NAME, - .host_flags = IDE_HFLAG_MMIO | IDE_HFLAG_NO_DMA, -}; - -/* - * swarm_ide_probe - if the board header indicates the existence of - * Generic Bus IDE, allocate a HWIF for it. - */ -static int __devinit swarm_ide_probe(struct device *dev) -{ - u8 __iomem *base; - struct ide_host *host; - phys_t offset, size; - int i, rc; - hw_regs_t hw, *hws[] = { &hw, NULL, NULL, NULL }; - - if (!SIBYTE_HAVE_IDE) - return -ENODEV; - - base = ioremap(A_IO_EXT_BASE, 0x800); - offset = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_START_ADDR, IDE_CS)); - size = __raw_readq(base + R_IO_EXT_REG(R_IO_EXT_MULT_SIZE, IDE_CS)); - iounmap(base); - - offset = G_IO_START_ADDR(offset) << S_IO_ADDRBASE; - size = (G_IO_MULT_SIZE(size) + 1) << S_IO_REGSIZE; - if (offset < A_PHYS_GENBUS || offset >= A_PHYS_GENBUS_END) { - printk(KERN_INFO DRV_NAME - ": IDE interface at GenBus disabled\n"); - return -EBUSY; - } - - printk(KERN_INFO DRV_NAME ": IDE interface at GenBus slot %i\n", - IDE_CS); - - swarm_ide_resource.start = offset; - swarm_ide_resource.end = offset + size - 1; - if (request_resource(&iomem_resource, &swarm_ide_resource)) { - printk(KERN_ERR DRV_NAME - ": can't request I/O memory resource\n"); - return -EBUSY; - } - - base = ioremap(offset, size); - - memset(&hw, 0, sizeof(hw)); - for (i = 0; i <= 7; i++) - hw.io_ports_array[i] = - (unsigned long)(base + ((0x1f0 + i) << 5)); - hw.io_ports.ctl_addr = - (unsigned long)(base + (0x3f6 << 5)); - hw.irq = K_INT_GB_IDE; - hw.chipset = ide_generic; - - rc = ide_host_add(&swarm_port_info, hws, &host); - if (rc) - goto err; - - dev_set_drvdata(dev, host); - - return 0; -err: - release_resource(&swarm_ide_resource); - iounmap(base); - return rc; -} - -static struct device_driver swarm_ide_driver = { - .name = swarm_ide_string, - .bus = &platform_bus_type, - .probe = swarm_ide_probe, -}; - -static void swarm_ide_platform_release(struct device *device) -{ - struct platform_device *pldev; - - /* free device */ - pldev = to_platform_device(device); - kfree(pldev); -} - -static int __devinit swarm_ide_init_module(void) -{ - struct platform_device *pldev; - int err; - - printk(KERN_INFO "SWARM IDE driver\n"); - - if (driver_register(&swarm_ide_driver)) { - printk(KERN_ERR "Driver registration failed\n"); - err = -ENODEV; - goto out; - } - - if (!(pldev = kzalloc(sizeof (*pldev), GFP_KERNEL))) { - err = -ENOMEM; - goto out_unregister_driver; - } - - pldev->name = swarm_ide_string; - pldev->id = 0; - pldev->dev.release = swarm_ide_platform_release; - - if (platform_device_register(pldev)) { - err = -ENODEV; - goto out_free_pldev; - } - - if (!pldev->dev.driver) { - /* - * The driver was not bound to this device, there was - * no hardware at this address. Unregister it, as the - * release fuction will take care of freeing the - * allocated structure - */ - platform_device_unregister (pldev); - } - - swarm_ide_dev = pldev; - - return 0; - -out_free_pldev: - kfree(pldev); - -out_unregister_driver: - driver_unregister(&swarm_ide_driver); -out: - return err; -} - -module_init(swarm_ide_init_module); diff --git a/drivers/leds/leds-fsg.c b/drivers/leds/leds-fsg.c index be0e121..3493515 100644 --- a/drivers/leds/leds-fsg.c +++ b/drivers/leds/leds-fsg.c @@ -161,6 +161,16 @@ static int fsg_led_probe(struct platform_device *pdev) { int ret; + /* Map the LED chip select address space */ + latch_address = (unsigned short *) ioremap(IXP4XX_EXP_BUS_BASE(2), 512); + if (!latch_address) { + ret = -ENOMEM; + goto failremap; + } + + latch_value = 0xffff; + *latch_address = latch_value; + ret = led_classdev_register(&pdev->dev, &fsg_wlan_led); if (ret < 0) goto failwlan; @@ -185,20 +195,8 @@ static int fsg_led_probe(struct platform_device *pdev) if (ret < 0) goto failring; - /* Map the LED chip select address space */ - latch_address = (unsigned short *) ioremap(IXP4XX_EXP_BUS_BASE(2), 512); - if (!latch_address) { - ret = -ENOMEM; - goto failremap; - } - - latch_value = 0xffff; - *latch_address = latch_value; - return ret; - failremap: - led_classdev_unregister(&fsg_ring_led); failring: led_classdev_unregister(&fsg_sync_led); failsync: @@ -210,14 +208,14 @@ static int fsg_led_probe(struct platform_device *pdev) failwan: led_classdev_unregister(&fsg_wlan_led); failwlan: + iounmap(latch_address); + failremap: return ret; } static int fsg_led_remove(struct platform_device *pdev) { - iounmap(latch_address); - led_classdev_unregister(&fsg_wlan_led); led_classdev_unregister(&fsg_wan_led); led_classdev_unregister(&fsg_sata_led); @@ -225,6 +223,8 @@ static int fsg_led_remove(struct platform_device *pdev) led_classdev_unregister(&fsg_sync_led); led_classdev_unregister(&fsg_ring_led); + iounmap(latch_address); + return 0; } diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c index 146c069..f508729 100644 --- a/drivers/leds/leds-pca955x.c +++ b/drivers/leds/leds-pca955x.c @@ -248,11 +248,10 @@ static int __devinit pca955x_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct pca955x_led *pca955x; - int i; - int err = -ENODEV; struct pca955x_chipdef *chip; struct i2c_adapter *adapter; struct led_platform_data *pdata; + int i, err; chip = &pca955x_chipdefs[id->driver_data]; adapter = to_i2c_adapter(client->dev.parent); @@ -282,43 +281,41 @@ static int __devinit pca955x_probe(struct i2c_client *client, } } + pca955x = kzalloc(sizeof(*pca955x) * chip->bits, GFP_KERNEL); + if (!pca955x) + return -ENOMEM; + + i2c_set_clientdata(client, pca955x); + for (i = 0; i < chip->bits; i++) { - pca955x = kzalloc(sizeof(struct pca955x_led), GFP_KERNEL); - if (!pca955x) { - err = -ENOMEM; - goto exit; - } + pca955x[i].chipdef = chip; + pca955x[i].client = client; + pca955x[i].led_num = i; - pca955x->chipdef = chip; - pca955x->client = client; - pca955x->led_num = i; /* Platform data can specify LED names and default triggers */ if (pdata) { if (pdata->leds[i].name) - snprintf(pca955x->name, 32, "pca955x:%s", - pdata->leds[i].name); + snprintf(pca955x[i].name, + sizeof(pca955x[i].name), "pca955x:%s", + pdata->leds[i].name); if (pdata->leds[i].default_trigger) - pca955x->led_cdev.default_trigger = + pca955x[i].led_cdev.default_trigger = pdata->leds[i].default_trigger; } else { - snprintf(pca955x->name, 32, "pca955x:%d", i); + snprintf(pca955x[i].name, sizeof(pca955x[i].name), + "pca955x:%d", i); } - spin_lock_init(&pca955x->lock); - pca955x->led_cdev.name = pca955x->name; - pca955x->led_cdev.brightness_set = - pca955x_led_set; + spin_lock_init(&pca955x[i].lock); - /* - * Client data is a pointer to the _first_ pca955x_led - * struct - */ - if (i == 0) - i2c_set_clientdata(client, pca955x); + pca955x[i].led_cdev.name = pca955x[i].name; + pca955x[i].led_cdev.brightness_set = pca955x_led_set; - INIT_WORK(&(pca955x->work), pca955x_led_work); + INIT_WORK(&pca955x[i].work, pca955x_led_work); - led_classdev_register(&client->dev, &(pca955x->led_cdev)); + err = led_classdev_register(&client->dev, &pca955x[i].led_cdev); + if (err < 0) + goto exit; } /* Turn off LEDs */ @@ -336,23 +333,32 @@ static int __devinit pca955x_probe(struct i2c_client *client, pca955x_write_psc(client, 1, 0); return 0; + exit: + while (i--) { + led_classdev_unregister(&pca955x[i].led_cdev); + cancel_work_sync(&pca955x[i].work); + } + + kfree(pca955x); + i2c_set_clientdata(client, NULL); + return err; } static int __devexit pca955x_remove(struct i2c_client *client) { struct pca955x_led *pca955x = i2c_get_clientdata(client); - int leds = pca955x->chipdef->bits; int i; - for (i = 0; i < leds; i++) { - led_classdev_unregister(&(pca955x->led_cdev)); - cancel_work_sync(&(pca955x->work)); - kfree(pca955x); - pca955x = pca955x + 1; + for (i = 0; i < pca955x->chipdef->bits; i++) { + led_classdev_unregister(&pca955x[i].led_cdev); + cancel_work_sync(&pca955x[i].work); } + kfree(pca955x); + i2c_set_clientdata(client, NULL); + return 0; } diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index b262c00..5b91915 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -426,7 +426,7 @@ static int list_devices(struct dm_ioctl *param, size_t param_size) old_nl->next = (uint32_t) ((void *) nl - (void *) old_nl); disk = dm_disk(hc->md); - nl->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); + nl->dev = huge_encode_dev(disk_devt(disk)); nl->next = 0; strcpy(nl->name, hc->name); @@ -539,7 +539,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) if (dm_suspended(md)) param->flags |= DM_SUSPEND_FLAG; - param->dev = huge_encode_dev(MKDEV(disk->major, disk->first_minor)); + param->dev = huge_encode_dev(disk_devt(disk)); /* * Yes, this will be out of date by the time it gets back @@ -548,7 +548,7 @@ static int __dev_status(struct mapped_device *md, struct dm_ioctl *param) */ param->open_count = dm_open_count(md); - if (disk->policy) + if (get_disk_ro(disk)) param->flags |= DM_READONLY_FLAG; param->event_nr = dm_get_event_nr(md); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c2fcf28..3d38481 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -33,6 +33,7 @@ struct pgpath { unsigned fail_count; /* Cumulative failure count */ struct dm_path path; + struct work_struct deactivate_path; }; #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) @@ -112,6 +113,7 @@ static struct workqueue_struct *kmultipathd, *kmpath_handlerd; static void process_queued_ios(struct work_struct *work); static void trigger_event(struct work_struct *work); static void activate_path(struct work_struct *work); +static void deactivate_path(struct work_struct *work); /*----------------------------------------------- @@ -122,8 +124,10 @@ static struct pgpath *alloc_pgpath(void) { struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); - if (pgpath) + if (pgpath) { pgpath->path.is_active = 1; + INIT_WORK(&pgpath->deactivate_path, deactivate_path); + } return pgpath; } @@ -133,6 +137,14 @@ static void free_pgpath(struct pgpath *pgpath) kfree(pgpath); } +static void deactivate_path(struct work_struct *work) +{ + struct pgpath *pgpath = + container_of(work, struct pgpath, deactivate_path); + + blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue); +} + static struct priority_group *alloc_priority_group(void) { struct priority_group *pg; @@ -870,6 +882,7 @@ static int fail_path(struct pgpath *pgpath) pgpath->path.dev->name, m->nr_valid_paths); queue_work(kmultipathd, &m->trigger_event); + queue_work(kmultipathd, &pgpath->deactivate_path); out: spin_unlock_irqrestore(&m->lock, flags); diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4de90ab..b745d8a 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -284,8 +284,8 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio, memset(major_minor, 0, sizeof(major_minor)); sprintf(major_minor, "%d:%d", - bio->bi_bdev->bd_disk->major, - bio->bi_bdev->bd_disk->first_minor); + MAJOR(disk_devt(bio->bi_bdev->bd_disk)), + MINOR(disk_devt(bio->bi_bdev->bd_disk))); /* * Test to see which stripe drive triggered the event diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ace998c..327de03 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -377,13 +377,14 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio) static void start_io_acct(struct dm_io *io) { struct mapped_device *md = io->md; + int cpu; io->start_time = jiffies; - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); - dm_disk(md)->in_flight = atomic_inc_return(&md->pending); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_unlock(); + dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); } static int end_io_acct(struct dm_io *io) @@ -391,15 +392,16 @@ static int end_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->bio; unsigned long duration = jiffies - io->start_time; - int pending; + int pending, cpu; int rw = bio_data_dir(bio); - preempt_disable(); - disk_round_stats(dm_disk(md)); - preempt_enable(); - dm_disk(md)->in_flight = pending = atomic_dec_return(&md->pending); + cpu = part_stat_lock(); + part_round_stats(cpu, &dm_disk(md)->part0); + part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); + part_stat_unlock(); - disk_stat_add(dm_disk(md), ticks[rw], duration); + dm_disk(md)->part0.in_flight = pending = + atomic_dec_return(&md->pending); return !pending; } @@ -885,6 +887,7 @@ static int dm_request(struct request_queue *q, struct bio *bio) int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; + int cpu; /* * There is no use in forwarding any barrier request since we can't @@ -897,8 +900,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); - disk_stat_inc(dm_disk(md), ios[rw]); - disk_stat_add(dm_disk(md), sectors[rw], bio_sectors(bio)); + cpu = part_stat_lock(); + part_stat_inc(cpu, &dm_disk(md)->part0, ios[rw]); + part_stat_add(cpu, &dm_disk(md)->part0, sectors[rw], bio_sectors(bio)); + part_stat_unlock(); /* * If we're suspended we have to queue @@ -1146,7 +1151,7 @@ static void unlock_fs(struct mapped_device *md); static void free_dev(struct mapped_device *md) { - int minor = md->disk->first_minor; + int minor = MINOR(disk_devt(md->disk)); if (md->suspended_bdev) { unlock_fs(md); @@ -1182,7 +1187,7 @@ static void event_callback(void *context) list_splice_init(&md->uevent_list, &uevents); spin_unlock_irqrestore(&md->uevent_lock, flags); - dm_send_uevents(&uevents, &md->disk->dev.kobj); + dm_send_uevents(&uevents, &disk_to_dev(md->disk)->kobj); atomic_inc(&md->event_nr); wake_up(&md->eventq); @@ -1267,7 +1272,7 @@ static struct mapped_device *dm_find_md(dev_t dev) md = idr_find(&_minor_idr, minor); if (md && (md == MINOR_ALLOCED || - (dm_disk(md)->first_minor != minor) || + (MINOR(disk_devt(dm_disk(md))) != minor) || test_bit(DMF_FREEING, &md->flags))) { md = NULL; goto out; @@ -1318,7 +1323,8 @@ void dm_put(struct mapped_device *md) if (atomic_dec_and_lock(&md->holders, &_minor_lock)) { map = dm_get_table(md); - idr_replace(&_minor_idr, MINOR_ALLOCED, dm_disk(md)->first_minor); + idr_replace(&_minor_idr, MINOR_ALLOCED, + MINOR(disk_devt(dm_disk(md)))); set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); if (!dm_suspended(md)) { @@ -1638,7 +1644,7 @@ out: *---------------------------------------------------------------*/ void dm_kobject_uevent(struct mapped_device *md) { - kobject_uevent(&md->disk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(md->disk)->kobj, KOBJ_CHANGE); } uint32_t dm_next_uevent_seq(struct mapped_device *md) diff --git a/drivers/md/linear.c b/drivers/md/linear.c index b1eebf8..b9cbee68 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -318,14 +318,18 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) mddev_t *mddev = q->queuedata; dev_info_t *tmp_dev; sector_t block; + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); return 0; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); tmp_dev = which_dev(mddev, bio->bi_sector); block = bio->bi_sector >> 1; @@ -349,7 +353,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio) * split it. */ struct bio_pair *bp; - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, ((tmp_dev->offset + tmp_dev->size)<<1) - bio->bi_sector); if (linear_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); diff --git a/drivers/md/md.c b/drivers/md/md.c index deeac4b..0a3a4bd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1464,10 +1464,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) if ((err = kobject_add(&rdev->kobj, &mddev->kobj, "dev-%s", b))) goto fail; - if (rdev->bdev->bd_part) - ko = &rdev->bdev->bd_part->dev.kobj; - else - ko = &rdev->bdev->bd_disk->dev.kobj; + ko = &part_to_dev(rdev->bdev->bd_part)->kobj; if ((err = sysfs_create_link(&rdev->kobj, ko, "block"))) { kobject_del(&rdev->kobj); goto fail; @@ -3470,8 +3467,8 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) disk->queue = mddev->queue; add_disk(disk); mddev->gendisk = disk; - error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, - "%s", "md"); + error = kobject_init_and_add(&mddev->kobj, &md_ktype, + &disk_to_dev(disk)->kobj, "%s", "md"); mutex_unlock(&disks_mutex); if (error) printk(KERN_WARNING "md: cannot register %s/md - name in use\n", @@ -3761,7 +3758,7 @@ static int do_md_run(mddev_t * mddev) sysfs_notify(&mddev->kobj, NULL, "array_state"); sysfs_notify(&mddev->kobj, NULL, "sync_action"); sysfs_notify(&mddev->kobj, NULL, "degraded"); - kobject_uevent(&mddev->gendisk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE); return 0; } @@ -5549,8 +5546,8 @@ static int is_mddev_idle(mddev_t *mddev) rcu_read_lock(); rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; - curr_events = disk_stat_read(disk, sectors[0]) + - disk_stat_read(disk, sectors[1]) - + curr_events = part_stat_read(&disk->part0, sectors[0]) + + part_stat_read(&disk->part0, sectors[1]) - atomic_read(&disk->sync_io); /* sync IO will cause sync_io to increase before the disk_stats * as sync_io is counted when a request starts, and diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index c4779cc..8bb8794 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -147,6 +147,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) struct multipath_bh * mp_bh; struct multipath_info *multipath; const int rw = bio_data_dir(bio); + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); @@ -158,8 +159,11 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio) mp_bh->master_bio = bio; mp_bh->mddev = mddev; - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); mp_bh->path = multipath_map(conf); if (mp_bh->path < 0) { diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 1836106..53508a8 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -399,14 +399,18 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) sector_t chunk; sector_t block, rsect; const int rw = bio_data_dir(bio); + int cpu; if (unlikely(bio_barrier(bio))) { bio_endio(bio, -EOPNOTSUPP); return 0; } - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); chunk_size = mddev->chunk_size >> 10; chunk_sects = mddev->chunk_size >> 9; @@ -423,7 +427,7 @@ static int raid0_make_request (struct request_queue *q, struct bio *bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1))); if (raid0_make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); if (raid0_make_request(q, &bp->bio2)) diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 03a5ab7..b976442 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -779,7 +779,7 @@ static int make_request(struct request_queue *q, struct bio * bio) struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); const int do_sync = bio_sync(bio); - int do_barriers; + int cpu, do_barriers; mdk_rdev_t *blocked_rdev; /* @@ -804,8 +804,11 @@ static int make_request(struct request_queue *q, struct bio * bio) bitmap = mddev->bitmap; - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); /* * make_request() can abort the operation when READA is being @@ -1302,9 +1305,6 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) sbio->bi_size = r1_bio->sectors << 9; sbio->bi_idx = 0; sbio->bi_phys_segments = 0; - sbio->bi_hw_segments = 0; - sbio->bi_hw_front_size = 0; - sbio->bi_hw_back_size = 0; sbio->bi_flags &= ~(BIO_POOL_MASK - 1); sbio->bi_flags |= 1 << BIO_UPTODATE; sbio->bi_next = NULL; @@ -1790,7 +1790,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_vcnt = 0; bio->bi_idx = 0; bio->bi_phys_segments = 0; - bio->bi_hw_segments = 0; bio->bi_size = 0; bio->bi_end_io = NULL; bio->bi_private = NULL; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e34cd0e..8bdc9bf 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -789,6 +789,7 @@ static int make_request(struct request_queue *q, struct bio * bio) mirror_info_t *mirror; r10bio_t *r10_bio; struct bio *read_bio; + int cpu; int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); @@ -816,7 +817,7 @@ static int make_request(struct request_queue *q, struct bio * bio) /* This is a one page bio that upper layers * refuse to split for us, so we need to split it. */ - bp = bio_split(bio, bio_split_pool, + bp = bio_split(bio, chunk_sects - (bio->bi_sector & (chunk_sects - 1)) ); if (make_request(q, &bp->bio1)) generic_make_request(&bp->bio1); @@ -843,8 +844,11 @@ static int make_request(struct request_queue *q, struct bio * bio) */ wait_barrier(conf); - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio)); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bio)); + part_stat_unlock(); r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); @@ -1345,9 +1349,6 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) tbio->bi_size = r10_bio->sectors << 9; tbio->bi_idx = 0; tbio->bi_phys_segments = 0; - tbio->bi_hw_segments = 0; - tbio->bi_hw_front_size = 0; - tbio->bi_hw_back_size = 0; tbio->bi_flags &= ~(BIO_POOL_MASK - 1); tbio->bi_flags |= 1 << BIO_UPTODATE; tbio->bi_next = NULL; @@ -1947,7 +1948,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i bio->bi_vcnt = 0; bio->bi_idx = 0; bio->bi_phys_segments = 0; - bio->bi_hw_segments = 0; bio->bi_size = 0; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 224de02..ae16794 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -101,6 +101,40 @@ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); #endif +/* + * We maintain a biased count of active stripes in the bottom 16 bits of + * bi_phys_segments, and a count of processed stripes in the upper 16 bits + */ +static inline int raid5_bi_phys_segments(struct bio *bio) +{ + return bio->bi_phys_segments & 0xffff; +} + +static inline int raid5_bi_hw_segments(struct bio *bio) +{ + return (bio->bi_phys_segments >> 16) & 0xffff; +} + +static inline int raid5_dec_bi_phys_segments(struct bio *bio) +{ + --bio->bi_phys_segments; + return raid5_bi_phys_segments(bio); +} + +static inline int raid5_dec_bi_hw_segments(struct bio *bio) +{ + unsigned short val = raid5_bi_hw_segments(bio); + + --val; + bio->bi_phys_segments = (val << 16) | raid5_bi_phys_segments(bio); + return val; +} + +static inline void raid5_set_bi_hw_segments(struct bio *bio, unsigned int cnt) +{ + bio->bi_phys_segments = raid5_bi_phys_segments(bio) || (cnt << 16); +} + static inline int raid6_next_disk(int disk, int raid_disks) { disk++; @@ -507,7 +541,7 @@ static void ops_complete_biofill(void *stripe_head_ref) while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { rbi2 = r5_next_bio(rbi, dev->sector); - if (--rbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(rbi)) { rbi->bi_next = return_bi; return_bi = rbi; } @@ -1725,7 +1759,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in if (*bip) bi->bi_next = *bip; *bip = bi; - bi->bi_phys_segments ++; + bi->bi_phys_segments++; spin_unlock_irq(&conf->device_lock); spin_unlock(&sh->lock); @@ -1819,7 +1853,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; *return_bi = bi; @@ -1834,7 +1868,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, sh->dev[i].sector + STRIPE_SECTORS) { struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { md_write_end(conf->mddev); bi->bi_next = *return_bi; *return_bi = bi; @@ -1858,7 +1892,7 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); clear_bit(BIO_UPTODATE, &bi->bi_flags); - if (--bi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(bi)) { bi->bi_next = *return_bi; *return_bi = bi; } @@ -2033,7 +2067,7 @@ static void handle_stripe_clean_event(raid5_conf_t *conf, while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { wbi2 = r5_next_bio(wbi, dev->sector); - if (--wbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(wbi)) { md_write_end(conf->mddev); wbi->bi_next = *return_bi; *return_bi = wbi; @@ -2814,7 +2848,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) copy_data(0, rbi, dev->page, dev->sector); rbi2 = r5_next_bio(rbi, dev->sector); spin_lock_irq(&conf->device_lock); - if (--rbi->bi_phys_segments == 0) { + if (!raid5_dec_bi_phys_segments(rbi)) { rbi->bi_next = return_bi; return_bi = rbi; } @@ -3155,8 +3189,11 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf) if(bi) { conf->retry_read_aligned_list = bi->bi_next; bi->bi_next = NULL; + /* + * this sets the active strip count to 1 and the processed + * strip count to zero (upper 8 bits) + */ bi->bi_phys_segments = 1; /* biased count of active stripes */ - bi->bi_hw_segments = 0; /* count of processed stripes */ } return bi; @@ -3206,8 +3243,7 @@ static int bio_fits_rdev(struct bio *bi) if ((bi->bi_size>>9) > q->max_sectors) return 0; blk_recount_segments(q, bi); - if (bi->bi_phys_segments > q->max_phys_segments || - bi->bi_hw_segments > q->max_hw_segments) + if (bi->bi_phys_segments > q->max_phys_segments) return 0; if (q->merge_bvec_fn) @@ -3351,7 +3387,7 @@ static int make_request(struct request_queue *q, struct bio * bi) sector_t logical_sector, last_sector; struct stripe_head *sh; const int rw = bio_data_dir(bi); - int remaining; + int cpu, remaining; if (unlikely(bio_barrier(bi))) { bio_endio(bi, -EOPNOTSUPP); @@ -3360,8 +3396,11 @@ static int make_request(struct request_queue *q, struct bio * bi) md_write_start(mddev, bi); - disk_stat_inc(mddev->gendisk, ios[rw]); - disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bi)); + cpu = part_stat_lock(); + part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], + bio_sectors(bi)); + part_stat_unlock(); if (rw == READ && mddev->reshape_position == MaxSector && @@ -3468,7 +3507,7 @@ static int make_request(struct request_queue *q, struct bio * bi) } spin_lock_irq(&conf->device_lock); - remaining = --bi->bi_phys_segments; + remaining = raid5_dec_bi_phys_segments(bi); spin_unlock_irq(&conf->device_lock); if (remaining == 0) { @@ -3752,7 +3791,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) sector += STRIPE_SECTORS, scnt++) { - if (scnt < raid_bio->bi_hw_segments) + if (scnt < raid5_bi_hw_segments(raid_bio)) /* already done this stripe */ continue; @@ -3760,7 +3799,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) if (!sh) { /* failed to get a stripe - must wait */ - raid_bio->bi_hw_segments = scnt; + raid5_set_bi_hw_segments(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; } @@ -3768,7 +3807,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) set_bit(R5_ReadError, &sh->dev[dd_idx].flags); if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) { release_stripe(sh); - raid_bio->bi_hw_segments = scnt; + raid5_set_bi_hw_segments(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; } @@ -3778,7 +3817,7 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) handled++; } spin_lock_irq(&conf->device_lock); - remaining = --raid_bio->bi_phys_segments; + remaining = raid5_dec_bi_phys_segments(raid_bio); spin_unlock_irq(&conf->device_lock); if (remaining == 0) bio_endio(raid_bio, 0); diff --git a/drivers/media/common/tuners/tuner-xc2028.h b/drivers/media/common/tuners/tuner-xc2028.h index 216025c..2c5b628 100644 --- a/drivers/media/common/tuners/tuner-xc2028.h +++ b/drivers/media/common/tuners/tuner-xc2028.h @@ -10,6 +10,7 @@ #include "dvb_frontend.h" #define XC2028_DEFAULT_FIRMWARE "xc3028-v27.fw" +#define XC3028L_DEFAULT_FIRMWARE "xc3028L-v36.fw" /* Dmoduler IF (kHz) */ #define XC3028_FE_DEFAULT 0 /* Don't load SCODE */ diff --git a/drivers/media/dvb/b2c2/flexcop-fe-tuner.c b/drivers/media/dvb/b2c2/flexcop-fe-tuner.c index 4eed783..a127a41 100644 --- a/drivers/media/dvb/b2c2/flexcop-fe-tuner.c +++ b/drivers/media/dvb/b2c2/flexcop-fe-tuner.c @@ -491,6 +491,7 @@ static struct s5h1420_config skystar2_rev2_7_s5h1420_config = { .demod_address = 0x53, .invert = 1, .repeated_start_workaround = 1, + .serial_mpeg = 1, }; static struct itd1000_config skystar2_rev2_7_itd1000_config = { diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c index 069d847..0c733c6 100644 --- a/drivers/media/dvb/dvb-core/dmxdev.c +++ b/drivers/media/dvb/dvb-core/dmxdev.c @@ -364,15 +364,16 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, enum dmx_success success) { struct dmxdev_filter *dmxdevfilter = filter->priv; + unsigned long flags; int ret; if (dmxdevfilter->buffer.error) { wake_up(&dmxdevfilter->buffer.queue); return 0; } - spin_lock(&dmxdevfilter->dev->lock); + spin_lock_irqsave(&dmxdevfilter->dev->lock, flags); if (dmxdevfilter->state != DMXDEV_STATE_GO) { - spin_unlock(&dmxdevfilter->dev->lock); + spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); return 0; } del_timer(&dmxdevfilter->timer); @@ -391,7 +392,7 @@ static int dvb_dmxdev_section_callback(const u8 *buffer1, size_t buffer1_len, } if (dmxdevfilter->params.sec.flags & DMX_ONESHOT) dmxdevfilter->state = DMXDEV_STATE_DONE; - spin_unlock(&dmxdevfilter->dev->lock); + spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); wake_up(&dmxdevfilter->buffer.queue); return 0; } @@ -403,11 +404,12 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, { struct dmxdev_filter *dmxdevfilter = feed->priv; struct dvb_ringbuffer *buffer; + unsigned long flags; int ret; - spin_lock(&dmxdevfilter->dev->lock); + spin_lock_irqsave(&dmxdevfilter->dev->lock, flags); if (dmxdevfilter->params.pes.output == DMX_OUT_DECODER) { - spin_unlock(&dmxdevfilter->dev->lock); + spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); return 0; } @@ -417,7 +419,7 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, else buffer = &dmxdevfilter->dev->dvr_buffer; if (buffer->error) { - spin_unlock(&dmxdevfilter->dev->lock); + spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); wake_up(&buffer->queue); return 0; } @@ -428,7 +430,7 @@ static int dvb_dmxdev_ts_callback(const u8 *buffer1, size_t buffer1_len, dvb_ringbuffer_flush(buffer); buffer->error = ret; } - spin_unlock(&dmxdevfilter->dev->lock); + spin_unlock_irqrestore(&dmxdevfilter->dev->lock, flags); wake_up(&buffer->queue); return 0; } diff --git a/drivers/media/dvb/dvb-core/dvb_demux.c b/drivers/media/dvb/dvb-core/dvb_demux.c index e2eca0b..a2c1fd5 100644 --- a/drivers/media/dvb/dvb-core/dvb_demux.c +++ b/drivers/media/dvb/dvb-core/dvb_demux.c @@ -399,7 +399,9 @@ static void dvb_dmx_swfilter_packet(struct dvb_demux *demux, const u8 *buf) void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf, size_t count) { - spin_lock(&demux->lock); + unsigned long flags; + + spin_lock_irqsave(&demux->lock, flags); while (count--) { if (buf[0] == 0x47) @@ -407,16 +409,17 @@ void dvb_dmx_swfilter_packets(struct dvb_demux *demux, const u8 *buf, buf += 188; } - spin_unlock(&demux->lock); + spin_unlock_irqrestore(&demux->lock, flags); } EXPORT_SYMBOL(dvb_dmx_swfilter_packets); void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count) { + unsigned long flags; int p = 0, i, j; - spin_lock(&demux->lock); + spin_lock_irqsave(&demux->lock, flags); if (demux->tsbufp) { i = demux->tsbufp; @@ -449,17 +452,18 @@ void dvb_dmx_swfilter(struct dvb_demux *demux, const u8 *buf, size_t count) } bailout: - spin_unlock(&demux->lock); + spin_unlock_irqrestore(&demux->lock, flags); } EXPORT_SYMBOL(dvb_dmx_swfilter); void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count) { + unsigned long flags; int p = 0, i, j; u8 tmppack[188]; - spin_lock(&demux->lock); + spin_lock_irqsave(&demux->lock, flags); if (demux->tsbufp) { i = demux->tsbufp; @@ -500,7 +504,7 @@ void dvb_dmx_swfilter_204(struct dvb_demux *demux, const u8 *buf, size_t count) } bailout: - spin_unlock(&demux->lock); + spin_unlock_irqrestore(&demux->lock, flags); } EXPORT_SYMBOL(dvb_dmx_swfilter_204); diff --git a/drivers/media/dvb/frontends/s5h1420.c b/drivers/media/dvb/frontends/s5h1420.c index 747d3fa..2e9fd28 100644 --- a/drivers/media/dvb/frontends/s5h1420.c +++ b/drivers/media/dvb/frontends/s5h1420.c @@ -59,7 +59,7 @@ struct s5h1420_state { * it does not support repeated-start, workaround: write addr-1 * and then read */ - u8 shadow[255]; + u8 shadow[256]; }; static u32 s5h1420_getsymbolrate(struct s5h1420_state* state); @@ -94,8 +94,11 @@ static u8 s5h1420_readreg(struct s5h1420_state *state, u8 reg) if (ret != 3) return ret; } else { - ret = i2c_transfer(state->i2c, &msg[1], 2); - if (ret != 2) + ret = i2c_transfer(state->i2c, &msg[1], 1); + if (ret != 1) + return ret; + ret = i2c_transfer(state->i2c, &msg[2], 1); + if (ret != 1) return ret; } @@ -823,7 +826,7 @@ static int s5h1420_init (struct dvb_frontend* fe) struct s5h1420_state* state = fe->demodulator_priv; /* disable power down and do reset */ - state->CON_1_val = 0x10; + state->CON_1_val = state->config->serial_mpeg << 4; s5h1420_writereg(state, 0x02, state->CON_1_val); msleep(10); s5h1420_reset(state); diff --git a/drivers/media/dvb/frontends/s5h1420.h b/drivers/media/dvb/frontends/s5h1420.h index 4c913f1..ff30813 100644 --- a/drivers/media/dvb/frontends/s5h1420.h +++ b/drivers/media/dvb/frontends/s5h1420.h @@ -32,10 +32,12 @@ struct s5h1420_config u8 demod_address; /* does the inversion require inversion? */ - u8 invert : 1; + u8 invert:1; - u8 repeated_start_workaround : 1; - u8 cdclk_polarity : 1; /* 1 == falling edge, 0 == raising edge */ + u8 repeated_start_workaround:1; + u8 cdclk_polarity:1; /* 1 == falling edge, 0 == raising edge */ + + u8 serial_mpeg:1; }; #if defined(CONFIG_DVB_S5H1420) || (defined(CONFIG_DVB_S5H1420_MODULE) && defined(MODULE)) diff --git a/drivers/media/dvb/siano/sms-cards.c b/drivers/media/dvb/siano/sms-cards.c index cc5efb6..9da260f 100644 --- a/drivers/media/dvb/siano/sms-cards.c +++ b/drivers/media/dvb/siano/sms-cards.c @@ -40,6 +40,8 @@ struct usb_device_id smsusb_id_table[] = { .driver_info = SMS1XXX_BOARD_HAUPPAUGE_OKEMO_B }, { USB_DEVICE(0x2040, 0x5500), .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, + { USB_DEVICE(0x2040, 0x5510), + .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, { USB_DEVICE(0x2040, 0x5580), .driver_info = SMS1XXX_BOARD_HAUPPAUGE_WINDHAM }, { USB_DEVICE(0x2040, 0x5590), @@ -87,7 +89,7 @@ static struct sms_board sms_boards[] = { .fw[DEVICE_MODE_DVBT_BDA] = "sms1xxx-nova-b-dvbt-01.fw", }, [SMS1XXX_BOARD_HAUPPAUGE_WINDHAM] = { - .name = "Hauppauge WinTV-Nova-T-MiniStick", + .name = "Hauppauge WinTV MiniStick", .type = SMS_NOVA_B0, .fw[DEVICE_MODE_DVBT_BDA] = "sms1xxx-hcw-55xxx-dvbt-01.fw", }, diff --git a/drivers/media/video/bt8xx/bttv-driver.c b/drivers/media/video/bt8xx/bttv-driver.c index 6ae4cc8..933eaef 100644 --- a/drivers/media/video/bt8xx/bttv-driver.c +++ b/drivers/media/video/bt8xx/bttv-driver.c @@ -3431,7 +3431,7 @@ static int radio_open(struct inode *inode, struct file *file) dprintk("bttv: open minor=%d\n",minor); for (i = 0; i < bttv_num; i++) { - if (bttvs[i].radio_dev->minor == minor) { + if (bttvs[i].radio_dev && bttvs[i].radio_dev->minor == minor) { btv = &bttvs[i]; break; } diff --git a/drivers/media/video/cafe_ccic.c b/drivers/media/video/cafe_ccic.c index c149b7d..5405c30 100644 --- a/drivers/media/video/cafe_ccic.c +++ b/drivers/media/video/cafe_ccic.c @@ -19,6 +19,7 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> +#include <linux/mm.h> #include <linux/pci.h> #include <linux/i2c.h> #include <linux/interrupt.h> diff --git a/drivers/media/video/cpia2/cpia2_usb.c b/drivers/media/video/cpia2/cpia2_usb.c index a4574740..a8a1990 100644 --- a/drivers/media/video/cpia2/cpia2_usb.c +++ b/drivers/media/video/cpia2/cpia2_usb.c @@ -632,7 +632,7 @@ int cpia2_usb_transfer_cmd(struct camera_data *cam, static int submit_urbs(struct camera_data *cam) { struct urb *urb; - int fx, err, i; + int fx, err, i, j; for(i=0; i<NUM_SBUF; ++i) { if (cam->sbuf[i].data) @@ -657,6 +657,9 @@ static int submit_urbs(struct camera_data *cam) } urb = usb_alloc_urb(FRAMES_PER_DESC, GFP_KERNEL); if (!urb) { + ERR("%s: usb_alloc_urb error!\n", __func__); + for (j = 0; j < i; j++) + usb_free_urb(cam->sbuf[j].urb); return -ENOMEM; } diff --git a/drivers/media/video/cx18/cx18-cards.c b/drivers/media/video/cx18/cx18-cards.c index 8fe5f38..3cb9734 100644 --- a/drivers/media/video/cx18/cx18-cards.c +++ b/drivers/media/video/cx18/cx18-cards.c @@ -163,7 +163,7 @@ static const struct cx18_card cx18_card_h900 = { }, .audio_inputs = { { CX18_CARD_INPUT_AUD_TUNER, - CX18_AV_AUDIO8, 0 }, + CX18_AV_AUDIO5, 0 }, { CX18_CARD_INPUT_LINE_IN1, CX18_AV_AUDIO_SERIAL1, 0 }, }, diff --git a/drivers/media/video/em28xx/em28xx-audio.c b/drivers/media/video/em28xx/em28xx-audio.c index 3c00610..ac3292d 100644 --- a/drivers/media/video/em28xx/em28xx-audio.c +++ b/drivers/media/video/em28xx/em28xx-audio.c @@ -117,10 +117,10 @@ static void em28xx_audio_isocirq(struct urb *urb) if (oldptr + length >= runtime->buffer_size) { unsigned int cnt = - runtime->buffer_size - oldptr - 1; + runtime->buffer_size - oldptr; memcpy(runtime->dma_area + oldptr * stride, cp, cnt * stride); - memcpy(runtime->dma_area, cp + cnt, + memcpy(runtime->dma_area, cp + cnt * stride, length * stride - cnt * stride); } else { memcpy(runtime->dma_area + oldptr * stride, cp, @@ -161,8 +161,14 @@ static int em28xx_init_audio_isoc(struct em28xx *dev) memset(dev->adev->transfer_buffer[i], 0x80, sb_size); urb = usb_alloc_urb(EM28XX_NUM_AUDIO_PACKETS, GFP_ATOMIC); - if (!urb) + if (!urb) { + em28xx_errdev("usb_alloc_urb failed!\n"); + for (j = 0; j < i; j++) { + usb_free_urb(dev->adev->urb[j]); + kfree(dev->adev->transfer_buffer[j]); + } return -ENOMEM; + } urb->dev = dev->udev; urb->context = dev; diff --git a/drivers/media/video/em28xx/em28xx-cards.c b/drivers/media/video/em28xx/em28xx-cards.c index 452da70..de943cf 100644 --- a/drivers/media/video/em28xx/em28xx-cards.c +++ b/drivers/media/video/em28xx/em28xx-cards.c @@ -93,28 +93,6 @@ struct em28xx_board em28xx_boards[] = { .amux = 0, } }, }, - [EM2800_BOARD_KWORLD_USB2800] = { - .name = "Kworld USB2800", - .valid = EM28XX_BOARD_NOT_VALIDATED, - .is_em2800 = 1, - .vchannels = 3, - .tuner_type = TUNER_PHILIPS_FCV1236D, - .tda9887_conf = TDA9887_PRESENT, - .decoder = EM28XX_SAA7113, - .input = { { - .type = EM28XX_VMUX_TELEVISION, - .vmux = SAA7115_COMPOSITE2, - .amux = 0, - }, { - .type = EM28XX_VMUX_COMPOSITE1, - .vmux = SAA7115_COMPOSITE0, - .amux = 1, - }, { - .type = EM28XX_VMUX_SVIDEO, - .vmux = SAA7115_SVIDEO3, - .amux = 1, - } }, - }, [EM2820_BOARD_KWORLD_PVRTV2800RF] = { .name = "Kworld PVR TV 2800 RF", .is_em2800 = 0, @@ -599,7 +577,7 @@ struct em28xx_board em28xx_boards[] = { }, { .type = EM28XX_VMUX_COMPOSITE1, .vmux = TVP5150_COMPOSITE1, - .amux = 1, + .amux = 3, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, @@ -952,22 +930,23 @@ struct em28xx_board em28xx_boards[] = { }, [EM2880_BOARD_KWORLD_DVB_310U] = { .name = "KWorld DVB-T 310U", - .valid = EM28XX_BOARD_NOT_VALIDATED, .vchannels = 3, .tuner_type = TUNER_XC2028, + .has_dvb = 1, + .mts_firmware = 1, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, - .amux = 0, + .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE1, .vmux = TVP5150_COMPOSITE1, - .amux = 1, - }, { + .amux = EM28XX_AMUX_AC97_LINE_IN, + }, { /* S-video has not been tested yet */ .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, - .amux = 1, + .amux = EM28XX_AMUX_AC97_LINE_IN, } }, }, [EM2881_BOARD_DNT_DA2_HYBRID] = { @@ -1282,6 +1261,7 @@ static struct em28xx_reg_seq em2882_terratec_hybrid_xs_digital[] = { static struct em28xx_hash_table em28xx_eeprom_hash [] = { /* P/N: SA 60002070465 Tuner: TVF7533-MF */ {0x6ce05a8f, EM2820_BOARD_PROLINK_PLAYTV_USB2, TUNER_YMEC_TVF_5533MF}, + {0x966a0441, EM2880_BOARD_KWORLD_DVB_310U, TUNER_XC2028}, }; /* I2C devicelist hash table for devices with generic USB IDs */ @@ -1552,9 +1532,12 @@ static void em28xx_setup_xc3028(struct em28xx *dev, struct xc2028_ctrl *ctl) /* djh - Not sure which demod we need here */ ctl->demod = XC3028_FE_DEFAULT; break; + case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600: + ctl->demod = XC3028_FE_DEFAULT; + ctl->fname = XC3028L_DEFAULT_FIRMWARE; + break; case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950: case EM2880_BOARD_PINNACLE_PCTV_HD_PRO: - case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600: /* FIXME: Better to specify the needed IF */ ctl->demod = XC3028_FE_DEFAULT; break; @@ -1764,6 +1747,20 @@ void em28xx_card_setup(struct em28xx *dev) break; case EM2820_BOARD_UNKNOWN: case EM2800_BOARD_UNKNOWN: + /* + * The K-WORLD DVB-T 310U is detected as an MSI Digivox AD. + * + * This occurs because they share identical USB vendor and + * product IDs. + * + * What we do here is look up the EEPROM hash of the K-WORLD + * and if it is found then we decide that we do not have + * a DIGIVOX and reset the device to the K-WORLD instead. + * + * This solution is only valid if they do not share eeprom + * hash identities which has not been determined as yet. + */ + case EM2880_BOARD_MSI_DIGIVOX_AD: if (!em28xx_hint_board(dev)) em28xx_set_model(dev); break; diff --git a/drivers/media/video/em28xx/em28xx-dvb.c b/drivers/media/video/em28xx/em28xx-dvb.c index 4b992bc..d2b1a1a 100644 --- a/drivers/media/video/em28xx/em28xx-dvb.c +++ b/drivers/media/video/em28xx/em28xx-dvb.c @@ -452,6 +452,15 @@ static int dvb_init(struct em28xx *dev) goto out_free; } break; + case EM2880_BOARD_KWORLD_DVB_310U: + dvb->frontend = dvb_attach(zl10353_attach, + &em28xx_zl10353_with_xc3028, + &dev->i2c_adap); + if (attach_xc3028(0x61, dev) < 0) { + result = -EINVAL; + goto out_free; + } + break; default: printk(KERN_ERR "%s/2: The frontend of your DVB/ATSC card" " isn't supported yet\n", diff --git a/drivers/media/video/gspca/gspca.c b/drivers/media/video/gspca/gspca.c index 7be6928..ac95c55 100644 --- a/drivers/media/video/gspca/gspca.c +++ b/drivers/media/video/gspca/gspca.c @@ -459,6 +459,7 @@ static int create_urbs(struct gspca_dev *gspca_dev, urb = usb_alloc_urb(npkt, GFP_KERNEL); if (!urb) { err("usb_alloc_urb failed"); + destroy_urbs(gspca_dev); return -ENOMEM; } urb->transfer_buffer = usb_buffer_alloc(gspca_dev->dev, @@ -468,8 +469,8 @@ static int create_urbs(struct gspca_dev *gspca_dev, if (urb->transfer_buffer == NULL) { usb_free_urb(urb); - destroy_urbs(gspca_dev); err("usb_buffer_urb failed"); + destroy_urbs(gspca_dev); return -ENOMEM; } gspca_dev->urb[n] = urb; diff --git a/drivers/media/video/gspca/pac7311.c b/drivers/media/video/gspca/pac7311.c index d4be518..ba865b7 100644 --- a/drivers/media/video/gspca/pac7311.c +++ b/drivers/media/video/gspca/pac7311.c @@ -1063,6 +1063,7 @@ static __devinitdata struct usb_device_id device_table[] = { {USB_DEVICE(0x093a, 0x2621), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x2624), .driver_info = SENSOR_PAC7302}, {USB_DEVICE(0x093a, 0x2626), .driver_info = SENSOR_PAC7302}, + {USB_DEVICE(0x093a, 0x262a), .driver_info = SENSOR_PAC7302}, {} }; MODULE_DEVICE_TABLE(usb, device_table); diff --git a/drivers/media/video/gspca/sonixb.c b/drivers/media/video/gspca/sonixb.c index 5dd78c6..12b81ae 100644 --- a/drivers/media/video/gspca/sonixb.c +++ b/drivers/media/video/gspca/sonixb.c @@ -232,7 +232,7 @@ static struct ctrl sd_ctrls[] = { static struct v4l2_pix_format vga_mode[] = { {160, 120, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 160, - .sizeimage = 160 * 120 * 5 / 4, + .sizeimage = 160 * 120, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 2 | MODE_RAW}, {160, 120, V4L2_PIX_FMT_SN9C10X, V4L2_FIELD_NONE, @@ -264,7 +264,7 @@ static struct v4l2_pix_format sif_mode[] = { .priv = 1 | MODE_REDUCED_SIF}, {176, 144, V4L2_PIX_FMT_SBGGR8, V4L2_FIELD_NONE, .bytesperline = 176, - .sizeimage = 176 * 144 * 5 / 4, + .sizeimage = 176 * 144, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 1 | MODE_RAW}, {176, 144, V4L2_PIX_FMT_SN9C10X, V4L2_FIELD_NONE, diff --git a/drivers/media/video/gspca/sonixj.c b/drivers/media/video/gspca/sonixj.c index d75b1d2..572b0f3 100644 --- a/drivers/media/video/gspca/sonixj.c +++ b/drivers/media/video/gspca/sonixj.c @@ -707,6 +707,7 @@ static void i2c_w8(struct gspca_dev *gspca_dev, 0x08, 0, /* value, index */ gspca_dev->usb_buf, 8, 500); + msleep(2); } /* read 5 bytes in gspca_dev->usb_buf */ @@ -976,13 +977,13 @@ static int sd_init(struct gspca_dev *gspca_dev) case BRIDGE_SN9C105: if (regF1 != 0x11) return -ENODEV; - reg_w(gspca_dev, 0x02, regGpio, 2); + reg_w(gspca_dev, 0x01, regGpio, 2); break; case BRIDGE_SN9C120: if (regF1 != 0x12) return -ENODEV; regGpio[1] = 0x70; - reg_w(gspca_dev, 0x02, regGpio, 2); + reg_w(gspca_dev, 0x01, regGpio, 2); break; default: /* case BRIDGE_SN9C110: */ @@ -1183,7 +1184,7 @@ static void sd_start(struct gspca_dev *gspca_dev) static const __u8 CA[] = { 0x28, 0xd8, 0x14, 0xec }; static const __u8 CE[] = { 0x32, 0xdd, 0x2d, 0xdd }; /* MI0360 */ static const __u8 CE_ov76xx[] = - { 0x32, 0xdd, 0x32, 0xdd }; /* OV7630/48 */ + { 0x32, 0xdd, 0x32, 0xdd }; sn9c1xx = sn_tb[(int) sd->sensor]; configure_gpio(gspca_dev, sn9c1xx); @@ -1223,8 +1224,15 @@ static void sd_start(struct gspca_dev *gspca_dev) reg_w(gspca_dev, 0x20, gamma_def, sizeof gamma_def); for (i = 0; i < 8; i++) reg_w(gspca_dev, 0x84, reg84, sizeof reg84); + switch (sd->sensor) { + case SENSOR_OV7660: + reg_w1(gspca_dev, 0x9a, 0x05); + break; + default: reg_w1(gspca_dev, 0x9a, 0x08); reg_w1(gspca_dev, 0x99, 0x59); + break; + } mode = gspca_dev->cam.cam_mode[(int) gspca_dev->curr_mode].priv; if (mode) @@ -1275,8 +1283,8 @@ static void sd_start(struct gspca_dev *gspca_dev) /* reg1 = 0x44; */ /* reg1 = 0x46; (done) */ } else { - reg17 = 0x22; /* 640 MCKSIZE */ - reg1 = 0x06; + reg17 = 0xa2; /* 640 */ + reg1 = 0x44; } break; } @@ -1285,6 +1293,7 @@ static void sd_start(struct gspca_dev *gspca_dev) switch (sd->sensor) { case SENSOR_OV7630: case SENSOR_OV7648: + case SENSOR_OV7660: reg_w(gspca_dev, 0xce, CE_ov76xx, 4); break; default: diff --git a/drivers/media/video/gspca/spca561.c b/drivers/media/video/gspca/spca561.c index cfbc9eb..95fcfcb 100644 --- a/drivers/media/video/gspca/spca561.c +++ b/drivers/media/video/gspca/spca561.c @@ -225,7 +225,7 @@ static int i2c_read(struct gspca_dev *gspca_dev, __u16 reg, __u8 mode) reg_w_val(gspca_dev->dev, 0x8802, (mode | 0x01)); do { reg_r(gspca_dev, 0x8803, 1); - if (!gspca_dev->usb_buf) + if (!gspca_dev->usb_buf[0]) break; } while (--retry); if (retry == 0) diff --git a/drivers/media/video/gspca/zc3xx.c b/drivers/media/video/gspca/zc3xx.c index 8d7c27e..d61ef72 100644 --- a/drivers/media/video/gspca/zc3xx.c +++ b/drivers/media/video/gspca/zc3xx.c @@ -6576,8 +6576,8 @@ static int setlightfreq(struct gspca_dev *gspca_dev) cs2102_60HZ, cs2102_60HZScale}, /* SENSOR_CS2102K 1 */ {cs2102_NoFliker, cs2102_NoFlikerScale, - cs2102_50HZ, cs2102_50HZScale, - cs2102_60HZ, cs2102_60HZScale}, + NULL, NULL, /* currently disabled */ + NULL, NULL}, /* SENSOR_GC0305 2 */ {gc0305_NoFliker, gc0305_NoFliker, gc0305_50HZ, gc0305_50HZ, diff --git a/drivers/media/video/ov511.c b/drivers/media/video/ov511.c index 3d3c48d..c685240 100644 --- a/drivers/media/video/ov511.c +++ b/drivers/media/video/ov511.c @@ -3591,7 +3591,7 @@ static int ov51x_init_isoc(struct usb_ov511 *ov) { struct urb *urb; - int fx, err, n, size; + int fx, err, n, i, size; PDEBUG(3, "*** Initializing capture ***"); @@ -3662,6 +3662,8 @@ ov51x_init_isoc(struct usb_ov511 *ov) urb = usb_alloc_urb(FRAMES_PER_DESC, GFP_KERNEL); if (!urb) { err("init isoc: usb_alloc_urb ret. NULL"); + for (i = 0; i < n; i++) + usb_free_urb(ov->sbuf[i].urb); return -ENOMEM; } ov->sbuf[n].urb = urb; @@ -5651,7 +5653,7 @@ static ssize_t show_exposure(struct device *cd, if (!ov->dev) return -ENODEV; sensor_get_exposure(ov, &exp); - return sprintf(buf, "%d\n", exp >> 8); + return sprintf(buf, "%d\n", exp); } static DEVICE_ATTR(exposure, S_IRUGO, show_exposure, NULL); diff --git a/drivers/media/video/pvrusb2/pvrusb2-devattr.c b/drivers/media/video/pvrusb2/pvrusb2-devattr.c index 88e1751..cbe2a34 100644 --- a/drivers/media/video/pvrusb2/pvrusb2-devattr.c +++ b/drivers/media/video/pvrusb2/pvrusb2-devattr.c @@ -489,6 +489,8 @@ static const struct pvr2_device_desc pvr2_device_751xx = { struct usb_device_id pvr2_device_table[] = { { USB_DEVICE(0x2040, 0x2900), .driver_info = (kernel_ulong_t)&pvr2_device_29xxx}, + { USB_DEVICE(0x2040, 0x2950), /* Logically identical to 2900 */ + .driver_info = (kernel_ulong_t)&pvr2_device_29xxx}, { USB_DEVICE(0x2040, 0x2400), .driver_info = (kernel_ulong_t)&pvr2_device_24xxx}, { USB_DEVICE(0x1164, 0x0622), diff --git a/drivers/media/video/s2255drv.c b/drivers/media/video/s2255drv.c index b1d09d8..92b83fe 100644 --- a/drivers/media/video/s2255drv.c +++ b/drivers/media/video/s2255drv.c @@ -669,7 +669,7 @@ static void s2255_fillbuff(struct s2255_dev *dev, struct s2255_buffer *buf, (unsigned long)vbuf, pos); /* tell v4l buffer was filled */ - buf->vb.field_count++; + buf->vb.field_count = dev->frame_count[chn] * 2; do_gettimeofday(&ts); buf->vb.ts = ts; buf->vb.state = VIDEOBUF_DONE; @@ -1268,6 +1268,7 @@ static int vidioc_streamon(struct file *file, void *priv, enum v4l2_buf_type i) dev->last_frame[chn] = -1; dev->bad_payload[chn] = 0; dev->cur_frame[chn] = 0; + dev->frame_count[chn] = 0; for (j = 0; j < SYS_FRAMES; j++) { dev->buffer[chn].frame[j].ulState = 0; dev->buffer[chn].frame[j].cur_size = 0; diff --git a/drivers/media/video/uvc/uvc_ctrl.c b/drivers/media/video/uvc/uvc_ctrl.c index 6ef3e52..feab12a 100644 --- a/drivers/media/video/uvc/uvc_ctrl.c +++ b/drivers/media/video/uvc/uvc_ctrl.c @@ -592,7 +592,7 @@ int uvc_query_v4l2_ctrl(struct uvc_video_device *video, if (ctrl == NULL) return -EINVAL; - data = kmalloc(8, GFP_KERNEL); + data = kmalloc(ctrl->info->size, GFP_KERNEL); if (data == NULL) return -ENOMEM; diff --git a/drivers/media/video/w9968cf.c b/drivers/media/video/w9968cf.c index 168baab..11edf79 100644 --- a/drivers/media/video/w9968cf.c +++ b/drivers/media/video/w9968cf.c @@ -911,7 +911,6 @@ static int w9968cf_start_transfer(struct w9968cf_device* cam) for (i = 0; i < W9968CF_URBS; i++) { urb = usb_alloc_urb(W9968CF_ISO_PACKETS, GFP_KERNEL); - cam->urb[i] = urb; if (!urb) { for (j = 0; j < i; j++) usb_free_urb(cam->urb[j]); @@ -919,6 +918,7 @@ static int w9968cf_start_transfer(struct w9968cf_device* cam) return -ENOMEM; } + cam->urb[i] = urb; urb->dev = udev; urb->context = (void*)cam; urb->pipe = usb_rcvisocpipe(udev, 1); diff --git a/drivers/media/video/wm8739.c b/drivers/media/video/wm8739.c index 95c79ad..54ac3fe 100644 --- a/drivers/media/video/wm8739.c +++ b/drivers/media/video/wm8739.c @@ -274,10 +274,8 @@ static int wm8739_probe(struct i2c_client *client, client->addr << 1, client->adapter->name); state = kmalloc(sizeof(struct wm8739_state), GFP_KERNEL); - if (state == NULL) { - kfree(client); + if (state == NULL) return -ENOMEM; - } state->vol_l = 0x17; /* 0dB */ state->vol_r = 0x17; /* 0dB */ state->muted = 0; diff --git a/drivers/media/video/zoran_card.c b/drivers/media/video/zoran_card.c index d842a7c..3282be7 100644 --- a/drivers/media/video/zoran_card.c +++ b/drivers/media/video/zoran_card.c @@ -988,7 +988,7 @@ zoran_open_init_params (struct zoran *zr) zr->v4l_grab_seq = 0; zr->v4l_settings.width = 192; zr->v4l_settings.height = 144; - zr->v4l_settings.format = &zoran_formats[4]; /* YUY2 - YUV-4:2:2 packed */ + zr->v4l_settings.format = &zoran_formats[7]; /* YUY2 - YUV-4:2:2 packed */ zr->v4l_settings.bytesperline = zr->v4l_settings.width * ((zr->v4l_settings.format->depth + 7) / 8); diff --git a/drivers/media/video/zoran_driver.c b/drivers/media/video/zoran_driver.c index ec6f596..2dab9ee 100644 --- a/drivers/media/video/zoran_driver.c +++ b/drivers/media/video/zoran_driver.c @@ -134,7 +134,7 @@ const struct zoran_format zoran_formats[] = { }, { .name = "16-bit RGB BE", ZFMT(-1, - V4L2_PIX_FMT_RGB565, V4L2_COLORSPACE_SRGB), + V4L2_PIX_FMT_RGB565X, V4L2_COLORSPACE_SRGB), .depth = 16, .flags = ZORAN_FORMAT_CAPTURE | ZORAN_FORMAT_OVERLAY, @@ -2737,7 +2737,8 @@ zoran_do_ioctl (struct inode *inode, fh->v4l_settings.format->fourcc; fmt->fmt.pix.colorspace = fh->v4l_settings.format->colorspace; - fmt->fmt.pix.bytesperline = 0; + fmt->fmt.pix.bytesperline = + fh->v4l_settings.bytesperline; if (BUZ_MAX_HEIGHT < (fh->v4l_settings.height * 2)) fmt->fmt.pix.field = @@ -2833,13 +2834,6 @@ zoran_do_ioctl (struct inode *inode, fmt->fmt.pix.pixelformat, (char *) &printformat); - if (fmt->fmt.pix.bytesperline > 0) { - dprintk(5, - KERN_ERR "%s: bpl not supported\n", - ZR_DEVNAME(zr)); - return -EINVAL; - } - /* we can be requested to do JPEG/raw playback/capture */ if (! (fmt->type == V4L2_BUF_TYPE_VIDEO_CAPTURE || @@ -2923,6 +2917,7 @@ zoran_do_ioctl (struct inode *inode, fh->jpg_buffers.buffer_size = zoran_v4l2_calc_bufsize(&fh-> jpg_settings); + fmt->fmt.pix.bytesperline = 0; fmt->fmt.pix.sizeimage = fh->jpg_buffers.buffer_size; @@ -2979,6 +2974,8 @@ zoran_do_ioctl (struct inode *inode, /* tell the user the * results/missing stuff */ + fmt->fmt.pix.bytesperline = + fh->v4l_settings.bytesperline; fmt->fmt.pix.sizeimage = fh->v4l_settings.height * fh->v4l_settings.bytesperline; diff --git a/drivers/memstick/core/mspro_block.c b/drivers/memstick/core/mspro_block.c index d2d2318..6e291bf 100644 --- a/drivers/memstick/core/mspro_block.c +++ b/drivers/memstick/core/mspro_block.c @@ -197,7 +197,7 @@ static int mspro_block_bd_open(struct inode *inode, struct file *filp) static int mspro_block_disk_release(struct gendisk *disk) { struct mspro_block_data *msb = disk->private_data; - int disk_id = disk->first_minor >> MSPRO_BLOCK_PART_SHIFT; + int disk_id = MINOR(disk_devt(disk)) >> MSPRO_BLOCK_PART_SHIFT; mutex_lock(&mspro_block_disk_lock); @@ -828,7 +828,7 @@ static void mspro_block_submit_req(struct request_queue *q) if (msb->eject) { while ((req = elv_next_request(q)) != NULL) - end_queued_request(req, -ENODEV); + __blk_end_request(req, -ENODEV, blk_rq_bytes(req)); return; } diff --git a/drivers/misc/eeepc-laptop.c b/drivers/misc/eeepc-laptop.c index facdb98..1ee8501 100644 --- a/drivers/misc/eeepc-laptop.c +++ b/drivers/misc/eeepc-laptop.c @@ -450,12 +450,14 @@ static int eeepc_get_fan_pwm(void) int value = 0; read_acpi_int(NULL, EEEPC_EC_FAN_PWM, &value); + value = value * 255 / 100; return (value); } static void eeepc_set_fan_pwm(int value) { - value = SENSORS_LIMIT(value, 0, 100); + value = SENSORS_LIMIT(value, 0, 255); + value = value * 100 / 255; ec_write(EEEPC_EC_SC02, value); } @@ -520,15 +522,23 @@ static ssize_t show_sys_hwmon(int (*get)(void), char *buf) static SENSOR_DEVICE_ATTR(_name, _mode, show_##_name, store_##_name, 0); EEEPC_CREATE_SENSOR_ATTR(fan1_input, S_IRUGO, eeepc_get_fan_rpm, NULL); -EEEPC_CREATE_SENSOR_ATTR(fan1_pwm, S_IRUGO | S_IWUSR, +EEEPC_CREATE_SENSOR_ATTR(pwm1, S_IRUGO | S_IWUSR, eeepc_get_fan_pwm, eeepc_set_fan_pwm); EEEPC_CREATE_SENSOR_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, eeepc_get_fan_ctrl, eeepc_set_fan_ctrl); +static ssize_t +show_name(struct device *dev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "eeepc\n"); +} +static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, 0); + static struct attribute *hwmon_attributes[] = { - &sensor_dev_attr_fan1_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1.dev_attr.attr, &sensor_dev_attr_fan1_input.dev_attr.attr, &sensor_dev_attr_pwm1_enable.dev_attr.attr, + &sensor_dev_attr_name.dev_attr.attr, NULL }; diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index a9b6146..efacee0 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -84,7 +84,7 @@ static void mmc_blk_put(struct mmc_blk_data *md) mutex_lock(&open_lock); md->usage--; if (md->usage == 0) { - int devidx = md->disk->first_minor >> MMC_SHIFT; + int devidx = MINOR(disk_devt(md->disk)) >> MMC_SHIFT; __clear_bit(devidx, dev_use); put_disk(md->disk); diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c index 917035e..0000896 100644 --- a/drivers/mmc/host/atmel-mci.c +++ b/drivers/mmc/host/atmel-mci.c @@ -426,8 +426,6 @@ static u32 atmci_submit_data(struct mmc_host *mmc, struct mmc_data *data) host->sg = NULL; host->data = data; - mci_writel(host, BLKR, MCI_BCNT(data->blocks) - | MCI_BLKLEN(data->blksz)); dev_vdbg(&mmc->class_dev, "BLKR=0x%08x\n", MCI_BCNT(data->blocks) | MCI_BLKLEN(data->blksz)); @@ -483,6 +481,10 @@ static void atmci_request(struct mmc_host *mmc, struct mmc_request *mrq) if (data->blocks > 1 && data->blksz & 3) goto fail; atmci_set_timeout(host, data); + + /* Must set block count/size before sending command */ + mci_writel(host, BLKR, MCI_BCNT(data->blocks) + | MCI_BLKLEN(data->blksz)); } iflags = MCI_CMDRDY; diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c index f34f20c..9bf581c 100644 --- a/drivers/mtd/ftl.c +++ b/drivers/mtd/ftl.c @@ -1005,6 +1005,29 @@ static int ftl_writesect(struct mtd_blktrans_dev *dev, return ftl_write((void *)dev, buf, block, 1); } +static int ftl_discardsect(struct mtd_blktrans_dev *dev, + unsigned long sector, unsigned nr_sects) +{ + partition_t *part = (void *)dev; + uint32_t bsize = 1 << part->header.EraseUnitSize; + + DEBUG(1, "FTL erase sector %ld for %d sectors\n", + sector, nr_sects); + + while (nr_sects) { + uint32_t old_addr = part->VirtualBlockMap[sector]; + if (old_addr != 0xffffffff) { + part->VirtualBlockMap[sector] = 0xffffffff; + part->EUNInfo[old_addr/bsize].Deleted++; + if (set_bam_entry(part, old_addr, 0)) + return -EIO; + } + nr_sects--; + sector++; + } + + return 0; +} /*====================================================================*/ static void ftl_freepart(partition_t *part) @@ -1069,6 +1092,7 @@ static struct mtd_blktrans_ops ftl_tr = { .blksize = SECTOR_SIZE, .readsect = ftl_readsect, .writesect = ftl_writesect, + .discard = ftl_discardsect, .getgeo = ftl_getgeo, .add_mtd = ftl_add_mtd, .remove_dev = ftl_remove_dev, diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 9ff007c..681d5ac 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c @@ -32,6 +32,14 @@ struct mtd_blkcore_priv { spinlock_t queue_lock; }; +static int blktrans_discard_request(struct request_queue *q, + struct request *req) +{ + req->cmd_type = REQ_TYPE_LINUX_BLOCK; + req->cmd[0] = REQ_LB_OP_DISCARD; + return 0; +} + static int do_blktrans_request(struct mtd_blktrans_ops *tr, struct mtd_blktrans_dev *dev, struct request *req) @@ -44,6 +52,10 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, buf = req->buffer; + if (req->cmd_type == REQ_TYPE_LINUX_BLOCK && + req->cmd[0] == REQ_LB_OP_DISCARD) + return !tr->discard(dev, block, nsect); + if (!blk_fs_request(req)) return 0; @@ -367,6 +379,10 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr) tr->blkcore_priv->rq->queuedata = tr; blk_queue_hardsect_size(tr->blkcore_priv->rq, tr->blksize); + if (tr->discard) + blk_queue_set_discard(tr->blkcore_priv->rq, + blktrans_discard_request); + tr->blkshift = ffs(tr->blksize) - 1; tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr, diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c index 0b6095b..bcd2bc4 100644 --- a/drivers/net/e1000e/ich8lan.c +++ b/drivers/net/e1000e/ich8lan.c @@ -396,7 +396,7 @@ static s32 e1000_acquire_swflag_ich8lan(struct e1000_hw *hw) u32 extcnf_ctrl; u32 timeout = PHY_CFG_TIMEOUT; - WARN_ON(preempt_count()); + might_sleep(); if (!mutex_trylock(&nvm_mutex)) { WARN(1, KERN_ERR "e1000e mutex contention. Owned by pid %d\n", diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 9c71858..77baff0 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -16,6 +16,7 @@ #include <linux/kernel.h> +#include <linux/sched.h> #include <linux/pci.h> #include <linux/stat.h> #include <linux/topology.h> @@ -484,6 +485,21 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr, #endif /* HAVE_PCI_LEGACY */ #ifdef HAVE_PCI_MMAP + +static int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vma) +{ + unsigned long nr, start, size; + + nr = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; + start = vma->vm_pgoff; + size = pci_resource_len(pdev, resno) >> PAGE_SHIFT; + if (start < size && size - start >= nr) + return 1; + WARN(1, "process \"%s\" tried to map 0x%08lx-0x%08lx on %s BAR %d (size 0x%08lx)\n", + current->comm, start, start+nr, pci_name(pdev), resno, size); + return 0; +} + /** * pci_mmap_resource - map a PCI resource into user memory space * @kobj: kobject for mapping @@ -510,6 +526,9 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, if (i >= PCI_ROM_RESOURCE) return -ENODEV; + if (!pci_mmap_fits(pdev, i, vma)) + return -EINVAL; + /* pci_mmap_page_range() expects the same kind of entry as coming * from /proc/bus/pci/ which is a "user visible" value. If this is * different from the resource itself, arch will do necessary fixup. diff --git a/drivers/pnp/Makefile b/drivers/pnp/Makefile index 26f5abc..e83f34f 100644 --- a/drivers/pnp/Makefile +++ b/drivers/pnp/Makefile @@ -2,12 +2,15 @@ # Makefile for the Linux Plug-and-Play Support. # -obj-y := core.o card.o driver.o resource.o manager.o support.o interface.o quirks.o system.o +obj-y := core.o card.o driver.o resource.o manager.o support.o interface.o quirks.o obj-$(CONFIG_PNPACPI) += pnpacpi/ obj-$(CONFIG_PNPBIOS) += pnpbios/ obj-$(CONFIG_ISAPNP) += isapnp/ +# pnp_system_init goes after pnpacpi/pnpbios init +obj-y += system.o + ifeq ($(CONFIG_PNP_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index c1b9ea3..53561d7 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -268,7 +268,7 @@ static int __init pnpacpi_init(void) return 0; } -subsys_initcall(pnpacpi_init); +fs_initcall(pnpacpi_init); static int __init pnpacpi_setup(char *str) { diff --git a/drivers/pnp/pnpbios/core.c b/drivers/pnp/pnpbios/core.c index 19a4be1..662dfcdd 100644 --- a/drivers/pnp/pnpbios/core.c +++ b/drivers/pnp/pnpbios/core.c @@ -571,7 +571,7 @@ static int __init pnpbios_init(void) return 0; } -subsys_initcall(pnpbios_init); +fs_initcall(pnpbios_init); static int __init pnpbios_thread_init(void) { diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c index f118252..52e2743 100644 --- a/drivers/rtc/rtc-dev.c +++ b/drivers/rtc/rtc-dev.c @@ -422,6 +422,12 @@ done: return err; } +static int rtc_dev_fasync(int fd, struct file *file, int on) +{ + struct rtc_device *rtc = file->private_data; + return fasync_helper(fd, file, on, &rtc->async_queue); +} + static int rtc_dev_release(struct inode *inode, struct file *file) { struct rtc_device *rtc = file->private_data; @@ -434,16 +440,13 @@ static int rtc_dev_release(struct inode *inode, struct file *file) if (rtc->ops->release) rtc->ops->release(rtc->dev.parent); + if (file->f_flags & FASYNC) + rtc_dev_fasync(-1, file, 0); + clear_bit_unlock(RTC_DEV_BUSY, &rtc->flags); return 0; } -static int rtc_dev_fasync(int fd, struct file *file, int on) -{ - struct rtc_device *rtc = file->private_data; - return fasync_helper(fd, file, on, &rtc->async_queue); -} - static const struct file_operations rtc_dev_fops = { .owner = THIS_MODULE, .llseek = no_llseek, diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index 03c0e40..e3b5c4d 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -76,7 +76,8 @@ dasd_devices_show(struct seq_file *m, void *v) /* Print kdev. */ if (block->gdp) seq_printf(m, " at (%3d:%6d)", - block->gdp->major, block->gdp->first_minor); + MAJOR(disk_devt(block->gdp)), + MINOR(disk_devt(block->gdp))); else seq_printf(m, " at (???:??????)"); /* Print device name. */ diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 711b300..9481e4a 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -114,7 +114,7 @@ dcssblk_assign_free_minor(struct dcssblk_dev_info *dev_info) found = 0; // test if minor available list_for_each_entry(entry, &dcssblk_devices, lh) - if (minor == entry->gd->first_minor) + if (minor == MINOR(disk_devt(entry->gd))) found++; if (!found) break; // got unused minor } @@ -397,7 +397,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char goto unload_seg; } sprintf(dev_info->gd->disk_name, "dcssblk%d", - dev_info->gd->first_minor); + MINOR(disk_devt(dev_info->gd))); list_add_tail(&dev_info->lh, &dcssblk_devices); if (!try_module_get(THIS_MODULE)) { diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 1679e2f..a0b6b46 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -447,51 +447,36 @@ void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, { char s[80]; - sprintf(s, "%s sc:%x ", cdev->dev.bus_id, irq_ptr->schid.sch_no); - + sprintf(s, "qdio: %s ", dev_name(&cdev->dev)); switch (irq_ptr->qib.qfmt) { case QDIO_QETH_QFMT: - sprintf(s + strlen(s), "OSADE "); + sprintf(s + strlen(s), "OSA "); break; case QDIO_ZFCP_QFMT: sprintf(s + strlen(s), "ZFCP "); break; case QDIO_IQDIO_QFMT: - sprintf(s + strlen(s), "HiperSockets "); + sprintf(s + strlen(s), "HS "); break; } - sprintf(s + strlen(s), "using: "); - - if (!is_thinint_irq(irq_ptr)) - sprintf(s + strlen(s), "no"); - sprintf(s + strlen(s), "AdapterInterrupts "); - if (!(irq_ptr->sch_token != 0)) - sprintf(s + strlen(s), "no"); - sprintf(s + strlen(s), "QEBSM "); - if (!(irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED)) - sprintf(s + strlen(s), "no"); - sprintf(s + strlen(s), "OutboundPCI "); - if (!css_general_characteristics.aif_tdd) - sprintf(s + strlen(s), "no"); - sprintf(s + strlen(s), "TDD\n"); - printk(KERN_INFO "qdio: %s", s); - - memset(s, 0, sizeof(s)); - sprintf(s, "%s SIGA required: ", cdev->dev.bus_id); - if (irq_ptr->siga_flag.input) - sprintf(s + strlen(s), "Read "); - if (irq_ptr->siga_flag.output) - sprintf(s + strlen(s), "Write "); - if (irq_ptr->siga_flag.sync) - sprintf(s + strlen(s), "Sync "); - if (!irq_ptr->siga_flag.no_sync_ti) - sprintf(s + strlen(s), "SyncAI "); - if (!irq_ptr->siga_flag.no_sync_out_ti) - sprintf(s + strlen(s), "SyncOutAI "); - if (!irq_ptr->siga_flag.no_sync_out_pci) - sprintf(s + strlen(s), "SyncOutPCI"); + sprintf(s + strlen(s), "on SC %x using ", irq_ptr->schid.sch_no); + sprintf(s + strlen(s), "AI:%d ", is_thinint_irq(irq_ptr)); + sprintf(s + strlen(s), "QEBSM:%d ", (irq_ptr->sch_token) ? 1 : 0); + sprintf(s + strlen(s), "PCI:%d ", + (irq_ptr->qib.ac & QIB_AC_OUTBOUND_PCI_SUPPORTED) ? 1 : 0); + sprintf(s + strlen(s), "TDD:%d ", css_general_characteristics.aif_tdd); + sprintf(s + strlen(s), "SIGA:"); + sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.input) ? "R" : " "); + sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.output) ? "W" : " "); + sprintf(s + strlen(s), "%s", (irq_ptr->siga_flag.sync) ? "S" : " "); + sprintf(s + strlen(s), "%s", + (!irq_ptr->siga_flag.no_sync_ti) ? "A" : " "); + sprintf(s + strlen(s), "%s", + (!irq_ptr->siga_flag.no_sync_out_ti) ? "O" : " "); + sprintf(s + strlen(s), "%s", + (!irq_ptr->siga_flag.no_sync_out_pci) ? "P" : " "); sprintf(s + strlen(s), "\n"); - printk(KERN_INFO "qdio: %s", s); + printk(KERN_INFO "%s", s); } int __init qdio_setup_init(void) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index aa4e77c..8abfd06 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -1139,7 +1139,7 @@ static struct aac_srb * aac_scsi_common(struct fib * fib, struct scsi_cmnd * cmd srbcmd->id = cpu_to_le32(scmd_id(cmd)); srbcmd->lun = cpu_to_le32(cmd->device->lun); srbcmd->flags = cpu_to_le32(flag); - timeout = cmd->timeout_per_command/HZ; + timeout = cmd->request->timeout/HZ; if (timeout == 0) timeout = 1; srbcmd->timeout = cpu_to_le32(timeout); // timeout in seconds diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 822d521..c387c15 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -464,7 +464,6 @@ int __gdth_execute(struct scsi_device *sdev, gdth_cmd_str *gdtcmd, char *cmnd, /* use request field to save the ptr. to completion struct. */ scp->request = (struct request *)&wait; - scp->timeout_per_command = timeout*HZ; scp->cmd_len = 12; scp->cmnd = cmnd; cmndinfo.priority = IOCTL_PRI; @@ -1995,23 +1994,12 @@ static void gdth_putq(gdth_ha_str *ha, Scsi_Cmnd *scp, unchar priority) register Scsi_Cmnd *pscp; register Scsi_Cmnd *nscp; ulong flags; - unchar b, t; TRACE(("gdth_putq() priority %d\n",priority)); spin_lock_irqsave(&ha->smp_lock, flags); - if (!cmndinfo->internal_command) { + if (!cmndinfo->internal_command) cmndinfo->priority = priority; - b = scp->device->channel; - t = scp->device->id; - if (priority >= DEFAULT_PRI) { - if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha,b)].lock) || - (b==ha->virt_bus && t<MAX_HDRIVES && ha->hdr[t].lock)) { - TRACE2(("gdth_putq(): locked IO ->update_timeout()\n")); - cmndinfo->timeout = gdth_update_timeout(scp, 0); - } - } - } if (ha->req_first==NULL) { ha->req_first = scp; /* queue was empty */ @@ -3899,6 +3887,39 @@ static const char *gdth_info(struct Scsi_Host *shp) return ((const char *)ha->binfo.type_string); } +static enum blk_eh_timer_return gdth_timed_out(struct scsi_cmnd *scp) +{ + gdth_ha_str *ha = shost_priv(scp->device->host); + struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); + unchar b, t; + ulong flags; + enum blk_eh_timer_return retval = BLK_EH_NOT_HANDLED; + + TRACE(("%s() cmd 0x%x\n", scp->cmnd[0], __func__)); + b = scp->device->channel; + t = scp->device->id; + + /* + * We don't really honor the command timeout, but we try to + * honor 6 times of the actual command timeout! So reset the + * timer if this is less than 6th timeout on this command! + */ + if (++cmndinfo->timeout_count < 6) + retval = BLK_EH_RESET_TIMER; + + /* Reset the timeout if it is locked IO */ + spin_lock_irqsave(&ha->smp_lock, flags); + if ((b != ha->virt_bus && ha->raw[BUS_L2P(ha, b)].lock) || + (b == ha->virt_bus && t < MAX_HDRIVES && ha->hdr[t].lock)) { + TRACE2(("%s(): locked IO, reset timeout\n", __func__)); + retval = BLK_EH_RESET_TIMER; + } + spin_unlock_irqrestore(&ha->smp_lock, flags); + + return retval; +} + + static int gdth_eh_bus_reset(Scsi_Cmnd *scp) { gdth_ha_str *ha = shost_priv(scp->device->host); @@ -3992,7 +4013,7 @@ static int gdth_queuecommand(struct scsi_cmnd *scp, BUG_ON(!cmndinfo); scp->scsi_done = done; - gdth_update_timeout(scp, scp->timeout_per_command * 6); + cmndinfo->timeout_count = 0; cmndinfo->priority = DEFAULT_PRI; return __gdth_queuecommand(ha, scp, cmndinfo); @@ -4096,12 +4117,10 @@ static int ioc_lockdrv(void __user *arg) ha->hdr[j].lock = 1; spin_unlock_irqrestore(&ha->smp_lock, flags); gdth_wait_completion(ha, ha->bus_cnt, j); - gdth_stop_timeout(ha, ha->bus_cnt, j); } else { spin_lock_irqsave(&ha->smp_lock, flags); ha->hdr[j].lock = 0; spin_unlock_irqrestore(&ha->smp_lock, flags); - gdth_start_timeout(ha, ha->bus_cnt, j); gdth_next(ha); } } @@ -4539,18 +4558,14 @@ static int gdth_ioctl(struct inode *inode, struct file *filep, spin_lock_irqsave(&ha->smp_lock, flags); ha->raw[i].lock = 1; spin_unlock_irqrestore(&ha->smp_lock, flags); - for (j = 0; j < ha->tid_cnt; ++j) { + for (j = 0; j < ha->tid_cnt; ++j) gdth_wait_completion(ha, i, j); - gdth_stop_timeout(ha, i, j); - } } else { spin_lock_irqsave(&ha->smp_lock, flags); ha->raw[i].lock = 0; spin_unlock_irqrestore(&ha->smp_lock, flags); - for (j = 0; j < ha->tid_cnt; ++j) { - gdth_start_timeout(ha, i, j); + for (j = 0; j < ha->tid_cnt; ++j) gdth_next(ha); - } } } break; @@ -4644,6 +4659,7 @@ static struct scsi_host_template gdth_template = { .slave_configure = gdth_slave_configure, .bios_param = gdth_bios_param, .proc_info = gdth_proc_info, + .eh_timed_out = gdth_timed_out, .proc_name = "gdth", .can_queue = GDTH_MAXCMDS, .this_id = -1, diff --git a/drivers/scsi/gdth.h b/drivers/scsi/gdth.h index ca92476..1646444 100644 --- a/drivers/scsi/gdth.h +++ b/drivers/scsi/gdth.h @@ -916,7 +916,7 @@ typedef struct { gdth_cmd_str *internal_cmd_str; /* crier for internal messages*/ dma_addr_t sense_paddr; /* sense dma-addr */ unchar priority; - int timeout; + int timeout_count; /* # of timeout calls */ volatile int wait_for_completion; ushort status; ulong32 info; diff --git a/drivers/scsi/gdth_proc.c b/drivers/scsi/gdth_proc.c index ce0228e..59349a3 100644 --- a/drivers/scsi/gdth_proc.c +++ b/drivers/scsi/gdth_proc.c @@ -748,69 +748,3 @@ static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id) } spin_unlock_irqrestore(&ha->smp_lock, flags); } - -static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id) -{ - ulong flags; - Scsi_Cmnd *scp; - unchar b, t; - - spin_lock_irqsave(&ha->smp_lock, flags); - - for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) { - struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); - if (!cmndinfo->internal_command) { - b = scp->device->channel; - t = scp->device->id; - if (t == (unchar)id && b == (unchar)busnum) { - TRACE2(("gdth_stop_timeout(): update_timeout()\n")); - cmndinfo->timeout = gdth_update_timeout(scp, 0); - } - } - } - spin_unlock_irqrestore(&ha->smp_lock, flags); -} - -static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id) -{ - ulong flags; - Scsi_Cmnd *scp; - unchar b, t; - - spin_lock_irqsave(&ha->smp_lock, flags); - - for (scp = ha->req_first; scp; scp = (Scsi_Cmnd *)scp->SCp.ptr) { - struct gdth_cmndinfo *cmndinfo = gdth_cmnd_priv(scp); - if (!cmndinfo->internal_command) { - b = scp->device->channel; - t = scp->device->id; - if (t == (unchar)id && b == (unchar)busnum) { - TRACE2(("gdth_start_timeout(): update_timeout()\n")); - gdth_update_timeout(scp, cmndinfo->timeout); - } - } - } - spin_unlock_irqrestore(&ha->smp_lock, flags); -} - -static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout) -{ - int oldto; - - oldto = scp->timeout_per_command; - scp->timeout_per_command = timeout; - - if (timeout == 0) { - del_timer(&scp->eh_timeout); - scp->eh_timeout.data = (unsigned long) NULL; - scp->eh_timeout.expires = 0; - } else { - if (scp->eh_timeout.data != (unsigned long) NULL) - del_timer(&scp->eh_timeout); - scp->eh_timeout.data = (unsigned long) scp; - scp->eh_timeout.expires = jiffies + timeout; - add_timer(&scp->eh_timeout); - } - - return oldto; -} diff --git a/drivers/scsi/gdth_proc.h b/drivers/scsi/gdth_proc.h index 45e6fda..9b900cc 100644 --- a/drivers/scsi/gdth_proc.h +++ b/drivers/scsi/gdth_proc.h @@ -20,9 +20,6 @@ static char *gdth_ioctl_alloc(gdth_ha_str *ha, int size, int scratch, ulong64 *paddr); static void gdth_ioctl_free(gdth_ha_str *ha, int size, char *buf, ulong64 paddr); static void gdth_wait_completion(gdth_ha_str *ha, int busnum, int id); -static void gdth_stop_timeout(gdth_ha_str *ha, int busnum, int id); -static void gdth_start_timeout(gdth_ha_str *ha, int busnum, int id); -static int gdth_update_timeout(Scsi_Cmnd *scp, int timeout); #endif diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 7b1502c..87e09f3 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -756,7 +756,7 @@ static int ibmvscsi_queuecommand(struct scsi_cmnd *cmnd, init_event_struct(evt_struct, handle_cmd_rsp, VIOSRP_SRP_FORMAT, - cmnd->timeout_per_command/HZ); + cmnd->request->timeout/HZ); evt_struct->cmnd = cmnd; evt_struct->cmnd_done = done; diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 461331d..81c16cb 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -612,7 +612,7 @@ static int idescsi_queue (struct scsi_cmnd *cmd, pc->req_xfer = pc->buf_size = scsi_bufflen(cmd); pc->scsi_cmd = cmd; pc->done = done; - pc->timeout = jiffies + cmd->timeout_per_command; + pc->timeout = jiffies + cmd->request->timeout; if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) { printk ("ide-scsi: %s: que %lu, cmd = ", drive->name, cmd->serial_number); diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index e7a3a65..d30eb7b 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -3670,7 +3670,8 @@ static int ipr_slave_configure(struct scsi_device *sdev) sdev->no_uld_attach = 1; } if (ipr_is_vset_device(res)) { - sdev->timeout = IPR_VSET_RW_TIMEOUT; + blk_queue_rq_timeout(sdev->request_queue, + IPR_VSET_RW_TIMEOUT); blk_queue_max_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS); } if (ipr_is_vset_device(res) || ipr_is_scsi_disk(res)) diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c index bc9e6dd..ef683f0 100644 --- a/drivers/scsi/ips.c +++ b/drivers/scsi/ips.c @@ -3818,7 +3818,7 @@ ips_send_cmd(ips_ha_t * ha, ips_scb_t * scb) scb->cmd.dcdb.segment_4G = 0; scb->cmd.dcdb.enhanced_sg = 0; - TimeOut = scb->scsi_cmd->timeout_per_command; + TimeOut = scb->scsi_cmd->request->timeout; if (ha->subsys->param[4] & 0x00100000) { /* If NEW Tape DCDB is Supported */ if (!scb->sg_len) { diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 0b7457d..da7b67d 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1476,12 +1476,12 @@ static void iscsi_start_tx(struct iscsi_conn *conn) scsi_queue_work(conn->session->host, &conn->xmitwork); } -static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) +static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) { struct iscsi_cls_session *cls_session; struct iscsi_session *session; struct iscsi_conn *conn; - enum scsi_eh_timer_return rc = EH_NOT_HANDLED; + enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED; cls_session = starget_to_session(scsi_target(scmd->device)); session = cls_session->dd_data; @@ -1494,14 +1494,14 @@ static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) * We are probably in the middle of iscsi recovery so let * that complete and handle the error. */ - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; goto done; } conn = session->leadconn; if (!conn) { /* In the middle of shuting down */ - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; goto done; } @@ -1513,20 +1513,21 @@ static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd) */ if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) + (conn->ping_timeout * HZ), jiffies)) - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; /* * if we are about to check the transport then give the command * more time */ if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ), jiffies)) - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; /* if in the middle of checking the transport then give us more time */ if (conn->ping_task) - rc = EH_RESET_TIMER; + rc = BLK_EH_RESET_TIMER; done: spin_unlock(&session->lock); - debug_scsi("return %s\n", rc == EH_RESET_TIMER ? "timer reset" : "nh"); + debug_scsi("return %s\n", rc == BLK_EH_RESET_TIMER ? + "timer reset" : "nh"); return rc; } diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index 48ee8c7..e155011 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -294,10 +294,10 @@ static void sas_ata_post_internal(struct ata_queued_cmd *qc) } } -static int sas_ata_scr_write(struct ata_port *ap, unsigned int sc_reg_in, +static int sas_ata_scr_write(struct ata_link *link, unsigned int sc_reg_in, u32 val) { - struct domain_device *dev = ap->private_data; + struct domain_device *dev = link->ap->private_data; SAS_DPRINTK("STUB %s\n", __func__); switch (sc_reg_in) { @@ -319,10 +319,10 @@ static int sas_ata_scr_write(struct ata_port *ap, unsigned int sc_reg_in, return 0; } -static int sas_ata_scr_read(struct ata_port *ap, unsigned int sc_reg_in, +static int sas_ata_scr_read(struct ata_link *link, unsigned int sc_reg_in, u32 *val) { - struct domain_device *dev = ap->private_data; + struct domain_device *dev = link->ap->private_data; SAS_DPRINTK("STUB %s\n", __func__); switch (sc_reg_in) { @@ -398,7 +398,7 @@ void sas_ata_task_abort(struct sas_task *task) /* Bounce SCSI-initiated commands to the SCSI EH */ if (qc->scsicmd) { - scsi_req_abort_cmd(qc->scsicmd); + blk_abort_request(qc->scsicmd->request); scsi_schedule_eh(qc->scsicmd->device->host); return; } diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index b4f9368..0001374 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -55,7 +55,7 @@ void sas_unregister_phys(struct sas_ha_struct *sas_ha); int sas_register_ports(struct sas_ha_struct *sas_ha); void sas_unregister_ports(struct sas_ha_struct *sas_ha); -enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *); +enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *); int sas_init_queue(struct sas_ha_struct *sas_ha); int sas_init_events(struct sas_ha_struct *sas_ha); diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index a8e3ef3..7448387 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -673,43 +673,43 @@ out: return; } -enum scsi_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) +enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) { struct sas_task *task = TO_SAS_TASK(cmd); unsigned long flags; if (!task) { - cmd->timeout_per_command /= 2; + cmd->request->timeout /= 2; SAS_DPRINTK("command 0x%p, task 0x%p, gone: %s\n", - cmd, task, (cmd->timeout_per_command ? - "EH_RESET_TIMER" : "EH_NOT_HANDLED")); - if (!cmd->timeout_per_command) - return EH_NOT_HANDLED; - return EH_RESET_TIMER; + cmd, task, (cmd->request->timeout ? + "BLK_EH_RESET_TIMER" : "BLK_EH_NOT_HANDLED")); + if (!cmd->request->timeout) + return BLK_EH_NOT_HANDLED; + return BLK_EH_RESET_TIMER; } spin_lock_irqsave(&task->task_state_lock, flags); BUG_ON(task->task_state_flags & SAS_TASK_STATE_ABORTED); if (task->task_state_flags & SAS_TASK_STATE_DONE) { spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_HANDLED\n", - cmd, task); - return EH_HANDLED; + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: " + "BLK_EH_HANDLED\n", cmd, task); + return BLK_EH_HANDLED; } if (!(task->task_state_flags & SAS_TASK_AT_INITIATOR)) { spin_unlock_irqrestore(&task->task_state_lock, flags); SAS_DPRINTK("command 0x%p, task 0x%p, not at initiator: " - "EH_RESET_TIMER\n", + "BLK_EH_RESET_TIMER\n", cmd, task); - return EH_RESET_TIMER; + return BLK_EH_RESET_TIMER; } task->task_state_flags |= SAS_TASK_STATE_ABORTED; spin_unlock_irqrestore(&task->task_state_lock, flags); - SAS_DPRINTK("command 0x%p, task 0x%p, timed out: EH_NOT_HANDLED\n", + SAS_DPRINTK("command 0x%p, task 0x%p, timed out: BLK_EH_NOT_HANDLED\n", cmd, task); - return EH_NOT_HANDLED; + return BLK_EH_NOT_HANDLED; } int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) @@ -1039,7 +1039,7 @@ void sas_task_abort(struct sas_task *task) return; } - scsi_req_abort_cmd(sc); + blk_abort_request(sc->request); scsi_schedule_eh(sc->device->host); } diff --git a/drivers/scsi/megaraid/megaraid_sas.c b/drivers/scsi/megaraid/megaraid_sas.c index 97b7633..afe1de9 100644 --- a/drivers/scsi/megaraid/megaraid_sas.c +++ b/drivers/scsi/megaraid/megaraid_sas.c @@ -1167,7 +1167,7 @@ static int megasas_generic_reset(struct scsi_cmnd *scmd) * cmd has not been completed within the timeout period. */ static enum -scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) +blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) { struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr; struct megasas_instance *instance; @@ -1175,7 +1175,7 @@ scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) if (time_after(jiffies, scmd->jiffies_at_alloc + (MEGASAS_DEFAULT_CMD_TIMEOUT * 2) * HZ)) { - return EH_NOT_HANDLED; + return BLK_EH_NOT_HANDLED; } instance = cmd->instance; @@ -1189,7 +1189,7 @@ scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd) spin_unlock_irqrestore(instance->host->host_lock, flags); } - return EH_RESET_TIMER; + return BLK_EH_RESET_TIMER; } /** diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c index c57c94c..3b7240e 100644 --- a/drivers/scsi/ncr53c8xx.c +++ b/drivers/scsi/ncr53c8xx.c @@ -4170,8 +4170,8 @@ static int ncr_queue_command (struct ncb *np, struct scsi_cmnd *cmd) ** **---------------------------------------------------- */ - if (np->settle_time && cmd->timeout_per_command >= HZ) { - u_long tlimit = jiffies + cmd->timeout_per_command - HZ; + if (np->settle_time && cmd->request->timeout >= HZ) { + u_long tlimit = jiffies + cmd->request->timeout - HZ; if (time_after(np->settle_time, tlimit)) np->settle_time = tlimit; } diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c index 37f9ba0..b6cd12b 100644 --- a/drivers/scsi/qla1280.c +++ b/drivers/scsi/qla1280.c @@ -2845,7 +2845,7 @@ qla1280_64bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8)); /* Set ISP command timeout. */ - pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ); + pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ); /* Set device target ID and LUN */ pkt->lun = SCSI_LUN_32(cmd); @@ -3114,7 +3114,7 @@ qla1280_32bit_start_scsi(struct scsi_qla_host *ha, struct srb * sp) memset(((char *)pkt + 8), 0, (REQUEST_ENTRY_SIZE - 8)); /* Set ISP command timeout. */ - pkt->timeout = cpu_to_le16(cmd->timeout_per_command/HZ); + pkt->timeout = cpu_to_le16(cmd->request->timeout/HZ); /* Set device target ID and LUN */ pkt->lun = SCSI_LUN_32(cmd); diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 88bebb1..de8279a 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1542,7 +1542,7 @@ static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd) DEBUG2(printk(KERN_INFO "scsi%ld: DEVICE_RESET cmd=%p jiffies = 0x%lx, to=%x," "dpc_flags=%lx, status=%x allowed=%d\n", ha->host_no, - cmd, jiffies, cmd->timeout_per_command / HZ, + cmd, jiffies, cmd->request->timeout / HZ, ha->dpc_flags, cmd->result, cmd->allowed)); /* FIXME: wait for hba to go online */ @@ -1598,7 +1598,7 @@ static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd) DEBUG2(printk(KERN_INFO "scsi%ld: TARGET_DEVICE_RESET cmd=%p jiffies = 0x%lx, " "to=%x,dpc_flags=%lx, status=%x allowed=%d\n", - ha->host_no, cmd, jiffies, cmd->timeout_per_command / HZ, + ha->host_no, cmd, jiffies, cmd->request->timeout / HZ, ha->dpc_flags, cmd->result, cmd->allowed)); stat = qla4xxx_reset_target(ha, ddb_entry); diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 762a879..2ac3cb2 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -291,7 +291,6 @@ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask) unsigned long flags; cmd->device = dev; - init_timer(&cmd->eh_timeout); INIT_LIST_HEAD(&cmd->list); spin_lock_irqsave(&dev->list_lock, flags); list_add_tail(&cmd->list, &dev->cmd_list); @@ -652,14 +651,19 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) unsigned long timeout; int rtn = 0; + /* + * We will use a queued command if possible, otherwise we will + * emulate the queuing and calling of completion function ourselves. + */ + atomic_inc(&cmd->device->iorequest_cnt); + /* check if the device is still usable */ if (unlikely(cmd->device->sdev_state == SDEV_DEL)) { /* in SDEV_DEL we error all commands. DID_NO_CONNECT * returns an immediate error upwards, and signals * that the device is no longer present */ cmd->result = DID_NO_CONNECT << 16; - atomic_inc(&cmd->device->iorequest_cnt); - __scsi_done(cmd); + scsi_done(cmd); /* return 0 (because the command has been processed) */ goto out; } @@ -673,6 +677,7 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) * occur until the device transitions out of the * suspend state. */ + scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); SCSI_LOG_MLQUEUE(3, printk("queuecommand : device blocked \n")); @@ -715,21 +720,9 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) host->resetting = 0; } - /* - * AK: unlikely race here: for some reason the timer could - * expire before the serial number is set up below. - */ - scsi_add_timer(cmd, cmd->timeout_per_command, scsi_times_out); - scsi_log_send(cmd); /* - * We will use a queued command if possible, otherwise we will - * emulate the queuing and calling of completion function ourselves. - */ - atomic_inc(&cmd->device->iorequest_cnt); - - /* * Before we queue this command, check if the command * length exceeds what the host adapter can handle. */ @@ -745,6 +738,12 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) } spin_lock_irqsave(host->host_lock, flags); + /* + * AK: unlikely race here: for some reason the timer could + * expire before the serial number is set up below. + * + * TODO: kill serial or move to blk layer + */ scsi_cmd_get_serial(host, cmd); if (unlikely(host->shost_state == SHOST_DEL)) { @@ -755,12 +754,8 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) } spin_unlock_irqrestore(host->host_lock, flags); if (rtn) { - if (scsi_delete_timer(cmd)) { - atomic_inc(&cmd->device->iodone_cnt); - scsi_queue_insert(cmd, - (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ? - rtn : SCSI_MLQUEUE_HOST_BUSY); - } + scsi_queue_insert(cmd, (rtn == SCSI_MLQUEUE_DEVICE_BUSY) ? + rtn : SCSI_MLQUEUE_HOST_BUSY); SCSI_LOG_MLQUEUE(3, printk("queuecommand : request rejected\n")); } @@ -771,24 +766,6 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) } /** - * scsi_req_abort_cmd -- Request command recovery for the specified command - * @cmd: pointer to the SCSI command of interest - * - * This function requests that SCSI Core start recovery for the - * command by deleting the timer and adding the command to the eh - * queue. It can be called by either LLDDs or SCSI Core. LLDDs who - * implement their own error recovery MAY ignore the timeout event if - * they generated scsi_req_abort_cmd. - */ -void scsi_req_abort_cmd(struct scsi_cmnd *cmd) -{ - if (!scsi_delete_timer(cmd)) - return; - scsi_times_out(cmd); -} -EXPORT_SYMBOL(scsi_req_abort_cmd); - -/** * scsi_done - Enqueue the finished SCSI command into the done queue. * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives * ownership back to SCSI Core -- i.e. the LLDD has finished with it. @@ -803,42 +780,7 @@ EXPORT_SYMBOL(scsi_req_abort_cmd); */ static void scsi_done(struct scsi_cmnd *cmd) { - /* - * We don't have to worry about this one timing out anymore. - * If we are unable to remove the timer, then the command - * has already timed out. In which case, we have no choice but to - * let the timeout function run, as we have no idea where in fact - * that function could really be. It might be on another processor, - * etc, etc. - */ - if (!scsi_delete_timer(cmd)) - return; - __scsi_done(cmd); -} - -/* Private entry to scsi_done() to complete a command when the timer - * isn't running --- used by scsi_times_out */ -void __scsi_done(struct scsi_cmnd *cmd) -{ - struct request *rq = cmd->request; - - /* - * Set the serial numbers back to zero - */ - cmd->serial_number = 0; - - atomic_inc(&cmd->device->iodone_cnt); - if (cmd->result) - atomic_inc(&cmd->device->ioerr_cnt); - - BUG_ON(!rq); - - /* - * The uptodate/nbytes values don't matter, as we allow partial - * completes and thus will check this in the softirq callback - */ - rq->completion_data = cmd; - blk_complete_request(rq); + blk_complete_request(cmd->request); } /* Move this to a header if it becomes more generally useful */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 39ce3ab..fecefa0 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -112,69 +112,8 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) } /** - * scsi_add_timer - Start timeout timer for a single scsi command. - * @scmd: scsi command that is about to start running. - * @timeout: amount of time to allow this command to run. - * @complete: timeout function to call if timer isn't canceled. - * - * Notes: - * This should be turned into an inline function. Each scsi command - * has its own timer, and as it is added to the queue, we set up the - * timer. When the command completes, we cancel the timer. - */ -void scsi_add_timer(struct scsi_cmnd *scmd, int timeout, - void (*complete)(struct scsi_cmnd *)) -{ - - /* - * If the clock was already running for this command, then - * first delete the timer. The timer handling code gets rather - * confused if we don't do this. - */ - if (scmd->eh_timeout.function) - del_timer(&scmd->eh_timeout); - - scmd->eh_timeout.data = (unsigned long)scmd; - scmd->eh_timeout.expires = jiffies + timeout; - scmd->eh_timeout.function = (void (*)(unsigned long)) complete; - - SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:" - " %d, (%p)\n", __func__, - scmd, timeout, complete)); - - add_timer(&scmd->eh_timeout); -} - -/** - * scsi_delete_timer - Delete/cancel timer for a given function. - * @scmd: Cmd that we are canceling timer for - * - * Notes: - * This should be turned into an inline function. - * - * Return value: - * 1 if we were able to detach the timer. 0 if we blew it, and the - * timer function has already started to run. - */ -int scsi_delete_timer(struct scsi_cmnd *scmd) -{ - int rtn; - - rtn = del_timer(&scmd->eh_timeout); - - SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p," - " rtn: %d\n", __func__, - scmd, rtn)); - - scmd->eh_timeout.data = (unsigned long)NULL; - scmd->eh_timeout.function = NULL; - - return rtn; -} - -/** * scsi_times_out - Timeout function for normal scsi commands. - * @scmd: Cmd that is timing out. + * @req: request that is timing out. * * Notes: * We do not need to lock this. There is the potential for a race @@ -182,9 +121,11 @@ int scsi_delete_timer(struct scsi_cmnd *scmd) * normal completion function determines that the timer has already * fired, then it mustn't do anything. */ -void scsi_times_out(struct scsi_cmnd *scmd) +enum blk_eh_timer_return scsi_times_out(struct request *req) { - enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); + struct scsi_cmnd *scmd = req->special; + enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); + enum blk_eh_timer_return rtn = BLK_EH_NOT_HANDLED; scsi_log_completion(scmd, TIMEOUT_ERROR); @@ -196,22 +137,20 @@ void scsi_times_out(struct scsi_cmnd *scmd) eh_timed_out = NULL; if (eh_timed_out) - switch (eh_timed_out(scmd)) { - case EH_HANDLED: - __scsi_done(scmd); - return; - case EH_RESET_TIMER: - scsi_add_timer(scmd, scmd->timeout_per_command, - scsi_times_out); - return; - case EH_NOT_HANDLED: + rtn = eh_timed_out(scmd); + switch (rtn) { + case BLK_EH_NOT_HANDLED: break; + default: + return rtn; } if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) { scmd->result |= DID_TIME_OUT << 16; - __scsi_done(scmd); + return BLK_EH_HANDLED; } + + return BLK_EH_NOT_HANDLED; } /** @@ -1793,7 +1732,6 @@ scsi_reset_provider(struct scsi_device *dev, int flag) blk_rq_init(NULL, &req); scmd->request = &req; - memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout)); scmd->cmnd = req.cmd; @@ -1804,8 +1742,6 @@ scsi_reset_provider(struct scsi_device *dev, int flag) scmd->sc_data_direction = DMA_BIDIRECTIONAL; - init_timer(&scmd->eh_timeout); - spin_lock_irqsave(shost->host_lock, flags); shost->tmf_in_progress = 1; spin_unlock_irqrestore(shost->host_lock, flags); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d2884bf..98ee55c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1181,7 +1181,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) cmd->transfersize = req->data_len; cmd->allowed = req->retries; - cmd->timeout_per_command = req->timeout; return BLKPREP_OK; } EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd); @@ -1417,17 +1416,26 @@ static void scsi_kill_request(struct request *req, struct request_queue *q) spin_unlock(shost->host_lock); spin_lock(sdev->request_queue->queue_lock); - __scsi_done(cmd); + blk_complete_request(req); } static void scsi_softirq_done(struct request *rq) { - struct scsi_cmnd *cmd = rq->completion_data; - unsigned long wait_for = (cmd->allowed + 1) * cmd->timeout_per_command; + struct scsi_cmnd *cmd = rq->special; + unsigned long wait_for = (cmd->allowed + 1) * rq->timeout; int disposition; INIT_LIST_HEAD(&cmd->eh_entry); + /* + * Set the serial numbers back to zero + */ + cmd->serial_number = 0; + + atomic_inc(&cmd->device->iodone_cnt); + if (cmd->result) + atomic_inc(&cmd->device->ioerr_cnt); + disposition = scsi_decide_disposition(cmd); if (disposition != SUCCESS && time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { @@ -1676,6 +1684,7 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) blk_queue_prep_rq(q, scsi_prep_fn); blk_queue_softirq_done(q, scsi_softirq_done); + blk_queue_rq_timed_out(q, scsi_times_out); return q; } diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 79f0f75..6cddd5d 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -4,6 +4,7 @@ #include <linux/device.h> struct request_queue; +struct request; struct scsi_cmnd; struct scsi_device; struct scsi_host_template; @@ -27,7 +28,6 @@ extern void scsi_exit_hosts(void); extern int scsi_dispatch_cmd(struct scsi_cmnd *cmd); extern int scsi_setup_command_freelist(struct Scsi_Host *shost); extern void scsi_destroy_command_freelist(struct Scsi_Host *shost); -extern void __scsi_done(struct scsi_cmnd *cmd); #ifdef CONFIG_SCSI_LOGGING void scsi_log_send(struct scsi_cmnd *cmd); void scsi_log_completion(struct scsi_cmnd *cmd, int disposition); @@ -49,10 +49,7 @@ extern int __init scsi_init_devinfo(void); extern void scsi_exit_devinfo(void); /* scsi_error.c */ -extern void scsi_add_timer(struct scsi_cmnd *, int, - void (*)(struct scsi_cmnd *)); -extern int scsi_delete_timer(struct scsi_cmnd *); -extern void scsi_times_out(struct scsi_cmnd *cmd); +extern enum blk_eh_timer_return scsi_times_out(struct request *req); extern int scsi_error_handler(void *host); extern int scsi_decide_disposition(struct scsi_cmnd *cmd); extern void scsi_eh_wakeup(struct Scsi_Host *shost); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 09d311d..93c28f3 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -561,12 +561,15 @@ sdev_rd_attr (vendor, "%.8s\n"); sdev_rd_attr (model, "%.16s\n"); sdev_rd_attr (rev, "%.4s\n"); +/* + * TODO: can we make these symlinks to the block layer ones? + */ static ssize_t sdev_show_timeout (struct device *dev, struct device_attribute *attr, char *buf) { struct scsi_device *sdev; sdev = to_scsi_device(dev); - return snprintf (buf, 20, "%d\n", sdev->timeout / HZ); + return snprintf(buf, 20, "%d\n", sdev->request_queue->rq_timeout / HZ); } static ssize_t @@ -577,7 +580,7 @@ sdev_store_timeout (struct device *dev, struct device_attribute *attr, int timeout; sdev = to_scsi_device(dev); sscanf (buf, "%d\n", &timeout); - sdev->timeout = timeout * HZ; + blk_queue_rq_timeout(sdev->request_queue, timeout * HZ); return count; } static DEVICE_ATTR(timeout, S_IRUGO | S_IWUSR, sdev_show_timeout, sdev_store_timeout); diff --git a/drivers/scsi/scsi_tgt_lib.c b/drivers/scsi/scsi_tgt_lib.c index f26299d..48ba413 100644 --- a/drivers/scsi/scsi_tgt_lib.c +++ b/drivers/scsi/scsi_tgt_lib.c @@ -362,7 +362,7 @@ static int scsi_map_user_pages(struct scsi_tgt_cmd *tcmd, struct scsi_cmnd *cmd, int err; dprintk("%lx %u\n", uaddr, len); - err = blk_rq_map_user(q, rq, (void *)uaddr, len); + err = blk_rq_map_user(q, rq, NULL, (void *)uaddr, len, GFP_KERNEL); if (err) { /* * TODO: need to fixup sg_tablesize, max_segment_size, diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index cb971f0..d5f7653 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -1926,15 +1926,15 @@ static int fc_vport_match(struct attribute_container *cont, * Notes: * This routine assumes no locks are held on entry. */ -static enum scsi_eh_timer_return +static enum blk_eh_timer_return fc_timed_out(struct scsi_cmnd *scmd) { struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device)); if (rport->port_state == FC_PORTSTATE_BLOCKED) - return EH_RESET_TIMER; + return BLK_EH_RESET_TIMER; - return EH_NOT_HANDLED; + return BLK_EH_NOT_HANDLED; } /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index af9e406..a7b53be 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -87,6 +87,12 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_DISK); MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD); MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC); +#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT) +#define SD_MINORS 16 +#else +#define SD_MINORS 0 +#endif + static int sd_revalidate_disk(struct gendisk *); static int sd_probe(struct device *); static int sd_remove(struct device *); @@ -160,7 +166,7 @@ sd_store_cache_type(struct device *dev, struct device_attribute *attr, sd_print_sense_hdr(sdkp, &sshdr); return -EINVAL; } - sd_revalidate_disk(sdkp->disk); + revalidate_disk(sdkp->disk); return count; } @@ -378,7 +384,6 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) sector_t block = rq->sector; sector_t threshold; unsigned int this_count = rq->nr_sectors; - unsigned int timeout = sdp->timeout; int ret; if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { @@ -579,7 +584,6 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) SCpnt->transfersize = sdp->sector_size; SCpnt->underflow = this_count << 9; SCpnt->allowed = SD_MAX_RETRIES; - SCpnt->timeout_per_command = timeout; /* * This indicates that the command is ready from our end to be @@ -911,7 +915,7 @@ static void sd_rescan(struct device *dev) struct scsi_disk *sdkp = scsi_disk_get_from_dev(dev); if (sdkp) { - sd_revalidate_disk(sdkp->disk); + revalidate_disk(sdkp->disk); scsi_disk_put(sdkp); } } @@ -1759,6 +1763,52 @@ static int sd_revalidate_disk(struct gendisk *disk) } /** + * sd_format_disk_name - format disk name + * @prefix: name prefix - ie. "sd" for SCSI disks + * @index: index of the disk to format name for + * @buf: output buffer + * @buflen: length of the output buffer + * + * SCSI disk names starts at sda. The 26th device is sdz and the + * 27th is sdaa. The last one for two lettered suffix is sdzz + * which is followed by sdaaa. + * + * This is basically 26 base counting with one extra 'nil' entry + * at the beggining from the second digit on and can be + * determined using similar method as 26 base conversion with the + * index shifted -1 after each digit is computed. + * + * CONTEXT: + * Don't care. + * + * RETURNS: + * 0 on success, -errno on failure. + */ +static int sd_format_disk_name(char *prefix, int index, char *buf, int buflen) +{ + const int base = 'z' - 'a' + 1; + char *begin = buf + strlen(prefix); + char *end = buf + buflen; + char *p; + int unit; + + p = end - 1; + *p = '\0'; + unit = base; + do { + if (p == begin) + return -EINVAL; + *--p = 'a' + (index % unit); + index = (index / unit) - 1; + } while (index >= 0); + + memmove(begin, p, end - p); + memcpy(buf, prefix, strlen(prefix)); + + return 0; +} + +/** * sd_probe - called during driver initialization and whenever a * new scsi device is attached to the system. It is called once * for each scsi device (not just disks) present. @@ -1796,7 +1846,7 @@ static int sd_probe(struct device *dev) if (!sdkp) goto out; - gd = alloc_disk(16); + gd = alloc_disk(SD_MINORS); if (!gd) goto out_free; @@ -1810,8 +1860,8 @@ static int sd_probe(struct device *dev) if (error) goto out_put; - error = -EBUSY; - if (index >= SD_MAX_DISKS) + error = sd_format_disk_name("sd", index, gd->disk_name, DISK_NAME_LEN); + if (error) goto out_free_index; sdkp->device = sdp; @@ -1821,11 +1871,12 @@ static int sd_probe(struct device *dev) sdkp->openers = 0; sdkp->previous_state = 1; - if (!sdp->timeout) { + if (!sdp->request_queue->rq_timeout) { if (sdp->type != TYPE_MOD) - sdp->timeout = SD_TIMEOUT; + blk_queue_rq_timeout(sdp->request_queue, SD_TIMEOUT); else - sdp->timeout = SD_MOD_TIMEOUT; + blk_queue_rq_timeout(sdp->request_queue, + SD_MOD_TIMEOUT); } device_initialize(&sdkp->dev); @@ -1838,24 +1889,12 @@ static int sd_probe(struct device *dev) get_device(&sdp->sdev_gendev); - gd->major = sd_major((index & 0xf0) >> 4); - gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); - gd->minors = 16; - gd->fops = &sd_fops; - - if (index < 26) { - sprintf(gd->disk_name, "sd%c", 'a' + index % 26); - } else if (index < (26 + 1) * 26) { - sprintf(gd->disk_name, "sd%c%c", - 'a' + index / 26 - 1,'a' + index % 26); - } else { - const unsigned int m1 = (index / 26 - 1) / 26 - 1; - const unsigned int m2 = (index / 26 - 1) % 26; - const unsigned int m3 = index % 26; - sprintf(gd->disk_name, "sd%c%c%c", - 'a' + m1, 'a' + m2, 'a' + m3); + if (index < SD_MAX_DISKS) { + gd->major = sd_major((index & 0xf0) >> 4); + gd->first_minor = ((index & 0xf) << 4) | (index & 0xfff00); + gd->minors = SD_MINORS; } - + gd->fops = &sd_fops; gd->private_data = &sdkp->driver; gd->queue = sdkp->device->request_queue; @@ -1864,7 +1903,7 @@ static int sd_probe(struct device *dev) blk_queue_prep_rq(sdp->request_queue, sd_prep_fn); gd->driverfs_dev = &sdp->sdev_gendev; - gd->flags = GENHD_FL_DRIVERFS; + gd->flags = GENHD_FL_EXT_DEVT | GENHD_FL_DRIVERFS; if (sdp->removable) gd->flags |= GENHD_FL_REMOVABLE; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 661f9f2..ba9b9bb 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -47,7 +47,6 @@ static int sg_version_num = 30534; /* 2 digits for each component */ #include <linux/seq_file.h> #include <linux/blkdev.h> #include <linux/delay.h> -#include <linux/scatterlist.h> #include <linux/blktrace_api.h> #include <linux/smp_lock.h> @@ -69,7 +68,6 @@ static void sg_proc_cleanup(void); #endif #define SG_ALLOW_DIO_DEF 0 -#define SG_ALLOW_DIO_CODE /* compile out by commenting this define */ #define SG_MAX_DEVS 32768 @@ -118,8 +116,8 @@ typedef struct sg_scatter_hold { /* holding area for scsi scatter gather info */ unsigned short k_use_sg; /* Count of kernel scatter-gather pieces */ unsigned sglist_len; /* size of malloc'd scatter-gather list ++ */ unsigned bufflen; /* Size of (aggregate) data buffer */ - unsigned b_malloc_len; /* actual len malloc'ed in buffer */ - struct scatterlist *buffer;/* scatter list */ + struct page **pages; + int page_order; char dio_in_use; /* 0->indirect IO (or mmap), 1->dio */ unsigned char cmd_opcode; /* first byte of command */ } Sg_scatter_hold; @@ -137,6 +135,8 @@ typedef struct sg_request { /* SG_MAX_QUEUE requests outstanding per file */ char orphan; /* 1 -> drop on sight, 0 -> normal */ char sg_io_owned; /* 1 -> packet belongs to SG_IO */ volatile char done; /* 0->before bh, 1->before read, 2->read */ + struct request *rq; + struct bio *bio; } Sg_request; typedef struct sg_fd { /* holds the state of a file descriptor */ @@ -175,8 +175,8 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ static int sg_fasync(int fd, struct file *filp, int mode); /* tasklet or soft irq callback */ -static void sg_cmd_done(void *data, char *sense, int result, int resid); -static int sg_start_req(Sg_request * srp); +static void sg_rq_end_io(struct request *rq, int uptodate); +static int sg_start_req(Sg_request *srp, unsigned char *cmd); static void sg_finish_rem_req(Sg_request * srp); static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, @@ -188,17 +188,11 @@ static ssize_t sg_new_write(Sg_fd *sfp, struct file *file, int read_only, Sg_request **o_srp); static int sg_common_write(Sg_fd * sfp, Sg_request * srp, unsigned char *cmnd, int timeout, int blocking); -static int sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind, - int wr_xf, int *countp, unsigned char __user **up); -static int sg_write_xfer(Sg_request * srp); -static int sg_read_xfer(Sg_request * srp); static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer); static void sg_remove_scat(Sg_scatter_hold * schp); static void sg_build_reserve(Sg_fd * sfp, int req_size); static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size); static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp); -static struct page *sg_page_malloc(int rqSz, int lowDma, int *retSzp); -static void sg_page_free(struct page *page, int size); static Sg_fd *sg_add_sfp(Sg_device * sdp, int dev); static int sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); static void __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp); @@ -206,7 +200,6 @@ static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id); static Sg_request *sg_add_request(Sg_fd * sfp); static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); static int sg_res_in_use(Sg_fd * sfp); -static int sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len); static Sg_device *sg_get_dev(int dev); #ifdef CONFIG_SCSI_PROC_FS static int sg_last_dev(void); @@ -529,8 +522,7 @@ sg_new_read(Sg_fd * sfp, char __user *buf, size_t count, Sg_request * srp) err = -EFAULT; goto err_out; } - err = sg_read_xfer(srp); - err_out: +err_out: sg_finish_rem_req(srp); return (0 == err) ? count : err; } @@ -612,7 +604,10 @@ sg_write(struct file *filp, const char __user *buf, size_t count, loff_t * ppos) else hp->dxfer_direction = (mxsize > 0) ? SG_DXFER_FROM_DEV : SG_DXFER_NONE; hp->dxfer_len = mxsize; - hp->dxferp = (char __user *)buf + cmd_size; + if (hp->dxfer_direction == SG_DXFER_TO_DEV) + hp->dxferp = (char __user *)buf + cmd_size; + else + hp->dxferp = NULL; hp->sbp = NULL; hp->timeout = old_hdr.reply_len; /* structure abuse ... */ hp->flags = input_size; /* structure abuse ... */ @@ -732,16 +727,12 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, SCSI_LOG_TIMEOUT(4, printk("sg_common_write: scsi opcode=0x%02x, cmd_size=%d\n", (int) cmnd[0], (int) hp->cmd_len)); - if ((k = sg_start_req(srp))) { + k = sg_start_req(srp, cmnd); + if (k) { SCSI_LOG_TIMEOUT(1, printk("sg_common_write: start_req err=%d\n", k)); sg_finish_rem_req(srp); return k; /* probably out of space --> ENOMEM */ } - if ((k = sg_write_xfer(srp))) { - SCSI_LOG_TIMEOUT(1, printk("sg_common_write: write_xfer, bad address\n")); - sg_finish_rem_req(srp); - return k; - } if (sdp->detached) { sg_finish_rem_req(srp); return -ENODEV; @@ -763,20 +754,11 @@ sg_common_write(Sg_fd * sfp, Sg_request * srp, break; } hp->duration = jiffies_to_msecs(jiffies); -/* Now send everything of to mid-level. The next time we hear about this - packet is when sg_cmd_done() is called (i.e. a callback). */ - if (scsi_execute_async(sdp->device, cmnd, hp->cmd_len, data_dir, srp->data.buffer, - hp->dxfer_len, srp->data.k_use_sg, timeout, - SG_DEFAULT_RETRIES, srp, sg_cmd_done, - GFP_ATOMIC)) { - SCSI_LOG_TIMEOUT(1, printk("sg_common_write: scsi_execute_async failed\n")); - /* - * most likely out of mem, but could also be a bad map - */ - sg_finish_rem_req(srp); - return -ENOMEM; - } else - return 0; + + srp->rq->timeout = timeout; + blk_execute_rq_nowait(sdp->device->request_queue, sdp->disk, + srp->rq, 1, sg_rq_end_io); + return 0; } static int @@ -1192,8 +1174,7 @@ sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) Sg_fd *sfp; unsigned long offset, len, sa; Sg_scatter_hold *rsv_schp; - struct scatterlist *sg; - int k; + int k, length; if ((NULL == vma) || (!(sfp = (Sg_fd *) vma->vm_private_data))) return VM_FAULT_SIGBUS; @@ -1203,15 +1184,14 @@ sg_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf) return VM_FAULT_SIGBUS; SCSI_LOG_TIMEOUT(3, printk("sg_vma_fault: offset=%lu, scatg=%d\n", offset, rsv_schp->k_use_sg)); - sg = rsv_schp->buffer; sa = vma->vm_start; - for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end); - ++k, sg = sg_next(sg)) { + length = 1 << (PAGE_SHIFT + rsv_schp->page_order); + for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { len = vma->vm_end - sa; - len = (len < sg->length) ? len : sg->length; + len = (len < length) ? len : length; if (offset < len) { - struct page *page; - page = virt_to_page(page_address(sg_page(sg)) + offset); + struct page *page = nth_page(rsv_schp->pages[k], + offset >> PAGE_SHIFT); get_page(page); /* increment page count */ vmf->page = page; return 0; /* success */ @@ -1233,8 +1213,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) Sg_fd *sfp; unsigned long req_sz, len, sa; Sg_scatter_hold *rsv_schp; - int k; - struct scatterlist *sg; + int k, length; if ((!filp) || (!vma) || (!(sfp = (Sg_fd *) filp->private_data))) return -ENXIO; @@ -1248,11 +1227,10 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) return -ENOMEM; /* cannot map more than reserved buffer */ sa = vma->vm_start; - sg = rsv_schp->buffer; - for (k = 0; (k < rsv_schp->k_use_sg) && (sa < vma->vm_end); - ++k, sg = sg_next(sg)) { + length = 1 << (PAGE_SHIFT + rsv_schp->page_order); + for (k = 0; k < rsv_schp->k_use_sg && sa < vma->vm_end; k++) { len = vma->vm_end - sa; - len = (len < sg->length) ? len : sg->length; + len = (len < length) ? len : length; sa += len; } @@ -1263,16 +1241,19 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma) return 0; } -/* This function is a "bottom half" handler that is called by the - * mid level when a command is completed (or has failed). */ -static void -sg_cmd_done(void *data, char *sense, int result, int resid) +/* + * This function is a "bottom half" handler that is called by the mid + * level when a command is completed (or has failed). + */ +static void sg_rq_end_io(struct request *rq, int uptodate) { - Sg_request *srp = data; + struct sg_request *srp = rq->end_io_data; Sg_device *sdp = NULL; Sg_fd *sfp; unsigned long iflags; unsigned int ms; + char *sense; + int result, resid; if (NULL == srp) { printk(KERN_ERR "sg_cmd_done: NULL request\n"); @@ -1286,6 +1267,9 @@ sg_cmd_done(void *data, char *sense, int result, int resid) return; } + sense = rq->sense; + result = rq->errors; + resid = rq->data_len; SCSI_LOG_TIMEOUT(4, printk("sg_cmd_done: %s, pack_id=%d, res=0x%x\n", sdp->disk->disk_name, srp->header.pack_id, result)); @@ -1296,7 +1280,6 @@ sg_cmd_done(void *data, char *sense, int result, int resid) if (0 != result) { struct scsi_sense_hdr sshdr; - memcpy(srp->sense_b, sense, sizeof (srp->sense_b)); srp->header.status = 0xff & result; srp->header.masked_status = status_byte(result); srp->header.msg_status = msg_byte(result); @@ -1634,37 +1617,79 @@ exit_sg(void) idr_destroy(&sg_index_idr); } -static int -sg_start_req(Sg_request * srp) +static int sg_start_req(Sg_request *srp, unsigned char *cmd) { int res; + struct request *rq; Sg_fd *sfp = srp->parentfp; sg_io_hdr_t *hp = &srp->header; int dxfer_len = (int) hp->dxfer_len; int dxfer_dir = hp->dxfer_direction; + unsigned int iov_count = hp->iovec_count; Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; + struct request_queue *q = sfp->parentdp->device->request_queue; + struct rq_map_data *md, map_data; + int rw = hp->dxfer_direction == SG_DXFER_TO_DEV ? WRITE : READ; + + SCSI_LOG_TIMEOUT(4, printk(KERN_INFO "sg_start_req: dxfer_len=%d\n", + dxfer_len)); + + rq = blk_get_request(q, rw, GFP_ATOMIC); + if (!rq) + return -ENOMEM; + + memcpy(rq->cmd, cmd, hp->cmd_len); + + rq->cmd_len = hp->cmd_len; + rq->cmd_type = REQ_TYPE_BLOCK_PC; + + srp->rq = rq; + rq->end_io_data = srp; + rq->sense = srp->sense_b; + rq->retries = SG_DEFAULT_RETRIES; - SCSI_LOG_TIMEOUT(4, printk("sg_start_req: dxfer_len=%d\n", dxfer_len)); if ((dxfer_len <= 0) || (dxfer_dir == SG_DXFER_NONE)) return 0; - if (sg_allow_dio && (hp->flags & SG_FLAG_DIRECT_IO) && - (dxfer_dir != SG_DXFER_UNKNOWN) && (0 == hp->iovec_count) && - (!sfp->parentdp->device->host->unchecked_isa_dma)) { - res = sg_build_direct(srp, sfp, dxfer_len); - if (res <= 0) /* -ve -> error, 0 -> done, 1 -> try indirect */ - return res; - } - if ((!sg_res_in_use(sfp)) && (dxfer_len <= rsv_schp->bufflen)) - sg_link_reserve(sfp, srp, dxfer_len); - else { - res = sg_build_indirect(req_schp, sfp, dxfer_len); - if (res) { - sg_remove_scat(req_schp); - return res; + + if (sg_allow_dio && hp->flags & SG_FLAG_DIRECT_IO && + dxfer_dir != SG_DXFER_UNKNOWN && !iov_count && + !sfp->parentdp->device->host->unchecked_isa_dma && + blk_rq_aligned(q, hp->dxferp, dxfer_len)) + md = NULL; + else + md = &map_data; + + if (md) { + if (!sg_res_in_use(sfp) && dxfer_len <= rsv_schp->bufflen) + sg_link_reserve(sfp, srp, dxfer_len); + else { + res = sg_build_indirect(req_schp, sfp, dxfer_len); + if (res) + return res; } + + md->pages = req_schp->pages; + md->page_order = req_schp->page_order; + md->nr_entries = req_schp->k_use_sg; } - return 0; + + if (iov_count) + res = blk_rq_map_user_iov(q, rq, md, hp->dxferp, iov_count, + hp->dxfer_len, GFP_ATOMIC); + else + res = blk_rq_map_user(q, rq, md, hp->dxferp, + hp->dxfer_len, GFP_ATOMIC); + + if (!res) { + srp->bio = rq->bio; + + if (!md) { + req_schp->dio_in_use = 1; + hp->info |= SG_INFO_DIRECT_IO; + } + } + return res; } static void @@ -1678,186 +1703,37 @@ sg_finish_rem_req(Sg_request * srp) sg_unlink_reserve(sfp, srp); else sg_remove_scat(req_schp); + + if (srp->rq) { + if (srp->bio) + blk_rq_unmap_user(srp->bio); + + blk_put_request(srp->rq); + } + sg_remove_request(sfp, srp); } static int sg_build_sgat(Sg_scatter_hold * schp, const Sg_fd * sfp, int tablesize) { - int sg_bufflen = tablesize * sizeof(struct scatterlist); + int sg_bufflen = tablesize * sizeof(struct page *); gfp_t gfp_flags = GFP_ATOMIC | __GFP_NOWARN; - /* - * TODO: test without low_dma, we should not need it since - * the block layer will bounce the buffer for us - * - * XXX(hch): we shouldn't need GFP_DMA for the actual S/G list. - */ - if (sfp->low_dma) - gfp_flags |= GFP_DMA; - schp->buffer = kzalloc(sg_bufflen, gfp_flags); - if (!schp->buffer) + schp->pages = kzalloc(sg_bufflen, gfp_flags); + if (!schp->pages) return -ENOMEM; - sg_init_table(schp->buffer, tablesize); schp->sglist_len = sg_bufflen; return tablesize; /* number of scat_gath elements allocated */ } -#ifdef SG_ALLOW_DIO_CODE -/* vvvvvvvv following code borrowed from st driver's direct IO vvvvvvvvv */ - /* TODO: hopefully we can use the generic block layer code */ - -/* Pin down user pages and put them into a scatter gather list. Returns <= 0 if - - mapping of all pages not successful - (i.e., either completely successful or fails) -*/ -static int -st_map_user_pages(struct scatterlist *sgl, const unsigned int max_pages, - unsigned long uaddr, size_t count, int rw) -{ - unsigned long end = (uaddr + count + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - int res, i, j; - struct page **pages; - - /* User attempted Overflow! */ - if ((uaddr + count) < uaddr) - return -EINVAL; - - /* Too big */ - if (nr_pages > max_pages) - return -ENOMEM; - - /* Hmm? */ - if (count == 0) - return 0; - - if ((pages = kmalloc(max_pages * sizeof(*pages), GFP_ATOMIC)) == NULL) - return -ENOMEM; - - /* Try to fault in all of the necessary pages */ - down_read(¤t->mm->mmap_sem); - /* rw==READ means read from drive, write into memory area */ - res = get_user_pages( - current, - current->mm, - uaddr, - nr_pages, - rw == READ, - 0, /* don't force */ - pages, - NULL); - up_read(¤t->mm->mmap_sem); - - /* Errors and no page mapped should return here */ - if (res < nr_pages) - goto out_unmap; - - for (i=0; i < nr_pages; i++) { - /* FIXME: flush superflous for rw==READ, - * probably wrong function for rw==WRITE - */ - flush_dcache_page(pages[i]); - /* ?? Is locking needed? I don't think so */ - /* if (!trylock_page(pages[i])) - goto out_unlock; */ - } - - sg_set_page(sgl, pages[0], 0, uaddr & ~PAGE_MASK); - if (nr_pages > 1) { - sgl[0].length = PAGE_SIZE - sgl[0].offset; - count -= sgl[0].length; - for (i=1; i < nr_pages ; i++) - sg_set_page(&sgl[i], pages[i], count < PAGE_SIZE ? count : PAGE_SIZE, 0); - } - else { - sgl[0].length = count; - } - - kfree(pages); - return nr_pages; - - out_unmap: - if (res > 0) { - for (j=0; j < res; j++) - page_cache_release(pages[j]); - res = 0; - } - kfree(pages); - return res; -} - - -/* And unmap them... */ -static int -st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages, - int dirtied) -{ - int i; - - for (i=0; i < nr_pages; i++) { - struct page *page = sg_page(&sgl[i]); - - if (dirtied) - SetPageDirty(page); - /* unlock_page(page); */ - /* FIXME: cache flush missing for rw==READ - * FIXME: call the correct reference counting function - */ - page_cache_release(page); - } - - return 0; -} - -/* ^^^^^^^^ above code borrowed from st driver's direct IO ^^^^^^^^^ */ -#endif - - -/* Returns: -ve -> error, 0 -> done, 1 -> try indirect */ -static int -sg_build_direct(Sg_request * srp, Sg_fd * sfp, int dxfer_len) -{ -#ifdef SG_ALLOW_DIO_CODE - sg_io_hdr_t *hp = &srp->header; - Sg_scatter_hold *schp = &srp->data; - int sg_tablesize = sfp->parentdp->sg_tablesize; - int mx_sc_elems, res; - struct scsi_device *sdev = sfp->parentdp->device; - - if (((unsigned long)hp->dxferp & - queue_dma_alignment(sdev->request_queue)) != 0) - return 1; - - mx_sc_elems = sg_build_sgat(schp, sfp, sg_tablesize); - if (mx_sc_elems <= 0) { - return 1; - } - res = st_map_user_pages(schp->buffer, mx_sc_elems, - (unsigned long)hp->dxferp, dxfer_len, - (SG_DXFER_TO_DEV == hp->dxfer_direction) ? 1 : 0); - if (res <= 0) { - sg_remove_scat(schp); - return 1; - } - schp->k_use_sg = res; - schp->dio_in_use = 1; - hp->info |= SG_INFO_DIRECT_IO; - return 0; -#else - return 1; -#endif -} - static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) { - struct scatterlist *sg; - int ret_sz = 0, k, rem_sz, num, mx_sc_elems; + int ret_sz = 0, i, k, rem_sz, num, mx_sc_elems; int sg_tablesize = sfp->parentdp->sg_tablesize; - int blk_size = buff_size; - struct page *p = NULL; + int blk_size = buff_size, order; + gfp_t gfp_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; if (blk_size < 0) return -EFAULT; @@ -1881,15 +1757,26 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) } else scatter_elem_sz_prev = num; } - for (k = 0, sg = schp->buffer, rem_sz = blk_size; - (rem_sz > 0) && (k < mx_sc_elems); - ++k, rem_sz -= ret_sz, sg = sg_next(sg)) { - + + if (sfp->low_dma) + gfp_mask |= GFP_DMA; + + if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) + gfp_mask |= __GFP_ZERO; + + order = get_order(num); +retry: + ret_sz = 1 << (PAGE_SHIFT + order); + + for (k = 0, rem_sz = blk_size; rem_sz > 0 && k < mx_sc_elems; + k++, rem_sz -= ret_sz) { + num = (rem_sz > scatter_elem_sz_prev) ? - scatter_elem_sz_prev : rem_sz; - p = sg_page_malloc(num, sfp->low_dma, &ret_sz); - if (!p) - return -ENOMEM; + scatter_elem_sz_prev : rem_sz; + + schp->pages[k] = alloc_pages(gfp_mask, order); + if (!schp->pages[k]) + goto out; if (num == scatter_elem_sz_prev) { if (unlikely(ret_sz > scatter_elem_sz_prev)) { @@ -1897,12 +1784,12 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) scatter_elem_sz_prev = ret_sz; } } - sg_set_page(sg, p, (ret_sz > num) ? num : ret_sz, 0); SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k=%d, num=%d, " "ret_sz=%d\n", k, num, ret_sz)); } /* end of for loop */ + schp->page_order = order; schp->k_use_sg = k; SCSI_LOG_TIMEOUT(5, printk("sg_build_indirect: k_use_sg=%d, " "rem_sz=%d\n", k, rem_sz)); @@ -1910,223 +1797,42 @@ sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size) schp->bufflen = blk_size; if (rem_sz > 0) /* must have failed */ return -ENOMEM; - return 0; -} - -static int -sg_write_xfer(Sg_request * srp) -{ - sg_io_hdr_t *hp = &srp->header; - Sg_scatter_hold *schp = &srp->data; - struct scatterlist *sg = schp->buffer; - int num_xfer = 0; - int j, k, onum, usglen, ksglen, res; - int iovec_count = (int) hp->iovec_count; - int dxfer_dir = hp->dxfer_direction; - unsigned char *p; - unsigned char __user *up; - int new_interface = ('\0' == hp->interface_id) ? 0 : 1; - - if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_TO_DEV == dxfer_dir) || - (SG_DXFER_TO_FROM_DEV == dxfer_dir)) { - num_xfer = (int) (new_interface ? hp->dxfer_len : hp->flags); - if (schp->bufflen < num_xfer) - num_xfer = schp->bufflen; - } - if ((num_xfer <= 0) || (schp->dio_in_use) || - (new_interface - && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags))) - return 0; - - SCSI_LOG_TIMEOUT(4, printk("sg_write_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n", - num_xfer, iovec_count, schp->k_use_sg)); - if (iovec_count) { - onum = iovec_count; - if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum)) - return -EFAULT; - } else - onum = 1; - - ksglen = sg->length; - p = page_address(sg_page(sg)); - for (j = 0, k = 0; j < onum; ++j) { - res = sg_u_iovec(hp, iovec_count, j, 1, &usglen, &up); - if (res) - return res; - - for (; p; sg = sg_next(sg), ksglen = sg->length, - p = page_address(sg_page(sg))) { - if (usglen <= 0) - break; - if (ksglen > usglen) { - if (usglen >= num_xfer) { - if (__copy_from_user(p, up, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_from_user(p, up, usglen)) - return -EFAULT; - p += usglen; - ksglen -= usglen; - break; - } else { - if (ksglen >= num_xfer) { - if (__copy_from_user(p, up, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_from_user(p, up, ksglen)) - return -EFAULT; - up += ksglen; - usglen -= ksglen; - } - ++k; - if (k >= schp->k_use_sg) - return 0; - } - } - - return 0; -} +out: + for (i = 0; i < k; i++) + __free_pages(schp->pages[k], order); -static int -sg_u_iovec(sg_io_hdr_t * hp, int sg_num, int ind, - int wr_xf, int *countp, unsigned char __user **up) -{ - int num_xfer = (int) hp->dxfer_len; - unsigned char __user *p = hp->dxferp; - int count; + if (--order >= 0) + goto retry; - if (0 == sg_num) { - if (wr_xf && ('\0' == hp->interface_id)) - count = (int) hp->flags; /* holds "old" input_size */ - else - count = num_xfer; - } else { - sg_iovec_t iovec; - if (__copy_from_user(&iovec, p + ind*SZ_SG_IOVEC, SZ_SG_IOVEC)) - return -EFAULT; - p = iovec.iov_base; - count = (int) iovec.iov_len; - } - if (!access_ok(wr_xf ? VERIFY_READ : VERIFY_WRITE, p, count)) - return -EFAULT; - if (up) - *up = p; - if (countp) - *countp = count; - return 0; + return -ENOMEM; } static void sg_remove_scat(Sg_scatter_hold * schp) { SCSI_LOG_TIMEOUT(4, printk("sg_remove_scat: k_use_sg=%d\n", schp->k_use_sg)); - if (schp->buffer && (schp->sglist_len > 0)) { - struct scatterlist *sg = schp->buffer; - - if (schp->dio_in_use) { -#ifdef SG_ALLOW_DIO_CODE - st_unmap_user_pages(sg, schp->k_use_sg, TRUE); -#endif - } else { + if (schp->pages && schp->sglist_len > 0) { + if (!schp->dio_in_use) { int k; - for (k = 0; (k < schp->k_use_sg) && sg_page(sg); - ++k, sg = sg_next(sg)) { + for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { SCSI_LOG_TIMEOUT(5, printk( - "sg_remove_scat: k=%d, pg=0x%p, len=%d\n", - k, sg_page(sg), sg->length)); - sg_page_free(sg_page(sg), sg->length); + "sg_remove_scat: k=%d, pg=0x%p\n", + k, schp->pages[k])); + __free_pages(schp->pages[k], schp->page_order); } - } - kfree(schp->buffer); - } - memset(schp, 0, sizeof (*schp)); -} -static int -sg_read_xfer(Sg_request * srp) -{ - sg_io_hdr_t *hp = &srp->header; - Sg_scatter_hold *schp = &srp->data; - struct scatterlist *sg = schp->buffer; - int num_xfer = 0; - int j, k, onum, usglen, ksglen, res; - int iovec_count = (int) hp->iovec_count; - int dxfer_dir = hp->dxfer_direction; - unsigned char *p; - unsigned char __user *up; - int new_interface = ('\0' == hp->interface_id) ? 0 : 1; - - if ((SG_DXFER_UNKNOWN == dxfer_dir) || (SG_DXFER_FROM_DEV == dxfer_dir) - || (SG_DXFER_TO_FROM_DEV == dxfer_dir)) { - num_xfer = hp->dxfer_len; - if (schp->bufflen < num_xfer) - num_xfer = schp->bufflen; - } - if ((num_xfer <= 0) || (schp->dio_in_use) || - (new_interface - && ((SG_FLAG_NO_DXFER | SG_FLAG_MMAP_IO) & hp->flags))) - return 0; - - SCSI_LOG_TIMEOUT(4, printk("sg_read_xfer: num_xfer=%d, iovec_count=%d, k_use_sg=%d\n", - num_xfer, iovec_count, schp->k_use_sg)); - if (iovec_count) { - onum = iovec_count; - if (!access_ok(VERIFY_READ, hp->dxferp, SZ_SG_IOVEC * onum)) - return -EFAULT; - } else - onum = 1; - - p = page_address(sg_page(sg)); - ksglen = sg->length; - for (j = 0, k = 0; j < onum; ++j) { - res = sg_u_iovec(hp, iovec_count, j, 0, &usglen, &up); - if (res) - return res; - - for (; p; sg = sg_next(sg), ksglen = sg->length, - p = page_address(sg_page(sg))) { - if (usglen <= 0) - break; - if (ksglen > usglen) { - if (usglen >= num_xfer) { - if (__copy_to_user(up, p, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_to_user(up, p, usglen)) - return -EFAULT; - p += usglen; - ksglen -= usglen; - break; - } else { - if (ksglen >= num_xfer) { - if (__copy_to_user(up, p, num_xfer)) - return -EFAULT; - return 0; - } - if (__copy_to_user(up, p, ksglen)) - return -EFAULT; - up += ksglen; - usglen -= ksglen; - } - ++k; - if (k >= schp->k_use_sg) - return 0; + kfree(schp->pages); } } - - return 0; + memset(schp, 0, sizeof (*schp)); } static int sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) { Sg_scatter_hold *schp = &srp->data; - struct scatterlist *sg = schp->buffer; int k, num; SCSI_LOG_TIMEOUT(4, printk("sg_read_oxfer: num_read_xfer=%d\n", @@ -2134,15 +1840,15 @@ sg_read_oxfer(Sg_request * srp, char __user *outp, int num_read_xfer) if ((!outp) || (num_read_xfer <= 0)) return 0; - for (k = 0; (k < schp->k_use_sg) && sg_page(sg); ++k, sg = sg_next(sg)) { - num = sg->length; + num = 1 << (PAGE_SHIFT + schp->page_order); + for (k = 0; k < schp->k_use_sg && schp->pages[k]; k++) { if (num > num_read_xfer) { - if (__copy_to_user(outp, page_address(sg_page(sg)), + if (__copy_to_user(outp, page_address(schp->pages[k]), num_read_xfer)) return -EFAULT; break; } else { - if (__copy_to_user(outp, page_address(sg_page(sg)), + if (__copy_to_user(outp, page_address(schp->pages[k]), num)) return -EFAULT; num_read_xfer -= num; @@ -2177,24 +1883,21 @@ sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size) { Sg_scatter_hold *req_schp = &srp->data; Sg_scatter_hold *rsv_schp = &sfp->reserve; - struct scatterlist *sg = rsv_schp->buffer; int k, num, rem; srp->res_used = 1; SCSI_LOG_TIMEOUT(4, printk("sg_link_reserve: size=%d\n", size)); rem = size; - for (k = 0; k < rsv_schp->k_use_sg; ++k, sg = sg_next(sg)) { - num = sg->length; + num = 1 << (PAGE_SHIFT + rsv_schp->page_order); + for (k = 0; k < rsv_schp->k_use_sg; k++) { if (rem <= num) { - sfp->save_scat_len = num; - sg->length = rem; req_schp->k_use_sg = k + 1; req_schp->sglist_len = rsv_schp->sglist_len; - req_schp->buffer = rsv_schp->buffer; + req_schp->pages = rsv_schp->pages; req_schp->bufflen = size; - req_schp->b_malloc_len = rsv_schp->b_malloc_len; + req_schp->page_order = rsv_schp->page_order; break; } else rem -= num; @@ -2208,22 +1911,13 @@ static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) { Sg_scatter_hold *req_schp = &srp->data; - Sg_scatter_hold *rsv_schp = &sfp->reserve; SCSI_LOG_TIMEOUT(4, printk("sg_unlink_reserve: req->k_use_sg=%d\n", (int) req_schp->k_use_sg)); - if ((rsv_schp->k_use_sg > 0) && (req_schp->k_use_sg > 0)) { - struct scatterlist *sg = rsv_schp->buffer; - - if (sfp->save_scat_len > 0) - (sg + (req_schp->k_use_sg - 1))->length = - (unsigned) sfp->save_scat_len; - else - SCSI_LOG_TIMEOUT(1, printk ("sg_unlink_reserve: BAD save_scat_len\n")); - } req_schp->k_use_sg = 0; req_schp->bufflen = 0; - req_schp->buffer = NULL; + req_schp->pages = NULL; + req_schp->page_order = 0; req_schp->sglist_len = 0; sfp->save_scat_len = 0; srp->res_used = 0; @@ -2481,53 +2175,6 @@ sg_res_in_use(Sg_fd * sfp) return srp ? 1 : 0; } -/* The size fetched (value output via retSzp) set when non-NULL return */ -static struct page * -sg_page_malloc(int rqSz, int lowDma, int *retSzp) -{ - struct page *resp = NULL; - gfp_t page_mask; - int order, a_size; - int resSz; - - if ((rqSz <= 0) || (NULL == retSzp)) - return resp; - - if (lowDma) - page_mask = GFP_ATOMIC | GFP_DMA | __GFP_COMP | __GFP_NOWARN; - else - page_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN; - - for (order = 0, a_size = PAGE_SIZE; a_size < rqSz; - order++, a_size <<= 1) ; - resSz = a_size; /* rounded up if necessary */ - resp = alloc_pages(page_mask, order); - while ((!resp) && order) { - --order; - a_size >>= 1; /* divide by 2, until PAGE_SIZE */ - resp = alloc_pages(page_mask, order); /* try half */ - resSz = a_size; - } - if (resp) { - if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) - memset(page_address(resp), 0, resSz); - *retSzp = resSz; - } - return resp; -} - -static void -sg_page_free(struct page *page, int size) -{ - int order, a_size; - - if (!page) - return; - for (order = 0, a_size = PAGE_SIZE; a_size < size; - order++, a_size <<= 1) ; - __free_pages(page, order); -} - #ifdef CONFIG_SCSI_PROC_FS static int sg_idr_max_id(int id, void *p, void *data) diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 27f5bfd..0f17009 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -331,7 +331,7 @@ static int sr_done(struct scsi_cmnd *SCpnt) static int sr_prep_fn(struct request_queue *q, struct request *rq) { - int block=0, this_count, s_size, timeout = SR_TIMEOUT; + int block = 0, this_count, s_size; struct scsi_cd *cd; struct scsi_cmnd *SCpnt; struct scsi_device *sdp = q->queuedata; @@ -461,7 +461,6 @@ static int sr_prep_fn(struct request_queue *q, struct request *rq) SCpnt->transfersize = cd->device->sector_size; SCpnt->underflow = this_count << 9; SCpnt->allowed = MAX_RETRIES; - SCpnt->timeout_per_command = timeout; /* * This indicates that the command is ready from our end to be @@ -620,6 +619,8 @@ static int sr_probe(struct device *dev) disk->fops = &sr_bdops; disk->flags = GENHD_FL_CD; + blk_queue_rq_timeout(sdev->request_queue, SR_TIMEOUT); + cd->device = sdev; cd->disk = disk; cd->driver = &sr_template; @@ -878,7 +879,7 @@ static void sr_kref_release(struct kref *kref) struct gendisk *disk = cd->disk; spin_lock(&sr_index_lock); - clear_bit(disk->first_minor, sr_index_bits); + clear_bit(MINOR(disk_devt(disk)), sr_index_bits); spin_unlock(&sr_index_lock); unregister_cdrom(&cd->cdi); diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index d39107b..f4e6cde 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -519,8 +519,8 @@ static int sym53c8xx_queue_command(struct scsi_cmnd *cmd, * Shorten our settle_time if needed for * this command not to time out. */ - if (np->s.settle_time_valid && cmd->timeout_per_command) { - unsigned long tlimit = jiffies + cmd->timeout_per_command; + if (np->s.settle_time_valid && cmd->request->timeout) { + unsigned long tlimit = jiffies + cmd->request->timeout; tlimit -= SYM_CONF_TIMER_INTERVAL*2; if (time_after(np->s.settle_time, tlimit)) { np->s.settle_time = tlimit; diff --git a/drivers/spi/orion_spi.c b/drivers/spi/orion_spi.c index c4eaacd..b872bfa 100644 --- a/drivers/spi/orion_spi.c +++ b/drivers/spi/orion_spi.c @@ -427,7 +427,7 @@ static int orion_spi_transfer(struct spi_device *spi, struct spi_message *m) goto msg_rejected; } - if (t->speed_hz < orion_spi->min_speed) { + if (t->speed_hz && t->speed_hz < orion_spi->min_speed) { dev_err(&spi->dev, "message rejected : " "device min speed (%d Hz) exceeds " diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index c6299e8..9cbff84 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -2400,11 +2400,15 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch) if (!fbcon_is_inactive(vc, info)) { if (ops->blank_state != blank) { + int ret = 1; + ops->blank_state = blank; fbcon_cursor(vc, blank ? CM_ERASE : CM_DRAW); ops->cursor_flash = (!blank); - if (fb_blank(info, blank)) + if (info->fbops->fb_blank) + ret = info->fbops->fb_blank(blank, info); + if (ret) fbcon_generic_blank(vc, info, blank); } diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index c3e174b..19caf7c 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -107,7 +107,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs) BUG_ON(bip == NULL); /* A cloned bio doesn't own the integrity metadata */ - if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) + if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) + && bip->bip_buf != NULL) kfree(bip->bip_buf); mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); @@ -150,6 +151,24 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, } EXPORT_SYMBOL(bio_integrity_add_page); +static int bdev_integrity_enabled(struct block_device *bdev, int rw) +{ + struct blk_integrity *bi = bdev_get_integrity(bdev); + + if (bi == NULL) + return 0; + + if (rw == READ && bi->verify_fn != NULL && + (bi->flags & INTEGRITY_FLAG_READ)) + return 1; + + if (rw == WRITE && bi->generate_fn != NULL && + (bi->flags & INTEGRITY_FLAG_WRITE)) + return 1; + + return 0; +} + /** * bio_integrity_enabled - Check whether integrity can be passed * @bio: bio to check @@ -313,6 +332,14 @@ static void bio_integrity_generate(struct bio *bio) } } +static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) +{ + if (bi) + return bi->tuple_size; + + return 0; +} + /** * bio_integrity_prep - Prepare bio for integrity I/O * @bio: bio to prepare @@ -30,7 +30,7 @@ static struct kmem_cache *bio_slab __read_mostly; -mempool_t *bio_split_pool __read_mostly; +static mempool_t *bio_split_pool __read_mostly; /* * if you change this list, also change bvec_alloc or things will @@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct struct bio_vec *bvl; /* - * see comment near bvec_array define! + * If 'bs' is given, lookup the pool and do the mempool alloc. + * If not, this is a bio_kmalloc() allocation and just do a + * kzalloc() for the exact number of vecs right away. */ - switch (nr) { - case 1 : *idx = 0; break; - case 2 ... 4: *idx = 1; break; - case 5 ... 16: *idx = 2; break; - case 17 ... 64: *idx = 3; break; - case 65 ... 128: *idx = 4; break; - case 129 ... BIO_MAX_PAGES: *idx = 5; break; + if (bs) { + /* + * see comment near bvec_array define! + */ + switch (nr) { + case 1: + *idx = 0; + break; + case 2 ... 4: + *idx = 1; + break; + case 5 ... 16: + *idx = 2; + break; + case 17 ... 64: + *idx = 3; + break; + case 65 ... 128: + *idx = 4; + break; + case 129 ... BIO_MAX_PAGES: + *idx = 5; + break; default: return NULL; - } - /* - * idx now points to the pool we want to allocate from - */ + } - bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); - if (bvl) - memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); + /* + * idx now points to the pool we want to allocate from + */ + bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); + if (bvl) + memset(bvl, 0, + bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); + } else + bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); return bvl; } @@ -107,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio) bio_free(bio, fs_bio_set); } +static void bio_kmalloc_destructor(struct bio *bio) +{ + kfree(bio->bi_io_vec); + kfree(bio); +} + void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); bio->bi_flags = 1 << BIO_UPTODATE; + bio->bi_comp_cpu = -1; atomic_set(&bio->bi_cnt, 1); } @@ -118,19 +146,25 @@ void bio_init(struct bio *bio) * bio_alloc_bioset - allocate a bio for I/O * @gfp_mask: the GFP_ mask given to the slab allocator * @nr_iovecs: number of iovecs to pre-allocate - * @bs: the bio_set to allocate from + * @bs: the bio_set to allocate from. If %NULL, just use kmalloc * * Description: - * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. + * bio_alloc_bioset will first try its own mempool to satisfy the allocation. * If %__GFP_WAIT is set then we will block on the internal pool waiting - * for a &struct bio to become free. + * for a &struct bio to become free. If a %NULL @bs is passed in, we will + * fall back to just using @kmalloc to allocate the required memory. * * allocate bio and iovecs from the memory pools specified by the - * bio_set structure. + * bio_set structure, or @kmalloc if none given. **/ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) { - struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); + struct bio *bio; + + if (bs) + bio = mempool_alloc(bs->bio_pool, gfp_mask); + else + bio = kmalloc(sizeof(*bio), gfp_mask); if (likely(bio)) { struct bio_vec *bvl = NULL; @@ -141,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); if (unlikely(!bvl)) { - mempool_free(bio, bs->bio_pool); + if (bs) + mempool_free(bio, bs->bio_pool); + else + kfree(bio); bio = NULL; goto out; } @@ -164,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) return bio; } +/* + * Like bio_alloc(), but doesn't use a mempool backing. This means that + * it CAN fail, but while bio_alloc() can only be used for allocations + * that have a short (finite) life span, bio_kmalloc() should be used + * for more permanent bio allocations (like allocating some bio's for + * initalization or setup purposes). + */ +struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) +{ + struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); + + if (bio) + bio->bi_destructor = bio_kmalloc_destructor; + + return bio; +} + void zero_fill_bio(struct bio *bio) { unsigned long flags; @@ -208,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) return bio->bi_phys_segments; } -inline int bio_hw_segments(struct request_queue *q, struct bio *bio) -{ - if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) - blk_recount_segments(q, bio); - - return bio->bi_hw_segments; -} - /** * __bio_clone - clone a bio * @bio: destination bio @@ -350,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page */ while (bio->bi_phys_segments >= q->max_phys_segments - || bio->bi_hw_segments >= q->max_hw_segments - || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) { + || bio->bi_phys_segments >= q->max_hw_segments) { if (retried_segments) return 0; @@ -395,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page } /* If we may be able to merge these biovecs, force a recount */ - if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || - BIOVEC_VIRT_MERGEABLE(bvec-1, bvec))) + if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) bio->bi_flags &= ~(1 << BIO_SEG_VALID); bio->bi_vcnt++; bio->bi_phys_segments++; - bio->bi_hw_segments++; done: bio->bi_size += len; return len; @@ -449,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, struct bio_map_data { struct bio_vec *iovecs; - int nr_sgvecs; struct sg_iovec *sgvecs; + int nr_sgvecs; + int is_our_pages; }; static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, - struct sg_iovec *iov, int iov_count) + struct sg_iovec *iov, int iov_count, + int is_our_pages) { memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); bmd->nr_sgvecs = iov_count; + bmd->is_our_pages = is_our_pages; bio->bi_private = bmd; } @@ -493,7 +539,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, } static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, - struct sg_iovec *iov, int iov_count, int uncopy) + struct sg_iovec *iov, int iov_count, int uncopy, + int do_free_page) { int ret = 0, i; struct bio_vec *bvec; @@ -536,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, } } - if (uncopy) + if (do_free_page) __free_page(bvec->bv_page); } @@ -553,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int bio_uncopy_user(struct bio *bio) { struct bio_map_data *bmd = bio->bi_private; - int ret; - - ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); + int ret = 0; + if (!bio_flagged(bio, BIO_NULL_MAPPED)) + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, + bmd->nr_sgvecs, 1, bmd->is_our_pages); bio_free_map_data(bmd); bio_put(bio); return ret; @@ -565,16 +613,20 @@ int bio_uncopy_user(struct bio *bio) /** * bio_copy_user_iov - copy user data to bio * @q: destination block queue + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @iov: the iovec. * @iov_count: number of elements in the iovec * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Prepares and returns a bio for indirect user io, bouncing data * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ -struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, - int iov_count, int write_to_vm) +struct bio *bio_copy_user_iov(struct request_queue *q, + struct rq_map_data *map_data, + struct sg_iovec *iov, int iov_count, + int write_to_vm, gfp_t gfp_mask) { struct bio_map_data *bmd; struct bio_vec *bvec; @@ -597,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, len += iov[i].iov_len; } - bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); + bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); if (!bmd) return ERR_PTR(-ENOMEM); ret = -ENOMEM; - bio = bio_alloc(GFP_KERNEL, nr_pages); + bio = bio_alloc(gfp_mask, nr_pages); if (!bio) goto out_bmd; bio->bi_rw |= (!write_to_vm << BIO_RW); ret = 0; + i = 0; while (len) { - unsigned int bytes = PAGE_SIZE; + unsigned int bytes; + + if (map_data) + bytes = 1U << (PAGE_SHIFT + map_data->page_order); + else + bytes = PAGE_SIZE; if (bytes > len) bytes = len; - page = alloc_page(q->bounce_gfp | GFP_KERNEL); + if (map_data) { + if (i == map_data->nr_entries) { + ret = -ENOMEM; + break; + } + page = map_data->pages[i++]; + } else + page = alloc_page(q->bounce_gfp | gfp_mask); if (!page) { ret = -ENOMEM; break; @@ -634,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, * success */ if (!write_to_vm) { - ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); + ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); if (ret) goto cleanup; } - bio_set_map_data(bmd, bio, iov, iov_count); + bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); return bio; cleanup: - bio_for_each_segment(bvec, bio, i) - __free_page(bvec->bv_page); + if (!map_data) + bio_for_each_segment(bvec, bio, i) + __free_page(bvec->bv_page); bio_put(bio); out_bmd: @@ -654,29 +720,32 @@ out_bmd: /** * bio_copy_user - copy user data to bio * @q: destination block queue + * @map_data: pointer to the rq_map_data holding pages (if necessary) * @uaddr: start of user address * @len: length in bytes * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Prepares and returns a bio for indirect user io, bouncing data * to/from kernel pages as necessary. Must be paired with * call bio_uncopy_user() on io completion. */ -struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, - unsigned int len, int write_to_vm) +struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, + unsigned long uaddr, unsigned int len, + int write_to_vm, gfp_t gfp_mask) { struct sg_iovec iov; iov.iov_base = (void __user *)uaddr; iov.iov_len = len; - return bio_copy_user_iov(q, &iov, 1, write_to_vm); + return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); } static struct bio *__bio_map_user_iov(struct request_queue *q, struct block_device *bdev, struct sg_iovec *iov, int iov_count, - int write_to_vm) + int write_to_vm, gfp_t gfp_mask) { int i, j; int nr_pages = 0; @@ -702,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, if (!nr_pages) return ERR_PTR(-EINVAL); - bio = bio_alloc(GFP_KERNEL, nr_pages); + bio = bio_alloc(gfp_mask, nr_pages); if (!bio) return ERR_PTR(-ENOMEM); ret = -ENOMEM; - pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); + pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); if (!pages) goto out; @@ -786,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, * @uaddr: start of user address * @len: length in bytes * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Map the user space address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, - unsigned long uaddr, unsigned int len, int write_to_vm) + unsigned long uaddr, unsigned int len, int write_to_vm, + gfp_t gfp_mask) { struct sg_iovec iov; iov.iov_base = (void __user *)uaddr; iov.iov_len = len; - return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); + return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); } /** @@ -808,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, * @iov: the iovec. * @iov_count: number of elements in the iovec * @write_to_vm: bool indicating writing to pages or not + * @gfp_mask: memory allocation flags * * Map the user space address into a bio suitable for io to a block * device. Returns an error pointer in case of error. */ struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, struct sg_iovec *iov, int iov_count, - int write_to_vm) + int write_to_vm, gfp_t gfp_mask) { struct bio *bio; - bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); - + bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, + gfp_mask); if (IS_ERR(bio)) return bio; @@ -976,48 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err) struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, gfp_t gfp_mask, int reading) { - unsigned long kaddr = (unsigned long)data; - unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = kaddr >> PAGE_SHIFT; - const int nr_pages = end - start; struct bio *bio; struct bio_vec *bvec; - struct bio_map_data *bmd; - int i, ret; - struct sg_iovec iov; - - iov.iov_base = data; - iov.iov_len = len; - - bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask); - if (!bmd) - return ERR_PTR(-ENOMEM); - - ret = -ENOMEM; - bio = bio_alloc(gfp_mask, nr_pages); - if (!bio) - goto out_bmd; - - while (len) { - struct page *page; - unsigned int bytes = PAGE_SIZE; - - if (bytes > len) - bytes = len; - - page = alloc_page(q->bounce_gfp | gfp_mask); - if (!page) { - ret = -ENOMEM; - goto cleanup; - } - - if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { - ret = -EINVAL; - goto cleanup; - } + int i; - len -= bytes; - } + bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); + if (IS_ERR(bio)) + return bio; if (!reading) { void *p = data; @@ -1030,20 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, } } - bio->bi_private = bmd; bio->bi_end_io = bio_copy_kern_endio; - bio_set_map_data(bmd, bio, &iov, 1); return bio; -cleanup: - bio_for_each_segment(bvec, bio, i) - __free_page(bvec->bv_page); - - bio_put(bio); -out_bmd: - bio_free_map_data(bmd); - - return ERR_PTR(ret); } /* @@ -1230,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err) * split a bio - only worry about a bio with a single page * in it's iovec */ -struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) +struct bio_pair *bio_split(struct bio *bi, int first_sectors) { - struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); + struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); if (!bp) return bp; @@ -1266,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) bp->bio2.bi_end_io = bio_pair_end_2; bp->bio1.bi_private = bi; - bp->bio2.bi_private = pool; + bp->bio2.bi_private = bio_split_pool; if (bio_integrity(bi)) bio_integrity_split(bi, bp, first_sectors); @@ -1274,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) return bp; } +/** + * bio_sector_offset - Find hardware sector offset in bio + * @bio: bio to inspect + * @index: bio_vec index + * @offset: offset in bv_page + * + * Return the number of hardware sectors between beginning of bio + * and an end point indicated by a bio_vec index and an offset + * within that vector's page. + */ +sector_t bio_sector_offset(struct bio *bio, unsigned short index, + unsigned int offset) +{ + unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); + struct bio_vec *bv; + sector_t sectors; + int i; + + sectors = 0; + + if (index >= bio->bi_idx) + index = bio->bi_vcnt - 1; + + __bio_for_each_segment(bv, bio, i, 0) { + if (i == index) { + if (offset > bv->bv_offset) + sectors += (offset - bv->bv_offset) / sector_sz; + break; + } + + sectors += bv->bv_len / sector_sz; + } + + return sectors; +} +EXPORT_SYMBOL(bio_sector_offset); /* * create memory pools for biovec's in a bio_set. @@ -1376,6 +1438,7 @@ static int __init init_bio(void) subsys_initcall(init_bio); EXPORT_SYMBOL(bio_alloc); +EXPORT_SYMBOL(bio_kmalloc); EXPORT_SYMBOL(bio_put); EXPORT_SYMBOL(bio_free); EXPORT_SYMBOL(bio_endio); @@ -1383,7 +1446,6 @@ EXPORT_SYMBOL(bio_init); EXPORT_SYMBOL(__bio_clone); EXPORT_SYMBOL(bio_clone); EXPORT_SYMBOL(bio_phys_segments); -EXPORT_SYMBOL(bio_hw_segments); EXPORT_SYMBOL(bio_add_page); EXPORT_SYMBOL(bio_add_pc_page); EXPORT_SYMBOL(bio_get_nr_vecs); @@ -1393,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern); EXPORT_SYMBOL(bio_copy_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); -EXPORT_SYMBOL(bio_split_pool); EXPORT_SYMBOL(bio_copy_user); EXPORT_SYMBOL(bio_uncopy_user); EXPORT_SYMBOL(bioset_create); diff --git a/fs/block_dev.c b/fs/block_dev.c index aff5421..d84f0469 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -540,22 +540,6 @@ EXPORT_SYMBOL(bd_release); * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 */ -static struct kobject *bdev_get_kobj(struct block_device *bdev) -{ - if (bdev->bd_contains != bdev) - return kobject_get(&bdev->bd_part->dev.kobj); - else - return kobject_get(&bdev->bd_disk->dev.kobj); -} - -static struct kobject *bdev_get_holder(struct block_device *bdev) -{ - if (bdev->bd_contains != bdev) - return kobject_get(bdev->bd_part->holder_dir); - else - return kobject_get(bdev->bd_disk->holder_dir); -} - static int add_symlink(struct kobject *from, struct kobject *to) { if (!from || !to) @@ -604,11 +588,11 @@ static int bd_holder_grab_dirs(struct block_device *bdev, if (!bo->hdev) goto fail_put_sdir; - bo->sdev = bdev_get_kobj(bdev); + bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); if (!bo->sdev) goto fail_put_hdev; - bo->hdir = bdev_get_holder(bdev); + bo->hdir = kobject_get(bdev->bd_part->holder_dir); if (!bo->hdir) goto fail_put_sdev; @@ -868,6 +852,87 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) EXPORT_SYMBOL(open_by_devnum); +/** + * flush_disk - invalidates all buffer-cache entries on a disk + * + * @bdev: struct block device to be flushed + * + * Invalidates all buffer-cache entries on a disk. It should be called + * when a disk has been changed -- either by a media change or online + * resize. + */ +static void flush_disk(struct block_device *bdev) +{ + if (__invalidate_device(bdev)) { + char name[BDEVNAME_SIZE] = ""; + + if (bdev->bd_disk) + disk_name(bdev->bd_disk, 0, name); + printk(KERN_WARNING "VFS: busy inodes on changed media or " + "resized disk %s\n", name); + } + + if (!bdev->bd_disk) + return; + if (disk_partitionable(bdev->bd_disk)) + bdev->bd_invalidated = 1; +} + +/** + * check_disk_size_change - checks for disk size change and adjusts bdev size. + * @disk: struct gendisk to check + * @bdev: struct bdev to adjust. + * + * This routine checks to see if the bdev size does not match the disk size + * and adjusts it if it differs. + */ +void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) +{ + loff_t disk_size, bdev_size; + + disk_size = (loff_t)get_capacity(disk) << 9; + bdev_size = i_size_read(bdev->bd_inode); + if (disk_size != bdev_size) { + char name[BDEVNAME_SIZE]; + + disk_name(disk, 0, name); + printk(KERN_INFO + "%s: detected capacity change from %lld to %lld\n", + name, bdev_size, disk_size); + i_size_write(bdev->bd_inode, disk_size); + flush_disk(bdev); + } +} +EXPORT_SYMBOL(check_disk_size_change); + +/** + * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back + * @disk: struct gendisk to be revalidated + * + * This routine is a wrapper for lower-level driver's revalidate_disk + * call-backs. It is used to do common pre and post operations needed + * for all revalidate_disk operations. + */ +int revalidate_disk(struct gendisk *disk) +{ + struct block_device *bdev; + int ret = 0; + + if (disk->fops->revalidate_disk) + ret = disk->fops->revalidate_disk(disk); + + bdev = bdget_disk(disk, 0); + if (!bdev) + return ret; + + mutex_lock(&bdev->bd_mutex); + check_disk_size_change(disk, bdev); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); + return ret; +} +EXPORT_SYMBOL(revalidate_disk); + /* * This routine checks whether a removable media has been changed, * and invalidates all buffer-cache-entries in that case. This @@ -887,13 +952,9 @@ int check_disk_change(struct block_device *bdev) if (!bdops->media_changed(bdev->bd_disk)) return 0; - if (__invalidate_device(bdev)) - printk("VFS: busy inodes on changed media.\n"); - + flush_disk(bdev); if (bdops->revalidate_disk) bdops->revalidate_disk(bdev->bd_disk); - if (bdev->bd_disk->minors > 1) - bdev->bd_invalidated = 1; return 1; } @@ -927,10 +988,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part); static int do_open(struct block_device *bdev, struct file *file, int for_part) { - struct module *owner = NULL; struct gendisk *disk; + struct hd_struct *part = NULL; int ret; - int part; + int partno; int perm = 0; if (file->f_mode & FMODE_READ) @@ -948,25 +1009,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; + lock_kernel(); - disk = get_gendisk(bdev->bd_dev, &part); - if (!disk) { - unlock_kernel(); - bdput(bdev); - return ret; - } - owner = disk->fops->owner; + + disk = get_gendisk(bdev->bd_dev, &partno); + if (!disk) + goto out_unlock_kernel; + part = disk_get_part(disk, partno); + if (!part) + goto out_unlock_kernel; mutex_lock_nested(&bdev->bd_mutex, for_part); if (!bdev->bd_openers) { bdev->bd_disk = disk; + bdev->bd_part = part; bdev->bd_contains = bdev; - if (!part) { + if (!partno) { struct backing_dev_info *bdi; if (disk->fops->open) { ret = disk->fops->open(bdev->bd_inode, file); if (ret) - goto out_first; + goto out_clear; } if (!bdev->bd_openers) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); @@ -978,36 +1041,36 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (bdev->bd_invalidated) rescan_partitions(disk, bdev); } else { - struct hd_struct *p; struct block_device *whole; whole = bdget_disk(disk, 0); ret = -ENOMEM; if (!whole) - goto out_first; + goto out_clear; BUG_ON(for_part); ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); if (ret) - goto out_first; + goto out_clear; bdev->bd_contains = whole; - p = disk->part[part - 1]; bdev->bd_inode->i_data.backing_dev_info = whole->bd_inode->i_data.backing_dev_info; - if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { + if (!(disk->flags & GENHD_FL_UP) || + !part || !part->nr_sects) { ret = -ENXIO; - goto out_first; + goto out_clear; } - kobject_get(&p->dev.kobj); - bdev->bd_part = p; - bd_set_size(bdev, (loff_t) p->nr_sects << 9); + bd_set_size(bdev, (loff_t)part->nr_sects << 9); } } else { + disk_put_part(part); put_disk(disk); - module_put(owner); + module_put(disk->fops->owner); + part = NULL; + disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); if (ret) - goto out; + goto out_unlock_bdev; } if (bdev->bd_invalidated) rescan_partitions(bdev->bd_disk, bdev); @@ -1020,19 +1083,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) unlock_kernel(); return 0; -out_first: + out_clear: bdev->bd_disk = NULL; + bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) __blkdev_put(bdev->bd_contains, 1); bdev->bd_contains = NULL; - put_disk(disk); - module_put(owner); -out: + out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); + out_unlock_kernel: unlock_kernel(); - if (ret) - bdput(bdev); + + disk_put_part(part); + if (disk) + module_put(disk->fops->owner); + put_disk(disk); + bdput(bdev); + return ret; } @@ -1117,11 +1185,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part) put_disk(disk); module_put(owner); - - if (bdev->bd_contains != bdev) { - kobject_put(&bdev->bd_part->dev.kobj); - bdev->bd_part = NULL; - } + disk_put_part(bdev->bd_part); + bdev->bd_part = NULL; bdev->bd_disk = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) @@ -1197,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev); /** * lookup_bdev - lookup a struct block_device by name + * @pathname: special file representing the block device * - * @path: special file representing the block device - * - * Get a reference to the blockdevice at @path in the current + * Get a reference to the blockdevice at @pathname in the current * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 302e95c..fb98b3d 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -6,6 +6,7 @@ #include <linux/module.h> #include <linux/fs.h> #include <linux/msdos_fs.h> +#include <linux/blkdev.h> struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); @@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inode, int cluster) struct fat_entry fatent; struct buffer_head *bhs[MAX_BUF_PER_PAGE]; int i, err, nr_bhs; + int first_cl = cluster; nr_bhs = 0; fatent_init(&fatent); @@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inode, int cluster) goto error; } + /* + * Issue discard for the sectors we no longer care about, + * batching contiguous clusters into one request + */ + if (cluster != fatent.entry + 1) { + int nr_clus = fatent.entry - first_cl + 1; + + sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), + nr_clus * sbi->sec_per_clus); + first_cl = cluster; + } + ops->ent_put(&fatent, FAT_ENT_FREE); if (sbi->free_clusters != -1) { sbi->free_clusters++; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index ecc3330..7408227 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -120,22 +120,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = * a pointer to that same buffer (for convenience). */ -char *disk_name(struct gendisk *hd, int part, char *buf) +char *disk_name(struct gendisk *hd, int partno, char *buf) { - if (!part) + if (!partno) snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) - snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part); + snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); else - snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part); + snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); return buf; } const char *bdevname(struct block_device *bdev, char *buf) { - int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; - return disk_name(bdev->bd_disk, part, buf); + return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); } EXPORT_SYMBOL(bdevname); @@ -169,7 +168,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) if (isdigit(state->name[strlen(state->name)-1])) sprintf(state->name, "p"); - state->limit = hd->minors; + state->limit = disk_max_parts(hd); i = res = err = 0; while (!res && check_part[i]) { memset(&state->parts, 0, sizeof(state->parts)); @@ -204,21 +203,22 @@ static ssize_t part_start_show(struct device *dev, return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); } -static ssize_t part_size_show(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t part_size_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); } -static ssize_t part_stat_show(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); + int cpu; - preempt_disable(); - part_round_stats(p); - preempt_enable(); + cpu = part_stat_lock(); + part_round_stats(cpu, p); + part_stat_unlock(); return sprintf(buf, "%8lu %8lu %8llu %8u " "%8lu %8lu %8llu %8u " @@ -238,17 +238,17 @@ static ssize_t part_stat_show(struct device *dev, } #ifdef CONFIG_FAIL_MAKE_REQUEST -static ssize_t part_fail_show(struct device *dev, - struct device_attribute *attr, char *buf) +ssize_t part_fail_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%d\n", p->make_it_fail); } -static ssize_t part_fail_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) +ssize_t part_fail_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) { struct hd_struct *p = dev_to_part(dev); int i; @@ -300,40 +300,34 @@ struct device_type part_type = { .release = part_release, }; -static inline void partition_sysfs_add_subdir(struct hd_struct *p) -{ - struct kobject *k; - - k = kobject_get(&p->dev.kobj); - p->holder_dir = kobject_create_and_add("holders", k); - kobject_put(k); -} - -static inline void disk_sysfs_add_subdirs(struct gendisk *disk) +static void delete_partition_rcu_cb(struct rcu_head *head) { - struct kobject *k; + struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); - k = kobject_get(&disk->dev.kobj); - disk->holder_dir = kobject_create_and_add("holders", k); - disk->slave_dir = kobject_create_and_add("slaves", k); - kobject_put(k); + part->start_sect = 0; + part->nr_sects = 0; + part_stat_set_all(part, 0); + put_device(part_to_dev(part)); } -void delete_partition(struct gendisk *disk, int part) +void delete_partition(struct gendisk *disk, int partno) { - struct hd_struct *p = disk->part[part-1]; + struct disk_part_tbl *ptbl = disk->part_tbl; + struct hd_struct *part; - if (!p) + if (partno >= ptbl->len) return; - if (!p->nr_sects) + + part = ptbl->part[partno]; + if (!part) return; - disk->part[part-1] = NULL; - p->start_sect = 0; - p->nr_sects = 0; - part_stat_set_all(p, 0); - kobject_put(p->holder_dir); - device_del(&p->dev); - put_device(&p->dev); + + blk_free_devt(part_devt(part)); + rcu_assign_pointer(ptbl->part[partno], NULL); + kobject_put(part->holder_dir); + device_del(part_to_dev(part)); + + call_rcu(&part->rcu_head, delete_partition_rcu_cb); } static ssize_t whole_disk_show(struct device *dev, @@ -344,102 +338,132 @@ static ssize_t whole_disk_show(struct device *dev, static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, whole_disk_show, NULL); -int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) +int add_partition(struct gendisk *disk, int partno, + sector_t start, sector_t len, int flags) { struct hd_struct *p; + dev_t devt = MKDEV(0, 0); + struct device *ddev = disk_to_dev(disk); + struct device *pdev; + struct disk_part_tbl *ptbl; + const char *dname; int err; + err = disk_expand_part_tbl(disk, partno); + if (err) + return err; + ptbl = disk->part_tbl; + + if (ptbl->part[partno]) + return -EBUSY; + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; if (!init_part_stats(p)) { err = -ENOMEM; - goto out0; + goto out_free; } + pdev = part_to_dev(p); + p->start_sect = start; p->nr_sects = len; - p->partno = part; - p->policy = disk->policy; + p->partno = partno; + p->policy = get_disk_ro(disk); - if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) - snprintf(p->dev.bus_id, BUS_ID_SIZE, - "%sp%d", disk->dev.bus_id, part); + dname = dev_name(ddev); + if (isdigit(dname[strlen(dname) - 1])) + snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno); else - snprintf(p->dev.bus_id, BUS_ID_SIZE, - "%s%d", disk->dev.bus_id, part); + snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno); - device_initialize(&p->dev); - p->dev.devt = MKDEV(disk->major, disk->first_minor + part); - p->dev.class = &block_class; - p->dev.type = &part_type; - p->dev.parent = &disk->dev; - disk->part[part-1] = p; + device_initialize(pdev); + pdev->class = &block_class; + pdev->type = &part_type; + pdev->parent = ddev; + + err = blk_alloc_devt(p, &devt); + if (err) + goto out_free; + pdev->devt = devt; /* delay uevent until 'holders' subdir is created */ - p->dev.uevent_suppress = 1; - err = device_add(&p->dev); + pdev->uevent_suppress = 1; + err = device_add(pdev); if (err) - goto out1; - partition_sysfs_add_subdir(p); - p->dev.uevent_suppress = 0; + goto out_put; + + err = -ENOMEM; + p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); + if (!p->holder_dir) + goto out_del; + + pdev->uevent_suppress = 0; if (flags & ADDPART_FLAG_WHOLEDISK) { - err = device_create_file(&p->dev, &dev_attr_whole_disk); + err = device_create_file(pdev, &dev_attr_whole_disk); if (err) - goto out2; + goto out_del; } + /* everything is up and running, commence */ + INIT_RCU_HEAD(&p->rcu_head); + rcu_assign_pointer(ptbl->part[partno], p); + /* suppress uevent if the disk supresses it */ - if (!disk->dev.uevent_suppress) - kobject_uevent(&p->dev.kobj, KOBJ_ADD); + if (!ddev->uevent_suppress) + kobject_uevent(&pdev->kobj, KOBJ_ADD); return 0; -out2: - device_del(&p->dev); -out1: - put_device(&p->dev); - free_part_stats(p); -out0: +out_free: kfree(p); return err; +out_del: + kobject_put(p->holder_dir); + device_del(pdev); +out_put: + put_device(pdev); + blk_free_devt(devt); + return err; } /* Not exported, helper to add_disk(). */ void register_disk(struct gendisk *disk) { + struct device *ddev = disk_to_dev(disk); struct block_device *bdev; + struct disk_part_iter piter; + struct hd_struct *part; char *s; - int i; - struct hd_struct *p; int err; - disk->dev.parent = disk->driverfs_dev; - disk->dev.devt = MKDEV(disk->major, disk->first_minor); + ddev->parent = disk->driverfs_dev; - strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); + strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE); /* ewww... some of these buggers have / in the name... */ - s = strchr(disk->dev.bus_id, '/'); + s = strchr(ddev->bus_id, '/'); if (s) *s = '!'; /* delay uevents, until we scanned partition table */ - disk->dev.uevent_suppress = 1; + ddev->uevent_suppress = 1; - if (device_add(&disk->dev)) + if (device_add(ddev)) return; #ifndef CONFIG_SYSFS_DEPRECATED - err = sysfs_create_link(block_depr, &disk->dev.kobj, - kobject_name(&disk->dev.kobj)); + err = sysfs_create_link(block_depr, &ddev->kobj, + kobject_name(&ddev->kobj)); if (err) { - device_del(&disk->dev); + device_del(ddev); return; } #endif - disk_sysfs_add_subdirs(disk); + disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); + disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); /* No minors to use for partitions */ - if (disk->minors == 1) + if (!disk_partitionable(disk)) goto exit; /* No such device (e.g., media were just removed) */ @@ -458,41 +482,57 @@ void register_disk(struct gendisk *disk) exit: /* announce disk after possible partitions are created */ - disk->dev.uevent_suppress = 0; - kobject_uevent(&disk->dev.kobj, KOBJ_ADD); + ddev->uevent_suppress = 0; + kobject_uevent(&ddev->kobj, KOBJ_ADD); /* announce possible partitions */ - for (i = 1; i < disk->minors; i++) { - p = disk->part[i-1]; - if (!p || !p->nr_sects) - continue; - kobject_uevent(&p->dev.kobj, KOBJ_ADD); - } + disk_part_iter_init(&piter, disk, 0); + while ((part = disk_part_iter_next(&piter))) + kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); + disk_part_iter_exit(&piter); } int rescan_partitions(struct gendisk *disk, struct block_device *bdev) { + struct disk_part_iter piter; + struct hd_struct *part; struct parsed_partitions *state; - int p, res; + int p, highest, res; if (bdev->bd_part_count) return -EBUSY; res = invalidate_partition(disk, 0); if (res) return res; - bdev->bd_invalidated = 0; - for (p = 1; p < disk->minors; p++) - delete_partition(disk, p); + + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) + delete_partition(disk, part->partno); + disk_part_iter_exit(&piter); + if (disk->fops->revalidate_disk) disk->fops->revalidate_disk(disk); + check_disk_size_change(disk, bdev); + bdev->bd_invalidated = 0; if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) return 0; if (IS_ERR(state)) /* I/O error reading the partition table */ return -EIO; /* tell userspace that the media / partition table may have changed */ - kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); + kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); + /* Detect the highest partition number and preallocate + * disk->part_tbl. This is an optimization and not strictly + * necessary. + */ + for (p = 1, highest = 0; p < state->limit; p++) + if (state->parts[p].size) + highest = p; + + disk_expand_part_tbl(disk, highest); + + /* add partitions */ for (p = 1; p < state->limit; p++) { sector_t size = state->parts[p].size; sector_t from = state->parts[p].from; @@ -541,25 +581,31 @@ EXPORT_SYMBOL(read_dev_sector); void del_gendisk(struct gendisk *disk) { - int p; + struct disk_part_iter piter; + struct hd_struct *part; /* invalidate stuff */ - for (p = disk->minors - 1; p > 0; p--) { - invalidate_partition(disk, p); - delete_partition(disk, p); + disk_part_iter_init(&piter, disk, + DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); + while ((part = disk_part_iter_next(&piter))) { + invalidate_partition(disk, part->partno); + delete_partition(disk, part->partno); } + disk_part_iter_exit(&piter); + invalidate_partition(disk, 0); - disk->capacity = 0; + blk_free_devt(disk_to_dev(disk)->devt); + set_capacity(disk, 0); disk->flags &= ~GENHD_FL_UP; unlink_gendisk(disk); - disk_stat_set_all(disk, 0); - disk->stamp = 0; + part_stat_set_all(&disk->part0, 0); + disk->part0.stamp = 0; - kobject_put(disk->holder_dir); + kobject_put(disk->part0.holder_dir); kobject_put(disk->slave_dir); disk->driverfs_dev = NULL; #ifndef CONFIG_SYSFS_DEPRECATED - sysfs_remove_link(block_depr, disk->dev.bus_id); + sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); #endif - device_del(&disk->dev); + device_del(disk_to_dev(disk)); } diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 17ae8ec..98dbe1a 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h @@ -5,15 +5,13 @@ * add_gd_partition adds a partitions details to the devices partition * description. */ -enum { MAX_PART = 256 }; - struct parsed_partitions { char name[BDEVNAME_SIZE]; struct { sector_t from; sector_t size; int flags; - } parts[MAX_PART]; + } parts[DISK_MAX_PARTS]; int next; int limit; }; diff --git a/fs/splice.c b/fs/splice.c index 1bbc6f4..a1e701c 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -898,6 +898,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, if (unlikely(!(out->f_mode & FMODE_WRITE))) return -EBADF; + if (unlikely(out->f_flags & O_APPEND)) + return -EINVAL; + ret = rw_verify_area(WRITE, out, ppos, len); if (unlikely(ret < 0)) return ret; diff --git a/include/asm-mips/cevt-r4k.h b/include/asm-mips/cevt-r4k.h new file mode 100644 index 0000000..fa4328f --- /dev/null +++ b/include/asm-mips/cevt-r4k.h @@ -0,0 +1,46 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2008 Kevin D. Kissell + */ + +/* + * Definitions used for common event timer implementation + * for MIPS 4K-type processors and their MIPS MT variants. + * Avoids unsightly extern declarations in C files. + */ +#ifndef __ASM_CEVT_R4K_H +#define __ASM_CEVT_R4K_H + +DECLARE_PER_CPU(struct clock_event_device, mips_clockevent_device); + +void mips_event_handler(struct clock_event_device *dev); +int c0_compare_int_usable(void); +void mips_set_clock_mode(enum clock_event_mode, struct clock_event_device *); +irqreturn_t c0_compare_interrupt(int, void *); + +extern struct irqaction c0_compare_irqaction; +extern int cp0_timer_irq_installed; + +/* + * Possibly handle a performance counter interrupt. + * Return true if the timer interrupt should not be checked + */ + +static inline int handle_perf_irq(int r2) +{ + /* + * The performance counter overflow interrupt may be shared with the + * timer interrupt (cp0_perfcount_irq < 0). If it is and a + * performance counter has overflowed (perf_irq() == IRQ_HANDLED) + * and we can't reliably determine if a counter interrupt has also + * happened (!r2) then don't check for a timer interrupt. + */ + return (cp0_perfcount_irq < 0) && + perf_irq() == IRQ_HANDLED && + !r2; +} + +#endif /* __ASM_CEVT_R4K_H */ diff --git a/include/asm-mips/irqflags.h b/include/asm-mips/irqflags.h index 881e886..701ec0b 100644 --- a/include/asm-mips/irqflags.h +++ b/include/asm-mips/irqflags.h @@ -38,8 +38,17 @@ __asm__( " .set pop \n" " .endm"); +extern void smtc_ipi_replay(void); + static inline void raw_local_irq_enable(void) { +#ifdef CONFIG_MIPS_MT_SMTC + /* + * SMTC kernel needs to do a software replay of queued + * IPIs, at the cost of call overhead on each local_irq_enable() + */ + smtc_ipi_replay(); +#endif __asm__ __volatile__( "raw_local_irq_enable" : /* no outputs */ @@ -47,6 +56,7 @@ static inline void raw_local_irq_enable(void) : "memory"); } + /* * For cli() we have to insert nops to make sure that the new value * has actually arrived in the status register before the end of this @@ -185,15 +195,14 @@ __asm__( " .set pop \n" " .endm \n"); -extern void smtc_ipi_replay(void); static inline void raw_local_irq_restore(unsigned long flags) { unsigned long __tmp1; -#ifdef CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY +#ifdef CONFIG_MIPS_MT_SMTC /* - * CONFIG_MIPS_MT_SMTC_INSTANT_REPLAY does prompt replay of deferred + * SMTC kernel needs to do a software replay of queued * IPIs, at the cost of branch and call overhead on each * local_irq_restore() */ @@ -208,6 +217,17 @@ static inline void raw_local_irq_restore(unsigned long flags) : "memory"); } +static inline void __raw_local_irq_restore(unsigned long flags) +{ + unsigned long __tmp1; + + __asm__ __volatile__( + "raw_local_irq_restore\t%0" + : "=r" (__tmp1) + : "0" (flags) + : "memory"); +} + static inline int raw_irqs_disabled_flags(unsigned long flags) { #ifdef CONFIG_MIPS_MT_SMTC diff --git a/include/asm-mips/mipsregs.h b/include/asm-mips/mipsregs.h index a46f8e2..9798660 100644 --- a/include/asm-mips/mipsregs.h +++ b/include/asm-mips/mipsregs.h @@ -1462,7 +1462,7 @@ set_c0_##name(unsigned int set) \ { \ unsigned int res; \ unsigned int omt; \ - unsigned int flags; \ + unsigned long flags; \ \ local_irq_save(flags); \ omt = __dmt(); \ @@ -1480,7 +1480,7 @@ clear_c0_##name(unsigned int clear) \ { \ unsigned int res; \ unsigned int omt; \ - unsigned int flags; \ + unsigned long flags; \ \ local_irq_save(flags); \ omt = __dmt(); \ @@ -1498,7 +1498,7 @@ change_c0_##name(unsigned int change, unsigned int new) \ { \ unsigned int res; \ unsigned int omt; \ - unsigned int flags; \ + unsigned long flags; \ \ local_irq_save(flags); \ \ diff --git a/include/asm-mips/smtc.h b/include/asm-mips/smtc.h index 3639b28..ea60bf0 100644 --- a/include/asm-mips/smtc.h +++ b/include/asm-mips/smtc.h @@ -6,6 +6,7 @@ */ #include <asm/mips_mt.h> +#include <asm/smtc_ipi.h> /* * System-wide SMTC status information @@ -38,14 +39,15 @@ struct mm_struct; struct task_struct; void smtc_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu); - +void self_ipi(struct smtc_ipi *); void smtc_flush_tlb_asid(unsigned long asid); -extern int mipsmt_build_cpu_map(int startslot); -extern void mipsmt_prepare_cpus(void); +extern int smtc_build_cpu_map(int startslot); +extern void smtc_prepare_cpus(int cpus); extern void smtc_smp_finish(void); extern void smtc_boot_secondary(int cpu, struct task_struct *t); extern void smtc_cpus_done(void); + /* * Sharing the TLB between multiple VPEs means that the * "random" index selection function is not allowed to diff --git a/include/asm-mips/sn/mapped_kernel.h b/include/asm-mips/sn/mapped_kernel.h index c3dd5d0..721496a 100644 --- a/include/asm-mips/sn/mapped_kernel.h +++ b/include/asm-mips/sn/mapped_kernel.h @@ -5,6 +5,8 @@ #ifndef __ASM_SN_MAPPED_KERNEL_H #define __ASM_SN_MAPPED_KERNEL_H +#include <linux/mmzone.h> + /* * Note on how mapped kernels work: the text and data section is * compiled at cksseg segment (LOADADDR = 0xc001c000), and the @@ -29,10 +31,8 @@ #define MAPPED_ADDR_RO_TO_PHYS(x) (x - REP_BASE) #define MAPPED_ADDR_RW_TO_PHYS(x) (x - REP_BASE - 16777216) -#define MAPPED_KERN_RO_PHYSBASE(n) \ - (PLAT_NODE_DATA(n)->kern_vars.kv_ro_baseaddr) -#define MAPPED_KERN_RW_PHYSBASE(n) \ - (PLAT_NODE_DATA(n)->kern_vars.kv_rw_baseaddr) +#define MAPPED_KERN_RO_PHYSBASE(n) (hub_data(n)->kern_vars.kv_ro_baseaddr) +#define MAPPED_KERN_RW_PHYSBASE(n) (hub_data(n)->kern_vars.kv_rw_baseaddr) #define MAPPED_KERN_RO_TO_PHYS(x) \ ((unsigned long)MAPPED_ADDR_RO_TO_PHYS(x) | \ diff --git a/include/asm-mips/stackframe.h b/include/asm-mips/stackframe.h index 051e1af..4c37c4e5 100644 --- a/include/asm-mips/stackframe.h +++ b/include/asm-mips/stackframe.h @@ -297,14 +297,31 @@ #ifdef CONFIG_MIPS_MT_SMTC .set mips32r2 /* - * This may not really be necessary if ints are already - * inhibited here. + * We need to make sure the read-modify-write + * of Status below isn't perturbed by an interrupt + * or cross-TC access, so we need to do at least a DMT, + * protected by an interrupt-inhibit. But setting IXMT + * also creates a few-cycle window where an IPI could + * be queued and not be detected before potentially + * returning to a WAIT or user-mode loop. It must be + * replayed. + * + * We're in the middle of a context switch, and + * we can't dispatch it directly without trashing + * some registers, so we'll try to detect this unlikely + * case and program a software interrupt in the VPE, + * as would be done for a cross-VPE IPI. To accomodate + * the handling of that case, we're doing a DVPE instead + * of just a DMT here to protect against other threads. + * This is a lot of cruft to cover a tiny window. + * If you can find a better design, implement it! + * */ mfc0 v0, CP0_TCSTATUS ori v0, TCSTATUS_IXMT mtc0 v0, CP0_TCSTATUS _ehb - DMT 5 # dmt a1 + DVPE 5 # dvpe a1 jal mips_ihb #endif /* CONFIG_MIPS_MT_SMTC */ mfc0 a0, CP0_STATUS @@ -325,17 +342,50 @@ */ LONG_L v1, PT_TCSTATUS(sp) _ehb - mfc0 v0, CP0_TCSTATUS + mfc0 a0, CP0_TCSTATUS andi v1, TCSTATUS_IXMT - /* We know that TCStatua.IXMT should be set from above */ - xori v0, v0, TCSTATUS_IXMT - or v0, v0, v1 - mtc0 v0, CP0_TCSTATUS - _ehb - andi a1, a1, VPECONTROL_TE + bnez v1, 0f + +/* + * We'd like to detect any IPIs queued in the tiny window + * above and request an software interrupt to service them + * when we ERET. + * + * Computing the offset into the IPIQ array of the executing + * TC's IPI queue in-line would be tedious. We use part of + * the TCContext register to hold 16 bits of offset that we + * can add in-line to find the queue head. + */ + mfc0 v0, CP0_TCCONTEXT + la a2, IPIQ + srl v0, v0, 16 + addu a2, a2, v0 + LONG_L v0, 0(a2) + beqz v0, 0f +/* + * If we have a queue, provoke dispatch within the VPE by setting C_SW1 + */ + mfc0 v0, CP0_CAUSE + ori v0, v0, C_SW1 + mtc0 v0, CP0_CAUSE +0: + /* + * This test should really never branch but + * let's be prudent here. Having atomized + * the shared register modifications, we can + * now EVPE, and must do so before interrupts + * are potentially re-enabled. + */ + andi a1, a1, MVPCONTROL_EVP beqz a1, 1f - emt + evpe 1: + /* We know that TCStatua.IXMT should be set from above */ + xori a0, a0, TCSTATUS_IXMT + or a0, a0, v1 + mtc0 a0, CP0_TCSTATUS + _ehb + .set mips0 #endif /* CONFIG_MIPS_MT_SMTC */ LONG_L v1, PT_EPC(sp) diff --git a/include/asm-x86/a.out-core.h b/include/asm-x86/a.out-core.h index 714207a..f570576 100644 --- a/include/asm-x86/a.out-core.h +++ b/include/asm-x86/a.out-core.h @@ -9,8 +9,8 @@ * 2 of the Licence, or (at your option) any later version. */ -#ifndef _ASM_A_OUT_CORE_H -#define _ASM_A_OUT_CORE_H +#ifndef ASM_X86__A_OUT_CORE_H +#define ASM_X86__A_OUT_CORE_H #ifdef __KERNEL__ #ifdef CONFIG_X86_32 @@ -70,4 +70,4 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) #endif /* CONFIG_X86_32 */ #endif /* __KERNEL__ */ -#endif /* _ASM_A_OUT_CORE_H */ +#endif /* ASM_X86__A_OUT_CORE_H */ diff --git a/include/asm-x86/a.out.h b/include/asm-x86/a.out.h index 4684f97..0948748 100644 --- a/include/asm-x86/a.out.h +++ b/include/asm-x86/a.out.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_A_OUT_H -#define _ASM_X86_A_OUT_H +#ifndef ASM_X86__A_OUT_H +#define ASM_X86__A_OUT_H struct exec { @@ -17,4 +17,4 @@ struct exec #define N_DRSIZE(a) ((a).a_drsize) #define N_SYMSIZE(a) ((a).a_syms) -#endif /* _ASM_X86_A_OUT_H */ +#endif /* ASM_X86__A_OUT_H */ diff --git a/include/asm-x86/acpi.h b/include/asm-x86/acpi.h index 35d1743..392e173 100644 --- a/include/asm-x86/acpi.h +++ b/include/asm-x86/acpi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_ACPI_H -#define _ASM_X86_ACPI_H +#ifndef ASM_X86__ACPI_H +#define ASM_X86__ACPI_H /* * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> @@ -175,4 +175,4 @@ static inline void acpi_fake_nodes(const struct bootnode *fake_nodes, #define acpi_unlazy_tlb(x) leave_mm(x) -#endif /*__X86_ASM_ACPI_H*/ +#endif /* ASM_X86__ACPI_H */ diff --git a/include/asm-x86/agp.h b/include/asm-x86/agp.h index e4004a9..3617fd4 100644 --- a/include/asm-x86/agp.h +++ b/include/asm-x86/agp.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_AGP_H -#define _ASM_X86_AGP_H +#ifndef ASM_X86__AGP_H +#define ASM_X86__AGP_H #include <asm/pgtable.h> #include <asm/cacheflush.h> @@ -32,4 +32,4 @@ #define free_gatt_pages(table, order) \ free_pages((unsigned long)(table), (order)) -#endif +#endif /* ASM_X86__AGP_H */ diff --git a/include/asm-x86/alternative.h b/include/asm-x86/alternative.h index f6aa18e..22d3c98 100644 --- a/include/asm-x86/alternative.h +++ b/include/asm-x86/alternative.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_ALTERNATIVE_H -#define _ASM_X86_ALTERNATIVE_H +#ifndef ASM_X86__ALTERNATIVE_H +#define ASM_X86__ALTERNATIVE_H #include <linux/types.h> #include <linux/stddef.h> @@ -180,4 +180,4 @@ extern void add_nops(void *insns, unsigned int len); extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_early(void *addr, const void *opcode, size_t len); -#endif /* _ASM_X86_ALTERNATIVE_H */ +#endif /* ASM_X86__ALTERNATIVE_H */ diff --git a/include/asm-x86/amd_iommu.h b/include/asm-x86/amd_iommu.h index 30a1204..783f43e 100644 --- a/include/asm-x86/amd_iommu.h +++ b/include/asm-x86/amd_iommu.h @@ -17,8 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef _ASM_X86_AMD_IOMMU_H -#define _ASM_X86_AMD_IOMMU_H +#ifndef ASM_X86__AMD_IOMMU_H +#define ASM_X86__AMD_IOMMU_H #ifdef CONFIG_AMD_IOMMU extern int amd_iommu_init(void); @@ -29,4 +29,4 @@ static inline int amd_iommu_init(void) { return -ENODEV; } static inline void amd_iommu_detect(void) { } #endif -#endif +#endif /* ASM_X86__AMD_IOMMU_H */ diff --git a/include/asm-x86/amd_iommu_types.h b/include/asm-x86/amd_iommu_types.h index dcc8120..1ffa4e5 100644 --- a/include/asm-x86/amd_iommu_types.h +++ b/include/asm-x86/amd_iommu_types.h @@ -17,8 +17,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef __AMD_IOMMU_TYPES_H__ -#define __AMD_IOMMU_TYPES_H__ +#ifndef ASM_X86__AMD_IOMMU_TYPES_H +#define ASM_X86__AMD_IOMMU_TYPES_H #include <linux/types.h> #include <linux/list.h> @@ -341,4 +341,4 @@ static inline u16 calc_devid(u8 bus, u8 devfn) return (((u16)bus) << 8) | devfn; } -#endif +#endif /* ASM_X86__AMD_IOMMU_TYPES_H */ diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h index 133c998..65590c9 100644 --- a/include/asm-x86/apic.h +++ b/include/asm-x86/apic.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_APIC_H -#define _ASM_X86_APIC_H +#ifndef ASM_X86__APIC_H +#define ASM_X86__APIC_H #include <linux/pm.h> #include <linux/delay.h> @@ -54,6 +54,11 @@ extern int disable_apic; #endif extern int is_vsmp_box(void); +extern void xapic_wait_icr_idle(void); +extern u32 safe_xapic_wait_icr_idle(void); +extern u64 xapic_icr_read(void); +extern void xapic_icr_write(u32, u32); +extern int setup_profiling_timer(unsigned int); static inline void native_apic_write(unsigned long reg, u32 v) { @@ -76,9 +81,7 @@ extern int get_physical_broadcast(void); static inline void ack_APIC_irq(void) { /* - * ack_APIC_irq() actually gets compiled as a single instruction: - * - a single rmw on Pentium/82489DX - * - a single write on P6+ cores (CONFIG_X86_GOOD_APIC) + * ack_APIC_irq() actually gets compiled as a single instruction * ... yummie. */ @@ -128,4 +131,4 @@ static inline void init_apic_mappings(void) { } #endif /* !CONFIG_X86_LOCAL_APIC */ -#endif /* __ASM_APIC_H */ +#endif /* ASM_X86__APIC_H */ diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h index 6b9008c..c40687d 100644 --- a/include/asm-x86/apicdef.h +++ b/include/asm-x86/apicdef.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_APICDEF_H -#define _ASM_X86_APICDEF_H +#ifndef ASM_X86__APICDEF_H +#define ASM_X86__APICDEF_H /* * Constants for various Intel APICs. (local APIC, IOAPIC, etc.) @@ -411,4 +411,4 @@ struct local_apic { #else #define BAD_APICID 0xFFFFu #endif -#endif +#endif /* ASM_X86__APICDEF_H */ diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h index 8411750..72adc3a 100644 --- a/include/asm-x86/arch_hooks.h +++ b/include/asm-x86/arch_hooks.h @@ -1,5 +1,5 @@ -#ifndef _ASM_ARCH_HOOKS_H -#define _ASM_ARCH_HOOKS_H +#ifndef ASM_X86__ARCH_HOOKS_H +#define ASM_X86__ARCH_HOOKS_H #include <linux/interrupt.h> @@ -25,4 +25,4 @@ extern void pre_time_init_hook(void); extern void time_init_hook(void); extern void mca_nmi_hook(void); -#endif +#endif /* ASM_X86__ARCH_HOOKS_H */ diff --git a/include/asm-x86/asm.h b/include/asm-x86/asm.h index 9722032..e1355f4 100644 --- a/include/asm-x86/asm.h +++ b/include/asm-x86/asm.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_ASM_H -#define _ASM_X86_ASM_H +#ifndef ASM_X86__ASM_H +#define ASM_X86__ASM_H #ifdef __ASSEMBLY__ # define __ASM_FORM(x) x @@ -20,17 +20,22 @@ #define _ASM_PTR __ASM_SEL(.long, .quad) #define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) -#define _ASM_MOV_UL __ASM_SIZE(mov) +#define _ASM_MOV __ASM_SIZE(mov) #define _ASM_INC __ASM_SIZE(inc) #define _ASM_DEC __ASM_SIZE(dec) #define _ASM_ADD __ASM_SIZE(add) #define _ASM_SUB __ASM_SIZE(sub) #define _ASM_XADD __ASM_SIZE(xadd) + #define _ASM_AX __ASM_REG(ax) #define _ASM_BX __ASM_REG(bx) #define _ASM_CX __ASM_REG(cx) #define _ASM_DX __ASM_REG(dx) +#define _ASM_SP __ASM_REG(sp) +#define _ASM_BP __ASM_REG(bp) +#define _ASM_SI __ASM_REG(si) +#define _ASM_DI __ASM_REG(di) /* Exception table entry */ # define _ASM_EXTABLE(from,to) \ @@ -39,4 +44,4 @@ _ASM_PTR #from "," #to "\n" \ " .previous\n" -#endif /* _ASM_X86_ASM_H */ +#endif /* ASM_X86__ASM_H */ diff --git a/include/asm-x86/atomic_32.h b/include/asm-x86/atomic_32.h index 21a4825..14d3f0b 100644 --- a/include/asm-x86/atomic_32.h +++ b/include/asm-x86/atomic_32.h @@ -1,5 +1,5 @@ -#ifndef __ARCH_I386_ATOMIC__ -#define __ARCH_I386_ATOMIC__ +#ifndef ASM_X86__ATOMIC_32_H +#define ASM_X86__ATOMIC_32_H #include <linux/compiler.h> #include <asm/processor.h> @@ -256,4 +256,4 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__after_atomic_inc() barrier() #include <asm-generic/atomic.h> -#endif +#endif /* ASM_X86__ATOMIC_32_H */ diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h index 91c7d03..2cb218c 100644 --- a/include/asm-x86/atomic_64.h +++ b/include/asm-x86/atomic_64.h @@ -1,5 +1,5 @@ -#ifndef __ARCH_X86_64_ATOMIC__ -#define __ARCH_X86_64_ATOMIC__ +#ifndef ASM_X86__ATOMIC_64_H +#define ASM_X86__ATOMIC_64_H #include <asm/alternative.h> #include <asm/cmpxchg.h> @@ -470,4 +470,4 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2) #define smp_mb__after_atomic_inc() barrier() #include <asm-generic/atomic.h> -#endif +#endif /* ASM_X86__ATOMIC_64_H */ diff --git a/include/asm-x86/auxvec.h b/include/asm-x86/auxvec.h index 87f5e6d..12c7cac 100644 --- a/include/asm-x86/auxvec.h +++ b/include/asm-x86/auxvec.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_AUXVEC_H -#define _ASM_X86_AUXVEC_H +#ifndef ASM_X86__AUXVEC_H +#define ASM_X86__AUXVEC_H /* * Architecture-neutral AT_ values in 0-17, leave some room * for more of them, start the x86-specific ones at 32. @@ -9,4 +9,4 @@ #endif #define AT_SYSINFO_EHDR 33 -#endif +#endif /* ASM_X86__AUXVEC_H */ diff --git a/include/asm-x86/bios_ebda.h b/include/asm-x86/bios_ebda.h index 0033e50..ec42ed8 100644 --- a/include/asm-x86/bios_ebda.h +++ b/include/asm-x86/bios_ebda.h @@ -1,5 +1,5 @@ -#ifndef _MACH_BIOS_EBDA_H -#define _MACH_BIOS_EBDA_H +#ifndef ASM_X86__BIOS_EBDA_H +#define ASM_X86__BIOS_EBDA_H #include <asm/io.h> @@ -16,4 +16,4 @@ static inline unsigned int get_bios_ebda(void) void reserve_ebda_region(void); -#endif /* _MACH_BIOS_EBDA_H */ +#endif /* ASM_X86__BIOS_EBDA_H */ diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index cfb2b64..61989b9 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_BITOPS_H -#define _ASM_X86_BITOPS_H +#ifndef ASM_X86__BITOPS_H +#define ASM_X86__BITOPS_H /* * Copyright 1992, Linus Torvalds. @@ -458,4 +458,4 @@ static inline void set_bit_string(unsigned long *bitmap, #include <asm-generic/bitops/minix.h> #endif /* __KERNEL__ */ -#endif /* _ASM_X86_BITOPS_H */ +#endif /* ASM_X86__BITOPS_H */ diff --git a/include/asm-x86/boot.h b/include/asm-x86/boot.h index 2faed7e..825de5d 100644 --- a/include/asm-x86/boot.h +++ b/include/asm-x86/boot.h @@ -1,5 +1,5 @@ -#ifndef _ASM_BOOT_H -#define _ASM_BOOT_H +#ifndef ASM_X86__BOOT_H +#define ASM_X86__BOOT_H /* Don't touch these, unless you really know what you're doing. */ #define DEF_INITSEG 0x9000 @@ -25,4 +25,4 @@ #define BOOT_STACK_SIZE 0x1000 #endif -#endif /* _ASM_BOOT_H */ +#endif /* ASM_X86__BOOT_H */ diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h index ae22bdf..ccf027e 100644 --- a/include/asm-x86/bootparam.h +++ b/include/asm-x86/bootparam.h @@ -1,5 +1,5 @@ -#ifndef _ASM_BOOTPARAM_H -#define _ASM_BOOTPARAM_H +#ifndef ASM_X86__BOOTPARAM_H +#define ASM_X86__BOOTPARAM_H #include <linux/types.h> #include <linux/screen_info.h> @@ -108,4 +108,4 @@ struct boot_params { __u8 _pad9[276]; /* 0xeec */ } __attribute__((packed)); -#endif /* _ASM_BOOTPARAM_H */ +#endif /* ASM_X86__BOOTPARAM_H */ diff --git a/include/asm-x86/bug.h b/include/asm-x86/bug.h index b69aa64..91ad43a 100644 --- a/include/asm-x86/bug.h +++ b/include/asm-x86/bug.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_BUG_H -#define _ASM_X86_BUG_H +#ifndef ASM_X86__BUG_H +#define ASM_X86__BUG_H #ifdef CONFIG_BUG #define HAVE_ARCH_BUG @@ -36,4 +36,4 @@ do { \ #endif /* !CONFIG_BUG */ #include <asm-generic/bug.h> -#endif +#endif /* ASM_X86__BUG_H */ diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h index 021cbdd..4761c46 100644 --- a/include/asm-x86/bugs.h +++ b/include/asm-x86/bugs.h @@ -1,7 +1,7 @@ -#ifndef _ASM_X86_BUGS_H -#define _ASM_X86_BUGS_H +#ifndef ASM_X86__BUGS_H +#define ASM_X86__BUGS_H extern void check_bugs(void); int ppro_with_ram_bug(void); -#endif /* _ASM_X86_BUGS_H */ +#endif /* ASM_X86__BUGS_H */ diff --git a/include/asm-x86/byteorder.h b/include/asm-x86/byteorder.h index e02ae2d8..722f27d 100644 --- a/include/asm-x86/byteorder.h +++ b/include/asm-x86/byteorder.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_BYTEORDER_H -#define _ASM_X86_BYTEORDER_H +#ifndef ASM_X86__BYTEORDER_H +#define ASM_X86__BYTEORDER_H #include <asm/types.h> #include <linux/compiler.h> @@ -78,4 +78,4 @@ static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) #include <linux/byteorder/little_endian.h> -#endif /* _ASM_X86_BYTEORDER_H */ +#endif /* ASM_X86__BYTEORDER_H */ diff --git a/include/asm-x86/cache.h b/include/asm-x86/cache.h index 1e0bac8..ea3f1cc 100644 --- a/include/asm-x86/cache.h +++ b/include/asm-x86/cache.h @@ -1,5 +1,5 @@ -#ifndef _ARCH_X86_CACHE_H -#define _ARCH_X86_CACHE_H +#ifndef ASM_X86__CACHE_H +#define ASM_X86__CACHE_H /* L1 cache line size */ #define L1_CACHE_SHIFT (CONFIG_X86_L1_CACHE_SHIFT) @@ -17,4 +17,4 @@ #endif #endif -#endif +#endif /* ASM_X86__CACHE_H */ diff --git a/include/asm-x86/cacheflush.h b/include/asm-x86/cacheflush.h index f4c0ab5..59859cb 100644 --- a/include/asm-x86/cacheflush.h +++ b/include/asm-x86/cacheflush.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_CACHEFLUSH_H -#define _ASM_X86_CACHEFLUSH_H +#ifndef ASM_X86__CACHEFLUSH_H +#define ASM_X86__CACHEFLUSH_H /* Keep includes the same across arches. */ #include <linux/mm.h> @@ -112,4 +112,4 @@ static inline int rodata_test(void) } #endif -#endif +#endif /* ASM_X86__CACHEFLUSH_H */ diff --git a/include/asm-x86/calgary.h b/include/asm-x86/calgary.h index 67f6040..933fd27 100644 --- a/include/asm-x86/calgary.h +++ b/include/asm-x86/calgary.h @@ -21,8 +21,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef _ASM_X86_64_CALGARY_H -#define _ASM_X86_64_CALGARY_H +#ifndef ASM_X86__CALGARY_H +#define ASM_X86__CALGARY_H #include <linux/spinlock.h> #include <linux/device.h> @@ -69,4 +69,4 @@ static inline int calgary_iommu_init(void) { return 1; } static inline void detect_calgary(void) { return; } #endif -#endif /* _ASM_X86_64_CALGARY_H */ +#endif /* ASM_X86__CALGARY_H */ diff --git a/include/asm-x86/checksum_32.h b/include/asm-x86/checksum_32.h index 52bbb0d..d041e8c 100644 --- a/include/asm-x86/checksum_32.h +++ b/include/asm-x86/checksum_32.h @@ -1,5 +1,5 @@ -#ifndef _I386_CHECKSUM_H -#define _I386_CHECKSUM_H +#ifndef ASM_X86__CHECKSUM_32_H +#define ASM_X86__CHECKSUM_32_H #include <linux/in6.h> @@ -186,4 +186,4 @@ static inline __wsum csum_and_copy_to_user(const void *src, return (__force __wsum)-1; /* invalid checksum */ } -#endif +#endif /* ASM_X86__CHECKSUM_32_H */ diff --git a/include/asm-x86/checksum_64.h b/include/asm-x86/checksum_64.h index 8bd861c..110f403 100644 --- a/include/asm-x86/checksum_64.h +++ b/include/asm-x86/checksum_64.h @@ -1,5 +1,5 @@ -#ifndef _X86_64_CHECKSUM_H -#define _X86_64_CHECKSUM_H +#ifndef ASM_X86__CHECKSUM_64_H +#define ASM_X86__CHECKSUM_64_H /* * Checksums for x86-64 @@ -188,4 +188,4 @@ static inline unsigned add32_with_carry(unsigned a, unsigned b) return a; } -#endif +#endif /* ASM_X86__CHECKSUM_64_H */ diff --git a/include/asm-x86/cmpxchg_32.h b/include/asm-x86/cmpxchg_32.h index bf5a69d..0622e45 100644 --- a/include/asm-x86/cmpxchg_32.h +++ b/include/asm-x86/cmpxchg_32.h @@ -1,5 +1,5 @@ -#ifndef __ASM_CMPXCHG_H -#define __ASM_CMPXCHG_H +#ifndef ASM_X86__CMPXCHG_32_H +#define ASM_X86__CMPXCHG_32_H #include <linux/bitops.h> /* for LOCK_PREFIX */ @@ -341,4 +341,4 @@ extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); #endif -#endif +#endif /* ASM_X86__CMPXCHG_32_H */ diff --git a/include/asm-x86/cmpxchg_64.h b/include/asm-x86/cmpxchg_64.h index 17463cc..63c1a5e 100644 --- a/include/asm-x86/cmpxchg_64.h +++ b/include/asm-x86/cmpxchg_64.h @@ -1,5 +1,5 @@ -#ifndef __ASM_CMPXCHG_H -#define __ASM_CMPXCHG_H +#ifndef ASM_X86__CMPXCHG_64_H +#define ASM_X86__CMPXCHG_64_H #include <asm/alternative.h> /* Provides LOCK_PREFIX */ @@ -182,4 +182,4 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr, cmpxchg_local((ptr), (o), (n)); \ }) -#endif +#endif /* ASM_X86__CMPXCHG_64_H */ diff --git a/include/asm-x86/compat.h b/include/asm-x86/compat.h index 1793ac3..6732b15 100644 --- a/include/asm-x86/compat.h +++ b/include/asm-x86/compat.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_COMPAT_H -#define _ASM_X86_64_COMPAT_H +#ifndef ASM_X86__COMPAT_H +#define ASM_X86__COMPAT_H /* * Architecture specific compatibility types @@ -215,4 +215,4 @@ static inline int is_compat_task(void) return current_thread_info()->status & TS_COMPAT; } -#endif /* _ASM_X86_64_COMPAT_H */ +#endif /* ASM_X86__COMPAT_H */ diff --git a/include/asm-x86/cpu.h b/include/asm-x86/cpu.h index 73f2ea8..83a1150 100644 --- a/include/asm-x86/cpu.h +++ b/include/asm-x86/cpu.h @@ -1,5 +1,5 @@ -#ifndef _ASM_I386_CPU_H_ -#define _ASM_I386_CPU_H_ +#ifndef ASM_X86__CPU_H +#define ASM_X86__CPU_H #include <linux/device.h> #include <linux/cpu.h> @@ -17,4 +17,4 @@ extern void arch_unregister_cpu(int); #endif DECLARE_PER_CPU(int, cpu_state); -#endif /* _ASM_I386_CPU_H_ */ +#endif /* ASM_X86__CPU_H */ diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h index cfcfb0a..250fa0c 100644 --- a/include/asm-x86/cpufeature.h +++ b/include/asm-x86/cpufeature.h @@ -1,8 +1,8 @@ /* * Defines x86 CPU feature bits */ -#ifndef _ASM_X86_CPUFEATURE_H -#define _ASM_X86_CPUFEATURE_H +#ifndef ASM_X86__CPUFEATURE_H +#define ASM_X86__CPUFEATURE_H #include <asm/required-features.h> @@ -224,4 +224,4 @@ extern const char * const x86_power_flags[32]; #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ -#endif /* _ASM_X86_CPUFEATURE_H */ +#endif /* ASM_X86__CPUFEATURE_H */ diff --git a/include/asm-x86/current.h b/include/asm-x86/current.h index 7515c19..a863ead 100644 --- a/include/asm-x86/current.h +++ b/include/asm-x86/current.h @@ -1,5 +1,5 @@ -#ifndef _X86_CURRENT_H -#define _X86_CURRENT_H +#ifndef ASM_X86__CURRENT_H +#define ASM_X86__CURRENT_H #ifdef CONFIG_X86_32 #include <linux/compiler.h> @@ -36,4 +36,4 @@ static __always_inline struct task_struct *get_current(void) #define current get_current() -#endif /* X86_CURRENT_H */ +#endif /* ASM_X86__CURRENT_H */ diff --git a/include/asm-x86/debugreg.h b/include/asm-x86/debugreg.h index c6344d5..ecb6907 100644 --- a/include/asm-x86/debugreg.h +++ b/include/asm-x86/debugreg.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_DEBUGREG_H -#define _ASM_X86_DEBUGREG_H +#ifndef ASM_X86__DEBUGREG_H +#define ASM_X86__DEBUGREG_H /* Indicate the register numbers for a number of the specific @@ -67,4 +67,4 @@ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ -#endif +#endif /* ASM_X86__DEBUGREG_H */ diff --git a/include/asm-x86/delay.h b/include/asm-x86/delay.h index 409a649..8a0da95 100644 --- a/include/asm-x86/delay.h +++ b/include/asm-x86/delay.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_DELAY_H -#define _ASM_X86_DELAY_H +#ifndef ASM_X86__DELAY_H +#define ASM_X86__DELAY_H /* * Copyright (C) 1993 Linus Torvalds @@ -28,4 +28,4 @@ extern void __delay(unsigned long loops); void use_tsc_delay(void); -#endif /* _ASM_X86_DELAY_H */ +#endif /* ASM_X86__DELAY_H */ diff --git a/include/asm-x86/desc.h b/include/asm-x86/desc.h index a44c4dc..b73fea5 100644 --- a/include/asm-x86/desc.h +++ b/include/asm-x86/desc.h @@ -1,5 +1,5 @@ -#ifndef _ASM_DESC_H_ -#define _ASM_DESC_H_ +#ifndef ASM_X86__DESC_H +#define ASM_X86__DESC_H #ifndef __ASSEMBLY__ #include <asm/desc_defs.h> @@ -397,4 +397,4 @@ static inline void set_system_gate_ist(int n, void *addr, unsigned ist) #endif /* __ASSEMBLY__ */ -#endif +#endif /* ASM_X86__DESC_H */ diff --git a/include/asm-x86/desc_defs.h b/include/asm-x86/desc_defs.h index f7bacf3..b881db6 100644 --- a/include/asm-x86/desc_defs.h +++ b/include/asm-x86/desc_defs.h @@ -1,6 +1,6 @@ /* Written 2000 by Andi Kleen */ -#ifndef __ARCH_DESC_DEFS_H -#define __ARCH_DESC_DEFS_H +#ifndef ASM_X86__DESC_DEFS_H +#define ASM_X86__DESC_DEFS_H /* * Segment descriptor structure definitions, usable from both x86_64 and i386 @@ -92,4 +92,4 @@ struct desc_ptr { #endif /* !__ASSEMBLY__ */ -#endif +#endif /* ASM_X86__DESC_DEFS_H */ diff --git a/include/asm-x86/device.h b/include/asm-x86/device.h index 3c034f4..1bece04 100644 --- a/include/asm-x86/device.h +++ b/include/asm-x86/device.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_DEVICE_H -#define _ASM_X86_DEVICE_H +#ifndef ASM_X86__DEVICE_H +#define ASM_X86__DEVICE_H struct dev_archdata { #ifdef CONFIG_ACPI @@ -13,4 +13,4 @@ struct dma_mapping_ops *dma_ops; #endif }; -#endif /* _ASM_X86_DEVICE_H */ +#endif /* ASM_X86__DEVICE_H */ diff --git a/include/asm-x86/div64.h b/include/asm-x86/div64.h index 9a2d644..f9530f2 100644 --- a/include/asm-x86/div64.h +++ b/include/asm-x86/div64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_DIV64_H -#define _ASM_X86_DIV64_H +#ifndef ASM_X86__DIV64_H +#define ASM_X86__DIV64_H #ifdef CONFIG_X86_32 @@ -57,4 +57,4 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) # include <asm-generic/div64.h> #endif /* CONFIG_X86_32 */ -#endif /* _ASM_X86_DIV64_H */ +#endif /* ASM_X86__DIV64_H */ diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index ad9cd6d..5d200e7 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -1,5 +1,5 @@ -#ifndef _ASM_DMA_MAPPING_H_ -#define _ASM_DMA_MAPPING_H_ +#ifndef ASM_X86__DMA_MAPPING_H +#define ASM_X86__DMA_MAPPING_H /* * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for @@ -250,4 +250,4 @@ static inline int dma_get_cache_alignment(void) #define dma_is_consistent(d, h) (1) #include <asm-generic/dma-coherent.h> -#endif +#endif /* ASM_X86__DMA_MAPPING_H */ diff --git a/include/asm-x86/dma.h b/include/asm-x86/dma.h index ca1098a..c9f7a4e 100644 --- a/include/asm-x86/dma.h +++ b/include/asm-x86/dma.h @@ -5,8 +5,8 @@ * and John Boyd, Nov. 1992. */ -#ifndef _ASM_X86_DMA_H -#define _ASM_X86_DMA_H +#ifndef ASM_X86__DMA_H +#define ASM_X86__DMA_H #include <linux/spinlock.h> /* And spinlocks */ #include <asm/io.h> /* need byte IO */ @@ -315,4 +315,4 @@ extern int isa_dma_bridge_buggy; #define isa_dma_bridge_buggy (0) #endif -#endif /* _ASM_X86_DMA_H */ +#endif /* ASM_X86__DMA_H */ diff --git a/include/asm-x86/dmi.h b/include/asm-x86/dmi.h index 58a8657..1cff6fe 100644 --- a/include/asm-x86/dmi.h +++ b/include/asm-x86/dmi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_DMI_H -#define _ASM_X86_DMI_H +#ifndef ASM_X86__DMI_H +#define ASM_X86__DMI_H #include <asm/io.h> @@ -23,4 +23,4 @@ static inline void *dmi_alloc(unsigned len) #define dmi_ioremap early_ioremap #define dmi_iounmap early_iounmap -#endif +#endif /* ASM_X86__DMI_H */ diff --git a/include/asm-x86/ds.h b/include/asm-x86/ds.h index 7881368..c3c953a 100644 --- a/include/asm-x86/ds.h +++ b/include/asm-x86/ds.h @@ -2,71 +2,237 @@ * Debug Store (DS) support * * This provides a low-level interface to the hardware's Debug Store - * feature that is used for last branch recording (LBR) and + * feature that is used for branch trace store (BTS) and * precise-event based sampling (PEBS). * - * Different architectures use a different DS layout/pointer size. - * The below functions therefore work on a void*. + * It manages: + * - per-thread and per-cpu allocation of BTS and PEBS + * - buffer memory allocation (optional) + * - buffer overflow handling + * - buffer access * + * It assumes: + * - get_task_struct on all parameter tasks + * - current is allowed to trace parameter tasks * - * Since there is no user for PEBS, yet, only LBR (or branch - * trace store, BTS) is supported. * - * - * Copyright (C) 2007 Intel Corporation. - * Markus Metzger <markus.t.metzger@intel.com>, Dec 2007 + * Copyright (C) 2007-2008 Intel Corporation. + * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 */ -#ifndef _ASM_X86_DS_H -#define _ASM_X86_DS_H +#ifndef ASM_X86__DS_H +#define ASM_X86__DS_H + +#ifdef CONFIG_X86_DS #include <linux/types.h> #include <linux/init.h> -struct cpuinfo_x86; +struct task_struct; -/* a branch trace record entry +/* + * Request BTS or PEBS + * + * Due to alignement constraints, the actual buffer may be slightly + * smaller than the requested or provided buffer. * - * In order to unify the interface between various processor versions, - * we use the below data structure for all processors. + * Returns 0 on success; -Eerrno otherwise + * + * task: the task to request recording for; + * NULL for per-cpu recording on the current cpu + * base: the base pointer for the (non-pageable) buffer; + * NULL if buffer allocation requested + * size: the size of the requested or provided buffer + * ovfl: pointer to a function to be called on buffer overflow; + * NULL if cyclic buffer requested */ -enum bts_qualifier { - BTS_INVALID = 0, - BTS_BRANCH, - BTS_TASK_ARRIVES, - BTS_TASK_DEPARTS -}; +typedef void (*ds_ovfl_callback_t)(struct task_struct *); +extern int ds_request_bts(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); +extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, + ds_ovfl_callback_t ovfl); -struct bts_struct { - u64 qualifier; - union { - /* BTS_BRANCH */ - struct { - u64 from_ip; - u64 to_ip; - } lbr; - /* BTS_TASK_ARRIVES or - BTS_TASK_DEPARTS */ - u64 jiffies; - } variant; +/* + * Release BTS or PEBS resources + * + * Frees buffers allocated on ds_request. + * + * Returns 0 on success; -Eerrno otherwise + * + * task: the task to release resources for; + * NULL to release resources for the current cpu + */ +extern int ds_release_bts(struct task_struct *task); +extern int ds_release_pebs(struct task_struct *task); + +/* + * Return the (array) index of the write pointer. + * (assuming an array of BTS/PEBS records) + * + * Returns -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result + */ +extern int ds_get_bts_index(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); + +/* + * Return the (array) index one record beyond the end of the array. + * (assuming an array of BTS/PEBS records) + * + * Returns -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * pos (out): if not NULL, will hold the result + */ +extern int ds_get_bts_end(struct task_struct *task, size_t *pos); +extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); + +/* + * Provide a pointer to the BTS/PEBS record at parameter index. + * (assuming an array of BTS/PEBS records) + * + * The pointer points directly into the buffer. The user is + * responsible for copying the record. + * + * Returns the size of a single record on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * index: the index of the requested record + * record (out): pointer to the requested record + */ +extern int ds_access_bts(struct task_struct *task, + size_t index, const void **record); +extern int ds_access_pebs(struct task_struct *task, + size_t index, const void **record); + +/* + * Write one or more BTS/PEBS records at the write pointer index and + * advance the write pointer. + * + * If size is not a multiple of the record size, trailing bytes are + * zeroed out. + * + * May result in one or more overflow notifications. + * + * If called during overflow handling, that is, with index >= + * interrupt threshold, the write will wrap around. + * + * An overflow notification is given if and when the interrupt + * threshold is reached during or after the write. + * + * Returns the number of bytes written or -Eerrno. + * + * task: the task to access; + * NULL to access the current cpu + * buffer: the buffer to write + * size: the size of the buffer + */ +extern int ds_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + +/* + * Same as ds_write_bts/pebs, but omit ownership checks. + * + * This is needed to have some other task than the owner of the + * BTS/PEBS buffer or the parameter task itself write into the + * respective buffer. + */ +extern int ds_unchecked_write_bts(struct task_struct *task, + const void *buffer, size_t size); +extern int ds_unchecked_write_pebs(struct task_struct *task, + const void *buffer, size_t size); + +/* + * Reset the write pointer of the BTS/PEBS buffer. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + */ +extern int ds_reset_bts(struct task_struct *task); +extern int ds_reset_pebs(struct task_struct *task); + +/* + * Clear the BTS/PEBS buffer and reset the write pointer. + * The entire buffer will be zeroed out. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + */ +extern int ds_clear_bts(struct task_struct *task); +extern int ds_clear_pebs(struct task_struct *task); + +/* + * Provide the PEBS counter reset value. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * value (out): the counter reset value + */ +extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); + +/* + * Set the PEBS counter reset value. + * + * Returns 0 on success; -Eerrno on error + * + * task: the task to access; + * NULL to access the current cpu + * value: the new counter reset value + */ +extern int ds_set_pebs_reset(struct task_struct *task, u64 value); + +/* + * Initialization + */ +struct cpuinfo_x86; +extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); + + + +/* + * The DS context - part of struct thread_struct. + */ +struct ds_context { + /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ + unsigned char *ds; + /* the owner of the BTS and PEBS configuration, respectively */ + struct task_struct *owner[2]; + /* buffer overflow notification function for BTS and PEBS */ + ds_ovfl_callback_t callback[2]; + /* the original buffer address */ + void *buffer[2]; + /* the number of allocated pages for on-request allocated buffers */ + unsigned int pages[2]; + /* use count */ + unsigned long count; + /* a pointer to the context location inside the thread_struct + * or the per_cpu context array */ + struct ds_context **this; + /* a pointer to the task owning this context, or NULL, if the + * context is owned by a cpu */ + struct task_struct *task; }; -/* Overflow handling mechanisms */ -#define DS_O_SIGNAL 1 /* send overflow signal */ -#define DS_O_WRAP 2 /* wrap around */ - -extern int ds_allocate(void **, size_t); -extern int ds_free(void **); -extern int ds_get_bts_size(void *); -extern int ds_get_bts_end(void *); -extern int ds_get_bts_index(void *); -extern int ds_set_overflow(void *, int); -extern int ds_get_overflow(void *); -extern int ds_clear(void *); -extern int ds_read_bts(void *, int, struct bts_struct *); -extern int ds_write_bts(void *, const struct bts_struct *); -extern unsigned long ds_debugctl_mask(void); -extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *c); - -#endif /* _ASM_X86_DS_H */ +/* called by exit_thread() to free leftover contexts */ +extern void ds_free(struct ds_context *context); + +#else /* CONFIG_X86_DS */ + +#define ds_init_intel(config) do {} while (0) + +#endif /* CONFIG_X86_DS */ +#endif /* ASM_X86__DS_H */ diff --git a/include/asm-x86/dwarf2.h b/include/asm-x86/dwarf2.h index 738bb9f..21d1bc3 100644 --- a/include/asm-x86/dwarf2.h +++ b/include/asm-x86/dwarf2.h @@ -1,5 +1,5 @@ -#ifndef _DWARF2_H -#define _DWARF2_H +#ifndef ASM_X86__DWARF2_H +#define ASM_X86__DWARF2_H #ifndef __ASSEMBLY__ #warning "asm/dwarf2.h should be only included in pure assembly files" @@ -58,4 +58,4 @@ #endif -#endif +#endif /* ASM_X86__DWARF2_H */ diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h index 16a31e2..f52daf1 100644 --- a/include/asm-x86/e820.h +++ b/include/asm-x86/e820.h @@ -1,5 +1,5 @@ -#ifndef __ASM_E820_H -#define __ASM_E820_H +#ifndef ASM_X86__E820_H +#define ASM_X86__E820_H #define E820MAP 0x2d0 /* our map */ #define E820MAX 128 /* number of entries in E820MAP */ @@ -64,6 +64,7 @@ struct e820map { extern struct e820map e820; extern struct e820map e820_saved; +extern unsigned long pci_mem_start; extern int e820_any_mapped(u64 start, u64 end, unsigned type); extern int e820_all_mapped(u64 start, u64 end, unsigned type); extern void e820_add_region(u64 start, u64 size, int type); @@ -140,4 +141,4 @@ extern char *memory_setup(void); #define HIGH_MEMORY (1024*1024) #endif /* __KERNEL__ */ -#endif /* __ASM_E820_H */ +#endif /* ASM_X86__E820_H */ diff --git a/include/asm-x86/edac.h b/include/asm-x86/edac.h index a8088f6..9493c5b 100644 --- a/include/asm-x86/edac.h +++ b/include/asm-x86/edac.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_EDAC_H -#define _ASM_X86_EDAC_H +#ifndef ASM_X86__EDAC_H +#define ASM_X86__EDAC_H /* ECC atomic, DMA, SMP and interrupt safe scrub function */ @@ -15,4 +15,4 @@ static inline void atomic_scrub(void *va, u32 size) asm volatile("lock; addl $0, %0"::"m" (*virt_addr)); } -#endif +#endif /* ASM_X86__EDAC_H */ diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h index d4f2b0a..ed2de22 100644 --- a/include/asm-x86/efi.h +++ b/include/asm-x86/efi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_EFI_H -#define _ASM_X86_EFI_H +#ifndef ASM_X86__EFI_H +#define ASM_X86__EFI_H #ifdef CONFIG_X86_32 @@ -94,4 +94,4 @@ extern void efi_reserve_early(void); extern void efi_call_phys_prelog(void); extern void efi_call_phys_epilog(void); -#endif +#endif /* ASM_X86__EFI_H */ diff --git a/include/asm-x86/elf.h b/include/asm-x86/elf.h index 7be4733..5c4745b 100644 --- a/include/asm-x86/elf.h +++ b/include/asm-x86/elf.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_ELF_H -#define _ASM_X86_ELF_H +#ifndef ASM_X86__ELF_H +#define ASM_X86__ELF_H /* * ELF register definitions.. @@ -148,8 +148,9 @@ do { \ static inline void start_ia32_thread(struct pt_regs *regs, u32 ip, u32 sp) { - asm volatile("movl %0,%%fs" :: "r" (0)); - asm volatile("movl %0,%%es; movl %0,%%ds" : : "r" (__USER32_DS)); + loadsegment(fs, 0); + loadsegment(ds, __USER32_DS); + loadsegment(es, __USER32_DS); load_gs_index(0); regs->ip = ip; regs->sp = sp; @@ -332,4 +333,4 @@ extern int syscall32_setup_pages(struct linux_binprm *, int exstack); extern unsigned long arch_randomize_brk(struct mm_struct *mm); #define arch_randomize_brk arch_randomize_brk -#endif +#endif /* ASM_X86__ELF_H */ diff --git a/include/asm-x86/emergency-restart.h b/include/asm-x86/emergency-restart.h index 8e6aef1..190d0d8 100644 --- a/include/asm-x86/emergency-restart.h +++ b/include/asm-x86/emergency-restart.h @@ -1,5 +1,5 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H +#ifndef ASM_X86__EMERGENCY_RESTART_H +#define ASM_X86__EMERGENCY_RESTART_H enum reboot_type { BOOT_TRIPLE = 't', @@ -15,4 +15,4 @@ extern enum reboot_type reboot_type; extern void machine_emergency_restart(void); -#endif /* _ASM_EMERGENCY_RESTART_H */ +#endif /* ASM_X86__EMERGENCY_RESTART_H */ diff --git a/include/asm-x86/fb.h b/include/asm-x86/fb.h index 5301846..aca38dbd 100644 --- a/include/asm-x86/fb.h +++ b/include/asm-x86/fb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_FB_H -#define _ASM_X86_FB_H +#ifndef ASM_X86__FB_H +#define ASM_X86__FB_H #include <linux/fb.h> #include <linux/fs.h> @@ -18,4 +18,4 @@ extern int fb_is_primary_device(struct fb_info *info); static inline int fb_is_primary_device(struct fb_info *info) { return 0; } #endif -#endif /* _ASM_X86_FB_H */ +#endif /* ASM_X86__FB_H */ diff --git a/include/asm-x86/fixmap.h b/include/asm-x86/fixmap.h index 44d4f82..78e33a1 100644 --- a/include/asm-x86/fixmap.h +++ b/include/asm-x86/fixmap.h @@ -1,5 +1,5 @@ -#ifndef _ASM_FIXMAP_H -#define _ASM_FIXMAP_H +#ifndef ASM_X86__FIXMAP_H +#define ASM_X86__FIXMAP_H #ifdef CONFIG_X86_32 # include "fixmap_32.h" @@ -65,4 +65,4 @@ static inline unsigned long virt_to_fix(const unsigned long vaddr) BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); return __virt_to_fix(vaddr); } -#endif +#endif /* ASM_X86__FIXMAP_H */ diff --git a/include/asm-x86/fixmap_32.h b/include/asm-x86/fixmap_32.h index f1ac2b2..784e3e7 100644 --- a/include/asm-x86/fixmap_32.h +++ b/include/asm-x86/fixmap_32.h @@ -10,8 +10,8 @@ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 */ -#ifndef _ASM_FIXMAP_32_H -#define _ASM_FIXMAP_32_H +#ifndef ASM_X86__FIXMAP_32_H +#define ASM_X86__FIXMAP_32_H /* used by vmalloc.c, vsyscall.lds.S. @@ -120,4 +120,4 @@ extern void reserve_top_address(unsigned long reserve); #define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE) #endif /* !__ASSEMBLY__ */ -#endif +#endif /* ASM_X86__FIXMAP_32_H */ diff --git a/include/asm-x86/fixmap_64.h b/include/asm-x86/fixmap_64.h index 00f3d74..dafb24b 100644 --- a/include/asm-x86/fixmap_64.h +++ b/include/asm-x86/fixmap_64.h @@ -8,8 +8,8 @@ * Copyright (C) 1998 Ingo Molnar */ -#ifndef _ASM_FIXMAP_64_H -#define _ASM_FIXMAP_64_H +#ifndef ASM_X86__FIXMAP_64_H +#define ASM_X86__FIXMAP_64_H #include <linux/kernel.h> #include <asm/acpi.h> @@ -80,4 +80,4 @@ enum fixed_addresses { #define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) #define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) -#endif +#endif /* ASM_X86__FIXMAP_64_H */ diff --git a/include/asm-x86/floppy.h b/include/asm-x86/floppy.h index dbe82a5..7d83a3a 100644 --- a/include/asm-x86/floppy.h +++ b/include/asm-x86/floppy.h @@ -7,8 +7,8 @@ * * Copyright (C) 1995 */ -#ifndef _ASM_X86_FLOPPY_H -#define _ASM_X86_FLOPPY_H +#ifndef ASM_X86__FLOPPY_H +#define ASM_X86__FLOPPY_H #include <linux/vmalloc.h> @@ -278,4 +278,4 @@ static int FDC2 = -1; #define EXTRA_FLOPPY_PARAMS -#endif /* _ASM_X86_FLOPPY_H */ +#endif /* ASM_X86__FLOPPY_H */ diff --git a/include/asm-x86/ftrace.h b/include/asm-x86/ftrace.h index 5c68b32..be0e004 100644 --- a/include/asm-x86/ftrace.h +++ b/include/asm-x86/ftrace.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_FTRACE -#define _ASM_X86_FTRACE +#ifndef ASM_X86__FTRACE_H +#define ASM_X86__FTRACE_H #ifdef CONFIG_FTRACE #define MCOUNT_ADDR ((long)(mcount)) @@ -11,4 +11,4 @@ extern void mcount(void); #endif /* CONFIG_FTRACE */ -#endif /* _ASM_X86_FTRACE */ +#endif /* ASM_X86__FTRACE_H */ diff --git a/include/asm-x86/futex.h b/include/asm-x86/futex.h index e7a76b3..06b924e 100644 --- a/include/asm-x86/futex.h +++ b/include/asm-x86/futex.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_FUTEX_H -#define _ASM_X86_FUTEX_H +#ifndef ASM_X86__FUTEX_H +#define ASM_X86__FUTEX_H #ifdef __KERNEL__ @@ -25,7 +25,7 @@ asm volatile("1:\tmovl %2, %0\n" \ "\tmovl\t%0, %3\n" \ "\t" insn "\n" \ - "2:\tlock; cmpxchgl %3, %2\n" \ + "2:\t" LOCK_PREFIX "cmpxchgl %3, %2\n" \ "\tjnz\t1b\n" \ "3:\t.section .fixup,\"ax\"\n" \ "4:\tmov\t%5, %1\n" \ @@ -64,7 +64,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); break; case FUTEX_OP_ADD: - __futex_atomic_op1("lock; xaddl %0, %2", ret, oldval, + __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, uaddr, oparg); break; case FUTEX_OP_OR: @@ -122,7 +122,7 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; - asm volatile("1:\tlock; cmpxchgl %3, %1\n" + asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" "2:\t.section .fixup, \"ax\"\n" "3:\tmov %2, %0\n" "\tjmp 2b\n" @@ -137,4 +137,4 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, } #endif -#endif +#endif /* ASM_X86__FUTEX_H */ diff --git a/include/asm-x86/gart.h b/include/asm-x86/gart.h index 3f62a83..baa54fa 100644 --- a/include/asm-x86/gart.h +++ b/include/asm-x86/gart.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X8664_GART_H -#define _ASM_X8664_GART_H 1 +#ifndef ASM_X86__GART_H +#define ASM_X86__GART_H #include <asm/e820.h> @@ -52,15 +52,15 @@ static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) return 0; if (aper_base + aper_size > 0x100000000ULL) { - printk(KERN_ERR "Aperture beyond 4GB. Ignoring.\n"); + printk(KERN_INFO "Aperture beyond 4GB. Ignoring.\n"); return 0; } if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - printk(KERN_ERR "Aperture pointing to e820 RAM. Ignoring.\n"); + printk(KERN_INFO "Aperture pointing to e820 RAM. Ignoring.\n"); return 0; } if (aper_size < min_size) { - printk(KERN_ERR "Aperture too small (%d MB) than (%d MB)\n", + printk(KERN_INFO "Aperture too small (%d MB) than (%d MB)\n", aper_size>>20, min_size>>20); return 0; } @@ -68,4 +68,4 @@ static inline int aperture_valid(u64 aper_base, u32 aper_size, u32 min_size) return 1; } -#endif +#endif /* ASM_X86__GART_H */ diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h index 754d635..34280f0 100644 --- a/include/asm-x86/genapic_32.h +++ b/include/asm-x86/genapic_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_GENAPIC_H -#define _ASM_GENAPIC_H 1 +#ifndef ASM_X86__GENAPIC_32_H +#define ASM_X86__GENAPIC_32_H #include <asm/mpspec.h> @@ -121,4 +121,4 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; #define uv_system_init() do {} while (0) -#endif +#endif /* ASM_X86__GENAPIC_32_H */ diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index a47d631..25097a8 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_GENAPIC_H -#define _ASM_GENAPIC_H 1 +#ifndef ASM_X86__GENAPIC_64_H +#define ASM_X86__GENAPIC_64_H /* * Copyright 2004 James Cleverdon, IBM. @@ -47,4 +47,4 @@ extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); extern void setup_apic_routing(void); -#endif +#endif /* ASM_X86__GENAPIC_64_H */ diff --git a/include/asm-x86/geode.h b/include/asm-x86/geode.h index 2c1cda0..3f3444b 100644 --- a/include/asm-x86/geode.h +++ b/include/asm-x86/geode.h @@ -7,8 +7,8 @@ * as published by the Free Software Foundation. */ -#ifndef _ASM_GEODE_H_ -#define _ASM_GEODE_H_ +#ifndef ASM_X86__GEODE_H +#define ASM_X86__GEODE_H #include <asm/processor.h> #include <linux/io.h> @@ -250,4 +250,4 @@ extern int __init mfgpt_timer_setup(void); static inline int mfgpt_timer_setup(void) { return 0; } #endif -#endif +#endif /* ASM_X86__GEODE_H */ diff --git a/include/asm-x86/gpio.h b/include/asm-x86/gpio.h index c4c91b3..497fb98 100644 --- a/include/asm-x86/gpio.h +++ b/include/asm-x86/gpio.h @@ -53,4 +53,4 @@ static inline int irq_to_gpio(unsigned int irq) #endif /* CONFIG_GPIOLIB */ -#endif /* _ASM_I386_GPIO_H */ +#endif /* ASM_X86__GPIO_H */ diff --git a/include/asm-x86/hardirq_32.h b/include/asm-x86/hardirq_32.h index 4f85f0f..700fe23 100644 --- a/include/asm-x86/hardirq_32.h +++ b/include/asm-x86/hardirq_32.h @@ -1,5 +1,5 @@ -#ifndef __ASM_HARDIRQ_H -#define __ASM_HARDIRQ_H +#ifndef ASM_X86__HARDIRQ_32_H +#define ASM_X86__HARDIRQ_32_H #include <linux/threads.h> #include <linux/irq.h> @@ -25,4 +25,4 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); void ack_bad_irq(unsigned int irq); #include <linux/irq_cpustat.h> -#endif /* __ASM_HARDIRQ_H */ +#endif /* ASM_X86__HARDIRQ_32_H */ diff --git a/include/asm-x86/hardirq_64.h b/include/asm-x86/hardirq_64.h index 95d5e09..f8bd291 100644 --- a/include/asm-x86/hardirq_64.h +++ b/include/asm-x86/hardirq_64.h @@ -1,5 +1,5 @@ -#ifndef __ASM_HARDIRQ_H -#define __ASM_HARDIRQ_H +#ifndef ASM_X86__HARDIRQ_64_H +#define ASM_X86__HARDIRQ_64_H #include <linux/threads.h> #include <linux/irq.h> @@ -20,4 +20,4 @@ extern void ack_bad_irq(unsigned int irq); -#endif /* __ASM_HARDIRQ_H */ +#endif /* ASM_X86__HARDIRQ_64_H */ diff --git a/include/asm-x86/highmem.h b/include/asm-x86/highmem.h index 4514b16..bc3f6a2 100644 --- a/include/asm-x86/highmem.h +++ b/include/asm-x86/highmem.h @@ -15,8 +15,8 @@ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com> */ -#ifndef _ASM_HIGHMEM_H -#define _ASM_HIGHMEM_H +#ifndef ASM_X86__HIGHMEM_H +#define ASM_X86__HIGHMEM_H #ifdef __KERNEL__ @@ -79,4 +79,4 @@ extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn, #endif /* __KERNEL__ */ -#endif /* _ASM_HIGHMEM_H */ +#endif /* ASM_X86__HIGHMEM_H */ diff --git a/include/asm-x86/hpet.h b/include/asm-x86/hpet.h index 82f1ac6..cbbbb6d 100644 --- a/include/asm-x86/hpet.h +++ b/include/asm-x86/hpet.h @@ -1,5 +1,5 @@ -#ifndef ASM_X86_HPET_H -#define ASM_X86_HPET_H +#ifndef ASM_X86__HPET_H +#define ASM_X86__HPET_H #ifdef CONFIG_HPET_TIMER @@ -90,4 +90,4 @@ static inline int is_hpet_enabled(void) { return 0; } #define hpet_readl(a) 0 #endif -#endif /* ASM_X86_HPET_H */ +#endif /* ASM_X86__HPET_H */ diff --git a/include/asm-x86/hugetlb.h b/include/asm-x86/hugetlb.h index 439a9ac..0b7ec5d 100644 --- a/include/asm-x86/hugetlb.h +++ b/include/asm-x86/hugetlb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_HUGETLB_H -#define _ASM_X86_HUGETLB_H +#ifndef ASM_X86__HUGETLB_H +#define ASM_X86__HUGETLB_H #include <asm/page.h> @@ -90,4 +90,4 @@ static inline void arch_release_hugepage(struct page *page) { } -#endif /* _ASM_X86_HUGETLB_H */ +#endif /* ASM_X86__HUGETLB_H */ diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h index edd0b95..65997b1 100644 --- a/include/asm-x86/hw_irq.h +++ b/include/asm-x86/hw_irq.h @@ -1,5 +1,5 @@ -#ifndef _ASM_HW_IRQ_H -#define _ASM_HW_IRQ_H +#ifndef ASM_X86__HW_IRQ_H +#define ASM_X86__HW_IRQ_H /* * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar @@ -93,6 +93,26 @@ extern asmlinkage void qic_reschedule_interrupt(void); extern asmlinkage void qic_enable_irq_interrupt(void); extern asmlinkage void qic_call_function_interrupt(void); +/* SMP */ +extern void smp_apic_timer_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_32 +extern void smp_spurious_interrupt(struct pt_regs *); +extern void smp_error_interrupt(struct pt_regs *); +#else +extern asmlinkage void smp_spurious_interrupt(void); +extern asmlinkage void smp_error_interrupt(void); +#endif +#ifdef CONFIG_X86_SMP +extern void smp_reschedule_interrupt(struct pt_regs *); +extern void smp_call_function_interrupt(struct pt_regs *); +extern void smp_call_function_single_interrupt(struct pt_regs *); +#ifdef CONFIG_X86_32 +extern void smp_invalidate_interrupt(struct pt_regs *); +#else +extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); +#endif +#endif + #ifdef CONFIG_X86_32 extern void (*const interrupt[NR_IRQS])(void); #else @@ -112,4 +132,4 @@ static inline void __setup_vector_irq(int cpu) {} #endif /* !ASSEMBLY_ */ -#endif +#endif /* ASM_X86__HW_IRQ_H */ diff --git a/include/asm-x86/hypertransport.h b/include/asm-x86/hypertransport.h index d2bbd23..cc011a3 100644 --- a/include/asm-x86/hypertransport.h +++ b/include/asm-x86/hypertransport.h @@ -1,5 +1,5 @@ -#ifndef ASM_HYPERTRANSPORT_H -#define ASM_HYPERTRANSPORT_H +#ifndef ASM_X86__HYPERTRANSPORT_H +#define ASM_X86__HYPERTRANSPORT_H /* * Constants for x86 Hypertransport Interrupts. @@ -42,4 +42,4 @@ #define HT_IRQ_HIGH_DEST_ID(v) \ ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK) -#endif /* ASM_HYPERTRANSPORT_H */ +#endif /* ASM_X86__HYPERTRANSPORT_H */ diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 56d00e3..1ecdc3e 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -7,8 +7,8 @@ * x86-64 work by Andi Kleen 2002 */ -#ifndef _ASM_X86_I387_H -#define _ASM_X86_I387_H +#ifndef ASM_X86__I387_H +#define ASM_X86__I387_H #include <linux/sched.h> #include <linux/kernel_stat.h> @@ -25,6 +25,7 @@ extern void mxcsr_feature_mask_init(void); extern int init_fpu(struct task_struct *child); extern asmlinkage void math_state_restore(void); extern void init_thread_xstate(void); +extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; @@ -336,4 +337,4 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } -#endif /* _ASM_X86_I387_H */ +#endif /* ASM_X86__I387_H */ diff --git a/include/asm-x86/i8253.h b/include/asm-x86/i8253.h index b51c048..15a5b53 100644 --- a/include/asm-x86/i8253.h +++ b/include/asm-x86/i8253.h @@ -1,5 +1,5 @@ -#ifndef __ASM_I8253_H__ -#define __ASM_I8253_H__ +#ifndef ASM_X86__I8253_H +#define ASM_X86__I8253_H /* i8253A PIT registers */ #define PIT_MODE 0x43 @@ -15,4 +15,4 @@ extern void setup_pit_timer(void); #define inb_pit inb_p #define outb_pit outb_p -#endif /* __ASM_I8253_H__ */ +#endif /* ASM_X86__I8253_H */ diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h index 2f98df9..c586559 100644 --- a/include/asm-x86/i8259.h +++ b/include/asm-x86/i8259.h @@ -1,5 +1,5 @@ -#ifndef __ASM_I8259_H__ -#define __ASM_I8259_H__ +#ifndef ASM_X86__I8259_H +#define ASM_X86__I8259_H #include <linux/delay.h> @@ -57,4 +57,4 @@ static inline void outb_pic(unsigned char value, unsigned int port) extern struct irq_chip i8259A_chip; -#endif /* __ASM_I8259_H__ */ +#endif /* ASM_X86__I8259_H */ diff --git a/include/asm-x86/ia32.h b/include/asm-x86/ia32.h index 55d3abe..f932f7a 100644 --- a/include/asm-x86/ia32.h +++ b/include/asm-x86/ia32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_IA32_H -#define _ASM_X86_64_IA32_H +#ifndef ASM_X86__IA32_H +#define ASM_X86__IA32_H #ifdef CONFIG_IA32_EMULATION @@ -167,4 +167,4 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm); #endif /* !CONFIG_IA32_SUPPORT */ -#endif +#endif /* ASM_X86__IA32_H */ diff --git a/include/asm-x86/ia32_unistd.h b/include/asm-x86/ia32_unistd.h index 61cea9e..dbd887d 100644 --- a/include/asm-x86/ia32_unistd.h +++ b/include/asm-x86/ia32_unistd.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_IA32_UNISTD_H_ -#define _ASM_X86_64_IA32_UNISTD_H_ +#ifndef ASM_X86__IA32_UNISTD_H +#define ASM_X86__IA32_UNISTD_H /* * This file contains the system call numbers of the ia32 port, @@ -15,4 +15,4 @@ #define __NR_ia32_sigreturn 119 #define __NR_ia32_rt_sigreturn 173 -#endif /* _ASM_X86_64_IA32_UNISTD_H_ */ +#endif /* ASM_X86__IA32_UNISTD_H */ diff --git a/include/asm-x86/idle.h b/include/asm-x86/idle.h index cbb6491..baa3f783 100644 --- a/include/asm-x86/idle.h +++ b/include/asm-x86/idle.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_IDLE_H -#define _ASM_X86_64_IDLE_H 1 +#ifndef ASM_X86__IDLE_H +#define ASM_X86__IDLE_H #define IDLE_START 1 #define IDLE_END 2 @@ -12,4 +12,4 @@ void exit_idle(void); void c1e_remove_cpu(int cpu); -#endif +#endif /* ASM_X86__IDLE_H */ diff --git a/include/asm-x86/intel_arch_perfmon.h b/include/asm-x86/intel_arch_perfmon.h index fa0fd06..07c03c6 100644 --- a/include/asm-x86/intel_arch_perfmon.h +++ b/include/asm-x86/intel_arch_perfmon.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H -#define _ASM_X86_INTEL_ARCH_PERFMON_H +#ifndef ASM_X86__INTEL_ARCH_PERFMON_H +#define ASM_X86__INTEL_ARCH_PERFMON_H #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 @@ -28,4 +28,4 @@ union cpuid10_eax { unsigned int full; }; -#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ +#endif /* ASM_X86__INTEL_ARCH_PERFMON_H */ diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index 0f954dc..72b7719 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_IO_H -#define _ASM_X86_IO_H +#ifndef ASM_X86__IO_H +#define ASM_X86__IO_H #define ARCH_HAS_IOREMAP_WC @@ -73,6 +73,8 @@ build_mmio_write(__writeq, "q", unsigned long, "r", ) #define writeq writeq #endif +extern int iommu_bio_merge; + #ifdef CONFIG_X86_32 # include "io_32.h" #else @@ -99,4 +101,4 @@ extern void early_iounmap(void *addr, unsigned long size); extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); -#endif /* _ASM_X86_IO_H */ +#endif /* ASM_X86__IO_H */ diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index e876d89..4f7d878 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IO_H -#define _ASM_IO_H +#ifndef ASM_X86__IO_32_H +#define ASM_X86__IO_32_H #include <linux/string.h> #include <linux/compiler.h> @@ -281,4 +281,4 @@ BUILDIO(b, b, char) BUILDIO(w, w, short) BUILDIO(l, , int) -#endif +#endif /* ASM_X86__IO_32_H */ diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index 22995c5..64429e9 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IO_H -#define _ASM_IO_H +#ifndef ASM_X86__IO_64_H +#define ASM_X86__IO_64_H /* @@ -235,7 +235,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c); #define flush_write_buffers() -extern int iommu_bio_merge; #define BIO_VMERGE_BOUNDARY iommu_bio_merge /* @@ -245,4 +244,4 @@ extern int iommu_bio_merge; #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__IO_64_H */ diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h index 14f82bb..be62847 100644 --- a/include/asm-x86/io_apic.h +++ b/include/asm-x86/io_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_IO_APIC_H -#define __ASM_IO_APIC_H +#ifndef ASM_X86__IO_APIC_H +#define ASM_X86__IO_APIC_H #include <linux/types.h> #include <asm/mpspec.h> @@ -189,4 +189,4 @@ static const int timer_through_8259 = 0; static inline void ioapic_init_mappings(void) { } #endif -#endif +#endif /* ASM_X86__IO_APIC_H */ diff --git a/include/asm-x86/ioctls.h b/include/asm-x86/ioctls.h index c0c338b..3366035 100644 --- a/include/asm-x86/ioctls.h +++ b/include/asm-x86/ioctls.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_IOCTLS_H -#define _ASM_X86_IOCTLS_H +#ifndef ASM_X86__IOCTLS_H +#define ASM_X86__IOCTLS_H #include <asm/ioctl.h> @@ -85,4 +85,4 @@ #define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ -#endif +#endif /* ASM_X86__IOCTLS_H */ diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index 5f888cc..e86f441 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X8664_IOMMU_H -#define _ASM_X8664_IOMMU_H 1 +#ifndef ASM_X86__IOMMU_H +#define ASM_X86__IOMMU_H extern void pci_iommu_shutdown(void); extern void no_iommu_init(void); @@ -42,4 +42,4 @@ static inline void gart_iommu_hole_init(void) } #endif -#endif +#endif /* ASM_X86__IOMMU_H */ diff --git a/include/asm-x86/ipcbuf.h b/include/asm-x86/ipcbuf.h index ee678fd..910304f 100644 --- a/include/asm-x86/ipcbuf.h +++ b/include/asm-x86/ipcbuf.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_IPCBUF_H -#define _ASM_X86_IPCBUF_H +#ifndef ASM_X86__IPCBUF_H +#define ASM_X86__IPCBUF_H /* * The ipc64_perm structure for x86 architecture. @@ -25,4 +25,4 @@ struct ipc64_perm { unsigned long __unused2; }; -#endif /* _ASM_X86_IPCBUF_H */ +#endif /* ASM_X86__IPCBUF_H */ diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h index bb1c09f..c1b2267 100644 --- a/include/asm-x86/ipi.h +++ b/include/asm-x86/ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_IPI_H -#define __ASM_IPI_H +#ifndef ASM_X86__IPI_H +#define ASM_X86__IPI_H /* * Copyright 2004 James Cleverdon, IBM. @@ -129,4 +129,4 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) local_irq_restore(flags); } -#endif /* __ASM_IPI_H */ +#endif /* ASM_X86__IPI_H */ diff --git a/include/asm-x86/irq.h b/include/asm-x86/irq.h index 1a29257..1e5f290 100644 --- a/include/asm-x86/irq.h +++ b/include/asm-x86/irq.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IRQ_H -#define _ASM_IRQ_H +#ifndef ASM_X86__IRQ_H +#define ASM_X86__IRQ_H /* * (C) 1992, 1993 Linus Torvalds, (C) 1997 Ingo Molnar * @@ -47,4 +47,4 @@ extern void native_init_IRQ(void); /* Interrupt vector management */ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); -#endif /* _ASM_IRQ_H */ +#endif /* ASM_X86__IRQ_H */ diff --git a/include/asm-x86/irq_regs_32.h b/include/asm-x86/irq_regs_32.h index 3368b20..316a3b2 100644 --- a/include/asm-x86/irq_regs_32.h +++ b/include/asm-x86/irq_regs_32.h @@ -4,8 +4,8 @@ * * Jeremy Fitzhardinge <jeremy@goop.org> */ -#ifndef _ASM_I386_IRQ_REGS_H -#define _ASM_I386_IRQ_REGS_H +#ifndef ASM_X86__IRQ_REGS_32_H +#define ASM_X86__IRQ_REGS_32_H #include <asm/percpu.h> @@ -26,4 +26,4 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) return old_regs; } -#endif /* _ASM_I386_IRQ_REGS_H */ +#endif /* ASM_X86__IRQ_REGS_32_H */ diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h index a48c7f2..c5d2d76 100644 --- a/include/asm-x86/irq_vectors.h +++ b/include/asm-x86/irq_vectors.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IRQ_VECTORS_H -#define _ASM_IRQ_VECTORS_H +#ifndef ASM_X86__IRQ_VECTORS_H +#define ASM_X86__IRQ_VECTORS_H #include <linux/threads.h> @@ -179,4 +179,4 @@ #define VIC_CPU_BOOT_ERRATA_CPI (VIC_CPI_LEVEL0 + 8) -#endif /* _ASM_IRQ_VECTORS_H */ +#endif /* ASM_X86__IRQ_VECTORS_H */ diff --git a/include/asm-x86/ist.h b/include/asm-x86/ist.h index 6ec6cee..35a2fe9 100644 --- a/include/asm-x86/ist.h +++ b/include/asm-x86/ist.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IST_H -#define _ASM_IST_H +#ifndef ASM_X86__IST_H +#define ASM_X86__IST_H /* * Include file for the interface to IST BIOS @@ -31,4 +31,4 @@ struct ist_info { extern struct ist_info ist_info; #endif /* __KERNEL__ */ -#endif /* _ASM_IST_H */ +#endif /* ASM_X86__IST_H */ diff --git a/include/asm-x86/k8.h b/include/asm-x86/k8.h index 452e2b6..2bbaf43 100644 --- a/include/asm-x86/k8.h +++ b/include/asm-x86/k8.h @@ -1,5 +1,5 @@ -#ifndef _ASM_K8_H -#define _ASM_K8_H 1 +#ifndef ASM_X86__K8_H +#define ASM_X86__K8_H #include <linux/pci.h> @@ -12,4 +12,4 @@ extern int cache_k8_northbridges(void); extern void k8_flush_garts(void); extern int k8_scan_nodes(unsigned long start, unsigned long end); -#endif +#endif /* ASM_X86__K8_H */ diff --git a/include/asm-x86/kdebug.h b/include/asm-x86/kdebug.h index 96651bb..5ec3ad3 100644 --- a/include/asm-x86/kdebug.h +++ b/include/asm-x86/kdebug.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_KDEBUG_H -#define _ASM_X86_KDEBUG_H +#ifndef ASM_X86__KDEBUG_H +#define ASM_X86__KDEBUG_H #include <linux/notifier.h> @@ -35,4 +35,4 @@ extern void show_regs(struct pt_regs *regs); extern unsigned long oops_begin(void); extern void oops_end(unsigned long, struct pt_regs *, int signr); -#endif +#endif /* ASM_X86__KDEBUG_H */ diff --git a/include/asm-x86/kexec.h b/include/asm-x86/kexec.h index 4246ab7..ea09600 100644 --- a/include/asm-x86/kexec.h +++ b/include/asm-x86/kexec.h @@ -1,5 +1,5 @@ -#ifndef _KEXEC_H -#define _KEXEC_H +#ifndef ASM_X86__KEXEC_H +#define ASM_X86__KEXEC_H #ifdef CONFIG_X86_32 # define PA_CONTROL_PAGE 0 @@ -172,4 +172,4 @@ relocate_kernel(unsigned long indirection_page, #endif /* __ASSEMBLY__ */ -#endif /* _KEXEC_H */ +#endif /* ASM_X86__KEXEC_H */ diff --git a/include/asm-x86/kgdb.h b/include/asm-x86/kgdb.h index 94d63db..d283863 100644 --- a/include/asm-x86/kgdb.h +++ b/include/asm-x86/kgdb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_KGDB_H_ -#define _ASM_KGDB_H_ +#ifndef ASM_X86__KGDB_H +#define ASM_X86__KGDB_H /* * Copyright (C) 2001-2004 Amit S. Kale @@ -76,4 +76,4 @@ static inline void arch_kgdb_breakpoint(void) #define BREAK_INSTR_SIZE 1 #define CACHE_FLUSH_IS_SAFE 1 -#endif /* _ASM_KGDB_H_ */ +#endif /* ASM_X86__KGDB_H */ diff --git a/include/asm-x86/kmap_types.h b/include/asm-x86/kmap_types.h index 5f41741..89f4449 100644 --- a/include/asm-x86/kmap_types.h +++ b/include/asm-x86/kmap_types.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_KMAP_TYPES_H -#define _ASM_X86_KMAP_TYPES_H +#ifndef ASM_X86__KMAP_TYPES_H +#define ASM_X86__KMAP_TYPES_H #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) # define D(n) __KM_FENCE_##n , @@ -26,4 +26,4 @@ D(13) KM_TYPE_NR #undef D -#endif +#endif /* ASM_X86__KMAP_TYPES_H */ diff --git a/include/asm-x86/kprobes.h b/include/asm-x86/kprobes.h index 54980b0..bd840786 100644 --- a/include/asm-x86/kprobes.h +++ b/include/asm-x86/kprobes.h @@ -1,5 +1,5 @@ -#ifndef _ASM_KPROBES_H -#define _ASM_KPROBES_H +#ifndef ASM_X86__KPROBES_H +#define ASM_X86__KPROBES_H /* * Kernel Probes (KProbes) * @@ -94,4 +94,4 @@ static inline void restore_interrupts(struct pt_regs *regs) extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); -#endif /* _ASM_KPROBES_H */ +#endif /* ASM_X86__KPROBES_H */ diff --git a/include/asm-x86/kvm.h b/include/asm-x86/kvm.h index 6f18408..78e954d 100644 --- a/include/asm-x86/kvm.h +++ b/include/asm-x86/kvm.h @@ -1,5 +1,5 @@ -#ifndef __LINUX_KVM_X86_H -#define __LINUX_KVM_X86_H +#ifndef ASM_X86__KVM_H +#define ASM_X86__KVM_H /* * KVM x86 specific structures and definitions @@ -230,4 +230,4 @@ struct kvm_pit_state { #define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14) #define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15) -#endif +#endif /* ASM_X86__KVM_H */ diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index c2e34c2..6979454 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -1,4 +1,4 @@ -#/* +/* * Kernel-based Virtual Machine driver for Linux * * This header defines architecture specific interfaces, x86 version @@ -8,8 +8,8 @@ * */ -#ifndef ASM_KVM_HOST_H -#define ASM_KVM_HOST_H +#ifndef ASM_X86__KVM_HOST_H +#define ASM_X86__KVM_HOST_H #include <linux/types.h> #include <linux/mm.h> @@ -735,4 +735,4 @@ asmlinkage void kvm_handle_fault_on_reboot(void); int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_age_hva(struct kvm *kvm, unsigned long hva); -#endif +#endif /* ASM_X86__KVM_HOST_H */ diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h index 76f3921..30054fd 100644 --- a/include/asm-x86/kvm_para.h +++ b/include/asm-x86/kvm_para.h @@ -1,5 +1,5 @@ -#ifndef __X86_KVM_PARA_H -#define __X86_KVM_PARA_H +#ifndef ASM_X86__KVM_PARA_H +#define ASM_X86__KVM_PARA_H /* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It * should be used to determine that a VM is running under KVM. @@ -144,4 +144,4 @@ static inline unsigned int kvm_arch_para_features(void) #endif -#endif +#endif /* ASM_X86__KVM_PARA_H */ diff --git a/include/asm-x86/kvm_x86_emulate.h b/include/asm-x86/kvm_x86_emulate.h index 4e8c1e4..e2d9b03 100644 --- a/include/asm-x86/kvm_x86_emulate.h +++ b/include/asm-x86/kvm_x86_emulate.h @@ -8,8 +8,8 @@ * From: xen-unstable 10676:af9809f51f81a3c43f276f00c81a52ef558afda4 */ -#ifndef __X86_EMULATE_H__ -#define __X86_EMULATE_H__ +#ifndef ASM_X86__KVM_X86_EMULATE_H +#define ASM_X86__KVM_X86_EMULATE_H struct x86_emulate_ctxt; @@ -181,4 +181,4 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, int x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops); -#endif /* __X86_EMULATE_H__ */ +#endif /* ASM_X86__KVM_X86_EMULATE_H */ diff --git a/include/asm-x86/ldt.h b/include/asm-x86/ldt.h index 20c5972..a522850 100644 --- a/include/asm-x86/ldt.h +++ b/include/asm-x86/ldt.h @@ -3,8 +3,8 @@ * * Definitions of structures used with the modify_ldt system call. */ -#ifndef _ASM_X86_LDT_H -#define _ASM_X86_LDT_H +#ifndef ASM_X86__LDT_H +#define ASM_X86__LDT_H /* Maximum number of LDT entries supported. */ #define LDT_ENTRIES 8192 @@ -37,4 +37,4 @@ struct user_desc { #define MODIFY_LDT_CONTENTS_CODE 2 #endif /* !__ASSEMBLY__ */ -#endif +#endif /* ASM_X86__LDT_H */ diff --git a/include/asm-x86/lguest.h b/include/asm-x86/lguest.h index be4a724..7505e94 100644 --- a/include/asm-x86/lguest.h +++ b/include/asm-x86/lguest.h @@ -1,5 +1,5 @@ -#ifndef _X86_LGUEST_H -#define _X86_LGUEST_H +#ifndef ASM_X86__LGUEST_H +#define ASM_X86__LGUEST_H #define GDT_ENTRY_LGUEST_CS 10 #define GDT_ENTRY_LGUEST_DS 11 @@ -91,4 +91,4 @@ static inline void lguest_set_ts(void) #endif /* __ASSEMBLY__ */ -#endif +#endif /* ASM_X86__LGUEST_H */ diff --git a/include/asm-x86/lguest_hcall.h b/include/asm-x86/lguest_hcall.h index a3241f2..8f034ba 100644 --- a/include/asm-x86/lguest_hcall.h +++ b/include/asm-x86/lguest_hcall.h @@ -1,6 +1,6 @@ /* Architecture specific portion of the lguest hypercalls */ -#ifndef _X86_LGUEST_HCALL_H -#define _X86_LGUEST_HCALL_H +#ifndef ASM_X86__LGUEST_HCALL_H +#define ASM_X86__LGUEST_HCALL_H #define LHCALL_FLUSH_ASYNC 0 #define LHCALL_LGUEST_INIT 1 @@ -68,4 +68,4 @@ struct hcall_args { }; #endif /* !__ASSEMBLY__ */ -#endif /* _I386_LGUEST_HCALL_H */ +#endif /* ASM_X86__LGUEST_HCALL_H */ diff --git a/include/asm-x86/linkage.h b/include/asm-x86/linkage.h index 64e444f..42d8b62 100644 --- a/include/asm-x86/linkage.h +++ b/include/asm-x86/linkage.h @@ -1,5 +1,5 @@ -#ifndef __ASM_LINKAGE_H -#define __ASM_LINKAGE_H +#ifndef ASM_X86__LINKAGE_H +#define ASM_X86__LINKAGE_H #undef notrace #define notrace __attribute__((no_instrument_function)) @@ -57,5 +57,5 @@ #define __ALIGN_STR ".align 16,0x90" #endif -#endif +#endif /* ASM_X86__LINKAGE_H */ diff --git a/include/asm-x86/local.h b/include/asm-x86/local.h index 330a724..ae91994 100644 --- a/include/asm-x86/local.h +++ b/include/asm-x86/local.h @@ -1,5 +1,5 @@ -#ifndef _ARCH_LOCAL_H -#define _ARCH_LOCAL_H +#ifndef ASM_X86__LOCAL_H +#define ASM_X86__LOCAL_H #include <linux/percpu.h> @@ -232,4 +232,4 @@ static inline long local_sub_return(long i, local_t *l) #define __cpu_local_add(i, l) cpu_local_add((i), (l)) #define __cpu_local_sub(i, l) cpu_local_sub((i), (l)) -#endif /* _ARCH_LOCAL_H */ +#endif /* ASM_X86__LOCAL_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/mach-bigsmp/mach_apic.h index c3b9dc6..05362d4 100644 --- a/include/asm-x86/mach-bigsmp/mach_apic.h +++ b/include/asm-x86/mach-bigsmp/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_BIGSMP__MACH_APIC_H +#define ASM_X86__MACH_BIGSMP__MACH_APIC_H #define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) #define esr_disable (1) @@ -141,4 +141,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_BIGSMP__MACH_APIC_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_apicdef.h b/include/asm-x86/mach-bigsmp/mach_apicdef.h index a58ab5a..811935d 100644 --- a/include/asm-x86/mach-bigsmp/mach_apicdef.h +++ b/include/asm-x86/mach-bigsmp/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H +#ifndef ASM_X86__MACH_BIGSMP__MACH_APICDEF_H +#define ASM_X86__MACH_BIGSMP__MACH_APICDEF_H #define APIC_ID_MASK (0xFF<<24) @@ -10,4 +10,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) -#endif +#endif /* ASM_X86__MACH_BIGSMP__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-bigsmp/mach_ipi.h b/include/asm-x86/mach-bigsmp/mach_ipi.h index 9404c53..b1b0f96 100644 --- a/include/asm-x86/mach-bigsmp/mach_ipi.h +++ b/include/asm-x86/mach-bigsmp/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_BIGSMP__MACH_IPI_H +#define ASM_X86__MACH_BIGSMP__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_BIGSMP__MACH_IPI_H */ diff --git a/include/asm-x86/mach-default/apm.h b/include/asm-x86/mach-default/apm.h index 989f34c..2aa61b5 100644 --- a/include/asm-x86/mach-default/apm.h +++ b/include/asm-x86/mach-default/apm.h @@ -3,8 +3,8 @@ * Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp> */ -#ifndef _ASM_APM_H -#define _ASM_APM_H +#ifndef ASM_X86__MACH_DEFAULT__APM_H +#define ASM_X86__MACH_DEFAULT__APM_H #ifdef APM_ZERO_SEGS # define APM_DO_ZERO_SEGS \ @@ -70,4 +70,4 @@ static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, return error; } -#endif /* _ASM_APM_H */ +#endif /* ASM_X86__MACH_DEFAULT__APM_H */ diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h index f3226b9..b615f40 100644 --- a/include/asm-x86/mach-default/mach_apic.h +++ b/include/asm-x86/mach-default/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_APIC_H +#define ASM_X86__MACH_DEFAULT__MACH_APIC_H #ifdef CONFIG_X86_LOCAL_APIC @@ -138,4 +138,4 @@ static inline void enable_apic_mode(void) } #endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_APIC_H */ diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h index e4b29ba..936704f 100644 --- a/include/asm-x86/mach-default/mach_apicdef.h +++ b/include/asm-x86/mach-default/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_APICDEF_H +#define ASM_X86__MACH_DEFAULT__MACH_APICDEF_H #include <asm/apic.h> @@ -21,4 +21,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) #endif -#endif +#endif /* ASM_X86__MACH_DEFAULT__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-default/mach_ipi.h b/include/asm-x86/mach-default/mach_ipi.h index be32336..674bc7e 100644 --- a/include/asm-x86/mach-default/mach_ipi.h +++ b/include/asm-x86/mach-default/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_IPI_H +#define ASM_X86__MACH_DEFAULT__MACH_IPI_H /* Avoid include hell */ #define NMI_VECTOR 0x02 @@ -61,4 +61,4 @@ static inline void send_IPI_all(int vector) } #endif -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_IPI_H */ diff --git a/include/asm-x86/mach-default/mach_mpparse.h b/include/asm-x86/mach-default/mach_mpparse.h index d141085..9c381f2 100644 --- a/include/asm-x86/mach-default/mach_mpparse.h +++ b/include/asm-x86/mach-default/mach_mpparse.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_MPPARSE_H -#define __ASM_MACH_MPPARSE_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H +#define ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid) @@ -14,4 +14,4 @@ static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) } -#endif /* __ASM_MACH_MPPARSE_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-default/mach_mpspec.h b/include/asm-x86/mach-default/mach_mpspec.h index 51c9a97..d77646f 100644 --- a/include/asm-x86/mach-default/mach_mpspec.h +++ b/include/asm-x86/mach-default/mach_mpspec.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_MPSPEC_H -#define __ASM_MACH_MPSPEC_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H +#define ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H #define MAX_IRQ_SOURCES 256 @@ -9,4 +9,4 @@ #define MAX_MP_BUSSES 32 #endif -#endif /* __ASM_MACH_MPSPEC_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_MPSPEC_H */ diff --git a/include/asm-x86/mach-default/mach_timer.h b/include/asm-x86/mach-default/mach_timer.h index 4b76e53..990b158 100644 --- a/include/asm-x86/mach-default/mach_timer.h +++ b/include/asm-x86/mach-default/mach_timer.h @@ -10,8 +10,8 @@ * directly because of the awkward 8-bit access mechanism of the 82C54 * device. */ -#ifndef _MACH_TIMER_H -#define _MACH_TIMER_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_TIMER_H +#define ASM_X86__MACH_DEFAULT__MACH_TIMER_H #define CALIBRATE_TIME_MSEC 30 /* 30 msecs */ #define CALIBRATE_LATCH \ @@ -45,4 +45,4 @@ static inline void mach_countup(unsigned long *count_p) *count_p = count; } -#endif /* !_MACH_TIMER_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_TIMER_H */ diff --git a/include/asm-x86/mach-default/mach_traps.h b/include/asm-x86/mach-default/mach_traps.h index 2fe7705..de9ac3f 100644 --- a/include/asm-x86/mach-default/mach_traps.h +++ b/include/asm-x86/mach-default/mach_traps.h @@ -2,8 +2,8 @@ * Machine specific NMI handling for generic. * Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp> */ -#ifndef _MACH_TRAPS_H -#define _MACH_TRAPS_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_TRAPS_H +#define ASM_X86__MACH_DEFAULT__MACH_TRAPS_H #include <asm/mc146818rtc.h> @@ -36,4 +36,4 @@ static inline void reassert_nmi(void) unlock_cmos(); } -#endif /* !_MACH_TRAPS_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_TRAPS_H */ diff --git a/include/asm-x86/mach-default/mach_wakecpu.h b/include/asm-x86/mach-default/mach_wakecpu.h index 3ebb178..361b810 100644 --- a/include/asm-x86/mach-default/mach_wakecpu.h +++ b/include/asm-x86/mach-default/mach_wakecpu.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_WAKECPU_H -#define __ASM_MACH_WAKECPU_H +#ifndef ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H +#define ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H /* * This file copes with machines that wakeup secondary CPUs by the @@ -39,4 +39,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) #define inquire_remote_apic(apicid) {} #endif -#endif /* __ASM_MACH_WAKECPU_H */ +#endif /* ASM_X86__MACH_DEFAULT__MACH_WAKECPU_H */ diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/mach-es7000/mach_apic.h index 0a3fdf9..c1f6f68 100644 --- a/include/asm-x86/mach-es7000/mach_apic.h +++ b/include/asm-x86/mach-es7000/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_ES7000__MACH_APIC_H +#define ASM_X86__MACH_ES7000__MACH_APIC_H #define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) #define esr_disable (1) @@ -191,4 +191,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_APIC_H */ diff --git a/include/asm-x86/mach-es7000/mach_apicdef.h b/include/asm-x86/mach-es7000/mach_apicdef.h index a58ab5a..a07e567 100644 --- a/include/asm-x86/mach-es7000/mach_apicdef.h +++ b/include/asm-x86/mach-es7000/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H +#ifndef ASM_X86__MACH_ES7000__MACH_APICDEF_H +#define ASM_X86__MACH_ES7000__MACH_APICDEF_H #define APIC_ID_MASK (0xFF<<24) @@ -10,4 +10,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) -#endif +#endif /* ASM_X86__MACH_ES7000__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-es7000/mach_ipi.h b/include/asm-x86/mach-es7000/mach_ipi.h index 5e61bd2..3a21240 100644 --- a/include/asm-x86/mach-es7000/mach_ipi.h +++ b/include/asm-x86/mach-es7000/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_ES7000__MACH_IPI_H +#define ASM_X86__MACH_ES7000__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -21,4 +21,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_IPI_H */ diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/mach-es7000/mach_mpparse.h index ef26d35..befde24 100644 --- a/include/asm-x86/mach-es7000/mach_mpparse.h +++ b/include/asm-x86/mach-es7000/mach_mpparse.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_MPPARSE_H -#define __ASM_MACH_MPPARSE_H +#ifndef ASM_X86__MACH_ES7000__MACH_MPPARSE_H +#define ASM_X86__MACH_ES7000__MACH_MPPARSE_H #include <linux/acpi.h> @@ -26,4 +26,4 @@ static inline int es7000_check_dsdt(void) } #endif -#endif /* __ASM_MACH_MPPARSE_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-es7000/mach_wakecpu.h b/include/asm-x86/mach-es7000/mach_wakecpu.h index 84ff583..97c776c 100644 --- a/include/asm-x86/mach-es7000/mach_wakecpu.h +++ b/include/asm-x86/mach-es7000/mach_wakecpu.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_WAKECPU_H -#define __ASM_MACH_WAKECPU_H +#ifndef ASM_X86__MACH_ES7000__MACH_WAKECPU_H +#define ASM_X86__MACH_ES7000__MACH_WAKECPU_H /* * This file copes with machines that wakeup secondary CPUs by the @@ -56,4 +56,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) #define inquire_remote_apic(apicid) {} #endif -#endif /* __ASM_MACH_WAKECPU_H */ +#endif /* ASM_X86__MACH_ES7000__MACH_WAKECPU_H */ diff --git a/include/asm-x86/mach-generic/gpio.h b/include/asm-x86/mach-generic/gpio.h index 5305dcb..6ce0f77 100644 --- a/include/asm-x86/mach-generic/gpio.h +++ b/include/asm-x86/mach-generic/gpio.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_GENERIC_GPIO_H -#define __ASM_MACH_GENERIC_GPIO_H +#ifndef ASM_X86__MACH_GENERIC__GPIO_H +#define ASM_X86__MACH_GENERIC__GPIO_H int gpio_request(unsigned gpio, const char *label); void gpio_free(unsigned gpio); @@ -12,4 +12,4 @@ int irq_to_gpio(unsigned irq); #include <asm-generic/gpio.h> /* cansleep wrappers */ -#endif /* __ASM_MACH_GENERIC_GPIO_H */ +#endif /* ASM_X86__MACH_GENERIC__GPIO_H */ diff --git a/include/asm-x86/mach-generic/irq_vectors_limits.h b/include/asm-x86/mach-generic/irq_vectors_limits.h index 890ce3f..f7870e1 100644 --- a/include/asm-x86/mach-generic/irq_vectors_limits.h +++ b/include/asm-x86/mach-generic/irq_vectors_limits.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IRQ_VECTORS_LIMITS_H -#define _ASM_IRQ_VECTORS_LIMITS_H +#ifndef ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H +#define ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H /* * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, @@ -11,4 +11,4 @@ #define NR_IRQS 224 #define NR_IRQ_VECTORS 1024 -#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ +#endif /* ASM_X86__MACH_GENERIC__IRQ_VECTORS_LIMITS_H */ diff --git a/include/asm-x86/mach-generic/mach_apic.h b/include/asm-x86/mach-generic/mach_apic.h index 6eff343..5d010c6 100644 --- a/include/asm-x86/mach-generic/mach_apic.h +++ b/include/asm-x86/mach-generic/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_GENERIC__MACH_APIC_H +#define ASM_X86__MACH_GENERIC__MACH_APIC_H #include <asm/genapic.h> @@ -29,4 +29,4 @@ extern void generic_bigsmp_probe(void); -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_GENERIC__MACH_APIC_H */ diff --git a/include/asm-x86/mach-generic/mach_apicdef.h b/include/asm-x86/mach-generic/mach_apicdef.h index 28ed989..1657f38 100644 --- a/include/asm-x86/mach-generic/mach_apicdef.h +++ b/include/asm-x86/mach-generic/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef _GENAPIC_MACH_APICDEF_H -#define _GENAPIC_MACH_APICDEF_H 1 +#ifndef ASM_X86__MACH_GENERIC__MACH_APICDEF_H +#define ASM_X86__MACH_GENERIC__MACH_APICDEF_H #ifndef APIC_DEFINITION #include <asm/genapic.h> @@ -8,4 +8,4 @@ #define APIC_ID_MASK (genapic->apic_id_mask) #endif -#endif +#endif /* ASM_X86__MACH_GENERIC__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-generic/mach_ipi.h b/include/asm-x86/mach-generic/mach_ipi.h index 441b0fe..f67433d 100644 --- a/include/asm-x86/mach-generic/mach_ipi.h +++ b/include/asm-x86/mach-generic/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef _MACH_IPI_H -#define _MACH_IPI_H 1 +#ifndef ASM_X86__MACH_GENERIC__MACH_IPI_H +#define ASM_X86__MACH_GENERIC__MACH_IPI_H #include <asm/genapic.h> @@ -7,4 +7,4 @@ #define send_IPI_allbutself (genapic->send_IPI_allbutself) #define send_IPI_all (genapic->send_IPI_all) -#endif +#endif /* ASM_X86__MACH_GENERIC__MACH_IPI_H */ diff --git a/include/asm-x86/mach-generic/mach_mpparse.h b/include/asm-x86/mach-generic/mach_mpparse.h index 586cadb..3115564 100644 --- a/include/asm-x86/mach-generic/mach_mpparse.h +++ b/include/asm-x86/mach-generic/mach_mpparse.h @@ -1,5 +1,5 @@ -#ifndef _MACH_MPPARSE_H -#define _MACH_MPPARSE_H 1 +#ifndef ASM_X86__MACH_GENERIC__MACH_MPPARSE_H +#define ASM_X86__MACH_GENERIC__MACH_MPPARSE_H extern int mps_oem_check(struct mp_config_table *mpc, char *oem, @@ -7,4 +7,4 @@ extern int mps_oem_check(struct mp_config_table *mpc, char *oem, extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id); -#endif +#endif /* ASM_X86__MACH_GENERIC__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-generic/mach_mpspec.h b/include/asm-x86/mach-generic/mach_mpspec.h index c83c120..6061b15 100644 --- a/include/asm-x86/mach-generic/mach_mpspec.h +++ b/include/asm-x86/mach-generic/mach_mpspec.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_MPSPEC_H -#define __ASM_MACH_MPSPEC_H +#ifndef ASM_X86__MACH_GENERIC__MACH_MPSPEC_H +#define ASM_X86__MACH_GENERIC__MACH_MPSPEC_H #define MAX_IRQ_SOURCES 256 @@ -9,4 +9,4 @@ extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid); -#endif /* __ASM_MACH_MPSPEC_H */ +#endif /* ASM_X86__MACH_GENERIC__MACH_MPSPEC_H */ diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/mach-numaq/mach_apic.h index d802465..7a0d39e 100644 --- a/include/asm-x86/mach-numaq/mach_apic.h +++ b/include/asm-x86/mach-numaq/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_APIC_H +#define ASM_X86__MACH_NUMAQ__MACH_APIC_H #include <asm/io.h> #include <linux/mmzone.h> @@ -135,4 +135,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return cpuid_apic >> index_msb; } -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_APIC_H */ diff --git a/include/asm-x86/mach-numaq/mach_apicdef.h b/include/asm-x86/mach-numaq/mach_apicdef.h index bf439d0..f870ec5 100644 --- a/include/asm-x86/mach-numaq/mach_apicdef.h +++ b/include/asm-x86/mach-numaq/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_APICDEF_H +#define ASM_X86__MACH_NUMAQ__MACH_APICDEF_H #define APIC_ID_MASK (0xF<<24) @@ -11,4 +11,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) -#endif +#endif /* ASM_X86__MACH_NUMAQ__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-numaq/mach_ipi.h b/include/asm-x86/mach-numaq/mach_ipi.h index c604448..1e83582 100644 --- a/include/asm-x86/mach-numaq/mach_ipi.h +++ b/include/asm-x86/mach-numaq/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_IPI_H +#define ASM_X86__MACH_NUMAQ__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_IPI_H */ diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h index 626aef6..74ade18 100644 --- a/include/asm-x86/mach-numaq/mach_mpparse.h +++ b/include/asm-x86/mach-numaq/mach_mpparse.h @@ -1,7 +1,7 @@ -#ifndef __ASM_MACH_MPPARSE_H -#define __ASM_MACH_MPPARSE_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H +#define ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid); -#endif /* __ASM_MACH_MPPARSE_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H */ diff --git a/include/asm-x86/mach-numaq/mach_wakecpu.h b/include/asm-x86/mach-numaq/mach_wakecpu.h index 0053004..0db8cea 100644 --- a/include/asm-x86/mach-numaq/mach_wakecpu.h +++ b/include/asm-x86/mach-numaq/mach_wakecpu.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_WAKECPU_H -#define __ASM_MACH_WAKECPU_H +#ifndef ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H +#define ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H /* This file copes with machines that wakeup secondary CPUs by NMIs */ @@ -40,4 +40,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) #define inquire_remote_apic(apicid) {} -#endif /* __ASM_MACH_WAKECPU_H */ +#endif /* ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H */ diff --git a/include/asm-x86/mach-rdc321x/gpio.h b/include/asm-x86/mach-rdc321x/gpio.h index acce0b7..94b6cdf 100644 --- a/include/asm-x86/mach-rdc321x/gpio.h +++ b/include/asm-x86/mach-rdc321x/gpio.h @@ -1,5 +1,7 @@ -#ifndef _RDC321X_GPIO_H -#define _RDC321X_GPIO_H +#ifndef ASM_X86__MACH_RDC321X__GPIO_H +#define ASM_X86__MACH_RDC321X__GPIO_H + +#include <linux/kernel.h> extern int rdc_gpio_get_value(unsigned gpio); extern void rdc_gpio_set_value(unsigned gpio, int value); @@ -18,6 +20,7 @@ static inline int gpio_request(unsigned gpio, const char *label) static inline void gpio_free(unsigned gpio) { + might_sleep(); rdc_gpio_free(gpio); } @@ -54,4 +57,4 @@ static inline int irq_to_gpio(unsigned irq) /* For cansleep */ #include <asm-generic/gpio.h> -#endif /* _RDC321X_GPIO_H_ */ +#endif /* ASM_X86__MACH_RDC321X__GPIO_H */ diff --git a/include/asm-x86/mach-summit/irq_vectors_limits.h b/include/asm-x86/mach-summit/irq_vectors_limits.h index 890ce3f..22f376a 100644 --- a/include/asm-x86/mach-summit/irq_vectors_limits.h +++ b/include/asm-x86/mach-summit/irq_vectors_limits.h @@ -1,5 +1,5 @@ -#ifndef _ASM_IRQ_VECTORS_LIMITS_H -#define _ASM_IRQ_VECTORS_LIMITS_H +#ifndef ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H +#define ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H /* * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, @@ -11,4 +11,4 @@ #define NR_IRQS 224 #define NR_IRQ_VECTORS 1024 -#endif /* _ASM_IRQ_VECTORS_LIMITS_H */ +#endif /* ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H */ diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/mach-summit/mach_apic.h index c47e2ab..7a66758 100644 --- a/include/asm-x86/mach-summit/mach_apic.h +++ b/include/asm-x86/mach-summit/mach_apic.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APIC_H -#define __ASM_MACH_APIC_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_APIC_H +#define ASM_X86__MACH_SUMMIT__MACH_APIC_H #include <asm/smp.h> @@ -182,4 +182,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) return hard_smp_processor_id() >> index_msb; } -#endif /* __ASM_MACH_APIC_H */ +#endif /* ASM_X86__MACH_SUMMIT__MACH_APIC_H */ diff --git a/include/asm-x86/mach-summit/mach_apicdef.h b/include/asm-x86/mach-summit/mach_apicdef.h index a58ab5a..d4bc859 100644 --- a/include/asm-x86/mach-summit/mach_apicdef.h +++ b/include/asm-x86/mach-summit/mach_apicdef.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_APICDEF_H -#define __ASM_MACH_APICDEF_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_APICDEF_H +#define ASM_X86__MACH_SUMMIT__MACH_APICDEF_H #define APIC_ID_MASK (0xFF<<24) @@ -10,4 +10,4 @@ static inline unsigned get_apic_id(unsigned long x) #define GET_APIC_ID(x) get_apic_id(x) -#endif +#endif /* ASM_X86__MACH_SUMMIT__MACH_APICDEF_H */ diff --git a/include/asm-x86/mach-summit/mach_ipi.h b/include/asm-x86/mach-summit/mach_ipi.h index 9404c53..a3b31c5 100644 --- a/include/asm-x86/mach-summit/mach_ipi.h +++ b/include/asm-x86/mach-summit/mach_ipi.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_IPI_H -#define __ASM_MACH_IPI_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_IPI_H +#define ASM_X86__MACH_SUMMIT__MACH_IPI_H void send_IPI_mask_sequence(cpumask_t mask, int vector); @@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector) send_IPI_mask(cpu_online_map, vector); } -#endif /* __ASM_MACH_IPI_H */ +#endif /* ASM_X86__MACH_SUMMIT__MACH_IPI_H */ diff --git a/include/asm-x86/mach-summit/mach_mpparse.h b/include/asm-x86/mach-summit/mach_mpparse.h index fdf5917..92396f2 100644 --- a/include/asm-x86/mach-summit/mach_mpparse.h +++ b/include/asm-x86/mach-summit/mach_mpparse.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MACH_MPPARSE_H -#define __ASM_MACH_MPPARSE_H +#ifndef ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H +#define ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H #include <mach_apic.h> #include <asm/tsc.h> @@ -107,4 +107,4 @@ static inline int is_WPEG(struct rio_detail *rio){ rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); } -#endif /* __ASM_MACH_MPPARSE_H */ +#endif /* ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H */ diff --git a/include/asm-x86/math_emu.h b/include/asm-x86/math_emu.h index 9bf4ae9..5768d8e 100644 --- a/include/asm-x86/math_emu.h +++ b/include/asm-x86/math_emu.h @@ -1,5 +1,5 @@ -#ifndef _I386_MATH_EMU_H -#define _I386_MATH_EMU_H +#ifndef ASM_X86__MATH_EMU_H +#define ASM_X86__MATH_EMU_H /* This structure matches the layout of the data saved to the stack following a device-not-present interrupt, part of it saved @@ -28,4 +28,4 @@ struct info { long ___vm86_fs; long ___vm86_gs; }; -#endif +#endif /* ASM_X86__MATH_EMU_H */ diff --git a/include/asm-x86/mc146818rtc.h b/include/asm-x86/mc146818rtc.h index daf1ccd..a995f33 100644 --- a/include/asm-x86/mc146818rtc.h +++ b/include/asm-x86/mc146818rtc.h @@ -1,8 +1,8 @@ /* * Machine dependent access functions for RTC registers. */ -#ifndef _ASM_MC146818RTC_H -#define _ASM_MC146818RTC_H +#ifndef ASM_X86__MC146818RTC_H +#define ASM_X86__MC146818RTC_H #include <asm/io.h> #include <asm/system.h> @@ -101,4 +101,4 @@ extern unsigned long mach_get_cmos_time(void); #define RTC_IRQ 8 -#endif /* _ASM_MC146818RTC_H */ +#endif /* ASM_X86__MC146818RTC_H */ diff --git a/include/asm-x86/mca.h b/include/asm-x86/mca.h index 09adf2e..60d1ed2 100644 --- a/include/asm-x86/mca.h +++ b/include/asm-x86/mca.h @@ -1,8 +1,8 @@ /* -*- mode: c; c-basic-offset: 8 -*- */ /* Platform specific MCA defines */ -#ifndef _ASM_MCA_H -#define _ASM_MCA_H +#ifndef ASM_X86__MCA_H +#define ASM_X86__MCA_H /* Maximal number of MCA slots - actually, some machines have less, but * they all have sufficient number of POS registers to cover 8. @@ -40,4 +40,4 @@ */ #define MCA_NUMADAPTERS (MCA_MAX_SLOT_NR+3) -#endif +#endif /* ASM_X86__MCA_H */ diff --git a/include/asm-x86/mca_dma.h b/include/asm-x86/mca_dma.h index c3dca6e..49f22be 100644 --- a/include/asm-x86/mca_dma.h +++ b/include/asm-x86/mca_dma.h @@ -1,5 +1,5 @@ -#ifndef MCA_DMA_H -#define MCA_DMA_H +#ifndef ASM_X86__MCA_DMA_H +#define ASM_X86__MCA_DMA_H #include <asm/io.h> #include <linux/ioport.h> @@ -198,4 +198,4 @@ static inline void mca_set_dma_mode(unsigned int dmanr, unsigned int mode) outb(mode, MCA_DMA_REG_EXE); } -#endif /* MCA_DMA_H */ +#endif /* ASM_X86__MCA_DMA_H */ diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h index 531eaa5..036133e 100644 --- a/include/asm-x86/mce.h +++ b/include/asm-x86/mce.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_MCE_H -#define _ASM_X86_MCE_H +#ifndef ASM_X86__MCE_H +#define ASM_X86__MCE_H #ifdef __x86_64__ @@ -127,4 +127,4 @@ extern void restart_mce(void); #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__MCE_H */ diff --git a/include/asm-x86/mman.h b/include/asm-x86/mman.h index 90bc410..4ef28e6 100644 --- a/include/asm-x86/mman.h +++ b/include/asm-x86/mman.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_MMAN_H -#define _ASM_X86_MMAN_H +#ifndef ASM_X86__MMAN_H +#define ASM_X86__MMAN_H #include <asm-generic/mman.h> @@ -17,4 +17,4 @@ #define MCL_CURRENT 1 /* lock all current mappings */ #define MCL_FUTURE 2 /* lock all future mappings */ -#endif /* _ASM_X86_MMAN_H */ +#endif /* ASM_X86__MMAN_H */ diff --git a/include/asm-x86/mmconfig.h b/include/asm-x86/mmconfig.h index e293ab8..fb79b1c 100644 --- a/include/asm-x86/mmconfig.h +++ b/include/asm-x86/mmconfig.h @@ -1,5 +1,5 @@ -#ifndef _ASM_MMCONFIG_H -#define _ASM_MMCONFIG_H +#ifndef ASM_X86__MMCONFIG_H +#define ASM_X86__MMCONFIG_H #ifdef CONFIG_PCI_MMCONFIG extern void __cpuinit fam10h_check_enable_mmcfg(void); @@ -9,4 +9,4 @@ static inline void fam10h_check_enable_mmcfg(void) { } static inline void check_enable_amd_mmconf_dmi(void) { } #endif -#endif +#endif /* ASM_X86__MMCONFIG_H */ diff --git a/include/asm-x86/mmu.h b/include/asm-x86/mmu.h index 00e8867..9d5aff1 100644 --- a/include/asm-x86/mmu.h +++ b/include/asm-x86/mmu.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_MMU_H -#define _ASM_X86_MMU_H +#ifndef ASM_X86__MMU_H +#define ASM_X86__MMU_H #include <linux/spinlock.h> #include <linux/mutex.h> @@ -7,14 +7,9 @@ /* * The x86 doesn't have a mmu context, but * we put the segment information here. - * - * cpu_vm_mask is used to optimize ldt flushing. */ typedef struct { void *ldt; -#ifdef CONFIG_X86_64 - rwlock_t ldtlock; -#endif int size; struct mutex lock; void *vdso; @@ -28,4 +23,4 @@ static inline void leave_mm(int cpu) } #endif -#endif /* _ASM_X86_MMU_H */ +#endif /* ASM_X86__MMU_H */ diff --git a/include/asm-x86/mmu_context.h b/include/asm-x86/mmu_context.h index fac5701..8ec940b 100644 --- a/include/asm-x86/mmu_context.h +++ b/include/asm-x86/mmu_context.h @@ -1,5 +1,5 @@ -#ifndef __ASM_X86_MMU_CONTEXT_H -#define __ASM_X86_MMU_CONTEXT_H +#ifndef ASM_X86__MMU_CONTEXT_H +#define ASM_X86__MMU_CONTEXT_H #include <asm/desc.h> #include <asm/atomic.h> @@ -34,4 +34,4 @@ do { \ } while (0); -#endif /* __ASM_X86_MMU_CONTEXT_H */ +#endif /* ASM_X86__MMU_CONTEXT_H */ diff --git a/include/asm-x86/mmu_context_32.h b/include/asm-x86/mmu_context_32.h index 824fc57..cce6f6e 100644 --- a/include/asm-x86/mmu_context_32.h +++ b/include/asm-x86/mmu_context_32.h @@ -1,5 +1,5 @@ -#ifndef __I386_SCHED_H -#define __I386_SCHED_H +#ifndef ASM_X86__MMU_CONTEXT_32_H +#define ASM_X86__MMU_CONTEXT_32_H static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { @@ -53,4 +53,4 @@ static inline void switch_mm(struct mm_struct *prev, #define deactivate_mm(tsk, mm) \ asm("movl %0,%%gs": :"r" (0)); -#endif +#endif /* ASM_X86__MMU_CONTEXT_32_H */ diff --git a/include/asm-x86/mmu_context_64.h b/include/asm-x86/mmu_context_64.h index c700063..2675867 100644 --- a/include/asm-x86/mmu_context_64.h +++ b/include/asm-x86/mmu_context_64.h @@ -1,5 +1,5 @@ -#ifndef __X86_64_MMU_CONTEXT_H -#define __X86_64_MMU_CONTEXT_H +#ifndef ASM_X86__MMU_CONTEXT_64_H +#define ASM_X86__MMU_CONTEXT_64_H #include <asm/pda.h> @@ -51,4 +51,4 @@ do { \ asm volatile("movl %0,%%fs"::"r"(0)); \ } while (0) -#endif +#endif /* ASM_X86__MMU_CONTEXT_64_H */ diff --git a/include/asm-x86/mmx.h b/include/asm-x86/mmx.h index 9408812..2e7299b 100644 --- a/include/asm-x86/mmx.h +++ b/include/asm-x86/mmx.h @@ -1,5 +1,5 @@ -#ifndef _ASM_MMX_H -#define _ASM_MMX_H +#ifndef ASM_X86__MMX_H +#define ASM_X86__MMX_H /* * MMX 3Dnow! helper operations @@ -11,4 +11,4 @@ extern void *_mmx_memcpy(void *to, const void *from, size_t size); extern void mmx_clear_page(void *page); extern void mmx_copy_page(void *to, void *from); -#endif +#endif /* ASM_X86__MMX_H */ diff --git a/include/asm-x86/mmzone_32.h b/include/asm-x86/mmzone_32.h index 5862e64..121b65d 100644 --- a/include/asm-x86/mmzone_32.h +++ b/include/asm-x86/mmzone_32.h @@ -3,8 +3,8 @@ * */ -#ifndef _ASM_MMZONE_H_ -#define _ASM_MMZONE_H_ +#ifndef ASM_X86__MMZONE_32_H +#define ASM_X86__MMZONE_32_H #include <asm/smp.h> @@ -131,4 +131,4 @@ static inline int pfn_valid(int pfn) }) #endif /* CONFIG_NEED_MULTIPLE_NODES */ -#endif /* _ASM_MMZONE_H_ */ +#endif /* ASM_X86__MMZONE_32_H */ diff --git a/include/asm-x86/mmzone_64.h b/include/asm-x86/mmzone_64.h index 594bd0d..626b03a 100644 --- a/include/asm-x86/mmzone_64.h +++ b/include/asm-x86/mmzone_64.h @@ -1,8 +1,8 @@ /* K8 NUMA support */ /* Copyright 2002,2003 by Andi Kleen, SuSE Labs */ /* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */ -#ifndef _ASM_X86_64_MMZONE_H -#define _ASM_X86_64_MMZONE_H 1 +#ifndef ASM_X86__MMZONE_64_H +#define ASM_X86__MMZONE_64_H #ifdef CONFIG_NUMA @@ -49,4 +49,4 @@ extern int early_pfn_to_nid(unsigned long pfn); #endif #endif -#endif +#endif /* ASM_X86__MMZONE_64_H */ diff --git a/include/asm-x86/module.h b/include/asm-x86/module.h index bfedb24..48dc3e0 100644 --- a/include/asm-x86/module.h +++ b/include/asm-x86/module.h @@ -1,5 +1,5 @@ -#ifndef _ASM_MODULE_H -#define _ASM_MODULE_H +#ifndef ASM_X86__MODULE_H +#define ASM_X86__MODULE_H /* x86_32/64 are simple */ struct mod_arch_specific {}; @@ -79,4 +79,4 @@ struct mod_arch_specific {}; # define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY MODULE_STACKSIZE #endif -#endif /* _ASM_MODULE_H */ +#endif /* ASM_X86__MODULE_H */ diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h index b6995e5..118da36 100644 --- a/include/asm-x86/mpspec.h +++ b/include/asm-x86/mpspec.h @@ -1,5 +1,5 @@ -#ifndef _AM_X86_MPSPEC_H -#define _AM_X86_MPSPEC_H +#ifndef ASM_X86__MPSPEC_H +#define ASM_X86__MPSPEC_H #include <linux/init.h> @@ -141,4 +141,4 @@ static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map) extern physid_mask_t phys_cpu_present_map; -#endif +#endif /* ASM_X86__MPSPEC_H */ diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h index 38d1e73..79166b0 100644 --- a/include/asm-x86/mpspec_def.h +++ b/include/asm-x86/mpspec_def.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MPSPEC_DEF_H -#define __ASM_MPSPEC_DEF_H +#ifndef ASM_X86__MPSPEC_DEF_H +#define ASM_X86__MPSPEC_DEF_H /* * Structure definitions for SMP machines following the @@ -177,4 +177,4 @@ enum mp_bustype { MP_BUS_PCI, MP_BUS_MCA, }; -#endif +#endif /* ASM_X86__MPSPEC_DEF_H */ diff --git a/include/asm-x86/msgbuf.h b/include/asm-x86/msgbuf.h index 7e4e948..1b538c9 100644 --- a/include/asm-x86/msgbuf.h +++ b/include/asm-x86/msgbuf.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_MSGBUF_H -#define _ASM_X86_MSGBUF_H +#ifndef ASM_X86__MSGBUF_H +#define ASM_X86__MSGBUF_H /* * The msqid64_ds structure for i386 architecture. @@ -36,4 +36,4 @@ struct msqid64_ds { unsigned long __unused5; }; -#endif /* _ASM_X86_MSGBUF_H */ +#endif /* ASM_X86__MSGBUF_H */ diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h index 296f29c..3139666 100644 --- a/include/asm-x86/msidef.h +++ b/include/asm-x86/msidef.h @@ -1,5 +1,5 @@ -#ifndef ASM_MSIDEF_H -#define ASM_MSIDEF_H +#ifndef ASM_X86__MSIDEF_H +#define ASM_X86__MSIDEF_H /* * Constants for Intel APIC based MSI messages. @@ -48,4 +48,4 @@ #define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ MSI_ADDR_DEST_ID_MASK) -#endif /* ASM_MSIDEF_H */ +#endif /* ASM_X86__MSIDEF_H */ diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h index 44bce77..3052f05 100644 --- a/include/asm-x86/msr-index.h +++ b/include/asm-x86/msr-index.h @@ -1,5 +1,5 @@ -#ifndef __ASM_MSR_INDEX_H -#define __ASM_MSR_INDEX_H +#ifndef ASM_X86__MSR_INDEX_H +#define ASM_X86__MSR_INDEX_H /* CPU model specific register (MSR) numbers */ @@ -310,4 +310,4 @@ /* Geode defined MSRs */ #define MSR_GEODE_BUSCONT_CONF0 0x00001900 -#endif /* __ASM_MSR_INDEX_H */ +#endif /* ASM_X86__MSR_INDEX_H */ diff --git a/include/asm-x86/msr.h b/include/asm-x86/msr.h index 2362cfd..530af1f 100644 --- a/include/asm-x86/msr.h +++ b/include/asm-x86/msr.h @@ -1,5 +1,5 @@ -#ifndef __ASM_X86_MSR_H_ -#define __ASM_X86_MSR_H_ +#ifndef ASM_X86__MSR_H +#define ASM_X86__MSR_H #include <asm/msr-index.h> @@ -63,6 +63,22 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, return EAX_EDX_VAL(val, low, high); } +static inline unsigned long long native_read_msr_amd_safe(unsigned int msr, + int *err) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("2: rdmsr ; xor %0,%0\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: mov %3,%0 ; jmp 1b\n\t" + ".previous\n\t" + _ASM_EXTABLE(2b, 3b) + : "=r" (*err), EAX_EDX_RET(val, low, high) + : "c" (msr), "D" (0x9c5a203a), "i" (-EFAULT)); + return EAX_EDX_VAL(val, low, high); +} + static inline void native_write_msr(unsigned int msr, unsigned low, unsigned high) { @@ -158,6 +174,13 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) *p = native_read_msr_safe(msr, &err); return err; } +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +{ + int err; + + *p = native_read_msr_amd_safe(msr, &err); + return err; +} #define rdtscl(low) \ ((low) = (u32)native_read_tsc()) @@ -221,4 +244,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__MSR_H */ diff --git a/include/asm-x86/mtrr.h b/include/asm-x86/mtrr.h index a69a01a..23a7f83 100644 --- a/include/asm-x86/mtrr.h +++ b/include/asm-x86/mtrr.h @@ -20,8 +20,8 @@ The postal address is: Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia. */ -#ifndef _ASM_X86_MTRR_H -#define _ASM_X86_MTRR_H +#ifndef ASM_X86__MTRR_H +#define ASM_X86__MTRR_H #include <linux/ioctl.h> #include <linux/errno.h> @@ -170,4 +170,4 @@ struct mtrr_gentry32 { #endif /* __KERNEL__ */ -#endif /* _ASM_X86_MTRR_H */ +#endif /* ASM_X86__MTRR_H */ diff --git a/include/asm-x86/mutex_32.h b/include/asm-x86/mutex_32.h index 73e928e..25c16d8 100644 --- a/include/asm-x86/mutex_32.h +++ b/include/asm-x86/mutex_32.h @@ -6,8 +6,8 @@ * * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> */ -#ifndef _ASM_MUTEX_H -#define _ASM_MUTEX_H +#ifndef ASM_X86__MUTEX_32_H +#define ASM_X86__MUTEX_32_H #include <asm/alternative.h> @@ -122,4 +122,4 @@ static inline int __mutex_fastpath_trylock(atomic_t *count, #endif } -#endif +#endif /* ASM_X86__MUTEX_32_H */ diff --git a/include/asm-x86/mutex_64.h b/include/asm-x86/mutex_64.h index f3fae9b..918ba21 100644 --- a/include/asm-x86/mutex_64.h +++ b/include/asm-x86/mutex_64.h @@ -6,8 +6,8 @@ * * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> */ -#ifndef _ASM_MUTEX_H -#define _ASM_MUTEX_H +#ifndef ASM_X86__MUTEX_64_H +#define ASM_X86__MUTEX_64_H /** * __mutex_fastpath_lock - decrement and call function if negative @@ -97,4 +97,4 @@ static inline int __mutex_fastpath_trylock(atomic_t *count, return 0; } -#endif +#endif /* ASM_X86__MUTEX_64_H */ diff --git a/include/asm-x86/nmi.h b/include/asm-x86/nmi.h index 21f8d02..d5e715f 100644 --- a/include/asm-x86/nmi.h +++ b/include/asm-x86/nmi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_NMI_H_ -#define _ASM_X86_NMI_H_ +#ifndef ASM_X86__NMI_H +#define ASM_X86__NMI_H #include <linux/pm.h> #include <asm/irq.h> @@ -34,6 +34,7 @@ extern void stop_apic_nmi_watchdog(void *); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); +extern void cpu_nmi_set_wd_enabled(void); extern atomic_t nmi_active; extern unsigned int nmi_watchdog; @@ -81,4 +82,4 @@ void enable_lapic_nmi_watchdog(void); void stop_nmi(void); void restart_nmi(void); -#endif +#endif /* ASM_X86__NMI_H */ diff --git a/include/asm-x86/nops.h b/include/asm-x86/nops.h index ad0bedd..ae74272 100644 --- a/include/asm-x86/nops.h +++ b/include/asm-x86/nops.h @@ -1,5 +1,5 @@ -#ifndef _ASM_NOPS_H -#define _ASM_NOPS_H 1 +#ifndef ASM_X86__NOPS_H +#define ASM_X86__NOPS_H /* Define nops for use with alternative() */ @@ -115,4 +115,4 @@ #define ASM_NOP_MAX 8 -#endif +#endif /* ASM_X86__NOPS_H */ diff --git a/include/asm-x86/numa_32.h b/include/asm-x86/numa_32.h index 220d7b7..44cb078 100644 --- a/include/asm-x86/numa_32.h +++ b/include/asm-x86/numa_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_32_NUMA_H -#define _ASM_X86_32_NUMA_H 1 +#ifndef ASM_X86__NUMA_32_H +#define ASM_X86__NUMA_32_H extern int pxm_to_nid(int pxm); extern void numa_remove_cpu(int cpu); @@ -8,4 +8,4 @@ extern void numa_remove_cpu(int cpu); extern void set_highmem_pages_init(void); #endif -#endif /* _ASM_X86_32_NUMA_H */ +#endif /* ASM_X86__NUMA_32_H */ diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h index 3830094..15c9903 100644 --- a/include/asm-x86/numa_64.h +++ b/include/asm-x86/numa_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X8664_NUMA_H -#define _ASM_X8664_NUMA_H 1 +#ifndef ASM_X86__NUMA_64_H +#define ASM_X86__NUMA_64_H #include <linux/nodemask.h> #include <asm/apicdef.h> @@ -40,4 +40,4 @@ static inline void numa_add_cpu(int cpu, int node) { } static inline void numa_remove_cpu(int cpu) { } #endif -#endif +#endif /* ASM_X86__NUMA_64_H */ diff --git a/include/asm-x86/numaq.h b/include/asm-x86/numaq.h index 34b92d5..124bf7d 100644 --- a/include/asm-x86/numaq.h +++ b/include/asm-x86/numaq.h @@ -23,8 +23,8 @@ * Send feedback to <gone@us.ibm.com> */ -#ifndef NUMAQ_H -#define NUMAQ_H +#ifndef ASM_X86__NUMAQ_H +#define ASM_X86__NUMAQ_H #ifdef CONFIG_X86_NUMAQ @@ -165,5 +165,5 @@ static inline int get_memcfg_numaq(void) return 0; } #endif /* CONFIG_X86_NUMAQ */ -#endif /* NUMAQ_H */ +#endif /* ASM_X86__NUMAQ_H */ diff --git a/include/asm-x86/olpc.h b/include/asm-x86/olpc.h index 97d4713..d7328b1 100644 --- a/include/asm-x86/olpc.h +++ b/include/asm-x86/olpc.h @@ -1,7 +1,7 @@ /* OLPC machine specific definitions */ -#ifndef ASM_OLPC_H_ -#define ASM_OLPC_H_ +#ifndef ASM_X86__OLPC_H +#define ASM_X86__OLPC_H #include <asm/geode.h> @@ -129,4 +129,4 @@ extern int olpc_ec_mask_unset(uint8_t bits); #define OLPC_GPIO_LID geode_gpio(26) #define OLPC_GPIO_ECSCI geode_gpio(27) -#endif +#endif /* ASM_X86__OLPC_H */ diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index 4998211..79544e6 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PAGE_H -#define _ASM_X86_PAGE_H +#ifndef ASM_X86__PAGE_H +#define ASM_X86__PAGE_H #include <linux/const.h> @@ -199,4 +199,4 @@ static inline pteval_t native_pte_flags(pte_t pte) #define __HAVE_ARCH_GATE_AREA 1 #endif /* __KERNEL__ */ -#endif /* _ASM_X86_PAGE_H */ +#endif /* ASM_X86__PAGE_H */ diff --git a/include/asm-x86/page_32.h b/include/asm-x86/page_32.h index ab85287..72f7305 100644 --- a/include/asm-x86/page_32.h +++ b/include/asm-x86/page_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PAGE_32_H -#define _ASM_X86_PAGE_32_H +#ifndef ASM_X86__PAGE_32_H +#define ASM_X86__PAGE_32_H /* * This handles the memory map. @@ -89,13 +89,11 @@ extern int nx_enabled; extern unsigned int __VMALLOC_RESERVE; extern int sysctl_legacy_va_layout; -#define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) -#define MAXMEM (-__PAGE_OFFSET - __VMALLOC_RESERVE) - extern void find_low_pfn_range(void); extern unsigned long init_memory_mapping(unsigned long start, unsigned long end); extern void initmem_init(unsigned long, unsigned long); +extern void free_initmem(void); extern void setup_bootmem_allocator(void); @@ -126,4 +124,4 @@ static inline void copy_page(void *to, void *from) #endif /* CONFIG_X86_3DNOW */ #endif /* !__ASSEMBLY__ */ -#endif /* _ASM_X86_PAGE_32_H */ +#endif /* ASM_X86__PAGE_32_H */ diff --git a/include/asm-x86/page_64.h b/include/asm-x86/page_64.h index c6916c8..5e64acf 100644 --- a/include/asm-x86/page_64.h +++ b/include/asm-x86/page_64.h @@ -1,5 +1,5 @@ -#ifndef _X86_64_PAGE_H -#define _X86_64_PAGE_H +#ifndef ASM_X86__PAGE_64_H +#define ASM_X86__PAGE_64_H #define PAGETABLE_LEVELS 4 @@ -91,6 +91,7 @@ extern unsigned long init_memory_mapping(unsigned long start, unsigned long end); extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); +extern void free_initmem(void); extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); @@ -102,4 +103,4 @@ extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); #endif -#endif /* _X86_64_PAGE_H */ +#endif /* ASM_X86__PAGE_64_H */ diff --git a/include/asm-x86/param.h b/include/asm-x86/param.h index 6f0d042..0009cfb 100644 --- a/include/asm-x86/param.h +++ b/include/asm-x86/param.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PARAM_H -#define _ASM_X86_PARAM_H +#ifndef ASM_X86__PARAM_H +#define ASM_X86__PARAM_H #ifdef __KERNEL__ # define HZ CONFIG_HZ /* Internal kernel timer frequency */ @@ -19,4 +19,4 @@ #define MAXHOSTNAMELEN 64 /* max length of hostname */ -#endif /* _ASM_X86_PARAM_H */ +#endif /* ASM_X86__PARAM_H */ diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index fbbde93..891971f 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -1,5 +1,5 @@ -#ifndef __ASM_PARAVIRT_H -#define __ASM_PARAVIRT_H +#ifndef ASM_X86__PARAVIRT_H +#define ASM_X86__PARAVIRT_H /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ @@ -137,6 +137,7 @@ struct pv_cpu_ops { /* MSR, PMC and TSR operations. err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ + u64 (*read_msr_amd)(unsigned int msr, int *err); u64 (*read_msr)(unsigned int msr, int *err); int (*write_msr)(unsigned int msr, unsigned low, unsigned high); @@ -257,13 +258,13 @@ struct pv_mmu_ops { * Hooks for allocating/releasing pagetable pages when they're * attached to a pagetable */ - void (*alloc_pte)(struct mm_struct *mm, u32 pfn); - void (*alloc_pmd)(struct mm_struct *mm, u32 pfn); - void (*alloc_pmd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); - void (*alloc_pud)(struct mm_struct *mm, u32 pfn); - void (*release_pte)(u32 pfn); - void (*release_pmd)(u32 pfn); - void (*release_pud)(u32 pfn); + void (*alloc_pte)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_pmd)(struct mm_struct *mm, unsigned long pfn); + void (*alloc_pmd_clone)(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count); + void (*alloc_pud)(struct mm_struct *mm, unsigned long pfn); + void (*release_pte)(unsigned long pfn); + void (*release_pmd)(unsigned long pfn); + void (*release_pud)(unsigned long pfn); /* Pagetable manipulation functions */ void (*set_pte)(pte_t *ptep, pte_t pteval); @@ -726,6 +727,10 @@ static inline u64 paravirt_read_msr(unsigned msr, int *err) { return PVOP_CALL2(u64, pv_cpu_ops.read_msr, msr, err); } +static inline u64 paravirt_read_msr_amd(unsigned msr, int *err) +{ + return PVOP_CALL2(u64, pv_cpu_ops.read_msr_amd, msr, err); +} static inline int paravirt_write_msr(unsigned msr, unsigned low, unsigned high) { return PVOP_CALL3(int, pv_cpu_ops.write_msr, msr, low, high); @@ -771,6 +776,13 @@ static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) *p = paravirt_read_msr(msr, &err); return err; } +static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) +{ + int err; + + *p = paravirt_read_msr_amd(msr, &err); + return err; +} static inline u64 paravirt_read_tsc(void) { @@ -993,35 +1005,35 @@ static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) PVOP_VCALL2(pv_mmu_ops.pgd_free, mm, pgd); } -static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned pfn) +static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(pv_mmu_ops.alloc_pte, mm, pfn); } -static inline void paravirt_release_pte(unsigned pfn) +static inline void paravirt_release_pte(unsigned long pfn) { PVOP_VCALL1(pv_mmu_ops.release_pte, pfn); } -static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned pfn) +static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(pv_mmu_ops.alloc_pmd, mm, pfn); } -static inline void paravirt_alloc_pmd_clone(unsigned pfn, unsigned clonepfn, - unsigned start, unsigned count) +static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn, + unsigned long start, unsigned long count) { PVOP_VCALL4(pv_mmu_ops.alloc_pmd_clone, pfn, clonepfn, start, count); } -static inline void paravirt_release_pmd(unsigned pfn) +static inline void paravirt_release_pmd(unsigned long pfn) { PVOP_VCALL1(pv_mmu_ops.release_pmd, pfn); } -static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned pfn) +static inline void paravirt_alloc_pud(struct mm_struct *mm, unsigned long pfn) { PVOP_VCALL2(pv_mmu_ops.alloc_pud, mm, pfn); } -static inline void paravirt_release_pud(unsigned pfn) +static inline void paravirt_release_pud(unsigned long pfn) { PVOP_VCALL1(pv_mmu_ops.release_pud, pfn); } @@ -1634,4 +1646,4 @@ static inline unsigned long __raw_local_irq_save(void) #endif /* __ASSEMBLY__ */ #endif /* CONFIG_PARAVIRT */ -#endif /* __ASM_PARAVIRT_H */ +#endif /* ASM_X86__PARAVIRT_H */ diff --git a/include/asm-x86/parport.h b/include/asm-x86/parport.h index 3c4ffeb..2e3dda4 100644 --- a/include/asm-x86/parport.h +++ b/include/asm-x86/parport.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PARPORT_H -#define _ASM_X86_PARPORT_H +#ifndef ASM_X86__PARPORT_H +#define ASM_X86__PARPORT_H static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma); static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) @@ -7,4 +7,4 @@ static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma) return parport_pc_find_isa_ports(autoirq, autodma); } -#endif /* _ASM_X86_PARPORT_H */ +#endif /* ASM_X86__PARPORT_H */ diff --git a/include/asm-x86/pat.h b/include/asm-x86/pat.h index 7edc473..482c3e3 100644 --- a/include/asm-x86/pat.h +++ b/include/asm-x86/pat.h @@ -1,5 +1,5 @@ -#ifndef _ASM_PAT_H -#define _ASM_PAT_H +#ifndef ASM_X86__PAT_H +#define ASM_X86__PAT_H #include <linux/types.h> @@ -19,4 +19,4 @@ extern int free_memtype(u64 start, u64 end); extern void pat_disable(char *reason); -#endif +#endif /* ASM_X86__PAT_H */ diff --git a/include/asm-x86/pci-direct.h b/include/asm-x86/pci-direct.h index 80c775d..da42be0 100644 --- a/include/asm-x86/pci-direct.h +++ b/include/asm-x86/pci-direct.h @@ -1,5 +1,5 @@ -#ifndef ASM_PCI_DIRECT_H -#define ASM_PCI_DIRECT_H 1 +#ifndef ASM_X86__PCI_DIRECT_H +#define ASM_X86__PCI_DIRECT_H #include <linux/types.h> @@ -18,4 +18,4 @@ extern int early_pci_allowed(void); extern unsigned int pci_early_dump_regs; extern void early_dump_pci_device(u8 bus, u8 slot, u8 func); extern void early_dump_pci_devices(void); -#endif +#endif /* ASM_X86__PCI_DIRECT_H */ diff --git a/include/asm-x86/pci.h b/include/asm-x86/pci.h index 2db14cf..6025831 100644 --- a/include/asm-x86/pci.h +++ b/include/asm-x86/pci.h @@ -1,5 +1,5 @@ -#ifndef __x86_PCI_H -#define __x86_PCI_H +#ifndef ASM_X86__PCI_H +#define ASM_X86__PCI_H #include <linux/mm.h> /* for struct page */ #include <linux/types.h> @@ -111,4 +111,4 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) } #endif -#endif +#endif /* ASM_X86__PCI_H */ diff --git a/include/asm-x86/pci_32.h b/include/asm-x86/pci_32.h index a50d468..3f22882 100644 --- a/include/asm-x86/pci_32.h +++ b/include/asm-x86/pci_32.h @@ -1,5 +1,5 @@ -#ifndef __i386_PCI_H -#define __i386_PCI_H +#ifndef ASM_X86__PCI_32_H +#define ASM_X86__PCI_32_H #ifdef __KERNEL__ @@ -31,4 +31,4 @@ struct pci_dev; #endif /* __KERNEL__ */ -#endif /* __i386_PCI_H */ +#endif /* ASM_X86__PCI_32_H */ diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h index f330234..f72e12d 100644 --- a/include/asm-x86/pci_64.h +++ b/include/asm-x86/pci_64.h @@ -1,5 +1,5 @@ -#ifndef __x8664_PCI_H -#define __x8664_PCI_H +#ifndef ASM_X86__PCI_64_H +#define ASM_X86__PCI_64_H #ifdef __KERNEL__ @@ -63,4 +63,4 @@ extern void pci_iommu_alloc(void); #endif /* __KERNEL__ */ -#endif /* __x8664_PCI_H */ +#endif /* ASM_X86__PCI_64_H */ diff --git a/include/asm-x86/pda.h b/include/asm-x86/pda.h index b34e9a7..80860af 100644 --- a/include/asm-x86/pda.h +++ b/include/asm-x86/pda.h @@ -1,5 +1,5 @@ -#ifndef X86_64_PDA_H -#define X86_64_PDA_H +#ifndef ASM_X86__PDA_H +#define ASM_X86__PDA_H #ifndef __ASSEMBLY__ #include <linux/stddef.h> @@ -134,4 +134,4 @@ do { \ #define PDA_STACKOFFSET (5*8) -#endif +#endif /* ASM_X86__PDA_H */ diff --git a/include/asm-x86/percpu.h b/include/asm-x86/percpu.h index f643a3a..e10a1d0 100644 --- a/include/asm-x86/percpu.h +++ b/include/asm-x86/percpu.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PERCPU_H_ -#define _ASM_X86_PERCPU_H_ +#ifndef ASM_X86__PERCPU_H +#define ASM_X86__PERCPU_H #ifdef CONFIG_X86_64 #include <linux/compiler.h> @@ -215,4 +215,4 @@ do { \ #endif /* !CONFIG_SMP */ -#endif /* _ASM_X86_PERCPU_H_ */ +#endif /* ASM_X86__PERCPU_H */ diff --git a/include/asm-x86/pgalloc.h b/include/asm-x86/pgalloc.h index d63ea43..3cd23ad 100644 --- a/include/asm-x86/pgalloc.h +++ b/include/asm-x86/pgalloc.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PGALLOC_H -#define _ASM_X86_PGALLOC_H +#ifndef ASM_X86__PGALLOC_H +#define ASM_X86__PGALLOC_H #include <linux/threads.h> #include <linux/mm.h> /* for struct page */ @@ -111,4 +111,4 @@ extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); #endif /* PAGETABLE_LEVELS > 3 */ #endif /* PAGETABLE_LEVELS > 2 */ -#endif /* _ASM_X86_PGALLOC_H */ +#endif /* ASM_X86__PGALLOC_H */ diff --git a/include/asm-x86/pgtable-2level-defs.h b/include/asm-x86/pgtable-2level-defs.h index 0f71c9f..7ec48f4 100644 --- a/include/asm-x86/pgtable-2level-defs.h +++ b/include/asm-x86/pgtable-2level-defs.h @@ -1,5 +1,5 @@ -#ifndef _I386_PGTABLE_2LEVEL_DEFS_H -#define _I386_PGTABLE_2LEVEL_DEFS_H +#ifndef ASM_X86__PGTABLE_2LEVEL_DEFS_H +#define ASM_X86__PGTABLE_2LEVEL_DEFS_H #define SHARED_KERNEL_PMD 0 @@ -17,4 +17,4 @@ #define PTRS_PER_PTE 1024 -#endif /* _I386_PGTABLE_2LEVEL_DEFS_H */ +#endif /* ASM_X86__PGTABLE_2LEVEL_DEFS_H */ diff --git a/include/asm-x86/pgtable-2level.h b/include/asm-x86/pgtable-2level.h index 46bc52c..8176208 100644 --- a/include/asm-x86/pgtable-2level.h +++ b/include/asm-x86/pgtable-2level.h @@ -1,5 +1,5 @@ -#ifndef _I386_PGTABLE_2LEVEL_H -#define _I386_PGTABLE_2LEVEL_H +#ifndef ASM_X86__PGTABLE_2LEVEL_H +#define ASM_X86__PGTABLE_2LEVEL_H #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) @@ -53,9 +53,7 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp) #endif -#define pte_page(x) pfn_to_page(pte_pfn(x)) #define pte_none(x) (!(x).pte_low) -#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT) /* * Bits 0, 6 and 7 are taken, split up the 29 bits of offset @@ -78,4 +76,4 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) -#endif /* _I386_PGTABLE_2LEVEL_H */ +#endif /* ASM_X86__PGTABLE_2LEVEL_H */ diff --git a/include/asm-x86/pgtable-3level-defs.h b/include/asm-x86/pgtable-3level-defs.h index 448ac95..c05fe6f 100644 --- a/include/asm-x86/pgtable-3level-defs.h +++ b/include/asm-x86/pgtable-3level-defs.h @@ -1,5 +1,5 @@ -#ifndef _I386_PGTABLE_3LEVEL_DEFS_H -#define _I386_PGTABLE_3LEVEL_DEFS_H +#ifndef ASM_X86__PGTABLE_3LEVEL_DEFS_H +#define ASM_X86__PGTABLE_3LEVEL_DEFS_H #ifdef CONFIG_PARAVIRT #define SHARED_KERNEL_PMD (pv_info.shared_kernel_pmd) @@ -25,4 +25,4 @@ */ #define PTRS_PER_PTE 512 -#endif /* _I386_PGTABLE_3LEVEL_DEFS_H */ +#endif /* ASM_X86__PGTABLE_3LEVEL_DEFS_H */ diff --git a/include/asm-x86/pgtable-3level.h b/include/asm-x86/pgtable-3level.h index 105057f..75f4276 100644 --- a/include/asm-x86/pgtable-3level.h +++ b/include/asm-x86/pgtable-3level.h @@ -1,5 +1,5 @@ -#ifndef _I386_PGTABLE_3LEVEL_H -#define _I386_PGTABLE_3LEVEL_H +#ifndef ASM_X86__PGTABLE_3LEVEL_H +#define ASM_X86__PGTABLE_3LEVEL_H /* * Intel Physical Address Extension (PAE) Mode - three-level page @@ -151,18 +151,11 @@ static inline int pte_same(pte_t a, pte_t b) return a.pte_low == b.pte_low && a.pte_high == b.pte_high; } -#define pte_page(x) pfn_to_page(pte_pfn(x)) - static inline int pte_none(pte_t pte) { return !pte.pte_low && !pte.pte_high; } -static inline unsigned long pte_pfn(pte_t pte) -{ - return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; -} - /* * Bits 0, 6 and 7 are taken in the low part of the pte, * put the 32 bits of offset into the high part. @@ -179,4 +172,4 @@ static inline unsigned long pte_pfn(pte_t pte) #define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high }) #define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } }) -#endif /* _I386_PGTABLE_3LEVEL_H */ +#endif /* ASM_X86__PGTABLE_3LEVEL_H */ diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index 04caa2f..888add7 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PGTABLE_H -#define _ASM_X86_PGTABLE_H +#ifndef ASM_X86__PGTABLE_H +#define ASM_X86__PGTABLE_H #define FIRST_USER_ADDRESS 0 @@ -186,6 +186,13 @@ static inline int pte_special(pte_t pte) return pte_val(pte) & _PAGE_SPECIAL; } +static inline unsigned long pte_pfn(pte_t pte) +{ + return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; +} + +#define pte_page(pte) pfn_to_page(pte_pfn(pte)) + static inline int pmd_large(pmd_t pte) { return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) == @@ -313,6 +320,8 @@ static inline void native_pagetable_setup_start(pgd_t *base) {} static inline void native_pagetable_setup_done(pgd_t *base) {} #endif +extern int arch_report_meminfo(char *page); + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else /* !CONFIG_PARAVIRT */ @@ -521,4 +530,4 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) #include <asm-generic/pgtable.h> #endif /* __ASSEMBLY__ */ -#endif /* _ASM_X86_PGTABLE_H */ +#endif /* ASM_X86__PGTABLE_H */ diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h index 5c3b265..8de702d 100644 --- a/include/asm-x86/pgtable_32.h +++ b/include/asm-x86/pgtable_32.h @@ -1,5 +1,5 @@ -#ifndef _I386_PGTABLE_H -#define _I386_PGTABLE_H +#ifndef ASM_X86__PGTABLE_32_H +#define ASM_X86__PGTABLE_32_H /* @@ -31,6 +31,7 @@ static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } void paging_init(void); +extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); /* * The Linux x86 paging architecture is 'compile-time dual-mode', it @@ -56,8 +57,7 @@ void paging_init(void); * area for the same reason. ;) */ #define VMALLOC_OFFSET (8 * 1024 * 1024) -#define VMALLOC_START (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \ - & ~(VMALLOC_OFFSET - 1)) +#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET) #ifdef CONFIG_X86_PAE #define LAST_PKMAP 512 #else @@ -73,6 +73,8 @@ void paging_init(void); # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) #endif +#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) + /* * Define this if things work differently on an i386 and an i486: * it will (on an i486) warn about kernel memory accesses that are @@ -186,4 +188,4 @@ do { \ #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ remap_pfn_range(vma, vaddr, pfn, size, prot) -#endif /* _I386_PGTABLE_H */ +#endif /* ASM_X86__PGTABLE_32_H */ diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h index 549144d..fde9770 100644 --- a/include/asm-x86/pgtable_64.h +++ b/include/asm-x86/pgtable_64.h @@ -1,5 +1,5 @@ -#ifndef _X86_64_PGTABLE_H -#define _X86_64_PGTABLE_H +#ifndef ASM_X86__PGTABLE_64_H +#define ASM_X86__PGTABLE_64_H #include <linux/const.h> #ifndef __ASSEMBLY__ @@ -175,8 +175,6 @@ static inline int pmd_bad(pmd_t pmd) #define pte_present(x) (pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE)) #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ -#define pte_page(x) pfn_to_page(pte_pfn((x))) -#define pte_pfn(x) ((pte_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT) /* * Macro to mark a page protection value as "uncacheable". @@ -284,4 +282,4 @@ extern void cleanup_highmap(void); #define __HAVE_ARCH_PTE_SAME #endif /* !__ASSEMBLY__ */ -#endif /* _X86_64_PGTABLE_H */ +#endif /* ASM_X86__PGTABLE_64_H */ diff --git a/include/asm-x86/posix_types_32.h b/include/asm-x86/posix_types_32.h index b031efd..70cf2bb 100644 --- a/include/asm-x86/posix_types_32.h +++ b/include/asm-x86/posix_types_32.h @@ -1,5 +1,5 @@ -#ifndef __ARCH_I386_POSIX_TYPES_H -#define __ARCH_I386_POSIX_TYPES_H +#ifndef ASM_X86__POSIX_TYPES_32_H +#define ASM_X86__POSIX_TYPES_32_H /* * This file is generally used by user-level software, so you need to @@ -82,4 +82,4 @@ do { \ #endif /* defined(__KERNEL__) */ -#endif +#endif /* ASM_X86__POSIX_TYPES_32_H */ diff --git a/include/asm-x86/posix_types_64.h b/include/asm-x86/posix_types_64.h index d6624c9..388b4e7 100644 --- a/include/asm-x86/posix_types_64.h +++ b/include/asm-x86/posix_types_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_POSIX_TYPES_H -#define _ASM_X86_64_POSIX_TYPES_H +#ifndef ASM_X86__POSIX_TYPES_64_H +#define ASM_X86__POSIX_TYPES_64_H /* * This file is generally used by user-level software, so you need to @@ -116,4 +116,4 @@ static inline void __FD_ZERO(__kernel_fd_set *p) #endif /* defined(__KERNEL__) */ -#endif +#endif /* ASM_X86__POSIX_TYPES_64_H */ diff --git a/include/asm-x86/prctl.h b/include/asm-x86/prctl.h index 52952ad..e7ae34e 100644 --- a/include/asm-x86/prctl.h +++ b/include/asm-x86/prctl.h @@ -1,5 +1,5 @@ -#ifndef X86_64_PRCTL_H -#define X86_64_PRCTL_H 1 +#ifndef ASM_X86__PRCTL_H +#define ASM_X86__PRCTL_H #define ARCH_SET_GS 0x1001 #define ARCH_SET_FS 0x1002 @@ -7,4 +7,4 @@ #define ARCH_GET_GS 0x1004 -#endif +#endif /* ASM_X86__PRCTL_H */ diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h index eff2ecd..5dd7977 100644 --- a/include/asm-x86/processor-flags.h +++ b/include/asm-x86/processor-flags.h @@ -1,5 +1,5 @@ -#ifndef __ASM_I386_PROCESSOR_FLAGS_H -#define __ASM_I386_PROCESSOR_FLAGS_H +#ifndef ASM_X86__PROCESSOR_FLAGS_H +#define ASM_X86__PROCESSOR_FLAGS_H /* Various flags defined: can be included from assembler. */ /* @@ -96,4 +96,4 @@ #endif #endif -#endif /* __ASM_I386_PROCESSOR_FLAGS_H */ +#endif /* ASM_X86__PROCESSOR_FLAGS_H */ diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 4df3e2f..5eaf9bf 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -1,5 +1,5 @@ -#ifndef __ASM_X86_PROCESSOR_H -#define __ASM_X86_PROCESSOR_H +#ifndef ASM_X86__PROCESSOR_H +#define ASM_X86__PROCESSOR_H #include <asm/processor-flags.h> @@ -20,6 +20,7 @@ struct mm_struct; #include <asm/msr.h> #include <asm/desc_defs.h> #include <asm/nops.h> +#include <asm/ds.h> #include <linux/personality.h> #include <linux/cpumask.h> @@ -140,6 +141,8 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); #define current_cpu_data boot_cpu_data #endif +extern const struct seq_operations cpuinfo_op; + static inline int hlt_works(int cpu) { #ifdef CONFIG_X86_32 @@ -153,6 +156,8 @@ static inline int hlt_works(int cpu) extern void cpu_detect(struct cpuinfo_x86 *c); +extern struct pt_regs *idle_regs(struct pt_regs *); + extern void early_cpu_init(void); extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); @@ -411,9 +416,14 @@ struct thread_struct { unsigned io_bitmap_max; /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ unsigned long debugctlmsr; -/* Debug Store - if not 0 points to a DS Save Area configuration; - * goes into MSR_IA32_DS_AREA */ - unsigned long ds_area_msr; +#ifdef CONFIG_X86_DS +/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ + struct ds_context *ds_ctx; +#endif /* CONFIG_X86_DS */ +#ifdef CONFIG_X86_PTRACE_BTS +/* the signal to send on a bts buffer overflow */ + unsigned int bts_ovfl_signal; +#endif /* CONFIG_X86_PTRACE_BTS */ }; static inline unsigned long native_get_debugreg(int regno) @@ -943,4 +953,4 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); -#endif +#endif /* ASM_X86__PROCESSOR_H */ diff --git a/include/asm-x86/proto.h b/include/asm-x86/proto.h index 3dd458c..6e89e8b 100644 --- a/include/asm-x86/proto.h +++ b/include/asm-x86/proto.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X8664_PROTO_H -#define _ASM_X8664_PROTO_H 1 +#ifndef ASM_X86__PROTO_H +#define ASM_X86__PROTO_H #include <asm/ldt.h> @@ -29,4 +29,4 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); #define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1)) #define round_down(x, y) ((x) & ~((y) - 1)) -#endif +#endif /* ASM_X86__PROTO_H */ diff --git a/include/asm-x86/ptrace-abi.h b/include/asm-x86/ptrace-abi.h index 72e7b9d..4298b88 100644 --- a/include/asm-x86/ptrace-abi.h +++ b/include/asm-x86/ptrace-abi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PTRACE_ABI_H -#define _ASM_X86_PTRACE_ABI_H +#ifndef ASM_X86__PTRACE_ABI_H +#define ASM_X86__PTRACE_ABI_H #ifdef __i386__ @@ -80,8 +80,9 @@ #define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */ -#ifndef __ASSEMBLY__ +#ifdef CONFIG_X86_PTRACE_BTS +#ifndef __ASSEMBLY__ #include <asm/types.h> /* configuration/status structure used in PTRACE_BTS_CONFIG and @@ -97,20 +98,20 @@ struct ptrace_bts_config { /* actual size of bts_struct in bytes */ __u32 bts_size; }; -#endif +#endif /* __ASSEMBLY__ */ #define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ #define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ #define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG<signal> on buffer overflow instead of wrapping around */ -#define PTRACE_BTS_O_CUT_SIZE 0x8 /* cut requested size to max available - instead of failing */ +#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ #define PTRACE_BTS_CONFIG 40 /* Configure branch trace recording. ADDR points to a struct ptrace_bts_config. DATA gives the size of that buffer. - A new buffer is allocated, iff the size changes. + A new buffer is allocated, if requested in the flags. + An overflow signal may only be requested for new buffers. Returns the number of bytes read. */ #define PTRACE_BTS_STATUS 41 @@ -119,7 +120,7 @@ struct ptrace_bts_config { Returns the number of bytes written. */ #define PTRACE_BTS_SIZE 42 -/* Return the number of available BTS records. +/* Return the number of available BTS records for draining. DATA and ADDR are ignored. */ #define PTRACE_BTS_GET 43 @@ -139,5 +140,6 @@ struct ptrace_bts_config { BTS records are read from oldest to newest. Returns number of BTS records drained. */ +#endif /* CONFIG_X86_PTRACE_BTS */ -#endif +#endif /* ASM_X86__PTRACE_ABI_H */ diff --git a/include/asm-x86/ptrace.h b/include/asm-x86/ptrace.h index 8a71db8..d64a610 100644 --- a/include/asm-x86/ptrace.h +++ b/include/asm-x86/ptrace.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PTRACE_H -#define _ASM_X86_PTRACE_H +#ifndef ASM_X86__PTRACE_H +#define ASM_X86__PTRACE_H #include <linux/compiler.h> /* For __user */ #include <asm/ptrace-abi.h> @@ -127,14 +127,48 @@ struct pt_regs { #endif /* __KERNEL__ */ #endif /* !__i386__ */ + +#ifdef CONFIG_X86_PTRACE_BTS +/* a branch trace record entry + * + * In order to unify the interface between various processor versions, + * we use the below data structure for all processors. + */ +enum bts_qualifier { + BTS_INVALID = 0, + BTS_BRANCH, + BTS_TASK_ARRIVES, + BTS_TASK_DEPARTS +}; + +struct bts_struct { + __u64 qualifier; + union { + /* BTS_BRANCH */ + struct { + __u64 from_ip; + __u64 to_ip; + } lbr; + /* BTS_TASK_ARRIVES or + BTS_TASK_DEPARTS */ + __u64 jiffies; + } variant; +}; +#endif /* CONFIG_X86_PTRACE_BTS */ + #ifdef __KERNEL__ -/* the DS BTS struct is used for ptrace as well */ -#include <asm/ds.h> +#include <linux/init.h> +struct cpuinfo_x86; struct task_struct; +#ifdef CONFIG_X86_PTRACE_BTS +extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); +#else +#define ptrace_bts_init_intel(config) do {} while (0) +#endif /* CONFIG_X86_PTRACE_BTS */ extern unsigned long profile_pc(struct pt_regs *regs); @@ -148,6 +182,9 @@ extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, void signal_fault(struct pt_regs *regs, void __user *frame, char *where); #endif +extern long syscall_trace_enter(struct pt_regs *); +extern void syscall_trace_leave(struct pt_regs *); + static inline unsigned long regs_return_value(struct pt_regs *regs) { return regs->ax; @@ -213,6 +250,11 @@ static inline unsigned long frame_pointer(struct pt_regs *regs) return regs->bp; } +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->sp; +} + /* * These are defined as per linux/ptrace.h, which see. */ @@ -239,4 +281,4 @@ extern int do_set_thread_area(struct task_struct *p, int idx, #endif /* !__ASSEMBLY__ */ -#endif +#endif /* ASM_X86__PTRACE_H */ diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h index 6857f84..edb3b4e 100644 --- a/include/asm-x86/pvclock-abi.h +++ b/include/asm-x86/pvclock-abi.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PVCLOCK_ABI_H_ -#define _ASM_X86_PVCLOCK_ABI_H_ +#ifndef ASM_X86__PVCLOCK_ABI_H +#define ASM_X86__PVCLOCK_ABI_H #ifndef __ASSEMBLY__ /* @@ -39,4 +39,4 @@ struct pvclock_wall_clock { } __attribute__((__packed__)); #endif /* __ASSEMBLY__ */ -#endif /* _ASM_X86_PVCLOCK_ABI_H_ */ +#endif /* ASM_X86__PVCLOCK_ABI_H */ diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h index 85b1bba..1a38f68 100644 --- a/include/asm-x86/pvclock.h +++ b/include/asm-x86/pvclock.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_PVCLOCK_H_ -#define _ASM_X86_PVCLOCK_H_ +#ifndef ASM_X86__PVCLOCK_H +#define ASM_X86__PVCLOCK_H #include <linux/clocksource.h> #include <asm/pvclock-abi.h> @@ -10,4 +10,4 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct pvclock_vcpu_time_info *vcpu, struct timespec *ts); -#endif /* _ASM_X86_PVCLOCK_H_ */ +#endif /* ASM_X86__PVCLOCK_H */ diff --git a/include/asm-x86/reboot.h b/include/asm-x86/reboot.h index 206f355..1c2f0ce 100644 --- a/include/asm-x86/reboot.h +++ b/include/asm-x86/reboot.h @@ -1,5 +1,5 @@ -#ifndef _ASM_REBOOT_H -#define _ASM_REBOOT_H +#ifndef ASM_X86__REBOOT_H +#define ASM_X86__REBOOT_H struct pt_regs; @@ -18,4 +18,4 @@ void native_machine_crash_shutdown(struct pt_regs *regs); void native_machine_shutdown(void); void machine_real_restart(const unsigned char *code, int length); -#endif /* _ASM_REBOOT_H */ +#endif /* ASM_X86__REBOOT_H */ diff --git a/include/asm-x86/reboot_fixups.h b/include/asm-x86/reboot_fixups.h index 0cb7d87..2c2987d 100644 --- a/include/asm-x86/reboot_fixups.h +++ b/include/asm-x86/reboot_fixups.h @@ -1,6 +1,6 @@ -#ifndef _LINUX_REBOOT_FIXUPS_H -#define _LINUX_REBOOT_FIXUPS_H +#ifndef ASM_X86__REBOOT_FIXUPS_H +#define ASM_X86__REBOOT_FIXUPS_H extern void mach_reboot_fixups(void); -#endif /* _LINUX_REBOOT_FIXUPS_H */ +#endif /* ASM_X86__REBOOT_FIXUPS_H */ diff --git a/include/asm-x86/required-features.h b/include/asm-x86/required-features.h index 5c2ff4b..a01c4e37 100644 --- a/include/asm-x86/required-features.h +++ b/include/asm-x86/required-features.h @@ -1,5 +1,5 @@ -#ifndef _ASM_REQUIRED_FEATURES_H -#define _ASM_REQUIRED_FEATURES_H 1 +#ifndef ASM_X86__REQUIRED_FEATURES_H +#define ASM_X86__REQUIRED_FEATURES_H /* Define minimum CPUID feature set for kernel These bits are checked really early to actually display a visible error message before the @@ -79,4 +79,4 @@ #define REQUIRED_MASK6 0 #define REQUIRED_MASK7 0 -#endif +#endif /* ASM_X86__REQUIRED_FEATURES_H */ diff --git a/include/asm-x86/resume-trace.h b/include/asm-x86/resume-trace.h index 8d9f0b4..e39376d 100644 --- a/include/asm-x86/resume-trace.h +++ b/include/asm-x86/resume-trace.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_RESUME_TRACE_H -#define _ASM_X86_RESUME_TRACE_H +#ifndef ASM_X86__RESUME_TRACE_H +#define ASM_X86__RESUME_TRACE_H #include <asm/asm.h> @@ -7,7 +7,7 @@ do { \ if (pm_trace_enabled) { \ const void *tracedata; \ - asm volatile(_ASM_MOV_UL " $1f,%0\n" \ + asm volatile(_ASM_MOV " $1f,%0\n" \ ".section .tracedata,\"a\"\n" \ "1:\t.word %c1\n\t" \ _ASM_PTR " %c2\n" \ @@ -18,4 +18,4 @@ do { \ } \ } while (0) -#endif +#endif /* ASM_X86__RESUME_TRACE_H */ diff --git a/include/asm-x86/rio.h b/include/asm-x86/rio.h index c9448bd..5e1256b 100644 --- a/include/asm-x86/rio.h +++ b/include/asm-x86/rio.h @@ -5,8 +5,8 @@ * Author: Laurent Vivier <Laurent.Vivier@bull.net> */ -#ifndef __ASM_RIO_H -#define __ASM_RIO_H +#ifndef ASM_X86__RIO_H +#define ASM_X86__RIO_H #define RIO_TABLE_VERSION 3 @@ -60,4 +60,4 @@ enum { ALT_CALGARY = 5, /* Second Planar Calgary */ }; -#endif /* __ASM_RIO_H */ +#endif /* ASM_X86__RIO_H */ diff --git a/include/asm-x86/rwlock.h b/include/asm-x86/rwlock.h index 6a8c0d6..48a3109 100644 --- a/include/asm-x86/rwlock.h +++ b/include/asm-x86/rwlock.h @@ -1,8 +1,8 @@ -#ifndef _ASM_X86_RWLOCK_H -#define _ASM_X86_RWLOCK_H +#ifndef ASM_X86__RWLOCK_H +#define ASM_X86__RWLOCK_H #define RW_LOCK_BIAS 0x01000000 /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ -#endif /* _ASM_X86_RWLOCK_H */ +#endif /* ASM_X86__RWLOCK_H */ diff --git a/include/asm-x86/rwsem.h b/include/asm-x86/rwsem.h index 750f2a3..3ff3015 100644 --- a/include/asm-x86/rwsem.h +++ b/include/asm-x86/rwsem.h @@ -29,8 +29,8 @@ * front, then they'll all be woken up, but no other readers will be. */ -#ifndef _I386_RWSEM_H -#define _I386_RWSEM_H +#ifndef ASM_X86__RWSEM_H +#define ASM_X86__RWSEM_H #ifndef _LINUX_RWSEM_H #error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" @@ -262,4 +262,4 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem) } #endif /* __KERNEL__ */ -#endif /* _I386_RWSEM_H */ +#endif /* ASM_X86__RWSEM_H */ diff --git a/include/asm-x86/scatterlist.h b/include/asm-x86/scatterlist.h index c043206..ee48f88 100644 --- a/include/asm-x86/scatterlist.h +++ b/include/asm-x86/scatterlist.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SCATTERLIST_H -#define _ASM_X86_SCATTERLIST_H +#ifndef ASM_X86__SCATTERLIST_H +#define ASM_X86__SCATTERLIST_H #include <asm/types.h> @@ -30,4 +30,4 @@ struct scatterlist { # define sg_dma_len(sg) ((sg)->dma_length) #endif -#endif +#endif /* ASM_X86__SCATTERLIST_H */ diff --git a/include/asm-x86/seccomp_32.h b/include/asm-x86/seccomp_32.h index 36e71c5..cf9ab2d 100644 --- a/include/asm-x86/seccomp_32.h +++ b/include/asm-x86/seccomp_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_SECCOMP_H -#define _ASM_SECCOMP_H +#ifndef ASM_X86__SECCOMP_32_H +#define ASM_X86__SECCOMP_32_H #include <linux/thread_info.h> @@ -14,4 +14,4 @@ #define __NR_seccomp_exit __NR_exit #define __NR_seccomp_sigreturn __NR_sigreturn -#endif /* _ASM_SECCOMP_H */ +#endif /* ASM_X86__SECCOMP_32_H */ diff --git a/include/asm-x86/seccomp_64.h b/include/asm-x86/seccomp_64.h index 76cfe69..03274ce 100644 --- a/include/asm-x86/seccomp_64.h +++ b/include/asm-x86/seccomp_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_SECCOMP_H -#define _ASM_SECCOMP_H +#ifndef ASM_X86__SECCOMP_64_H +#define ASM_X86__SECCOMP_64_H #include <linux/thread_info.h> @@ -22,4 +22,4 @@ #define __NR_seccomp_exit_32 __NR_ia32_exit #define __NR_seccomp_sigreturn_32 __NR_ia32_sigreturn -#endif /* _ASM_SECCOMP_H */ +#endif /* ASM_X86__SECCOMP_64_H */ diff --git a/include/asm-x86/segment.h b/include/asm-x86/segment.h index 646452e..ea5f0a8 100644 --- a/include/asm-x86/segment.h +++ b/include/asm-x86/segment.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SEGMENT_H_ -#define _ASM_X86_SEGMENT_H_ +#ifndef ASM_X86__SEGMENT_H +#define ASM_X86__SEGMENT_H /* Constructor for a conventional segment GDT (or LDT) entry */ /* This is a macro so it can be used in initializers */ @@ -212,4 +212,4 @@ extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][10]; #endif #endif -#endif +#endif /* ASM_X86__SEGMENT_H */ diff --git a/include/asm-x86/sembuf.h b/include/asm-x86/sembuf.h index ee50c80..81f06b7 100644 --- a/include/asm-x86/sembuf.h +++ b/include/asm-x86/sembuf.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SEMBUF_H -#define _ASM_X86_SEMBUF_H +#ifndef ASM_X86__SEMBUF_H +#define ASM_X86__SEMBUF_H /* * The semid64_ds structure for x86 architecture. @@ -21,4 +21,4 @@ struct semid64_ds { unsigned long __unused4; }; -#endif /* _ASM_X86_SEMBUF_H */ +#endif /* ASM_X86__SEMBUF_H */ diff --git a/include/asm-x86/serial.h b/include/asm-x86/serial.h index 628c801..303660b 100644 --- a/include/asm-x86/serial.h +++ b/include/asm-x86/serial.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SERIAL_H -#define _ASM_X86_SERIAL_H +#ifndef ASM_X86__SERIAL_H +#define ASM_X86__SERIAL_H /* * This assumes you have a 1.8432 MHz clock for your UART. @@ -26,4 +26,4 @@ { 0, BASE_BAUD, 0x3E8, 4, STD_COM_FLAGS }, /* ttyS2 */ \ { 0, BASE_BAUD, 0x2E8, 3, STD_COM4_FLAGS }, /* ttyS3 */ -#endif /* _ASM_X86_SERIAL_H */ +#endif /* ASM_X86__SERIAL_H */ diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h index a07c6f1..9030cb7 100644 --- a/include/asm-x86/setup.h +++ b/include/asm-x86/setup.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SETUP_H -#define _ASM_X86_SETUP_H +#ifndef ASM_X86__SETUP_H +#define ASM_X86__SETUP_H #define COMMAND_LINE_SIZE 2048 @@ -41,6 +41,7 @@ struct x86_quirks { }; extern struct x86_quirks *x86_quirks; +extern unsigned long saved_video_mode; #ifndef CONFIG_PARAVIRT #define paravirt_post_allocator_init() do {} while (0) @@ -100,4 +101,4 @@ void __init x86_64_start_reservations(char *real_mode_data); #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -#endif /* _ASM_X86_SETUP_H */ +#endif /* ASM_X86__SETUP_H */ diff --git a/include/asm-x86/shmbuf.h b/include/asm-x86/shmbuf.h index b51413b..f51aec2 100644 --- a/include/asm-x86/shmbuf.h +++ b/include/asm-x86/shmbuf.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SHMBUF_H -#define _ASM_X86_SHMBUF_H +#ifndef ASM_X86__SHMBUF_H +#define ASM_X86__SHMBUF_H /* * The shmid64_ds structure for x86 architecture. @@ -48,4 +48,4 @@ struct shminfo64 { unsigned long __unused4; }; -#endif /* _ASM_X86_SHMBUF_H */ +#endif /* ASM_X86__SHMBUF_H */ diff --git a/include/asm-x86/shmparam.h b/include/asm-x86/shmparam.h index 0880cf0..a83a1fd 100644 --- a/include/asm-x86/shmparam.h +++ b/include/asm-x86/shmparam.h @@ -1,6 +1,6 @@ -#ifndef _ASM_X86_SHMPARAM_H -#define _ASM_X86_SHMPARAM_H +#ifndef ASM_X86__SHMPARAM_H +#define ASM_X86__SHMPARAM_H #define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ -#endif /* _ASM_X86_SHMPARAM_H */ +#endif /* ASM_X86__SHMPARAM_H */ diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h index 2f9c884..24879c8 100644 --- a/include/asm-x86/sigcontext.h +++ b/include/asm-x86/sigcontext.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SIGCONTEXT_H -#define _ASM_X86_SIGCONTEXT_H +#ifndef ASM_X86__SIGCONTEXT_H +#define ASM_X86__SIGCONTEXT_H #include <linux/compiler.h> #include <asm/types.h> @@ -202,4 +202,4 @@ struct sigcontext { #endif /* !__i386__ */ -#endif +#endif /* ASM_X86__SIGCONTEXT_H */ diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h index 57a9686..4e2ec73 100644 --- a/include/asm-x86/sigcontext32.h +++ b/include/asm-x86/sigcontext32.h @@ -1,5 +1,5 @@ -#ifndef _SIGCONTEXT32_H -#define _SIGCONTEXT32_H 1 +#ifndef ASM_X86__SIGCONTEXT32_H +#define ASM_X86__SIGCONTEXT32_H /* signal context for 32bit programs. */ @@ -68,4 +68,4 @@ struct sigcontext_ia32 { unsigned int cr2; }; -#endif +#endif /* ASM_X86__SIGCONTEXT32_H */ diff --git a/include/asm-x86/siginfo.h b/include/asm-x86/siginfo.h index a477bea..808bdfb 100644 --- a/include/asm-x86/siginfo.h +++ b/include/asm-x86/siginfo.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SIGINFO_H -#define _ASM_X86_SIGINFO_H +#ifndef ASM_X86__SIGINFO_H +#define ASM_X86__SIGINFO_H #ifdef __x86_64__ # define __ARCH_SI_PREAMBLE_SIZE (4 * sizeof(int)) @@ -7,4 +7,4 @@ #include <asm-generic/siginfo.h> -#endif +#endif /* ASM_X86__SIGINFO_H */ diff --git a/include/asm-x86/signal.h b/include/asm-x86/signal.h index 6dac493..65acc82 100644 --- a/include/asm-x86/signal.h +++ b/include/asm-x86/signal.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SIGNAL_H -#define _ASM_X86_SIGNAL_H +#ifndef ASM_X86__SIGNAL_H +#define ASM_X86__SIGNAL_H #ifndef __ASSEMBLY__ #include <linux/types.h> @@ -140,6 +140,9 @@ struct sigaction { struct k_sigaction { struct sigaction sa; }; + +extern void do_notify_resume(struct pt_regs *, void *, __u32); + # else /* __KERNEL__ */ /* Here we must cater to libcs that poke about in kernel headers. */ @@ -256,4 +259,4 @@ struct pt_regs; #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ -#endif +#endif /* ASM_X86__SIGNAL_H */ diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h index 3c877f7..04f84f4 100644 --- a/include/asm-x86/smp.h +++ b/include/asm-x86/smp.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SMP_H_ -#define _ASM_X86_SMP_H_ +#ifndef ASM_X86__SMP_H +#define ASM_X86__SMP_H #ifndef __ASSEMBLY__ #include <linux/cpumask.h> #include <linux/init.h> @@ -34,6 +34,9 @@ extern cpumask_t cpu_initialized; DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_t, cpu_core_map); DECLARE_PER_CPU(u16, cpu_llc_id); +#ifdef CONFIG_X86_32 +DECLARE_PER_CPU(int, cpu_number); +#endif DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); @@ -142,7 +145,6 @@ extern unsigned disabled_cpus __cpuinitdata; * from the initial startup. We map APIC_BASE very early in page_setup(), * so this is correct in the x86 case. */ -DECLARE_PER_CPU(int, cpu_number); #define raw_smp_processor_id() (x86_read_percpu(cpu_number)) extern int safe_smp_processor_id(void); @@ -205,4 +207,4 @@ extern void cpu_uninit(void); #endif #endif /* __ASSEMBLY__ */ -#endif +#endif /* ASM_X86__SMP_H */ diff --git a/include/asm-x86/socket.h b/include/asm-x86/socket.h index 80af9c4..db73274 100644 --- a/include/asm-x86/socket.h +++ b/include/asm-x86/socket.h @@ -1,5 +1,5 @@ -#ifndef _ASM_SOCKET_H -#define _ASM_SOCKET_H +#ifndef ASM_X86__SOCKET_H +#define ASM_X86__SOCKET_H #include <asm/sockios.h> @@ -54,4 +54,4 @@ #define SO_MARK 36 -#endif /* _ASM_SOCKET_H */ +#endif /* ASM_X86__SOCKET_H */ diff --git a/include/asm-x86/sockios.h b/include/asm-x86/sockios.h index 49cc72b..a006704 100644 --- a/include/asm-x86/sockios.h +++ b/include/asm-x86/sockios.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SOCKIOS_H -#define _ASM_X86_SOCKIOS_H +#ifndef ASM_X86__SOCKIOS_H +#define ASM_X86__SOCKIOS_H /* Socket-level I/O control calls. */ #define FIOSETOWN 0x8901 @@ -10,4 +10,4 @@ #define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */ #define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */ -#endif /* _ASM_X86_SOCKIOS_H */ +#endif /* ASM_X86__SOCKIOS_H */ diff --git a/include/asm-x86/sparsemem.h b/include/asm-x86/sparsemem.h index 9bd48b0..38f8e6b 100644 --- a/include/asm-x86/sparsemem.h +++ b/include/asm-x86/sparsemem.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SPARSEMEM_H -#define _ASM_X86_SPARSEMEM_H +#ifndef ASM_X86__SPARSEMEM_H +#define ASM_X86__SPARSEMEM_H #ifdef CONFIG_SPARSEMEM /* @@ -31,4 +31,4 @@ #endif #endif /* CONFIG_SPARSEMEM */ -#endif +#endif /* ASM_X86__SPARSEMEM_H */ diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index e39c790..93adae3 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h @@ -1,5 +1,5 @@ -#ifndef _X86_SPINLOCK_H_ -#define _X86_SPINLOCK_H_ +#ifndef ASM_X86__SPINLOCK_H +#define ASM_X86__SPINLOCK_H #include <asm/atomic.h> #include <asm/rwlock.h> @@ -97,7 +97,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) "jne 1f\n\t" "movw %w0,%w1\n\t" "incb %h1\n\t" - "lock ; cmpxchgw %w1,%2\n\t" + LOCK_PREFIX "cmpxchgw %w1,%2\n\t" "1:" "sete %b1\n\t" "movzbl %b1,%0\n\t" @@ -135,7 +135,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) int inc = 0x00010000; int tmp; - asm volatile("lock ; xaddl %0, %1\n" + asm volatile(LOCK_PREFIX "xaddl %0, %1\n" "movzwl %w0, %2\n\t" "shrl $16, %0\n\t" "1:\t" @@ -162,7 +162,7 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) "cmpl %0,%1\n\t" "jne 1f\n\t" "addl $0x00010000, %1\n\t" - "lock ; cmpxchgl %1,%2\n\t" + LOCK_PREFIX "cmpxchgl %1,%2\n\t" "1:" "sete %b1\n\t" "movzbl %b1,%0\n\t" @@ -366,4 +366,4 @@ static inline void __raw_write_unlock(raw_rwlock_t *rw) #define _raw_read_relax(lock) cpu_relax() #define _raw_write_relax(lock) cpu_relax() -#endif +#endif /* ASM_X86__SPINLOCK_H */ diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h index 06c071c..6aa9b56 100644 --- a/include/asm-x86/spinlock_types.h +++ b/include/asm-x86/spinlock_types.h @@ -1,5 +1,5 @@ -#ifndef __ASM_SPINLOCK_TYPES_H -#define __ASM_SPINLOCK_TYPES_H +#ifndef ASM_X86__SPINLOCK_TYPES_H +#define ASM_X86__SPINLOCK_TYPES_H #ifndef __LINUX_SPINLOCK_TYPES_H # error "please don't include this file directly" @@ -17,4 +17,4 @@ typedef struct { #define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } -#endif +#endif /* ASM_X86__SPINLOCK_TYPES_H */ diff --git a/include/asm-x86/srat.h b/include/asm-x86/srat.h index 774c919..5363e4f 100644 --- a/include/asm-x86/srat.h +++ b/include/asm-x86/srat.h @@ -24,8 +24,8 @@ * Send feedback to Pat Gaughen <gone@us.ibm.com> */ -#ifndef _ASM_SRAT_H_ -#define _ASM_SRAT_H_ +#ifndef ASM_X86__SRAT_H +#define ASM_X86__SRAT_H #ifdef CONFIG_ACPI_NUMA extern int get_memcfg_from_srat(void); @@ -36,4 +36,4 @@ static inline int get_memcfg_from_srat(void) } #endif -#endif /* _ASM_SRAT_H_ */ +#endif /* ASM_X86__SRAT_H */ diff --git a/include/asm-x86/stacktrace.h b/include/asm-x86/stacktrace.h index 30f8252..f43517e 100644 --- a/include/asm-x86/stacktrace.h +++ b/include/asm-x86/stacktrace.h @@ -1,5 +1,5 @@ -#ifndef _ASM_STACKTRACE_H -#define _ASM_STACKTRACE_H 1 +#ifndef ASM_X86__STACKTRACE_H +#define ASM_X86__STACKTRACE_H extern int kstack_depth_to_print; @@ -18,4 +18,4 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, unsigned long bp, const struct stacktrace_ops *ops, void *data); -#endif +#endif /* ASM_X86__STACKTRACE_H */ diff --git a/include/asm-x86/stat.h b/include/asm-x86/stat.h index 5c22dcb..1e120f6 100644 --- a/include/asm-x86/stat.h +++ b/include/asm-x86/stat.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_STAT_H -#define _ASM_X86_STAT_H +#ifndef ASM_X86__STAT_H +#define ASM_X86__STAT_H #define STAT_HAVE_NSEC 1 @@ -111,4 +111,4 @@ struct __old_kernel_stat { #endif }; -#endif +#endif /* ASM_X86__STAT_H */ diff --git a/include/asm-x86/statfs.h b/include/asm-x86/statfs.h index 7c651aa..3f005bc 100644 --- a/include/asm-x86/statfs.h +++ b/include/asm-x86/statfs.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_STATFS_H -#define _ASM_X86_STATFS_H +#ifndef ASM_X86__STATFS_H +#define ASM_X86__STATFS_H #ifdef __i386__ #include <asm-generic/statfs.h> @@ -60,4 +60,4 @@ struct compat_statfs64 { } __attribute__((packed)); #endif /* !__i386__ */ -#endif +#endif /* ASM_X86__STATFS_H */ diff --git a/include/asm-x86/string_32.h b/include/asm-x86/string_32.h index 193578c..487843e 100644 --- a/include/asm-x86/string_32.h +++ b/include/asm-x86/string_32.h @@ -1,5 +1,5 @@ -#ifndef _I386_STRING_H_ -#define _I386_STRING_H_ +#ifndef ASM_X86__STRING_32_H +#define ASM_X86__STRING_32_H #ifdef __KERNEL__ @@ -323,4 +323,4 @@ extern void *memscan(void *addr, int c, size_t size); #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__STRING_32_H */ diff --git a/include/asm-x86/string_64.h b/include/asm-x86/string_64.h index 52b5ab3..a2add11d 100644 --- a/include/asm-x86/string_64.h +++ b/include/asm-x86/string_64.h @@ -1,5 +1,5 @@ -#ifndef _X86_64_STRING_H_ -#define _X86_64_STRING_H_ +#ifndef ASM_X86__STRING_64_H +#define ASM_X86__STRING_64_H #ifdef __KERNEL__ @@ -57,4 +57,4 @@ int strcmp(const char *cs, const char *ct); #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__STRING_64_H */ diff --git a/include/asm-x86/suspend_32.h b/include/asm-x86/suspend_32.h index 8675c67..acb6d4d 100644 --- a/include/asm-x86/suspend_32.h +++ b/include/asm-x86/suspend_32.h @@ -3,8 +3,8 @@ * Based on code * Copyright 2001 Patrick Mochel <mochel@osdl.org> */ -#ifndef __ASM_X86_32_SUSPEND_H -#define __ASM_X86_32_SUSPEND_H +#ifndef ASM_X86__SUSPEND_32_H +#define ASM_X86__SUSPEND_32_H #include <asm/desc.h> #include <asm/i387.h> @@ -48,4 +48,4 @@ static inline void acpi_save_register_state(unsigned long return_point) extern int acpi_save_state_mem(void); #endif -#endif /* __ASM_X86_32_SUSPEND_H */ +#endif /* ASM_X86__SUSPEND_32_H */ diff --git a/include/asm-x86/suspend_64.h b/include/asm-x86/suspend_64.h index dc3262b..cf821dd 100644 --- a/include/asm-x86/suspend_64.h +++ b/include/asm-x86/suspend_64.h @@ -3,8 +3,8 @@ * Based on code * Copyright 2001 Patrick Mochel <mochel@osdl.org> */ -#ifndef __ASM_X86_64_SUSPEND_H -#define __ASM_X86_64_SUSPEND_H +#ifndef ASM_X86__SUSPEND_64_H +#define ASM_X86__SUSPEND_64_H #include <asm/desc.h> #include <asm/i387.h> @@ -49,4 +49,4 @@ extern int acpi_save_state_mem(void); extern char core_restore_code; extern char restore_registers; -#endif /* __ASM_X86_64_SUSPEND_H */ +#endif /* ASM_X86__SUSPEND_64_H */ diff --git a/include/asm-x86/swiotlb.h b/include/asm-x86/swiotlb.h index 2730b35..1e20adb 100644 --- a/include/asm-x86/swiotlb.h +++ b/include/asm-x86/swiotlb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_SWIOTLB_H -#define _ASM_SWIOTLB_H 1 +#ifndef ASM_X86__SWIOTLB_H +#define ASM_X86__SWIOTLB_H #include <asm/dma-mapping.h> @@ -55,4 +55,4 @@ static inline void pci_swiotlb_init(void) static inline void dma_mark_clean(void *addr, size_t size) {} -#endif /* _ASM_SWIOTLB_H */ +#endif /* ASM_X86__SWIOTLB_H */ diff --git a/include/asm-x86/sync_bitops.h b/include/asm-x86/sync_bitops.h index b47a1d0..b689bee 100644 --- a/include/asm-x86/sync_bitops.h +++ b/include/asm-x86/sync_bitops.h @@ -1,5 +1,5 @@ -#ifndef _I386_SYNC_BITOPS_H -#define _I386_SYNC_BITOPS_H +#ifndef ASM_X86__SYNC_BITOPS_H +#define ASM_X86__SYNC_BITOPS_H /* * Copyright 1992, Linus Torvalds. @@ -127,4 +127,4 @@ static inline int sync_test_and_change_bit(int nr, volatile unsigned long *addr) #undef ADDR -#endif /* _I386_SYNC_BITOPS_H */ +#endif /* ASM_X86__SYNC_BITOPS_H */ diff --git a/include/asm-x86/syscall.h b/include/asm-x86/syscall.h new file mode 100644 index 0000000..04c47dc --- /dev/null +++ b/include/asm-x86/syscall.h @@ -0,0 +1,211 @@ +/* + * Access to user system call parameters and results + * + * Copyright (C) 2008 Red Hat, Inc. All rights reserved. + * + * This copyrighted material is made available to anyone wishing to use, + * modify, copy, or redistribute it subject to the terms and conditions + * of the GNU General Public License v.2. + * + * See asm-generic/syscall.h for descriptions of what we must do here. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H 1 + +#include <linux/sched.h> +#include <linux/err.h> + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + /* + * We always sign-extend a -1 value being set here, + * so this is always either -1L or a syscall number. + */ + return regs->orig_ax; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + regs->ax = regs->orig_ax; +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + unsigned long error = regs->ax; +#ifdef CONFIG_IA32_EMULATION + /* + * TS_COMPAT is set for 32-bit syscall entries and then + * remains set until we return to user mode. + */ + if (task_thread_info(task)->status & TS_COMPAT) + /* + * Sign-extend the value so (int)-EFOO becomes (long)-EFOO + * and will match correctly in comparisons. + */ + error = (long) (int) error; +#endif + return IS_ERR_VALUE(error) ? error : 0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->ax; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + regs->ax = (long) error ?: val; +} + +#ifdef CONFIG_X86_32 + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ + BUG_ON(i + n > 6); + memcpy(args, ®s->bx + i, n * sizeof(args[0])); +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ + BUG_ON(i + n > 6); + memcpy(®s->bx + i, args, n * sizeof(args[0])); +} + +#else /* CONFIG_X86_64 */ + +static inline void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + unsigned long *args) +{ +# ifdef CONFIG_IA32_EMULATION + if (task_thread_info(task)->status & TS_COMPAT) + switch (i + n) { + case 6: + if (!n--) break; + *args++ = regs->bp; + case 5: + if (!n--) break; + *args++ = regs->di; + case 4: + if (!n--) break; + *args++ = regs->si; + case 3: + if (!n--) break; + *args++ = regs->dx; + case 2: + if (!n--) break; + *args++ = regs->cx; + case 1: + if (!n--) break; + *args++ = regs->bx; + case 0: + if (!n--) break; + default: + BUG(); + break; + } + else +# endif + switch (i + n) { + case 6: + if (!n--) break; + *args++ = regs->r9; + case 5: + if (!n--) break; + *args++ = regs->r8; + case 4: + if (!n--) break; + *args++ = regs->r10; + case 3: + if (!n--) break; + *args++ = regs->dx; + case 2: + if (!n--) break; + *args++ = regs->si; + case 1: + if (!n--) break; + *args++ = regs->di; + case 0: + if (!n--) break; + default: + BUG(); + break; + } +} + +static inline void syscall_set_arguments(struct task_struct *task, + struct pt_regs *regs, + unsigned int i, unsigned int n, + const unsigned long *args) +{ +# ifdef CONFIG_IA32_EMULATION + if (task_thread_info(task)->status & TS_COMPAT) + switch (i + n) { + case 6: + if (!n--) break; + regs->bp = *args++; + case 5: + if (!n--) break; + regs->di = *args++; + case 4: + if (!n--) break; + regs->si = *args++; + case 3: + if (!n--) break; + regs->dx = *args++; + case 2: + if (!n--) break; + regs->cx = *args++; + case 1: + if (!n--) break; + regs->bx = *args++; + case 0: + if (!n--) break; + default: + BUG(); + } + else +# endif + switch (i + n) { + case 6: + if (!n--) break; + regs->r9 = *args++; + case 5: + if (!n--) break; + regs->r8 = *args++; + case 4: + if (!n--) break; + regs->r10 = *args++; + case 3: + if (!n--) break; + regs->dx = *args++; + case 2: + if (!n--) break; + regs->si = *args++; + case 1: + if (!n--) break; + regs->di = *args++; + case 0: + if (!n--) break; + default: + BUG(); + } +} + +#endif /* CONFIG_X86_32 */ + +#endif /* _ASM_SYSCALL_H */ diff --git a/include/asm-x86/syscalls.h b/include/asm-x86/syscalls.h new file mode 100644 index 0000000..87803da --- /dev/null +++ b/include/asm-x86/syscalls.h @@ -0,0 +1,93 @@ +/* + * syscalls.h - Linux syscall interfaces (arch-specific) + * + * Copyright (c) 2008 Jaswinder Singh + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#ifndef _ASM_X86_SYSCALLS_H +#define _ASM_X86_SYSCALLS_H + +#include <linux/compiler.h> +#include <linux/linkage.h> +#include <linux/types.h> +#include <linux/signal.h> + +/* Common in X86_32 and X86_64 */ +/* kernel/ioport.c */ +asmlinkage long sys_ioperm(unsigned long, unsigned long, int); + +/* X86_32 only */ +#ifdef CONFIG_X86_32 +/* kernel/process_32.c */ +asmlinkage int sys_fork(struct pt_regs); +asmlinkage int sys_clone(struct pt_regs); +asmlinkage int sys_vfork(struct pt_regs); +asmlinkage int sys_execve(struct pt_regs); + +/* kernel/signal_32.c */ +asmlinkage int sys_sigsuspend(int, int, old_sigset_t); +asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, + struct old_sigaction __user *); +asmlinkage int sys_sigaltstack(unsigned long); +asmlinkage unsigned long sys_sigreturn(unsigned long); +asmlinkage int sys_rt_sigreturn(unsigned long); + +/* kernel/ioport.c */ +asmlinkage long sys_iopl(unsigned long); + +/* kernel/ldt.c */ +asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); + +/* kernel/sys_i386_32.c */ +asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); +struct mmap_arg_struct; +asmlinkage int old_mmap(struct mmap_arg_struct __user *); +struct sel_arg_struct; +asmlinkage int old_select(struct sel_arg_struct __user *); +asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); +struct old_utsname; +asmlinkage int sys_uname(struct old_utsname __user *); +struct oldold_utsname; +asmlinkage int sys_olduname(struct oldold_utsname __user *); + +/* kernel/tls.c */ +asmlinkage int sys_set_thread_area(struct user_desc __user *); +asmlinkage int sys_get_thread_area(struct user_desc __user *); + +/* kernel/vm86_32.c */ +asmlinkage int sys_vm86old(struct pt_regs); +asmlinkage int sys_vm86(struct pt_regs); + +#else /* CONFIG_X86_32 */ + +/* X86_64 only */ +/* kernel/process_64.c */ +asmlinkage long sys_fork(struct pt_regs *); +asmlinkage long sys_clone(unsigned long, unsigned long, + void __user *, void __user *, + struct pt_regs *); +asmlinkage long sys_vfork(struct pt_regs *); +asmlinkage long sys_execve(char __user *, char __user * __user *, + char __user * __user *, + struct pt_regs *); + +/* kernel/ioport.c */ +asmlinkage long sys_iopl(unsigned int, struct pt_regs *); + +/* kernel/signal_64.c */ +asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, + struct pt_regs *); +asmlinkage long sys_rt_sigreturn(struct pt_regs *); + +/* kernel/sys_x86_64.c */ +asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, + unsigned long, unsigned long, unsigned long); +struct new_utsname; +asmlinkage long sys_uname(struct new_utsname __user *); + +#endif /* CONFIG_X86_32 */ +#endif /* _ASM_X86_SYSCALLS_H */ diff --git a/include/asm-x86/system.h b/include/asm-x86/system.h index 983ce37..34505dd 100644 --- a/include/asm-x86/system.h +++ b/include/asm-x86/system.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_SYSTEM_H_ -#define _ASM_X86_SYSTEM_H_ +#ifndef ASM_X86__SYSTEM_H +#define ASM_X86__SYSTEM_H #include <asm/asm.h> #include <asm/segment.h> @@ -419,4 +419,4 @@ static inline void rdtsc_barrier(void) alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); } -#endif +#endif /* ASM_X86__SYSTEM_H */ diff --git a/include/asm-x86/system_64.h b/include/asm-x86/system_64.h index 97fa251..5aedb8b 100644 --- a/include/asm-x86/system_64.h +++ b/include/asm-x86/system_64.h @@ -1,5 +1,5 @@ -#ifndef __ASM_SYSTEM_H -#define __ASM_SYSTEM_H +#ifndef ASM_X86__SYSTEM_64_H +#define ASM_X86__SYSTEM_64_H #include <asm/segment.h> #include <asm/cmpxchg.h> @@ -19,4 +19,4 @@ static inline void write_cr8(unsigned long val) #include <linux/irqflags.h> -#endif +#endif /* ASM_X86__SYSTEM_64_H */ diff --git a/include/asm-x86/tce.h b/include/asm-x86/tce.h index b1a4ea0..e7932d7 100644 --- a/include/asm-x86/tce.h +++ b/include/asm-x86/tce.h @@ -21,8 +21,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#ifndef _ASM_X86_64_TCE_H -#define _ASM_X86_64_TCE_H +#ifndef ASM_X86__TCE_H +#define ASM_X86__TCE_H extern unsigned int specified_table_size; struct iommu_table; @@ -45,4 +45,4 @@ extern void * __init alloc_tce_table(void); extern void __init free_tce_table(void *tbl); extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar); -#endif /* _ASM_X86_64_TCE_H */ +#endif /* ASM_X86__TCE_H */ diff --git a/include/asm-x86/termbits.h b/include/asm-x86/termbits.h index af1b70e..3d00dc5 100644 --- a/include/asm-x86/termbits.h +++ b/include/asm-x86/termbits.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TERMBITS_H -#define _ASM_X86_TERMBITS_H +#ifndef ASM_X86__TERMBITS_H +#define ASM_X86__TERMBITS_H #include <linux/posix_types.h> @@ -195,4 +195,4 @@ struct ktermios { #define TCSADRAIN 1 #define TCSAFLUSH 2 -#endif /* _ASM_X86_TERMBITS_H */ +#endif /* ASM_X86__TERMBITS_H */ diff --git a/include/asm-x86/termios.h b/include/asm-x86/termios.h index f729563..e235db2 100644 --- a/include/asm-x86/termios.h +++ b/include/asm-x86/termios.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TERMIOS_H -#define _ASM_X86_TERMIOS_H +#ifndef ASM_X86__TERMIOS_H +#define ASM_X86__TERMIOS_H #include <asm/termbits.h> #include <asm/ioctls.h> @@ -110,4 +110,4 @@ static inline int kernel_termios_to_user_termios_1(struct termios __user *u, #endif /* __KERNEL__ */ -#endif /* _ASM_X86_TERMIOS_H */ +#endif /* ASM_X86__TERMIOS_H */ diff --git a/include/asm-x86/therm_throt.h b/include/asm-x86/therm_throt.h index 399bf60..1c7f57b 100644 --- a/include/asm-x86/therm_throt.h +++ b/include/asm-x86/therm_throt.h @@ -1,9 +1,9 @@ -#ifndef __ASM_I386_THERM_THROT_H__ -#define __ASM_I386_THERM_THROT_H__ 1 +#ifndef ASM_X86__THERM_THROT_H +#define ASM_X86__THERM_THROT_H #include <asm/atomic.h> extern atomic_t therm_throt_en; int therm_throt_process(int curr); -#endif /* __ASM_I386_THERM_THROT_H__ */ +#endif /* ASM_X86__THERM_THROT_H */ diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index da0a675..4db0066 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -4,8 +4,8 @@ * - Incorporating suggestions made by Linus Torvalds and Dave Miller */ -#ifndef _ASM_X86_THREAD_INFO_H -#define _ASM_X86_THREAD_INFO_H +#ifndef ASM_X86__THREAD_INFO_H +#define ASM_X86__THREAD_INFO_H #include <linux/compiler.h> #include <asm/page.h> @@ -71,6 +71,7 @@ struct thread_info { * Warning: layout of LSW is hardcoded in entry.S */ #define TIF_SYSCALL_TRACE 0 /* syscall trace active */ +#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */ #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ @@ -93,6 +94,7 @@ struct thread_info { #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) @@ -133,7 +135,7 @@ struct thread_info { /* Only used for 64 bit */ #define _TIF_DO_NOTIFY_MASK \ - (_TIF_SIGPENDING|_TIF_MCE_NOTIFY) + (_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ @@ -258,4 +260,4 @@ extern void free_thread_info(struct thread_info *ti); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #define arch_task_cache_init arch_task_cache_init #endif -#endif /* _ASM_X86_THREAD_INFO_H */ +#endif /* ASM_X86__THREAD_INFO_H */ diff --git a/include/asm-x86/time.h b/include/asm-x86/time.h index a17fa47..3e724ee 100644 --- a/include/asm-x86/time.h +++ b/include/asm-x86/time.h @@ -1,5 +1,5 @@ -#ifndef _ASMX86_TIME_H -#define _ASMX86_TIME_H +#ifndef ASM_X86__TIME_H +#define ASM_X86__TIME_H extern void hpet_time_init(void); @@ -46,6 +46,8 @@ static inline int native_set_wallclock(unsigned long nowtime) #endif +extern void time_init(void); + #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else /* !CONFIG_PARAVIRT */ @@ -58,4 +60,4 @@ static inline int native_set_wallclock(unsigned long nowtime) extern unsigned long __init calibrate_cpu(void); -#endif +#endif /* ASM_X86__TIME_H */ diff --git a/include/asm-x86/timer.h b/include/asm-x86/timer.h index fb2a4dd..d0babce 100644 --- a/include/asm-x86/timer.h +++ b/include/asm-x86/timer.h @@ -1,5 +1,5 @@ -#ifndef _ASMi386_TIMER_H -#define _ASMi386_TIMER_H +#ifndef ASM_X86__TIMER_H +#define ASM_X86__TIMER_H #include <linux/init.h> #include <linux/pm.h> #include <linux/percpu.h> @@ -9,9 +9,12 @@ unsigned long long native_sched_clock(void); unsigned long native_calibrate_tsc(void); +#ifdef CONFIG_X86_32 extern int timer_ack; -extern int no_timer_check; extern int recalibrate_cpu_khz(void); +#endif /* CONFIG_X86_32 */ + +extern int no_timer_check; #ifndef CONFIG_PARAVIRT #define calibrate_tsc() native_calibrate_tsc() @@ -60,4 +63,4 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc) return ns; } -#endif +#endif /* ASM_X86__TIMER_H */ diff --git a/include/asm-x86/timex.h b/include/asm-x86/timex.h index 43e5a78..d1ce241 100644 --- a/include/asm-x86/timex.h +++ b/include/asm-x86/timex.h @@ -1,6 +1,6 @@ /* x86 architecture timex specifications */ -#ifndef _ASM_X86_TIMEX_H -#define _ASM_X86_TIMEX_H +#ifndef ASM_X86__TIMEX_H +#define ASM_X86__TIMEX_H #include <asm/processor.h> #include <asm/tsc.h> @@ -16,4 +16,4 @@ #define ARCH_HAS_READ_CURRENT_TIMER -#endif +#endif /* ASM_X86__TIMEX_H */ diff --git a/include/asm-x86/tlb.h b/include/asm-x86/tlb.h index e4e9e2d..db36e9e 100644 --- a/include/asm-x86/tlb.h +++ b/include/asm-x86/tlb.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TLB_H -#define _ASM_X86_TLB_H +#ifndef ASM_X86__TLB_H +#define ASM_X86__TLB_H #define tlb_start_vma(tlb, vma) do { } while (0) #define tlb_end_vma(tlb, vma) do { } while (0) @@ -8,4 +8,4 @@ #include <asm-generic/tlb.h> -#endif +#endif /* ASM_X86__TLB_H */ diff --git a/include/asm-x86/tlbflush.h b/include/asm-x86/tlbflush.h index 35c76ce..ef68b76 100644 --- a/include/asm-x86/tlbflush.h +++ b/include/asm-x86/tlbflush.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TLBFLUSH_H -#define _ASM_X86_TLBFLUSH_H +#ifndef ASM_X86__TLBFLUSH_H +#define ASM_X86__TLBFLUSH_H #include <linux/mm.h> #include <linux/sched.h> @@ -165,4 +165,4 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } -#endif /* _ASM_X86_TLBFLUSH_H */ +#endif /* ASM_X86__TLBFLUSH_H */ diff --git a/include/asm-x86/topology.h b/include/asm-x86/topology.h index 90ac771..7eca9bc 100644 --- a/include/asm-x86/topology.h +++ b/include/asm-x86/topology.h @@ -22,8 +22,8 @@ * * Send feedback to <colpatch@us.ibm.com> */ -#ifndef _ASM_X86_TOPOLOGY_H -#define _ASM_X86_TOPOLOGY_H +#ifndef ASM_X86__TOPOLOGY_H +#define ASM_X86__TOPOLOGY_H #ifdef CONFIG_X86_32 # ifdef CONFIG_X86_HT @@ -255,4 +255,4 @@ static inline void set_mp_bus_to_node(int busnum, int node) } #endif -#endif /* _ASM_X86_TOPOLOGY_H */ +#endif /* ASM_X86__TOPOLOGY_H */ diff --git a/include/asm-x86/trampoline.h b/include/asm-x86/trampoline.h index b156b08..0406bbd 100644 --- a/include/asm-x86/trampoline.h +++ b/include/asm-x86/trampoline.h @@ -1,5 +1,5 @@ -#ifndef __TRAMPOLINE_HEADER -#define __TRAMPOLINE_HEADER +#ifndef ASM_X86__TRAMPOLINE_H +#define ASM_X86__TRAMPOLINE_H #ifndef __ASSEMBLY__ @@ -18,4 +18,4 @@ extern unsigned long setup_trampoline(void); #endif /* __ASSEMBLY__ */ -#endif /* __TRAMPOLINE_HEADER */ +#endif /* ASM_X86__TRAMPOLINE_H */ diff --git a/include/asm-x86/traps.h b/include/asm-x86/traps.h index a4b65a7..2ccebc6 100644 --- a/include/asm-x86/traps.h +++ b/include/asm-x86/traps.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TRAPS_H -#define _ASM_X86_TRAPS_H +#ifndef ASM_X86__TRAPS_H +#define ASM_X86__TRAPS_H /* Common in X86_32 and X86_64 */ asmlinkage void divide_error(void); @@ -51,6 +51,8 @@ void do_spurious_interrupt_bug(struct pt_regs *, long); unsigned long patch_espfix_desc(unsigned long, unsigned long); asmlinkage void math_emulate(long); +void do_page_fault(struct pt_regs *regs, unsigned long error_code); + #else /* CONFIG_X86_32 */ asmlinkage void double_fault(void); @@ -62,5 +64,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *); asmlinkage void do_simd_coprocessor_error(struct pt_regs *); asmlinkage void do_spurious_interrupt_bug(struct pt_regs *); +asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code); + #endif /* CONFIG_X86_32 */ -#endif /* _ASM_X86_TRAPS_H */ +#endif /* ASM_X86__TRAPS_H */ diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index cb6f6ee..ad0f5c4 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -1,8 +1,8 @@ /* * x86 TSC related functions */ -#ifndef _ASM_X86_TSC_H -#define _ASM_X86_TSC_H +#ifndef ASM_X86__TSC_H +#define ASM_X86__TSC_H #include <asm/processor.h> @@ -59,4 +59,4 @@ extern void check_tsc_sync_target(void); extern int notsc_setup(char *); -#endif +#endif /* ASM_X86__TSC_H */ diff --git a/include/asm-x86/types.h b/include/asm-x86/types.h index 1ac80cd..e78b52e 100644 --- a/include/asm-x86/types.h +++ b/include/asm-x86/types.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_TYPES_H -#define _ASM_X86_TYPES_H +#ifndef ASM_X86__TYPES_H +#define ASM_X86__TYPES_H #include <asm-generic/int-ll64.h> @@ -33,4 +33,4 @@ typedef u32 dma_addr_t; #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__TYPES_H */ diff --git a/include/asm-x86/uaccess.h b/include/asm-x86/uaccess.h index 5f702d1..48ebc0a 100644 --- a/include/asm-x86/uaccess.h +++ b/include/asm-x86/uaccess.h @@ -1,5 +1,5 @@ -#ifndef _ASM_UACCES_H_ -#define _ASM_UACCES_H_ +#ifndef ASM_X86__UACCESS_H +#define ASM_X86__UACCESS_H /* * User space memory access functions */ @@ -450,5 +450,5 @@ extern struct movsl_mask { # include "uaccess_64.h" #endif -#endif +#endif /* ASM_X86__UACCESS_H */ diff --git a/include/asm-x86/uaccess_32.h b/include/asm-x86/uaccess_32.h index 6fdef39..6b5b57d 100644 --- a/include/asm-x86/uaccess_32.h +++ b/include/asm-x86/uaccess_32.h @@ -1,5 +1,5 @@ -#ifndef __i386_UACCESS_H -#define __i386_UACCESS_H +#ifndef ASM_X86__UACCESS_32_H +#define ASM_X86__UACCESS_32_H /* * User space memory access functions @@ -215,4 +215,4 @@ long strnlen_user(const char __user *str, long n); unsigned long __must_check clear_user(void __user *mem, unsigned long len); unsigned long __must_check __clear_user(void __user *mem, unsigned long len); -#endif /* __i386_UACCESS_H */ +#endif /* ASM_X86__UACCESS_32_H */ diff --git a/include/asm-x86/uaccess_64.h b/include/asm-x86/uaccess_64.h index 45806d6..c96c1f5 100644 --- a/include/asm-x86/uaccess_64.h +++ b/include/asm-x86/uaccess_64.h @@ -1,5 +1,5 @@ -#ifndef __X86_64_UACCESS_H -#define __X86_64_UACCESS_H +#ifndef ASM_X86__UACCESS_64_H +#define ASM_X86__UACCESS_64_H /* * User space memory access functions @@ -199,4 +199,4 @@ static inline int __copy_from_user_inatomic_nocache(void *dst, unsigned long copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest); -#endif /* __X86_64_UACCESS_H */ +#endif /* ASM_X86__UACCESS_64_H */ diff --git a/include/asm-x86/ucontext.h b/include/asm-x86/ucontext.h index 50a79f7..9948dd3 100644 --- a/include/asm-x86/ucontext.h +++ b/include/asm-x86/ucontext.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UCONTEXT_H -#define _ASM_X86_UCONTEXT_H +#ifndef ASM_X86__UCONTEXT_H +#define ASM_X86__UCONTEXT_H struct ucontext { unsigned long uc_flags; @@ -9,4 +9,4 @@ struct ucontext { sigset_t uc_sigmask; /* mask last for extensibility */ }; -#endif /* _ASM_X86_UCONTEXT_H */ +#endif /* ASM_X86__UCONTEXT_H */ diff --git a/include/asm-x86/unaligned.h b/include/asm-x86/unaligned.h index a7bd416..59dcdec 100644 --- a/include/asm-x86/unaligned.h +++ b/include/asm-x86/unaligned.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UNALIGNED_H -#define _ASM_X86_UNALIGNED_H +#ifndef ASM_X86__UNALIGNED_H +#define ASM_X86__UNALIGNED_H /* * The x86 can do unaligned accesses itself. @@ -11,4 +11,4 @@ #define get_unaligned __get_unaligned_le #define put_unaligned __put_unaligned_le -#endif /* _ASM_X86_UNALIGNED_H */ +#endif /* ASM_X86__UNALIGNED_H */ diff --git a/include/asm-x86/unistd_32.h b/include/asm-x86/unistd_32.h index d739467..017f4a8 100644 --- a/include/asm-x86/unistd_32.h +++ b/include/asm-x86/unistd_32.h @@ -1,5 +1,5 @@ -#ifndef _ASM_I386_UNISTD_H_ -#define _ASM_I386_UNISTD_H_ +#ifndef ASM_X86__UNISTD_32_H +#define ASM_X86__UNISTD_32_H /* * This file contains the system call numbers. @@ -376,4 +376,4 @@ #endif #endif /* __KERNEL__ */ -#endif /* _ASM_I386_UNISTD_H_ */ +#endif /* ASM_X86__UNISTD_32_H */ diff --git a/include/asm-x86/unistd_64.h b/include/asm-x86/unistd_64.h index 3a341d7..ace83f1 100644 --- a/include/asm-x86/unistd_64.h +++ b/include/asm-x86/unistd_64.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_UNISTD_H_ -#define _ASM_X86_64_UNISTD_H_ +#ifndef ASM_X86__UNISTD_64_H +#define ASM_X86__UNISTD_64_H #ifndef __SYSCALL #define __SYSCALL(a, b) @@ -690,4 +690,4 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1) #define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall") #endif /* __KERNEL__ */ -#endif /* _ASM_X86_64_UNISTD_H_ */ +#endif /* ASM_X86__UNISTD_64_H */ diff --git a/include/asm-x86/unwind.h b/include/asm-x86/unwind.h index 8b064bd..a215156 100644 --- a/include/asm-x86/unwind.h +++ b/include/asm-x86/unwind.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_UNWIND_H -#define _ASM_X86_UNWIND_H +#ifndef ASM_X86__UNWIND_H +#define ASM_X86__UNWIND_H #define UNW_PC(frame) ((void)(frame), 0UL) #define UNW_SP(frame) ((void)(frame), 0UL) @@ -10,4 +10,4 @@ static inline int arch_unw_user_mode(const void *info) return 0; } -#endif /* _ASM_X86_UNWIND_H */ +#endif /* ASM_X86__UNWIND_H */ diff --git a/include/asm-x86/user32.h b/include/asm-x86/user32.h index a3d9100..aa66c18 100644 --- a/include/asm-x86/user32.h +++ b/include/asm-x86/user32.h @@ -1,5 +1,5 @@ -#ifndef USER32_H -#define USER32_H 1 +#ifndef ASM_X86__USER32_H +#define ASM_X86__USER32_H /* IA32 compatible user structures for ptrace. * These should be used for 32bit coredumps too. */ @@ -67,4 +67,4 @@ struct user32 { }; -#endif +#endif /* ASM_X86__USER32_H */ diff --git a/include/asm-x86/user_32.h b/include/asm-x86/user_32.h index d6e51ed..e0fe2f5 100644 --- a/include/asm-x86/user_32.h +++ b/include/asm-x86/user_32.h @@ -1,5 +1,5 @@ -#ifndef _I386_USER_H -#define _I386_USER_H +#ifndef ASM_X86__USER_32_H +#define ASM_X86__USER_32_H #include <asm/page.h> /* Core file format: The core file is written in such a way that gdb @@ -128,4 +128,4 @@ struct user{ #define HOST_TEXT_START_ADDR (u.start_code) #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) -#endif /* _I386_USER_H */ +#endif /* ASM_X86__USER_32_H */ diff --git a/include/asm-x86/user_64.h b/include/asm-x86/user_64.h index 6037b63..38b5799 100644 --- a/include/asm-x86/user_64.h +++ b/include/asm-x86/user_64.h @@ -1,5 +1,5 @@ -#ifndef _X86_64_USER_H -#define _X86_64_USER_H +#ifndef ASM_X86__USER_64_H +#define ASM_X86__USER_64_H #include <asm/types.h> #include <asm/page.h> @@ -134,4 +134,4 @@ struct user { #define HOST_TEXT_START_ADDR (u.start_code) #define HOST_STACK_END_ADDR (u.start_stack + u.u_ssize * NBPG) -#endif /* _X86_64_USER_H */ +#endif /* ASM_X86__USER_64_H */ diff --git a/include/asm-x86/uv/bios.h b/include/asm-x86/uv/bios.h index aa73362..7cd6d7e 100644 --- a/include/asm-x86/uv/bios.h +++ b/include/asm-x86/uv/bios.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_BIOS_H -#define _ASM_X86_BIOS_H +#ifndef ASM_X86__UV__BIOS_H +#define ASM_X86__UV__BIOS_H /* * BIOS layer definitions. @@ -65,4 +65,4 @@ x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, unsigned long *drift_info); extern const char *x86_bios_strerror(long status); -#endif /* _ASM_X86_BIOS_H */ +#endif /* ASM_X86__UV__BIOS_H */ diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h index 610b6b3..77153fb 100644 --- a/include/asm-x86/uv/uv_bau.h +++ b/include/asm-x86/uv/uv_bau.h @@ -8,8 +8,8 @@ * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. */ -#ifndef __ASM_X86_UV_BAU__ -#define __ASM_X86_UV_BAU__ +#ifndef ASM_X86__UV__UV_BAU_H +#define ASM_X86__UV__UV_BAU_H #include <linux/bitmap.h> #define BITSPERBYTE 8 @@ -329,4 +329,4 @@ extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); -#endif /* __ASM_X86_UV_BAU__ */ +#endif /* ASM_X86__UV__UV_BAU_H */ diff --git a/include/asm-x86/uv/uv_hub.h b/include/asm-x86/uv/uv_hub.h index a4ef26e..bdb5b01 100644 --- a/include/asm-x86/uv/uv_hub.h +++ b/include/asm-x86/uv/uv_hub.h @@ -8,8 +8,8 @@ * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ -#ifndef __ASM_X86_UV_HUB_H__ -#define __ASM_X86_UV_HUB_H__ +#ifndef ASM_X86__UV__UV_HUB_H +#define ASM_X86__UV__UV_HUB_H #include <linux/numa.h> #include <linux/percpu.h> @@ -350,5 +350,5 @@ static inline int uv_num_possible_blades(void) return uv_possible_blades; } -#endif /* __ASM_X86_UV_HUB__ */ +#endif /* ASM_X86__UV__UV_HUB_H */ diff --git a/include/asm-x86/uv/uv_mmrs.h b/include/asm-x86/uv/uv_mmrs.h index 151fd7f..8b03d89 100644 --- a/include/asm-x86/uv/uv_mmrs.h +++ b/include/asm-x86/uv/uv_mmrs.h @@ -8,8 +8,8 @@ * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. */ -#ifndef __ASM_X86_UV_MMRS__ -#define __ASM_X86_UV_MMRS__ +#ifndef ASM_X86__UV__UV_MMRS_H +#define ASM_X86__UV__UV_MMRS_H #define UV_MMR_ENABLE (1UL << 63) @@ -1292,4 +1292,4 @@ union uvh_si_alias2_overlay_config_u { }; -#endif /* __ASM_X86_UV_MMRS__ */ +#endif /* ASM_X86__UV__UV_MMRS_H */ diff --git a/include/asm-x86/vdso.h b/include/asm-x86/vdso.h index 8e18fb8..4ab3209 100644 --- a/include/asm-x86/vdso.h +++ b/include/asm-x86/vdso.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_VDSO_H -#define _ASM_X86_VDSO_H 1 +#ifndef ASM_X86__VDSO_H +#define ASM_X86__VDSO_H #ifdef CONFIG_X86_64 extern const char VDSO64_PRELINK[]; @@ -44,4 +44,4 @@ extern const char vdso32_int80_start, vdso32_int80_end; extern const char vdso32_syscall_start, vdso32_syscall_end; extern const char vdso32_sysenter_start, vdso32_sysenter_end; -#endif /* asm-x86/vdso.h */ +#endif /* ASM_X86__VDSO_H */ diff --git a/include/asm-x86/vga.h b/include/asm-x86/vga.h index 0ccf804..b9e493d 100644 --- a/include/asm-x86/vga.h +++ b/include/asm-x86/vga.h @@ -4,8 +4,8 @@ * (c) 1998 Martin Mares <mj@ucw.cz> */ -#ifndef _LINUX_ASM_VGA_H_ -#define _LINUX_ASM_VGA_H_ +#ifndef ASM_X86__VGA_H +#define ASM_X86__VGA_H /* * On the PC, we can just recalculate addresses and then @@ -17,4 +17,4 @@ #define vga_readb(x) (*(x)) #define vga_writeb(x, y) (*(y) = (x)) -#endif +#endif /* ASM_X86__VGA_H */ diff --git a/include/asm-x86/vgtod.h b/include/asm-x86/vgtod.h index 3301f09..38fd133 100644 --- a/include/asm-x86/vgtod.h +++ b/include/asm-x86/vgtod.h @@ -1,5 +1,5 @@ -#ifndef _ASM_VGTOD_H -#define _ASM_VGTOD_H 1 +#ifndef ASM_X86__VGTOD_H +#define ASM_X86__VGTOD_H #include <asm/vsyscall.h> #include <linux/clocksource.h> @@ -26,4 +26,4 @@ extern struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data; extern struct vsyscall_gtod_data vsyscall_gtod_data; -#endif +#endif /* ASM_X86__VGTOD_H */ diff --git a/include/asm-x86/visws/cobalt.h b/include/asm-x86/visws/cobalt.h index 9952588..9627a8f 100644 --- a/include/asm-x86/visws/cobalt.h +++ b/include/asm-x86/visws/cobalt.h @@ -1,5 +1,5 @@ -#ifndef __I386_SGI_COBALT_H -#define __I386_SGI_COBALT_H +#ifndef ASM_X86__VISWS__COBALT_H +#define ASM_X86__VISWS__COBALT_H #include <asm/fixmap.h> @@ -122,4 +122,4 @@ extern char visws_board_type; extern char visws_board_rev; -#endif /* __I386_SGI_COBALT_H */ +#endif /* ASM_X86__VISWS__COBALT_H */ diff --git a/include/asm-x86/visws/lithium.h b/include/asm-x86/visws/lithium.h index dfcd4f0..b36d3b3 100644 --- a/include/asm-x86/visws/lithium.h +++ b/include/asm-x86/visws/lithium.h @@ -1,5 +1,5 @@ -#ifndef __I386_SGI_LITHIUM_H -#define __I386_SGI_LITHIUM_H +#ifndef ASM_X86__VISWS__LITHIUM_H +#define ASM_X86__VISWS__LITHIUM_H #include <asm/fixmap.h> @@ -49,5 +49,5 @@ static inline unsigned short li_pcib_read16(unsigned long reg) return *((volatile unsigned short *)(LI_PCIB_VADDR+reg)); } -#endif +#endif /* ASM_X86__VISWS__LITHIUM_H */ diff --git a/include/asm-x86/visws/piix4.h b/include/asm-x86/visws/piix4.h index 83ea4f4..61c9380 100644 --- a/include/asm-x86/visws/piix4.h +++ b/include/asm-x86/visws/piix4.h @@ -1,5 +1,5 @@ -#ifndef __I386_SGI_PIIX_H -#define __I386_SGI_PIIX_H +#ifndef ASM_X86__VISWS__PIIX4_H +#define ASM_X86__VISWS__PIIX4_H /* * PIIX4 as used on SGI Visual Workstations @@ -104,4 +104,4 @@ */ #define PIIX_GPI_STPCLK 0x4 // STPCLK signal routed back in -#endif +#endif /* ASM_X86__VISWS__PIIX4_H */ diff --git a/include/asm-x86/vm86.h b/include/asm-x86/vm86.h index 5ce3513..998bd18 100644 --- a/include/asm-x86/vm86.h +++ b/include/asm-x86/vm86.h @@ -1,5 +1,5 @@ -#ifndef _LINUX_VM86_H -#define _LINUX_VM86_H +#ifndef ASM_X86__VM86_H +#define ASM_X86__VM86_H /* * I'm guessing at the VIF/VIP flag usage, but hope that this is how @@ -205,4 +205,4 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) #endif /* __KERNEL__ */ -#endif +#endif /* ASM_X86__VM86_H */ diff --git a/include/asm-x86/vmi_time.h b/include/asm-x86/vmi_time.h index c3118c3..b2d39e6 100644 --- a/include/asm-x86/vmi_time.h +++ b/include/asm-x86/vmi_time.h @@ -22,8 +22,8 @@ * */ -#ifndef __VMI_TIME_H -#define __VMI_TIME_H +#ifndef ASM_X86__VMI_TIME_H +#define ASM_X86__VMI_TIME_H /* * Raw VMI call indices for timer functions @@ -95,4 +95,4 @@ extern void __devinit vmi_time_ap_init(void); #define CONFIG_VMI_ALARM_HZ 100 -#endif +#endif /* ASM_X86__VMI_TIME_H */ diff --git a/include/asm-x86/vsyscall.h b/include/asm-x86/vsyscall.h index 6b66ff9..dcd4682 100644 --- a/include/asm-x86/vsyscall.h +++ b/include/asm-x86/vsyscall.h @@ -1,5 +1,5 @@ -#ifndef _ASM_X86_64_VSYSCALL_H_ -#define _ASM_X86_64_VSYSCALL_H_ +#ifndef ASM_X86__VSYSCALL_H +#define ASM_X86__VSYSCALL_H enum vsyscall_num { __NR_vgettimeofday, @@ -41,4 +41,4 @@ extern void map_vsyscall(void); #endif /* __KERNEL__ */ -#endif /* _ASM_X86_64_VSYSCALL_H_ */ +#endif /* ASM_X86__VSYSCALL_H */ diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h index 8ded747..8151f5b 100644 --- a/include/asm-x86/xen/events.h +++ b/include/asm-x86/xen/events.h @@ -1,5 +1,5 @@ -#ifndef __XEN_EVENTS_H -#define __XEN_EVENTS_H +#ifndef ASM_X86__XEN__EVENTS_H +#define ASM_X86__XEN__EVENTS_H enum ipi_vector { XEN_RESCHEDULE_VECTOR, @@ -21,4 +21,4 @@ static inline void xen_do_IRQ(int irq, struct pt_regs *regs) do_IRQ(regs); } -#endif /* __XEN_EVENTS_H */ +#endif /* ASM_X86__XEN__EVENTS_H */ diff --git a/include/asm-x86/xen/grant_table.h b/include/asm-x86/xen/grant_table.h index 2444d45..c4baab4 100644 --- a/include/asm-x86/xen/grant_table.h +++ b/include/asm-x86/xen/grant_table.h @@ -1,7 +1,7 @@ -#ifndef __XEN_GRANT_TABLE_H -#define __XEN_GRANT_TABLE_H +#ifndef ASM_X86__XEN__GRANT_TABLE_H +#define ASM_X86__XEN__GRANT_TABLE_H #define xen_alloc_vm_area(size) alloc_vm_area(size) #define xen_free_vm_area(area) free_vm_area(area) -#endif /* __XEN_GRANT_TABLE_H */ +#endif /* ASM_X86__XEN__GRANT_TABLE_H */ diff --git a/include/asm-x86/xen/hypercall.h b/include/asm-x86/xen/hypercall.h index 91cb7fd..44f4259 100644 --- a/include/asm-x86/xen/hypercall.h +++ b/include/asm-x86/xen/hypercall.h @@ -30,8 +30,8 @@ * IN THE SOFTWARE. */ -#ifndef __HYPERCALL_H__ -#define __HYPERCALL_H__ +#ifndef ASM_X86__XEN__HYPERCALL_H +#define ASM_X86__XEN__HYPERCALL_H #include <linux/errno.h> #include <linux/string.h> @@ -524,4 +524,4 @@ MULTI_stack_switch(struct multicall_entry *mcl, mcl->args[1] = esp; } -#endif /* __HYPERCALL_H__ */ +#endif /* ASM_X86__XEN__HYPERCALL_H */ diff --git a/include/asm-x86/xen/hypervisor.h b/include/asm-x86/xen/hypervisor.h index 04ee061..0ef3a88 100644 --- a/include/asm-x86/xen/hypervisor.h +++ b/include/asm-x86/xen/hypervisor.h @@ -30,8 +30,8 @@ * IN THE SOFTWARE. */ -#ifndef __HYPERVISOR_H__ -#define __HYPERVISOR_H__ +#ifndef ASM_X86__XEN__HYPERVISOR_H +#define ASM_X86__XEN__HYPERVISOR_H #include <linux/types.h> #include <linux/kernel.h> @@ -69,4 +69,4 @@ u64 jiffies_to_st(unsigned long jiffies); #define is_running_on_xen() (xen_start_info ? 1 : 0) -#endif /* __HYPERVISOR_H__ */ +#endif /* ASM_X86__XEN__HYPERVISOR_H */ diff --git a/include/asm-x86/xen/interface.h b/include/asm-x86/xen/interface.h index 9d810f2..d077bba 100644 --- a/include/asm-x86/xen/interface.h +++ b/include/asm-x86/xen/interface.h @@ -6,8 +6,8 @@ * Copyright (c) 2004, K A Fraser */ -#ifndef __ASM_X86_XEN_INTERFACE_H -#define __ASM_X86_XEN_INTERFACE_H +#ifndef ASM_X86__XEN__INTERFACE_H +#define ASM_X86__XEN__INTERFACE_H #ifdef __XEN__ #define __DEFINE_GUEST_HANDLE(name, type) \ @@ -172,4 +172,4 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_guest_context); #define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" #endif -#endif /* __ASM_X86_XEN_INTERFACE_H */ +#endif /* ASM_X86__XEN__INTERFACE_H */ diff --git a/include/asm-x86/xen/interface_32.h b/include/asm-x86/xen/interface_32.h index d8ac41d..08167e1 100644 --- a/include/asm-x86/xen/interface_32.h +++ b/include/asm-x86/xen/interface_32.h @@ -6,8 +6,8 @@ * Copyright (c) 2004, K A Fraser */ -#ifndef __ASM_X86_XEN_INTERFACE_32_H -#define __ASM_X86_XEN_INTERFACE_32_H +#ifndef ASM_X86__XEN__INTERFACE_32_H +#define ASM_X86__XEN__INTERFACE_32_H /* @@ -94,4 +94,4 @@ typedef struct xen_callback xen_callback_t; #define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) #define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) -#endif /* __ASM_X86_XEN_INTERFACE_32_H */ +#endif /* ASM_X86__XEN__INTERFACE_32_H */ diff --git a/include/asm-x86/xen/interface_64.h b/include/asm-x86/xen/interface_64.h index 842266c..046c0f1 100644 --- a/include/asm-x86/xen/interface_64.h +++ b/include/asm-x86/xen/interface_64.h @@ -1,5 +1,5 @@ -#ifndef __ASM_X86_XEN_INTERFACE_64_H -#define __ASM_X86_XEN_INTERFACE_64_H +#ifndef ASM_X86__XEN__INTERFACE_64_H +#define ASM_X86__XEN__INTERFACE_64_H /* * 64-bit segment selectors @@ -156,4 +156,4 @@ typedef unsigned long xen_callback_t; #endif /* !__ASSEMBLY__ */ -#endif /* __ASM_X86_XEN_INTERFACE_64_H */ +#endif /* ASM_X86__XEN__INTERFACE_64_H */ diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h index 7b3835d..c50185d 100644 --- a/include/asm-x86/xen/page.h +++ b/include/asm-x86/xen/page.h @@ -1,5 +1,5 @@ -#ifndef __XEN_PAGE_H -#define __XEN_PAGE_H +#ifndef ASM_X86__XEN__PAGE_H +#define ASM_X86__XEN__PAGE_H #include <linux/pfn.h> @@ -162,4 +162,4 @@ xmaddr_t arbitrary_virt_to_machine(void *address); void make_lowmem_page_readonly(void *vaddr); void make_lowmem_page_readwrite(void *vaddr); -#endif /* __XEN_PAGE_H */ +#endif /* ASM_X86__XEN__PAGE_H */ diff --git a/include/linux/Kbuild b/include/linux/Kbuild index b68ec09..31474e8 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -180,6 +180,7 @@ unifdef-y += audit.h unifdef-y += auto_fs.h unifdef-y += auxvec.h unifdef-y += binfmts.h +unifdef-y += blktrace_api.h unifdef-y += capability.h unifdef-y += capi.h unifdef-y += cciss_ioctl.h diff --git a/include/linux/ata.h b/include/linux/ata.h index 8a12d71..be00973d 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -88,6 +88,7 @@ enum { ATA_ID_DLF = 128, ATA_ID_CSFO = 129, ATA_ID_CFA_POWER = 160, + ATA_ID_ROT_SPEED = 217, ATA_ID_PIO4 = (1 << 1), ATA_ID_SERNO_LEN = 20, @@ -667,6 +668,15 @@ static inline int ata_id_has_dword_io(const u16 *id) return 0; } +static inline int ata_id_has_unload(const u16 *id) +{ + if (ata_id_major_version(id) >= 7 && + (id[ATA_ID_CFSSE] & 0xC000) == 0x4000 && + id[ATA_ID_CFSSE] & (1 << 13)) + return 1; + return 0; +} + static inline int ata_id_current_chs_valid(const u16 *id) { /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command @@ -691,6 +701,11 @@ static inline int ata_id_is_cfa(const u16 *id) return 0; } +static inline int ata_id_is_ssd(const u16 *id) +{ + return id[ATA_ID_ROT_SPEED] == 0x01; +} + static inline int ata_drive_40wire(const u16 *dev_id) { if (ata_id_is_sata(dev_id)) diff --git a/include/linux/bio.h b/include/linux/bio.h index 0933a14..ff5b4cf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -26,21 +26,8 @@ #ifdef CONFIG_BLOCK -/* Platforms may set this to teach the BIO layer about IOMMU hardware. */ #include <asm/io.h> -#if defined(BIO_VMERGE_MAX_SIZE) && defined(BIO_VMERGE_BOUNDARY) -#define BIOVEC_VIRT_START_SIZE(x) (bvec_to_phys(x) & (BIO_VMERGE_BOUNDARY - 1)) -#define BIOVEC_VIRT_OVERSIZE(x) ((x) > BIO_VMERGE_MAX_SIZE) -#else -#define BIOVEC_VIRT_START_SIZE(x) 0 -#define BIOVEC_VIRT_OVERSIZE(x) 0 -#endif - -#ifndef BIO_VMERGE_BOUNDARY -#define BIO_VMERGE_BOUNDARY 0 -#endif - #define BIO_DEBUG #ifdef BIO_DEBUG @@ -88,25 +75,14 @@ struct bio { /* Number of segments in this BIO after * physical address coalescing is performed. */ - unsigned short bi_phys_segments; - - /* Number of segments after physical and DMA remapping - * hardware coalescing is performed. - */ - unsigned short bi_hw_segments; + unsigned int bi_phys_segments; unsigned int bi_size; /* residual I/O count */ - /* - * To keep track of the max hw size, we account for the - * sizes of the first and last virtually mergeable segments - * in this bio - */ - unsigned int bi_hw_front_size; - unsigned int bi_hw_back_size; - unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ + unsigned int bi_comp_cpu; /* completion CPU */ + struct bio_vec *bi_io_vec; /* the actual vec list */ bio_end_io_t *bi_end_io; @@ -126,11 +102,14 @@ struct bio { #define BIO_UPTODATE 0 /* ok after I/O completion */ #define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ #define BIO_EOF 2 /* out-out-bounds error */ -#define BIO_SEG_VALID 3 /* nr_hw_seg valid */ +#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ #define BIO_CLONED 4 /* doesn't own data */ #define BIO_BOUNCED 5 /* bio is a bounce bio */ #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ +#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ +#define BIO_NULL_MAPPED 9 /* contains invalid user pages */ +#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) /* @@ -144,18 +123,31 @@ struct bio { /* * bio bi_rw flags * - * bit 0 -- read (not set) or write (set) + * bit 0 -- data direction + * If not set, bio is a read from device. If set, it's a write to device. * bit 1 -- rw-ahead when set * bit 2 -- barrier + * Insert a serialization point in the IO queue, forcing previously + * submitted IO to be completed before this oen is issued. * bit 3 -- fail fast, don't want low level driver retries * bit 4 -- synchronous I/O hint: the block layer will unplug immediately + * Note that this does NOT indicate that the IO itself is sync, just + * that the block layer will not postpone issue of this IO by plugging. + * bit 5 -- metadata request + * Used for tracing to differentiate metadata and data IO. May also + * get some preferential treatment in the IO scheduler + * bit 6 -- discard sectors + * Informs the lower level device that this range of sectors is no longer + * used by the file system and may thus be freed by the device. Used + * for flash based storage. */ -#define BIO_RW 0 -#define BIO_RW_AHEAD 1 +#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ +#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ #define BIO_RW_BARRIER 2 #define BIO_RW_FAILFAST 3 #define BIO_RW_SYNC 4 #define BIO_RW_META 5 +#define BIO_RW_DISCARD 6 /* * upper 16 bits of bi_rw define the io priority of this bio @@ -185,14 +177,15 @@ struct bio { #define bio_failfast(bio) ((bio)->bi_rw & (1 << BIO_RW_FAILFAST)) #define bio_rw_ahead(bio) ((bio)->bi_rw & (1 << BIO_RW_AHEAD)) #define bio_rw_meta(bio) ((bio)->bi_rw & (1 << BIO_RW_META)) -#define bio_empty_barrier(bio) (bio_barrier(bio) && !(bio)->bi_size) +#define bio_discard(bio) ((bio)->bi_rw & (1 << BIO_RW_DISCARD)) +#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) static inline unsigned int bio_cur_sectors(struct bio *bio) { if (bio->bi_vcnt) return bio_iovec(bio)->bv_len >> 9; - - return 0; + else /* dataless requests such as discard */ + return bio->bi_size >> 9; } static inline void *bio_data(struct bio *bio) @@ -236,8 +229,6 @@ static inline void *bio_data(struct bio *bio) ((bvec_to_phys((vec1)) + (vec1)->bv_len) == bvec_to_phys((vec2))) #endif -#define BIOVEC_VIRT_MERGEABLE(vec1, vec2) \ - ((((bvec_to_phys((vec1)) + (vec1)->bv_len) | bvec_to_phys((vec2))) & (BIO_VMERGE_BOUNDARY - 1)) == 0) #define __BIO_SEG_BOUNDARY(addr1, addr2, mask) \ (((addr1) | (mask)) == (((addr2) - 1) | (mask))) #define BIOVEC_SEG_BOUNDARY(q, b1, b2) \ @@ -319,15 +310,14 @@ struct bio_pair { atomic_t cnt; int error; }; -extern struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, - int first_sectors); -extern mempool_t *bio_split_pool; +extern struct bio_pair *bio_split(struct bio *bi, int first_sectors); extern void bio_pair_release(struct bio_pair *dbio); extern struct bio_set *bioset_create(int, int); extern void bioset_free(struct bio_set *); extern struct bio *bio_alloc(gfp_t, int); +extern struct bio *bio_kmalloc(gfp_t, int); extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); extern void bio_put(struct bio *); extern void bio_free(struct bio *, struct bio_set *); @@ -335,7 +325,6 @@ extern void bio_free(struct bio *, struct bio_set *); extern void bio_endio(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); -extern int bio_hw_segments(struct request_queue *, struct bio *); extern void __bio_clone(struct bio *, struct bio *); extern struct bio *bio_clone(struct bio *, gfp_t); @@ -346,12 +335,14 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); extern int bio_get_nr_vecs(struct block_device *); +extern sector_t bio_sector_offset(struct bio *, unsigned short, unsigned int); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, - unsigned long, unsigned int, int); + unsigned long, unsigned int, int, gfp_t); struct sg_iovec; +struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, - struct sg_iovec *, int, int); + struct sg_iovec *, int, int, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -359,15 +350,25 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, gfp_t, int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); -extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); -extern struct bio *bio_copy_user_iov(struct request_queue *, struct sg_iovec *, - int, int); +extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, + unsigned long, unsigned int, int, gfp_t); +extern struct bio *bio_copy_user_iov(struct request_queue *, + struct rq_map_data *, struct sg_iovec *, + int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); extern struct bio_vec *bvec_alloc_bs(gfp_t, int, unsigned long *, struct bio_set *); extern unsigned int bvec_nr_vecs(unsigned short idx); /* + * Allow queuer to specify a completion CPU for this bio + */ +static inline void bio_set_completion_cpu(struct bio *bio, unsigned int cpu) +{ + bio->bi_comp_cpu = cpu; +} + +/* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. * These memory pools in turn all allocate from the bio_slab @@ -445,6 +446,14 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) #define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) +/* + * Check whether this bio carries any data or not. A NULL bio is allowed. + */ +static inline int bio_has_data(struct bio *bio) +{ + return bio && bio->bi_io_vec != NULL; +} + #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) @@ -458,14 +467,7 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, #define bip_for_each_vec(bvl, bip, i) \ __bip_for_each_vec(bvl, bip, i, (bip)->bip_idx) -static inline int bio_integrity(struct bio *bio) -{ -#if defined(CONFIG_BLK_DEV_INTEGRITY) - return bio->bi_integrity != NULL; -#else - return 0; -#endif -} +#define bio_integrity(bio) (bio->bi_integrity != NULL) extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 53ea933..a92d9e4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -16,7 +16,9 @@ #include <linux/bio.h> #include <linux/module.h> #include <linux/stringify.h> +#include <linux/gfp.h> #include <linux/bsg.h> +#include <linux/smp.h> #include <asm/scatterlist.h> @@ -54,7 +56,6 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_SUSPEND, /* suspend request */ REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ - REQ_TYPE_FLUSH, /* flush request */ REQ_TYPE_SPECIAL, /* driver defined type */ REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ /* @@ -76,19 +77,18 @@ enum rq_cmd_type_bits { * */ enum { - /* - * just examples for now - */ REQ_LB_OP_EJECT = 0x40, /* eject request */ - REQ_LB_OP_FLUSH = 0x41, /* flush device */ + REQ_LB_OP_FLUSH = 0x41, /* flush request */ + REQ_LB_OP_DISCARD = 0x42, /* discard sectors */ }; /* - * request type modified bits. first three bits match BIO_RW* bits, important + * request type modified bits. first two bits match BIO_RW* bits, important */ enum rq_flag_bits { __REQ_RW, /* not set, read. set, write */ __REQ_FAILFAST, /* no low level driver retries */ + __REQ_DISCARD, /* request to discard sectors */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ __REQ_HARDBARRIER, /* may not be passed by drive either */ @@ -111,6 +111,7 @@ enum rq_flag_bits { }; #define REQ_RW (1 << __REQ_RW) +#define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_FAILFAST (1 << __REQ_FAILFAST) #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) @@ -140,12 +141,14 @@ enum rq_flag_bits { */ struct request { struct list_head queuelist; - struct list_head donelist; + struct call_single_data csd; + int cpu; struct request_queue *q; unsigned int cmd_flags; enum rq_cmd_type_bits cmd_type; + unsigned long atomic_flags; /* Maintain bio traversal state for part by part I/O submission. * hard_* are block layer internals, no driver should touch them! @@ -190,13 +193,6 @@ struct request { */ unsigned short nr_phys_segments; - /* Number of scatter-gather addr+len pairs after - * physical and DMA remapping hardware coalescing is performed. - * This is the number of scatter-gather entries the driver - * will actually have to deal with after DMA mapping is done. - */ - unsigned short nr_hw_segments; - unsigned short ioprio; void *special; @@ -220,6 +216,8 @@ struct request { void *data; void *sense; + unsigned long deadline; + struct list_head timeout_list; unsigned int timeout; int retries; @@ -233,6 +231,11 @@ struct request { struct request *next_rq; }; +static inline unsigned short req_get_ioprio(struct request *req) +{ + return req->ioprio; +} + /* * State information carried for REQ_TYPE_PM_SUSPEND and REQ_TYPE_PM_RESUME * requests. Some step values could eventually be made generic. @@ -252,6 +255,7 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); +typedef int (prepare_discard_fn) (struct request_queue *, struct request *); struct bio_vec; struct bvec_merge_data { @@ -265,6 +269,15 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); +typedef int (lld_busy_fn) (struct request_queue *q); + +enum blk_eh_timer_return { + BLK_EH_NOT_HANDLED, + BLK_EH_HANDLED, + BLK_EH_RESET_TIMER, +}; + +typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); enum blk_queue_state { Queue_down, @@ -307,10 +320,13 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unplug_fn *unplug_fn; + prepare_discard_fn *prepare_discard_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; + rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; + lld_busy_fn *lld_busy_fn; /* * Dispatch queue sorting @@ -385,6 +401,10 @@ struct request_queue unsigned int nr_sorted; unsigned int in_flight; + unsigned int rq_timeout; + struct timer_list timeout; + struct list_head timeout_list; + /* * sg stuff */ @@ -421,6 +441,10 @@ struct request_queue #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ #define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ +#define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ +#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ +#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ static inline int queue_is_locked(struct request_queue *q) { @@ -526,7 +550,10 @@ enum { #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) +#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) +#define blk_queue_stackable(q) \ + test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) #define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) @@ -536,16 +563,18 @@ enum { #define blk_noretry_request(rq) ((rq)->cmd_flags & REQ_FAILFAST) #define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) -#define blk_account_rq(rq) (blk_rq_started(rq) && blk_fs_request(rq)) +#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) #define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) #define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) #define blk_pm_request(rq) \ (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) +#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) #define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) +#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) #define blk_empty_barrier(rq) (blk_barrier_rq(rq) && blk_fs_request(rq) && !(rq)->hard_nr_sectors) /* rq->queuelist of dequeued request must be list_empty() */ @@ -592,7 +621,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int rw) #define RQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) #define rq_mergeable(rq) \ - (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && blk_fs_request((rq))) + (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ + (blk_discard_rq(rq) || blk_fs_request((rq)))) /* * q->prep_rq_fn return values @@ -637,6 +667,12 @@ static inline void blk_queue_bounce(struct request_queue *q, struct bio **bio) } #endif /* CONFIG_MMU */ +struct rq_map_data { + struct page **pages; + int page_order; + int nr_entries; +}; + struct req_iterator { int i; struct bio *bio; @@ -664,6 +700,10 @@ extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); +extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); +extern int blk_lld_busy(struct request_queue *q); +extern int blk_insert_cloned_request(struct request_queue *q, + struct request *rq); extern void blk_plug_device(struct request_queue *); extern void blk_plug_device_unlocked(struct request_queue *); extern int blk_remove_plug(struct request_queue *); @@ -705,11 +745,14 @@ extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *); extern void blk_run_queue(struct request_queue *); extern void blk_start_queueing(struct request_queue *); -extern int blk_rq_map_user(struct request_queue *, struct request *, void __user *, unsigned long); +extern int blk_rq_map_user(struct request_queue *, struct request *, + struct rq_map_data *, void __user *, unsigned long, + gfp_t); extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct sg_iovec *, int, unsigned int); + struct rq_map_data *, struct sg_iovec *, int, + unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, @@ -750,12 +793,15 @@ extern int __blk_end_request(struct request *rq, int error, extern int blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes); extern void end_request(struct request *, int); -extern void end_queued_request(struct request *, int); -extern void end_dequeued_request(struct request *, int); extern int blk_end_request_callback(struct request *rq, int error, unsigned int nr_bytes, int (drv_callback)(struct request *)); extern void blk_complete_request(struct request *); +extern void __blk_complete_request(struct request *); +extern void blk_abort_request(struct request *); +extern void blk_abort_queue(struct request_queue *); +extern void blk_update_request(struct request *rq, int error, + unsigned int nr_bytes); /* * blk_end_request() takes bytes instead of sectors as a complete size. @@ -790,12 +836,16 @@ extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); +extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); +extern void blk_queue_set_discard(struct request_queue *, prepare_discard_fn *); +extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); +extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); extern int blk_do_ordered(struct request_queue *, struct request **); @@ -837,6 +887,16 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } extern int blkdev_issue_flush(struct block_device *, sector_t *); +extern int blkdev_issue_discard(struct block_device *, + sector_t sector, sector_t nr_sects, gfp_t); + +static inline int sb_issue_discard(struct super_block *sb, + sector_t block, sector_t nr_blocks) +{ + block <<= (sb->s_blocksize_bits - 9); + nr_blocks <<= (sb->s_blocksize_bits - 9); + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL); +} /* * command filter functions @@ -874,6 +934,13 @@ static inline int queue_dma_alignment(struct request_queue *q) return q ? q->dma_alignment : 511; } +static inline int blk_rq_aligned(struct request_queue *q, void *addr, + unsigned int len) +{ + unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; + return !((unsigned long)addr & alignment) && !(len & alignment); +} + /* assumes size > 256 */ static inline unsigned int blksize_bits(unsigned int size) { @@ -900,7 +967,7 @@ static inline void put_dev_sector(Sector p) } struct work_struct; -int kblockd_schedule_work(struct work_struct *work); +int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); void kblockd_flush_work(struct work_struct *work); #define MODULE_ALIAS_BLOCKDEV(major,minor) \ @@ -945,49 +1012,19 @@ struct blk_integrity { extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); -extern int blk_integrity_compare(struct block_device *, struct block_device *); +extern int blk_integrity_compare(struct gendisk *, struct gendisk *); extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request *); -static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) -{ - if (bi) - return bi->tuple_size; - - return 0; -} - -static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) +static inline +struct blk_integrity *bdev_get_integrity(struct block_device *bdev) { return bdev->bd_disk->integrity; } -static inline unsigned int bdev_get_tag_size(struct block_device *bdev) +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { - struct blk_integrity *bi = bdev_get_integrity(bdev); - - if (bi) - return bi->tag_size; - - return 0; -} - -static inline int bdev_integrity_enabled(struct block_device *bdev, int rw) -{ - struct blk_integrity *bi = bdev_get_integrity(bdev); - - if (bi == NULL) - return 0; - - if (rw == READ && bi->verify_fn != NULL && - (bi->flags & INTEGRITY_FLAG_READ)) - return 1; - - if (rw == WRITE && bi->generate_fn != NULL && - (bi->flags & INTEGRITY_FLAG_WRITE)) - return 1; - - return 0; + return disk->integrity; } static inline int blk_integrity_rq(struct request *rq) @@ -1004,7 +1041,7 @@ static inline int blk_integrity_rq(struct request *rq) #define blk_rq_count_integrity_sg(a) (0) #define blk_rq_map_integrity_sg(a, b) (0) #define bdev_get_integrity(a) (0) -#define bdev_get_tag_size(a) (0) +#define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index d084b8d..3a31eb5 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -1,8 +1,10 @@ #ifndef BLKTRACE_H #define BLKTRACE_H +#ifdef __KERNEL__ #include <linux/blkdev.h> #include <linux/relay.h> +#endif /* * Trace categories @@ -21,6 +23,7 @@ enum blktrace_cat { BLK_TC_NOTIFY = 1 << 10, /* special message */ BLK_TC_AHEAD = 1 << 11, /* readahead */ BLK_TC_META = 1 << 12, /* metadata */ + BLK_TC_DISCARD = 1 << 13, /* discard requests */ BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ }; @@ -47,6 +50,7 @@ enum blktrace_act { __BLK_TA_SPLIT, /* bio was split */ __BLK_TA_BOUNCE, /* bio was bounced */ __BLK_TA_REMAP, /* bio was remapped */ + __BLK_TA_ABORT, /* request aborted */ }; /* @@ -77,6 +81,7 @@ enum blktrace_notify { #define BLK_TA_SPLIT (__BLK_TA_SPLIT) #define BLK_TA_BOUNCE (__BLK_TA_BOUNCE) #define BLK_TA_REMAP (__BLK_TA_REMAP | BLK_TC_ACT(BLK_TC_QUEUE)) +#define BLK_TA_ABORT (__BLK_TA_ABORT | BLK_TC_ACT(BLK_TC_QUEUE)) #define BLK_TN_PROCESS (__BLK_TN_PROCESS | BLK_TC_ACT(BLK_TC_NOTIFY)) #define BLK_TN_TIMESTAMP (__BLK_TN_TIMESTAMP | BLK_TC_ACT(BLK_TC_NOTIFY)) @@ -89,17 +94,17 @@ enum blktrace_notify { * The trace itself */ struct blk_io_trace { - u32 magic; /* MAGIC << 8 | version */ - u32 sequence; /* event number */ - u64 time; /* in microseconds */ - u64 sector; /* disk offset */ - u32 bytes; /* transfer length */ - u32 action; /* what happened */ - u32 pid; /* who did it */ - u32 device; /* device number */ - u32 cpu; /* on what cpu did it happen */ - u16 error; /* completion error */ - u16 pdu_len; /* length of data after this trace */ + __u32 magic; /* MAGIC << 8 | version */ + __u32 sequence; /* event number */ + __u64 time; /* in microseconds */ + __u64 sector; /* disk offset */ + __u32 bytes; /* transfer length */ + __u32 action; /* what happened */ + __u32 pid; /* who did it */ + __u32 device; /* device number */ + __u32 cpu; /* on what cpu did it happen */ + __u16 error; /* completion error */ + __u16 pdu_len; /* length of data after this trace */ }; /* @@ -117,6 +122,23 @@ enum { Blktrace_stopped, }; +#define BLKTRACE_BDEV_SIZE 32 + +/* + * User setup structure passed with BLKTRACESTART + */ +struct blk_user_trace_setup { + char name[BLKTRACE_BDEV_SIZE]; /* output */ + __u16 act_mask; /* input */ + __u32 buf_size; /* input */ + __u32 buf_nr; /* input */ + __u64 start_lba; + __u64 end_lba; + __u32 pid; +}; + +#ifdef __KERNEL__ +#if defined(CONFIG_BLK_DEV_IO_TRACE) struct blk_trace { int trace_state; struct rchan *rchan; @@ -133,21 +155,6 @@ struct blk_trace { atomic_t dropped; }; -/* - * User setup structure passed with BLKTRACESTART - */ -struct blk_user_trace_setup { - char name[BDEVNAME_SIZE]; /* output */ - u16 act_mask; /* input */ - u32 buf_size; /* input */ - u32 buf_nr; /* input */ - u64 start_lba; - u64 end_lba; - u32 pid; -}; - -#ifdef __KERNEL__ -#if defined(CONFIG_BLK_DEV_IO_TRACE) extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); @@ -195,6 +202,9 @@ static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, if (likely(!bt)) return; + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + if (blk_pc_request(rq)) { what |= BLK_TC_ACT(BLK_TC_PC); __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); diff --git a/include/linux/device.h b/include/linux/device.h index 4d8372d..246937c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -199,6 +199,11 @@ struct class { struct class_private *p; }; +struct class_dev_iter { + struct klist_iter ki; + const struct device_type *type; +}; + extern struct kobject *sysfs_dev_block_kobj; extern struct kobject *sysfs_dev_char_kobj; extern int __must_check __class_register(struct class *class, @@ -213,6 +218,13 @@ extern void class_unregister(struct class *class); __class_register(class, &__key); \ }) +extern void class_dev_iter_init(struct class_dev_iter *iter, + struct class *class, + struct device *start, + const struct device_type *type); +extern struct device *class_dev_iter_next(struct class_dev_iter *iter); +extern void class_dev_iter_exit(struct class_dev_iter *iter); + extern int class_for_each_device(struct class *class, struct device *start, void *data, int (*fn)(struct device *dev, void *data)); @@ -396,7 +408,7 @@ struct device { spinlock_t devres_lock; struct list_head devres_head; - struct list_head node; + struct klist_node knode_class; struct class *class; dev_t devt; /* dev_t, creates the sysfs "dev" */ struct attribute_group **groups; /* optional groups */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 639624b..92f6f63 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -112,6 +112,7 @@ extern struct request *elv_latter_request(struct request_queue *, struct request extern int elv_register_queue(struct request_queue *q); extern void elv_unregister_queue(struct request_queue *q); extern int elv_may_queue(struct request_queue *, int); +extern void elv_abort_queue(struct request_queue *); extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *, struct request *, gfp_t); extern void elv_put_request(struct request_queue *, struct request *); @@ -173,15 +174,15 @@ enum { #define rb_entry_rq(node) rb_entry((node), struct request, rb_node) /* - * Hack to reuse the donelist list_head as the fifo time holder while + * Hack to reuse the csd.list list_head as the fifo time holder while * the request is in the io scheduler. Saves an unsigned long in rq. */ -#define rq_fifo_time(rq) ((unsigned long) (rq)->donelist.next) -#define rq_set_fifo_time(rq,exp) ((rq)->donelist.next = (void *) (exp)) +#define rq_fifo_time(rq) ((unsigned long) (rq)->csd.list.next) +#define rq_set_fifo_time(rq,exp) ((rq)->csd.list.next = (void *) (exp)) #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define rq_fifo_clear(rq) do { \ list_del_init(&(rq)->queuelist); \ - INIT_LIST_HEAD(&(rq)->donelist); \ + INIT_LIST_HEAD(&(rq)->csd.list); \ } while (0) /* diff --git a/include/linux/fd.h b/include/linux/fd.h index b6bd41d..f5d194a 100644 --- a/include/linux/fd.h +++ b/include/linux/fd.h @@ -15,10 +15,16 @@ struct floppy_struct { sect, /* sectors per track */ head, /* nr of heads */ track, /* nr of tracks */ - stretch; /* !=0 means double track steps */ + stretch; /* bit 0 !=0 means double track steps */ + /* bit 1 != 0 means swap sides */ + /* bits 2..9 give the first sector */ + /* number (the LSB is flipped) */ #define FD_STRETCH 1 #define FD_SWAPSIDES 2 #define FD_ZEROBASED 4 +#define FD_SECTBASEMASK 0x3FC +#define FD_MKSECTBASE(s) (((s) ^ 1) << 2) +#define FD_SECTBASE(floppy) ((((floppy)->stretch & FD_SECTBASEMASK) >> 2) ^ 1) unsigned char gap, /* gap1 size */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 580b513..32477e8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -86,7 +86,9 @@ extern int dir_notify_enable; #define READ_META (READ | (1 << BIO_RW_META)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) #define SWRITE_SYNC (SWRITE | (1 << BIO_RW_SYNC)) -#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) +#define WRITE_BARRIER (WRITE | (1 << BIO_RW_BARRIER)) +#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) +#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) #define SEL_IN 1 #define SEL_OUT 2 @@ -222,6 +224,7 @@ extern int dir_notify_enable; #define BLKTRACESTART _IO(0x12,116) #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) +#define BLKDISCARD _IO(0x12,119) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ @@ -1682,6 +1685,7 @@ extern void chrdev_show(struct seq_file *,off_t); /* fs/block_dev.c */ #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ +#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ #ifdef CONFIG_BLOCK #define BLKDEV_MAJOR_HASH_SIZE 255 @@ -1718,6 +1722,9 @@ extern int fs_may_remount_ro(struct super_block *); */ #define bio_data_dir(bio) ((bio)->bi_rw & 1) +extern void check_disk_size_change(struct gendisk *disk, + struct block_device *bdev); +extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *); extern int invalidate_partition(struct gendisk *, int); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index be4f5e5..206cdf9 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -11,12 +11,15 @@ #include <linux/types.h> #include <linux/kdev_t.h> +#include <linux/rcupdate.h> #ifdef CONFIG_BLOCK -#define kobj_to_dev(k) container_of(k, struct device, kobj) -#define dev_to_disk(device) container_of(device, struct gendisk, dev) -#define dev_to_part(device) container_of(device, struct hd_struct, dev) +#define kobj_to_dev(k) container_of((k), struct device, kobj) +#define dev_to_disk(device) container_of((device), struct gendisk, part0.__dev) +#define dev_to_part(device) container_of((device), struct hd_struct, __dev) +#define disk_to_dev(disk) (&(disk)->part0.__dev) +#define part_to_dev(part) (&((part)->__dev)) extern struct device_type part_type; extern struct kobject *block_depr; @@ -55,6 +58,9 @@ enum { UNIXWARE_PARTITION = 0x63, /* Same as GNU_HURD and SCO Unix */ }; +#define DISK_MAX_PARTS 256 +#define DISK_NAME_LEN 32 + #include <linux/major.h> #include <linux/device.h> #include <linux/smp.h> @@ -87,7 +93,7 @@ struct disk_stats { struct hd_struct { sector_t start_sect; sector_t nr_sects; - struct device dev; + struct device __dev; struct kobject *holder_dir; int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST @@ -100,6 +106,7 @@ struct hd_struct { #else struct disk_stats dkstats; #endif + struct rcu_head rcu_head; }; #define GENHD_FL_REMOVABLE 1 @@ -108,100 +115,148 @@ struct hd_struct { #define GENHD_FL_CD 8 #define GENHD_FL_UP 16 #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 -#define GENHD_FL_FAIL 64 +#define GENHD_FL_EXT_DEVT 64 /* allow extended devt */ + +#define BLK_SCSI_MAX_CMDS (256) +#define BLK_SCSI_CMD_PER_LONG (BLK_SCSI_MAX_CMDS / (sizeof(long) * 8)) + +struct blk_scsi_cmd_filter { + unsigned long read_ok[BLK_SCSI_CMD_PER_LONG]; + unsigned long write_ok[BLK_SCSI_CMD_PER_LONG]; + struct kobject kobj; +}; + +struct disk_part_tbl { + struct rcu_head rcu_head; + int len; + struct hd_struct *part[]; +}; struct gendisk { + /* major, first_minor and minors are input parameters only, + * don't use directly. Use disk_devt() and disk_max_parts(). + */ int major; /* major number of driver */ int first_minor; int minors; /* maximum number of minors, =1 for * disks that can't be partitioned. */ - char disk_name[32]; /* name of major driver */ - struct hd_struct **part; /* [indexed by minor] */ + + char disk_name[DISK_NAME_LEN]; /* name of major driver */ + + /* Array of pointers to partitions indexed by partno. + * Protected with matching bdev lock but stat and other + * non-critical accesses use RCU. Always access through + * helpers. + */ + struct disk_part_tbl *part_tbl; + struct hd_struct part0; + struct block_device_operations *fops; struct request_queue *queue; void *private_data; - sector_t capacity; int flags; struct device *driverfs_dev; // FIXME: remove - struct device dev; - struct kobject *holder_dir; struct kobject *slave_dir; struct timer_rand_state *random; - int policy; atomic_t sync_io; /* RAID */ - unsigned long stamp; - int in_flight; -#ifdef CONFIG_SMP - struct disk_stats *dkstats; -#else - struct disk_stats dkstats; -#endif struct work_struct async_notify; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *integrity; #endif + int node_id; }; -/* - * Macros to operate on percpu disk statistics: - * - * The __ variants should only be called in critical sections. The full - * variants disable/enable preemption. - */ -static inline struct hd_struct *get_part(struct gendisk *gendiskp, - sector_t sector) +static inline struct gendisk *part_to_disk(struct hd_struct *part) { - struct hd_struct *part; - int i; - for (i = 0; i < gendiskp->minors - 1; i++) { - part = gendiskp->part[i]; - if (part && part->start_sect <= sector - && sector < part->start_sect + part->nr_sects) - return part; + if (likely(part)) { + if (part->partno) + return dev_to_disk(part_to_dev(part)->parent); + else + return dev_to_disk(part_to_dev(part)); } return NULL; } -#ifdef CONFIG_SMP -#define __disk_stat_add(gendiskp, field, addnd) \ - (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) +static inline int disk_max_parts(struct gendisk *disk) +{ + if (disk->flags & GENHD_FL_EXT_DEVT) + return DISK_MAX_PARTS; + return disk->minors; +} -#define disk_stat_read(gendiskp, field) \ -({ \ - typeof(gendiskp->dkstats->field) res = 0; \ - int i; \ - for_each_possible_cpu(i) \ - res += per_cpu_ptr(gendiskp->dkstats, i)->field; \ - res; \ -}) +static inline bool disk_partitionable(struct gendisk *disk) +{ + return disk_max_parts(disk) > 1; +} -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { - int i; +static inline dev_t disk_devt(struct gendisk *disk) +{ + return disk_to_dev(disk)->devt; +} - for_each_possible_cpu(i) - memset(per_cpu_ptr(gendiskp->dkstats, i), value, - sizeof(struct disk_stats)); -} +static inline dev_t part_devt(struct hd_struct *part) +{ + return part_to_dev(part)->devt; +} -#define __part_stat_add(part, field, addnd) \ - (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) +extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - __part_stat_add(part, field, addnd); \ - __disk_stat_add(gendiskp, field, addnd); \ -}) +static inline void disk_put_part(struct hd_struct *part) +{ + if (likely(part)) + put_device(part_to_dev(part)); +} + +/* + * Smarter partition iterator without context limits. + */ +#define DISK_PITER_REVERSE (1 << 0) /* iterate in the reverse direction */ +#define DISK_PITER_INCL_EMPTY (1 << 1) /* include 0-sized parts */ +#define DISK_PITER_INCL_PART0 (1 << 2) /* include partition 0 */ + +struct disk_part_iter { + struct gendisk *disk; + struct hd_struct *part; + int idx; + unsigned int flags; +}; + +extern void disk_part_iter_init(struct disk_part_iter *piter, + struct gendisk *disk, unsigned int flags); +extern struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter); +extern void disk_part_iter_exit(struct disk_part_iter *piter); + +extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, + sector_t sector); + +/* + * Macros to operate on percpu disk statistics: + * + * {disk|part|all}_stat_{add|sub|inc|dec}() modify the stat counters + * and should be called between disk_stat_lock() and + * disk_stat_unlock(). + * + * part_stat_read() can be called at any time. + * + * part_stat_{add|set_all}() and {init|free}_part_stats are for + * internal use only. + */ +#ifdef CONFIG_SMP +#define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) +#define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) + +#define __part_stat_add(cpu, part, field, addnd) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd)) #define part_stat_read(part, field) \ ({ \ - typeof(part->dkstats->field) res = 0; \ + typeof((part)->dkstats->field) res = 0; \ int i; \ for_each_possible_cpu(i) \ - res += per_cpu_ptr(part->dkstats, i)->field; \ + res += per_cpu_ptr((part)->dkstats, i)->field; \ res; \ }) @@ -213,171 +268,107 @@ static inline void part_stat_set_all(struct hd_struct *part, int value) memset(per_cpu_ptr(part->dkstats, i), value, sizeof(struct disk_stats)); } - -#else /* !CONFIG_SMP */ -#define __disk_stat_add(gendiskp, field, addnd) \ - (gendiskp->dkstats.field += addnd) -#define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +static inline int init_part_stats(struct hd_struct *part) { - memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; } -#define __part_stat_add(part, field, addnd) \ - (part->dkstats.field += addnd) - -#define __all_stat_add(gendiskp, part, field, addnd, sector) \ -({ \ - if (part) \ - part->dkstats.field += addnd; \ - __disk_stat_add(gendiskp, field, addnd); \ -}) - -#define part_stat_read(part, field) (part->dkstats.field) - -static inline void part_stat_set_all(struct hd_struct *part, int value) +static inline void free_part_stats(struct hd_struct *part) { - memset(&part->dkstats, value, sizeof(struct disk_stats)); + free_percpu(part->dkstats); } -#endif /* CONFIG_SMP */ +#else /* !CONFIG_SMP */ +#define part_stat_lock() ({ rcu_read_lock(); 0; }) +#define part_stat_unlock() rcu_read_unlock() -#define disk_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __disk_stat_add(gendiskp, field, addnd); \ - preempt_enable(); \ - } while (0) - -#define __disk_stat_dec(gendiskp, field) __disk_stat_add(gendiskp, field, -1) -#define disk_stat_dec(gendiskp, field) disk_stat_add(gendiskp, field, -1) - -#define __disk_stat_inc(gendiskp, field) __disk_stat_add(gendiskp, field, 1) -#define disk_stat_inc(gendiskp, field) disk_stat_add(gendiskp, field, 1) - -#define __disk_stat_sub(gendiskp, field, subnd) \ - __disk_stat_add(gendiskp, field, -subnd) -#define disk_stat_sub(gendiskp, field, subnd) \ - disk_stat_add(gendiskp, field, -subnd) - -#define part_stat_add(gendiskp, field, addnd) \ - do { \ - preempt_disable(); \ - __part_stat_add(gendiskp, field, addnd);\ - preempt_enable(); \ - } while (0) - -#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1) -#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1) - -#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1) -#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1) - -#define __part_stat_sub(gendiskp, field, subnd) \ - __part_stat_add(gendiskp, field, -subnd) -#define part_stat_sub(gendiskp, field, subnd) \ - part_stat_add(gendiskp, field, -subnd) - -#define all_stat_add(gendiskp, part, field, addnd, sector) \ - do { \ - preempt_disable(); \ - __all_stat_add(gendiskp, part, field, addnd, sector); \ - preempt_enable(); \ - } while (0) - -#define __all_stat_dec(gendiskp, field, sector) \ - __all_stat_add(gendiskp, field, -1, sector) -#define all_stat_dec(gendiskp, field, sector) \ - all_stat_add(gendiskp, field, -1, sector) - -#define __all_stat_inc(gendiskp, part, field, sector) \ - __all_stat_add(gendiskp, part, field, 1, sector) -#define all_stat_inc(gendiskp, part, field, sector) \ - all_stat_add(gendiskp, part, field, 1, sector) - -#define __all_stat_sub(gendiskp, part, field, subnd, sector) \ - __all_stat_add(gendiskp, part, field, -subnd, sector) -#define all_stat_sub(gendiskp, part, field, subnd, sector) \ - all_stat_add(gendiskp, part, field, -subnd, sector) - -/* Inlines to alloc and free disk stats in struct gendisk */ -#ifdef CONFIG_SMP -static inline int init_disk_stats(struct gendisk *disk) -{ - disk->dkstats = alloc_percpu(struct disk_stats); - if (!disk->dkstats) - return 0; - return 1; -} +#define __part_stat_add(cpu, part, field, addnd) \ + ((part)->dkstats.field += addnd) + +#define part_stat_read(part, field) ((part)->dkstats.field) -static inline void free_disk_stats(struct gendisk *disk) +static inline void part_stat_set_all(struct hd_struct *part, int value) { - free_percpu(disk->dkstats); + memset(&part->dkstats, value, sizeof(struct disk_stats)); } static inline int init_part_stats(struct hd_struct *part) { - part->dkstats = alloc_percpu(struct disk_stats); - if (!part->dkstats) - return 0; return 1; } static inline void free_part_stats(struct hd_struct *part) { - free_percpu(part->dkstats); -} - -#else /* CONFIG_SMP */ -static inline int init_disk_stats(struct gendisk *disk) -{ - return 1; } -static inline void free_disk_stats(struct gendisk *disk) -{ -} +#endif /* CONFIG_SMP */ -static inline int init_part_stats(struct hd_struct *part) +#define part_stat_add(cpu, part, field, addnd) do { \ + __part_stat_add((cpu), (part), field, addnd); \ + if ((part)->partno) \ + __part_stat_add((cpu), &part_to_disk((part))->part0, \ + field, addnd); \ +} while (0) + +#define part_stat_dec(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, -1) +#define part_stat_inc(cpu, gendiskp, field) \ + part_stat_add(cpu, gendiskp, field, 1) +#define part_stat_sub(cpu, gendiskp, field, subnd) \ + part_stat_add(cpu, gendiskp, field, -subnd) + +static inline void part_inc_in_flight(struct hd_struct *part) { - return 1; + part->in_flight++; + if (part->partno) + part_to_disk(part)->part0.in_flight++; } -static inline void free_part_stats(struct hd_struct *part) +static inline void part_dec_in_flight(struct hd_struct *part) { + part->in_flight--; + if (part->partno) + part_to_disk(part)->part0.in_flight--; } -#endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -extern void disk_round_stats(struct gendisk *disk); -extern void part_round_stats(struct hd_struct *part); +extern void part_round_stats(int cpu, struct hd_struct *part); /* drivers/block/genhd.c */ extern int get_blkdev_list(char *, int); extern void add_disk(struct gendisk *disk); extern void del_gendisk(struct gendisk *gp); extern void unlink_gendisk(struct gendisk *gp); -extern struct gendisk *get_gendisk(dev_t dev, int *part); +extern struct gendisk *get_gendisk(dev_t dev, int *partno); +extern struct block_device *bdget_disk(struct gendisk *disk, int partno); extern void set_device_ro(struct block_device *bdev, int flag); extern void set_disk_ro(struct gendisk *disk, int flag); +static inline int get_disk_ro(struct gendisk *disk) +{ + return disk->part0.policy; +} + /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk); extern void rand_initialize_disk(struct gendisk *disk); static inline sector_t get_start_sect(struct block_device *bdev) { - return bdev->bd_contains == bdev ? 0 : bdev->bd_part->start_sect; + return bdev->bd_part->start_sect; } static inline sector_t get_capacity(struct gendisk *disk) { - return disk->capacity; + return disk->part0.nr_sects; } static inline void set_capacity(struct gendisk *disk, sector_t size) { - disk->capacity = size; + disk->part0.nr_sects = size; } #ifdef CONFIG_SOLARIS_X86_PARTITION @@ -527,9 +518,12 @@ struct unixware_disklabel { #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 -extern dev_t blk_lookup_devt(const char *name, int part); -extern char *disk_name (struct gendisk *hd, int part, char *buf); +extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); +extern void blk_free_devt(dev_t devt); +extern dev_t blk_lookup_devt(const char *name, int partno); +extern char *disk_name (struct gendisk *hd, int partno, char *buf); +extern int disk_expand_part_tbl(struct gendisk *disk, int target); extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev); extern int __must_check add_partition(struct gendisk *, int, sector_t, sector_t, int); extern void delete_partition(struct gendisk *, int); @@ -546,16 +540,23 @@ extern void blk_register_region(dev_t devt, unsigned long range, void *data); extern void blk_unregister_region(dev_t devt, unsigned long range); -static inline struct block_device *bdget_disk(struct gendisk *disk, int index) -{ - return bdget(MKDEV(disk->major, disk->first_minor) + index); -} +extern ssize_t part_size_show(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf); +#ifdef CONFIG_FAIL_MAKE_REQUEST +extern ssize_t part_fail_show(struct device *dev, + struct device_attribute *attr, char *buf); +extern ssize_t part_fail_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count); +#endif /* CONFIG_FAIL_MAKE_REQUEST */ #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -static inline dev_t blk_lookup_devt(const char *name, int part) +static inline dev_t blk_lookup_devt(const char *name, int partno) { dev_t devt = MKDEV(0, 0); return devt; diff --git a/include/linux/ide.h b/include/linux/ide.h index 1524829..6514db8 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -366,7 +366,9 @@ enum { /* Currently on a filemark */ IDE_AFLAG_FILEMARK = (1 << 25), /* 0 = no tape is loaded, so we don't rewind after ejecting */ - IDE_AFLAG_MEDIUM_PRESENT = (1 << 26) + IDE_AFLAG_MEDIUM_PRESENT = (1 << 26), + + IDE_AFLAG_NO_AUTOCLOSE = (1 << 27), }; struct ide_drive_s { diff --git a/include/linux/klist.h b/include/linux/klist.h index 06c338e..8ea98db 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -38,7 +38,7 @@ extern void klist_init(struct klist *k, void (*get)(struct klist_node *), void (*put)(struct klist_node *)); struct klist_node { - struct klist *n_klist; + void *n_klist; /* never access directly */ struct list_head n_node; struct kref n_ref; struct completion n_removed; @@ -57,7 +57,6 @@ extern int klist_node_attached(struct klist_node *n); struct klist_iter { struct klist *i_klist; - struct list_head *i_head; struct klist_node *i_cur; }; diff --git a/include/linux/libata.h b/include/linux/libata.h index 225bfc5..947cf84 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -146,6 +146,7 @@ enum { ATA_DFLAG_SPUNDOWN = (1 << 14), /* XXX: for spindown_compat */ ATA_DFLAG_SLEEPING = (1 << 15), /* device is sleeping */ ATA_DFLAG_DUBIOUS_XFER = (1 << 16), /* data transfer not verified */ + ATA_DFLAG_NO_UNLOAD = (1 << 17), /* device doesn't support unload */ ATA_DFLAG_INIT_MASK = (1 << 24) - 1, ATA_DFLAG_DETACH = (1 << 24), @@ -244,6 +245,7 @@ enum { ATA_TMOUT_BOOT = 30000, /* heuristic */ ATA_TMOUT_BOOT_QUICK = 7000, /* heuristic */ ATA_TMOUT_INTERNAL_QUICK = 5000, + ATA_TMOUT_MAX_PARK = 30000, /* FIXME: GoVault needs 2s but we can't afford that without * parallel probing. 800ms is enough for iVDR disk @@ -319,8 +321,11 @@ enum { ATA_EH_RESET = ATA_EH_SOFTRESET | ATA_EH_HARDRESET, ATA_EH_ENABLE_LINK = (1 << 3), ATA_EH_LPM = (1 << 4), /* link power management action */ + ATA_EH_PARK = (1 << 5), /* unload heads and stop I/O */ - ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE, + ATA_EH_PERDEV_MASK = ATA_EH_REVALIDATE | ATA_EH_PARK, + ATA_EH_ALL_ACTIONS = ATA_EH_REVALIDATE | ATA_EH_RESET | + ATA_EH_ENABLE_LINK | ATA_EH_LPM, /* ata_eh_info->flags */ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ @@ -452,6 +457,7 @@ enum link_pm { MEDIUM_POWER, }; extern struct device_attribute dev_attr_link_power_management_policy; +extern struct device_attribute dev_attr_unload_heads; extern struct device_attribute dev_attr_em_message_type; extern struct device_attribute dev_attr_em_message; extern struct device_attribute dev_attr_sw_activity; @@ -554,8 +560,8 @@ struct ata_ering { struct ata_device { struct ata_link *link; unsigned int devno; /* 0 or 1 */ - unsigned long flags; /* ATA_DFLAG_xxx */ unsigned int horkage; /* List of broken features */ + unsigned long flags; /* ATA_DFLAG_xxx */ struct scsi_device *sdev; /* attached SCSI device */ #ifdef CONFIG_ATA_ACPI acpi_handle acpi_handle; @@ -564,6 +570,7 @@ struct ata_device { /* n_sector is used as CLEAR_OFFSET, read comment above CLEAR_OFFSET */ u64 n_sectors; /* size of device, if ATA */ unsigned int class; /* ATA_DEV_xxx */ + unsigned long unpark_deadline; u8 pio_mode; u8 dma_mode; @@ -621,6 +628,7 @@ struct ata_eh_context { [ATA_EH_CMD_TIMEOUT_TABLE_SIZE]; unsigned int classes[ATA_MAX_DEVICES]; unsigned int did_probe_mask; + unsigned int unloaded_mask; unsigned int saved_ncq_enabled; u8 saved_xfer_mode[ATA_MAX_DEVICES]; /* timestamp for the last reset attempt or success */ @@ -688,7 +696,8 @@ struct ata_port { unsigned int qc_active; int nr_active_links; /* #links with active qcs */ - struct ata_link link; /* host default link */ + struct ata_link link; /* host default link */ + struct ata_link *slave_link; /* see ata_slave_link_init() */ int nr_pmp_links; /* nr of available PMP links */ struct ata_link *pmp_link; /* array of PMP links */ @@ -709,6 +718,7 @@ struct ata_port { struct list_head eh_done_q; wait_queue_head_t eh_wait_q; int eh_tries; + struct completion park_req_pending; pm_message_t pm_mesg; int *pm_result; @@ -772,8 +782,8 @@ struct ata_port_operations { /* * Optional features */ - int (*scr_read)(struct ata_port *ap, unsigned int sc_reg, u32 *val); - int (*scr_write)(struct ata_port *ap, unsigned int sc_reg, u32 val); + int (*scr_read)(struct ata_link *link, unsigned int sc_reg, u32 *val); + int (*scr_write)(struct ata_link *link, unsigned int sc_reg, u32 val); void (*pmp_attach)(struct ata_port *ap); void (*pmp_detach)(struct ata_port *ap); int (*enable_pm)(struct ata_port *ap, enum link_pm policy); @@ -895,6 +905,7 @@ extern void ata_port_disable(struct ata_port *); extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports); extern struct ata_host *ata_host_alloc_pinfo(struct device *dev, const struct ata_port_info * const * ppi, int n_ports); +extern int ata_slave_link_init(struct ata_port *ap); extern int ata_host_start(struct ata_host *host); extern int ata_host_register(struct ata_host *host, struct scsi_host_template *sht); @@ -920,8 +931,8 @@ extern int sata_scr_valid(struct ata_link *link); extern int sata_scr_read(struct ata_link *link, int reg, u32 *val); extern int sata_scr_write(struct ata_link *link, int reg, u32 val); extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val); -extern int ata_link_online(struct ata_link *link); -extern int ata_link_offline(struct ata_link *link); +extern bool ata_link_online(struct ata_link *link); +extern bool ata_link_offline(struct ata_link *link); #ifdef CONFIG_PM extern int ata_host_suspend(struct ata_host *host, pm_message_t mesg); extern void ata_host_resume(struct ata_host *host); @@ -1098,6 +1109,7 @@ extern void ata_std_error_handler(struct ata_port *ap); */ extern const struct ata_port_operations ata_base_port_ops; extern const struct ata_port_operations sata_port_ops; +extern struct device_attribute *ata_common_sdev_attrs[]; #define ATA_BASE_SHT(drv_name) \ .module = THIS_MODULE, \ @@ -1112,7 +1124,8 @@ extern const struct ata_port_operations sata_port_ops; .proc_name = drv_name, \ .slave_configure = ata_scsi_slave_config, \ .slave_destroy = ata_scsi_slave_destroy, \ - .bios_param = ata_std_bios_param + .bios_param = ata_std_bios_param, \ + .sdev_attrs = ata_common_sdev_attrs #define ATA_NCQ_SHT(drv_name) \ ATA_BASE_SHT(drv_name), \ @@ -1134,7 +1147,7 @@ static inline bool sata_pmp_attached(struct ata_port *ap) static inline int ata_is_host_link(const struct ata_link *link) { - return link == &link->ap->link; + return link == &link->ap->link || link == link->ap->slave_link; } #else /* CONFIG_SATA_PMP */ static inline bool sata_pmp_supported(struct ata_port *ap) @@ -1167,7 +1180,7 @@ static inline int sata_srst_pmp(struct ata_link *link) printk("%sata%u: "fmt, lv, (ap)->print_id , ##args) #define ata_link_printk(link, lv, fmt, args...) do { \ - if (sata_pmp_attached((link)->ap)) \ + if (sata_pmp_attached((link)->ap) || (link)->ap->slave_link) \ printk("%sata%u.%02u: "fmt, lv, (link)->ap->print_id, \ (link)->pmp , ##args); \ else \ @@ -1265,34 +1278,17 @@ static inline int ata_link_active(struct ata_link *link) return ata_tag_valid(link->active_tag) || link->sactive; } -static inline struct ata_link *ata_port_first_link(struct ata_port *ap) -{ - if (sata_pmp_attached(ap)) - return ap->pmp_link; - return &ap->link; -} - -static inline struct ata_link *ata_port_next_link(struct ata_link *link) -{ - struct ata_port *ap = link->ap; - - if (ata_is_host_link(link)) { - if (!sata_pmp_attached(ap)) - return NULL; - return ap->pmp_link; - } - - if (++link < ap->nr_pmp_links + ap->pmp_link) - return link; - return NULL; -} +extern struct ata_link *__ata_port_next_link(struct ata_port *ap, + struct ata_link *link, + bool dev_only); -#define __ata_port_for_each_link(lk, ap) \ - for ((lk) = &(ap)->link; (lk); (lk) = ata_port_next_link(lk)) +#define __ata_port_for_each_link(link, ap) \ + for ((link) = __ata_port_next_link((ap), NULL, false); (link); \ + (link) = __ata_port_next_link((ap), (link), false)) #define ata_port_for_each_link(link, ap) \ - for ((link) = ata_port_first_link(ap); (link); \ - (link) = ata_port_next_link(link)) + for ((link) = __ata_port_next_link((ap), NULL, true); (link); \ + (link) = __ata_port_next_link((ap), (link), true)) #define ata_link_for_each_dev(dev, link) \ for ((dev) = (link)->device; \ diff --git a/include/linux/major.h b/include/linux/major.h index 53d5faf..8824945 100644 --- a/include/linux/major.h +++ b/include/linux/major.h @@ -170,4 +170,6 @@ #define VIOTAPE_MAJOR 230 +#define BLOCK_EXT_MAJOR 259 + #endif diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h index 310e616..8b4aa05 100644 --- a/include/linux/mtd/blktrans.h +++ b/include/linux/mtd/blktrans.h @@ -41,6 +41,8 @@ struct mtd_blktrans_ops { unsigned long block, char *buffer); int (*writesect)(struct mtd_blktrans_dev *dev, unsigned long block, char *buffer); + int (*discard)(struct mtd_blktrans_dev *dev, + unsigned long block, unsigned nr_blocks); /* Block layer ioctls */ int (*getgeo)(struct mtd_blktrans_dev *dev, struct hd_geometry *geo); diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 5da9794..b106fd8 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -1,6 +1,8 @@ #ifndef __LINUX_STACKTRACE_H #define __LINUX_STACKTRACE_H +struct task_struct; + #ifdef CONFIG_STACKTRACE struct stack_trace { unsigned int nr_entries, max_entries; diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index f9f6e79..855bf95 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -75,7 +75,6 @@ struct scsi_cmnd { int retries; int allowed; - int timeout_per_command; unsigned char prot_op; unsigned char prot_type; @@ -86,7 +85,6 @@ struct scsi_cmnd { /* These elements define the operation we are about to perform */ unsigned char *cmnd; - struct timer_list eh_timeout; /* Used to time out the command. */ /* These elements define the operation we ultimately want to perform */ struct scsi_data_buffer sdb; @@ -139,7 +137,6 @@ extern void scsi_put_command(struct scsi_cmnd *); extern void __scsi_put_command(struct Scsi_Host *, struct scsi_cmnd *, struct device *); extern void scsi_finish_command(struct scsi_cmnd *cmd); -extern void scsi_req_abort_cmd(struct scsi_cmnd *cmd); extern void *scsi_kmap_atomic_sg(struct scatterlist *sg, int sg_count, size_t *offset, size_t *len); diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 44a55d1..d123ca8 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -43,13 +43,6 @@ struct blk_queue_tags; #define DISABLE_CLUSTERING 0 #define ENABLE_CLUSTERING 1 -enum scsi_eh_timer_return { - EH_NOT_HANDLED, - EH_HANDLED, - EH_RESET_TIMER, -}; - - struct scsi_host_template { struct module *module; const char *name; @@ -347,7 +340,7 @@ struct scsi_host_template { * * Status: OPTIONAL */ - enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); + enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); /* * Name of proc directory diff --git a/include/scsi/scsi_transport.h b/include/scsi/scsi_transport.h index 490bd13..0de32cd 100644 --- a/include/scsi/scsi_transport.h +++ b/include/scsi/scsi_transport.h @@ -21,6 +21,7 @@ #define SCSI_TRANSPORT_H #include <linux/transport_class.h> +#include <linux/blkdev.h> #include <scsi/scsi_host.h> #include <scsi/scsi_device.h> @@ -64,7 +65,7 @@ struct scsi_transport_template { * begin counting again * EH_NOT_HANDLED Begin normal error recovery */ - enum scsi_eh_timer_return (* eh_timed_out)(struct scsi_cmnd *); + enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *); /* * Used as callback for the completion of i_t_nexus request diff --git a/init/do_mounts.c b/init/do_mounts.c index 3715feb..d055b19 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -263,6 +263,10 @@ retry: printk("Please append a correct \"root=\" boot option; here are the available partitions:\n"); printk_all_partitions(); +#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT + printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify " + "explicit textual name for \"root=\" boot option.\n"); +#endif panic("VFS: Unable to mount root fs on %s", b); } diff --git a/init/main.c b/init/main.c index f6f7042..3820323 100644 --- a/init/main.c +++ b/init/main.c @@ -708,7 +708,7 @@ int do_one_initcall(initcall_t fn) int result; if (initcall_debug) { - print_fn_descriptor_symbol("calling %s\n", fn); + printk("calling %pF\n", fn); t0 = ktime_get(); } @@ -718,8 +718,8 @@ int do_one_initcall(initcall_t fn) t1 = ktime_get(); delta = ktime_sub(t1, t0); - print_fn_descriptor_symbol("initcall %s", fn); - printk(" returned %d after %Ld msecs\n", result, + printk("initcall %pF returned %d after %Ld msecs\n", + fn, result, (unsigned long long) delta.tv64 >> 20); } @@ -737,8 +737,7 @@ int do_one_initcall(initcall_t fn) local_irq_enable(); } if (msgbuf[0]) { - print_fn_descriptor_symbol(KERN_WARNING "initcall %s", fn); - printk(" returned with %s\n", msgbuf); + printk("initcall %pF returned with %s\n", fn, msgbuf); } return result; diff --git a/kernel/kgdb.c b/kernel/kgdb.c index 25d955d..e4dcfb2 100644 --- a/kernel/kgdb.c +++ b/kernel/kgdb.c @@ -590,6 +590,7 @@ static void kgdb_wait(struct pt_regs *regs) /* Signal the primary CPU that we are done: */ atomic_set(&cpu_in_kgdb[cpu], 0); + touch_softlockup_watchdog(); clocksource_touch_watchdog(); local_irq_restore(flags); } @@ -1432,6 +1433,7 @@ acquirelock: atomic_read(&kgdb_cpu_doing_single_step) != cpu) { atomic_set(&kgdb_active, -1); + touch_softlockup_watchdog(); clocksource_touch_watchdog(); local_irq_restore(flags); @@ -1524,6 +1526,7 @@ acquirelock: kgdb_restore: /* Free kgdb_active */ atomic_set(&kgdb_active, -1); + touch_softlockup_watchdog(); clocksource_touch_watchdog(); local_irq_restore(flags); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index bd70345..cb01cd8 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -235,7 +235,8 @@ static void tick_do_broadcast_on_off(void *why) case CLOCK_EVT_NOTIFY_BROADCAST_FORCE: if (!cpu_isset(cpu, tick_broadcast_mask)) { cpu_set(cpu, tick_broadcast_mask); - if (bc->mode == TICKDEV_MODE_PERIODIC) + if (tick_broadcast_device.mode == + TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE) @@ -245,7 +246,8 @@ static void tick_do_broadcast_on_off(void *why) if (!tick_broadcast_force && cpu_isset(cpu, tick_broadcast_mask)) { cpu_clear(cpu, tick_broadcast_mask); - if (bc->mode == TICKDEV_MODE_PERIODIC) + if (tick_broadcast_device.mode == + TICKDEV_MODE_PERIODIC) tick_setup_periodic(dev, 0); } break; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0b50481..7d7a31d 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -624,6 +624,28 @@ config BACKTRACE_SELF_TEST Say N if you are unsure. +config DEBUG_BLOCK_EXT_DEVT + bool "Force extended block device numbers and spread them" + depends on DEBUG_KERNEL + depends on BLOCK + default n + help + Conventionally, block device numbers are allocated from + predetermined contiguous area. However, extended block area + may introduce non-contiguous block device numbers. This + option forces most block device numbers to be allocated from + the extended space and spreads them to discover kernel or + userland code paths which assume predetermined contiguous + device number allocation. + + Note that turning on this debug option shuffles all the + device numbers for all IDE and SCSI devices including libata + ones, so root partition specified using device number + directly (via rdev or root=MAJ:MIN) won't work anymore. + Textual device names (root=/dev/sdXn) will continue to work. + + Say N if you are unsure. + config LKDTM tristate "Linux Kernel Dump Test Tool Module" depends on DEBUG_KERNEL @@ -661,10 +683,21 @@ config FAIL_PAGE_ALLOC config FAIL_MAKE_REQUEST bool "Fault-injection capability for disk IO" - depends on FAULT_INJECTION + depends on FAULT_INJECTION && BLOCK help Provide fault-injection capability for disk IO. +config FAIL_IO_TIMEOUT + bool "Faul-injection capability for faking disk interrupts" + depends on FAULT_INJECTION && BLOCK + help + Provide fault-injection capability on end IO handling. This + will make the block layer "forget" an interrupt as configured, + thus exercising the error handling. + + Only works with drivers that use the generic timeout handling, + for others it wont do anything. + config FAULT_INJECTION_DEBUG_FS bool "Debugfs entries for fault-injection capabilities" depends on FAULT_INJECTION && SYSFS && DEBUG_FS diff --git a/lib/klist.c b/lib/klist.c index cca37f9..bbdd301 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -37,6 +37,37 @@ #include <linux/klist.h> #include <linux/module.h> +/* + * Use the lowest bit of n_klist to mark deleted nodes and exclude + * dead ones from iteration. + */ +#define KNODE_DEAD 1LU +#define KNODE_KLIST_MASK ~KNODE_DEAD + +static struct klist *knode_klist(struct klist_node *knode) +{ + return (struct klist *) + ((unsigned long)knode->n_klist & KNODE_KLIST_MASK); +} + +static bool knode_dead(struct klist_node *knode) +{ + return (unsigned long)knode->n_klist & KNODE_DEAD; +} + +static void knode_set_klist(struct klist_node *knode, struct klist *klist) +{ + knode->n_klist = klist; + /* no knode deserves to start its life dead */ + WARN_ON(knode_dead(knode)); +} + +static void knode_kill(struct klist_node *knode) +{ + /* and no knode should die twice ever either, see we're very humane */ + WARN_ON(knode_dead(knode)); + *(unsigned long *)&knode->n_klist |= KNODE_DEAD; +} /** * klist_init - Initialize a klist structure. @@ -79,7 +110,7 @@ static void klist_node_init(struct klist *k, struct klist_node *n) INIT_LIST_HEAD(&n->n_node); init_completion(&n->n_removed); kref_init(&n->n_ref); - n->n_klist = k; + knode_set_klist(n, k); if (k->get) k->get(n); } @@ -115,7 +146,7 @@ EXPORT_SYMBOL_GPL(klist_add_tail); */ void klist_add_after(struct klist_node *n, struct klist_node *pos) { - struct klist *k = pos->n_klist; + struct klist *k = knode_klist(pos); klist_node_init(k, n); spin_lock(&k->k_lock); @@ -131,7 +162,7 @@ EXPORT_SYMBOL_GPL(klist_add_after); */ void klist_add_before(struct klist_node *n, struct klist_node *pos) { - struct klist *k = pos->n_klist; + struct klist *k = knode_klist(pos); klist_node_init(k, n); spin_lock(&k->k_lock); @@ -144,9 +175,10 @@ static void klist_release(struct kref *kref) { struct klist_node *n = container_of(kref, struct klist_node, n_ref); + WARN_ON(!knode_dead(n)); list_del(&n->n_node); complete(&n->n_removed); - n->n_klist = NULL; + knode_set_klist(n, NULL); } static int klist_dec_and_del(struct klist_node *n) @@ -154,22 +186,29 @@ static int klist_dec_and_del(struct klist_node *n) return kref_put(&n->n_ref, klist_release); } -/** - * klist_del - Decrement the reference count of node and try to remove. - * @n: node we're deleting. - */ -void klist_del(struct klist_node *n) +static void klist_put(struct klist_node *n, bool kill) { - struct klist *k = n->n_klist; + struct klist *k = knode_klist(n); void (*put)(struct klist_node *) = k->put; spin_lock(&k->k_lock); + if (kill) + knode_kill(n); if (!klist_dec_and_del(n)) put = NULL; spin_unlock(&k->k_lock); if (put) put(n); } + +/** + * klist_del - Decrement the reference count of node and try to remove. + * @n: node we're deleting. + */ +void klist_del(struct klist_node *n) +{ + klist_put(n, true); +} EXPORT_SYMBOL_GPL(klist_del); /** @@ -206,7 +245,6 @@ void klist_iter_init_node(struct klist *k, struct klist_iter *i, struct klist_node *n) { i->i_klist = k; - i->i_head = &k->k_list; i->i_cur = n; if (n) kref_get(&n->n_ref); @@ -237,7 +275,7 @@ EXPORT_SYMBOL_GPL(klist_iter_init); void klist_iter_exit(struct klist_iter *i) { if (i->i_cur) { - klist_del(i->i_cur); + klist_put(i->i_cur, false); i->i_cur = NULL; } } @@ -258,27 +296,33 @@ static struct klist_node *to_klist_node(struct list_head *n) */ struct klist_node *klist_next(struct klist_iter *i) { - struct list_head *next; - struct klist_node *lnode = i->i_cur; - struct klist_node *knode = NULL; void (*put)(struct klist_node *) = i->i_klist->put; + struct klist_node *last = i->i_cur; + struct klist_node *next; spin_lock(&i->i_klist->k_lock); - if (lnode) { - next = lnode->n_node.next; - if (!klist_dec_and_del(lnode)) + + if (last) { + next = to_klist_node(last->n_node.next); + if (!klist_dec_and_del(last)) put = NULL; } else - next = i->i_head->next; + next = to_klist_node(i->i_klist->k_list.next); - if (next != i->i_head) { - knode = to_klist_node(next); - kref_get(&knode->n_ref); + i->i_cur = NULL; + while (next != to_klist_node(&i->i_klist->k_list)) { + if (likely(!knode_dead(next))) { + kref_get(&next->n_ref); + i->i_cur = next; + break; + } + next = to_klist_node(next->n_node.next); } - i->i_cur = knode; + spin_unlock(&i->i_klist->k_lock); - if (put && lnode) - put(lnode); - return knode; + + if (put && last) + put(last); + return i->i_cur; } EXPORT_SYMBOL_GPL(klist_next); diff --git a/mm/bounce.c b/mm/bounce.c index b6d2d0f..06722c4 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -267,7 +267,7 @@ void blk_queue_bounce(struct request_queue *q, struct bio **bio_orig) /* * Data-less bio, nothing to bounce */ - if (bio_empty_barrier(*bio_orig)) + if (!bio_has_data(*bio_orig)) return; /* @@ -514,9 +514,11 @@ size_t ksize(const void *block) return 0; sp = (struct slob_page *)virt_to_page(block); - if (slob_page(sp)) - return ((slob_t *)block - 1)->units + SLOB_UNIT; - else + if (slob_page(sp)) { + int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); + unsigned int *m = (unsigned int *)(block - align); + return SLOB_UNITS(*m) * SLOB_UNIT; + } else return sp->page.private; } diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 01c83e2..28c7157 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -317,6 +317,9 @@ void ax25_destroy_socket(ax25_cb *ax25) /* Queue the unaccepted socket for death */ sock_orphan(skb->sk); + /* 9A4GL: hack to release unaccepted sockets */ + skb->sk->sk_state = TCP_LISTEN; + ax25_start_heartbeat(sax25); sax25->state = AX25_STATE_0; } diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index cdc7e75..96e4b92 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -39,9 +39,11 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: - if (!sk || - sock_flag(sk, SOCK_DESTROY) || - sock_flag(sk, SOCK_DEAD)) { + /* Magic here: If we listen() and a new link dies before it + is accepted() it isn't 'dead' so doesn't get removed. */ + if (!sk || sock_flag(sk, SOCK_DESTROY) || + (sk->sk_state == TCP_LISTEN && + sock_flag(sk, SOCK_DEAD))) { if (sk) { sock_hold(sk); ax25_destroy_socket(ax25); diff --git a/net/core/dev.c b/net/core/dev.c index e8eb2b4..0ae08d3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2918,6 +2918,12 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } +static void dev_change_rx_flags(struct net_device *dev, int flags) +{ + if (dev->flags & IFF_UP && dev->change_rx_flags) + dev->change_rx_flags(dev, flags); +} + static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; @@ -2955,8 +2961,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) current->uid, current->gid, audit_get_sessionid(current)); - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_PROMISC); + dev_change_rx_flags(dev, IFF_PROMISC); } return 0; } @@ -3022,8 +3027,7 @@ int dev_set_allmulti(struct net_device *dev, int inc) } } if (dev->flags ^ old_flags) { - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_ALLMULTI); + dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); } return 0; @@ -3347,8 +3351,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST) - dev->change_rx_flags(dev, IFF_MULTICAST); + if ((old_flags ^ flags) & IFF_MULTICAST) + dev_change_rx_flags(dev, IFF_MULTICAST); dev_set_rx_mode(dev); @@ -3808,14 +3812,11 @@ static int dev_new_index(struct net *net) } /* Delayed registration/unregisteration */ -static DEFINE_SPINLOCK(net_todo_list_lock); static LIST_HEAD(net_todo_list); static void net_set_todo(struct net_device *dev) { - spin_lock(&net_todo_list_lock); list_add_tail(&dev->todo_list, &net_todo_list); - spin_unlock(&net_todo_list_lock); } static void rollback_registered(struct net_device *dev) @@ -4142,33 +4143,24 @@ static void netdev_wait_allrefs(struct net_device *dev) * free_netdev(y1); * free_netdev(y2); * - * We are invoked by rtnl_unlock() after it drops the semaphore. + * We are invoked by rtnl_unlock(). * This allows us to deal with problems: * 1) We can delete sysfs objects which invoke hotplug * without deadlocking with linkwatch via keventd. * 2) Since we run with the RTNL semaphore not held, we can sleep * safely in order to wait for the netdev refcnt to drop to zero. + * + * We must not return until all unregister events added during + * the interval the lock was held have been completed. */ -static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { struct list_head list; - /* Need to guard against multiple cpu's getting out of order. */ - mutex_lock(&net_todo_run_mutex); - - /* Not safe to do outside the semaphore. We must not return - * until all unregister events invoked by the local processor - * have been completed (either by this todo run, or one on - * another cpu). - */ - if (list_empty(&net_todo_list)) - goto out; - /* Snapshot list, allow later requests */ - spin_lock(&net_todo_list_lock); list_replace_init(&net_todo_list, &list); - spin_unlock(&net_todo_list_lock); + + __rtnl_unlock(); while (!list_empty(&list)) { struct net_device *dev @@ -4200,9 +4192,6 @@ void netdev_run_todo(void) /* Free network device */ kobject_put(&dev->dev.kobj); } - -out: - mutex_unlock(&net_todo_run_mutex); } static struct net_device_stats *internal_stats(struct net_device *dev) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 71edb8b..d6381c2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -73,7 +73,7 @@ void __rtnl_unlock(void) void rtnl_unlock(void) { - mutex_unlock(&rtnl_mutex); + /* This fellow will unlock it for us. */ netdev_run_todo(); } diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index bfcbd14..c209e05 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -150,7 +150,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) ca->snd_cwnd_cents -= 128; tp->snd_cwnd_cnt = 0; } - + /* check when cwnd has not been incremented for a while */ + if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) { + tp->snd_cwnd++; + tp->snd_cwnd_cnt = 0; + } /* clamp down slowstart cwnd to ssthresh value. */ if (is_slowstart) tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 67ccce2..7abc6b8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4879,7 +4879,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, goto no_ack; } - __tcp_ack_snd_check(sk, 0); + if (!copied_early || tp->rcv_nxt != tp->rcv_wup) + __tcp_ack_snd_check(sk, 0); no_ack: #ifdef CONFIG_NET_DMA if (copied_early) diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 532e4fa..9f1ea4a 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -525,6 +525,7 @@ static int nr_release(struct socket *sock) if (sk == NULL) return 0; sock_hold(sk); + sock_orphan(sk); lock_sock(sk); nr = nr_sk(sk); @@ -548,7 +549,6 @@ static int nr_release(struct socket *sock) sk->sk_state = TCP_CLOSE; sk->sk_shutdown |= SEND_SHUTDOWN; sk->sk_state_change(sk); - sock_orphan(sk); sock_set_flag(sk, SOCK_DESTROY); break; diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 418cd7d..8e0de6a 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1986,11 +1986,13 @@ static void read_markers(const char *fname) mod = find_module(modname); if (!mod) { - if (is_vmlinux(modname)) - have_vmlinux = 1; mod = new_module(NOFAIL(strdup(modname))); mod->skip = 1; } + if (is_vmlinux(modname)) { + have_vmlinux = 1; + mod->skip = 0; + } if (!mod->skip) add_marker(mod, marker, fmt); diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index d11a815..8551952 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2737,6 +2737,7 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, if (ctx == NULL) goto netlbl_secattr_to_sid_return; + context_init(&ctx_new); ctx_new.user = ctx->user; ctx_new.role = ctx->role; ctx_new.type = ctx->type; @@ -2745,13 +2746,9 @@ int security_netlbl_secattr_to_sid(struct netlbl_lsm_secattr *secattr, if (ebitmap_netlbl_import(&ctx_new.range.level[0].cat, secattr->attr.mls.cat) != 0) goto netlbl_secattr_to_sid_return; - ctx_new.range.level[1].cat.highbit = - ctx_new.range.level[0].cat.highbit; - ctx_new.range.level[1].cat.node = - ctx_new.range.level[0].cat.node; - } else { - ebitmap_init(&ctx_new.range.level[0].cat); - ebitmap_init(&ctx_new.range.level[1].cat); + memcpy(&ctx_new.range.level[1].cat, + &ctx_new.range.level[0].cat, + sizeof(ctx_new.range.level[0].cat)); } if (mls_context_isvalid(&policydb, &ctx_new) != 1) goto netlbl_secattr_to_sid_return_cleanup; |