diff options
417 files changed, 7810 insertions, 3938 deletions
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt new file mode 100644 index 0000000..ae8af16 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt @@ -0,0 +1,93 @@ +Pinctrl-based I2C Bus Mux + +This binding describes an I2C bus multiplexer that uses pin multiplexing to +route the I2C signals, and represents the pin multiplexing configuration +using the pinctrl device tree bindings. + + +-----+ +-----+ + | dev | | dev | + +------------------------+ +-----+ +-----+ + | SoC | | | + | /----|------+--------+ + | +---+ +------+ | child bus A, on first set of pins + | |I2C|---|Pinmux| | + | +---+ +------+ | child bus B, on second set of pins + | \----|------+--------+--------+ + | | | | | + +------------------------+ +-----+ +-----+ +-----+ + | dev | | dev | | dev | + +-----+ +-----+ +-----+ + +Required properties: +- compatible: i2c-mux-pinctrl +- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side + port is connected to. + +Also required are: + +* Standard pinctrl properties that specify the pin mux state for each child + bus. See ../pinctrl/pinctrl-bindings.txt. + +* Standard I2C mux properties. See mux.txt in this directory. + +* I2C child bus nodes. See mux.txt in this directory. + +For each named state defined in the pinctrl-names property, an I2C child bus +will be created. I2C child bus numbers are assigned based on the index into +the pinctrl-names property. + +The only exception is that no bus will be created for a state named "idle". If +such a state is defined, it must be the last entry in pinctrl-names. For +example: + + pinctrl-names = "ddc", "pta", "idle" -> ddc = bus 0, pta = bus 1 + pinctrl-names = "ddc", "idle", "pta" -> Invalid ("idle" not last) + pinctrl-names = "idle", "ddc", "pta" -> Invalid ("idle" not last) + +Whenever an access is made to a device on a child bus, the relevant pinctrl +state will be programmed into hardware. + +If an idle state is defined, whenever an access is not being made to a device +on a child bus, the idle pinctrl state will be programmed into hardware. + +If an idle state is not defined, the most recently used pinctrl state will be +left programmed into hardware whenever no access is being made of a device on +a child bus. + +Example: + + i2cmux { + compatible = "i2c-mux-pinctrl"; + #address-cells = <1>; + #size-cells = <0>; + + i2c-parent = <&i2c1>; + + pinctrl-names = "ddc", "pta", "idle"; + pinctrl-0 = <&state_i2cmux_ddc>; + pinctrl-1 = <&state_i2cmux_pta>; + pinctrl-2 = <&state_i2cmux_idle>; + + i2c@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + + eeprom { + compatible = "eeprom"; + reg = <0x50>; + }; + }; + + i2c@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + eeprom { + compatible = "eeprom"; + reg = <0x50>; + }; + }; + }; + diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index c45513d..a92c5eb 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2543,6 +2543,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. sched_debug [KNL] Enables verbose scheduler debug messages. + skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate + xtime_lock contention on larger systems, and/or RCU lock + contention on all systems with CONFIG_MAXSMP set. + Format: { "0" | "1" } + 0 -- disable. (may be 1 via CONFIG_CMDLINE="skew_tick=1" + 1 -- enable. + Note: increases power consumption, thus should only be + enabled if running jitter sensitive (HPC/RT) workloads. + security= [SECURITY] Choose a security module to enable at boot. If this boot parameter is not specified, only the first security module asking for security registration will be diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index 75a5923..8f3ae4a 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt @@ -211,6 +211,11 @@ The debug output can be changed at runtime using the file will enable debug messages for when routes change. +Counters for different types of packets entering and leaving the +batman-adv module are available through ethtool: + +# ethtool --statistics bat0 + BATCTL ------ diff --git a/Documentation/vm/frontswap.txt b/Documentation/vm/frontswap.txt new file mode 100644 index 0000000..37067cf --- /dev/null +++ b/Documentation/vm/frontswap.txt @@ -0,0 +1,278 @@ +Frontswap provides a "transcendent memory" interface for swap pages. +In some environments, dramatic performance savings may be obtained because +swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk. + +(Note, frontswap -- and cleancache (merged at 3.0) -- are the "frontends" +and the only necessary changes to the core kernel for transcendent memory; +all other supporting code -- the "backends" -- is implemented as drivers. +See the LWN.net article "Transcendent memory in a nutshell" for a detailed +overview of frontswap and related kernel parts: +https://lwn.net/Articles/454795/ ) + +Frontswap is so named because it can be thought of as the opposite of +a "backing" store for a swap device. The storage is assumed to be +a synchronous concurrency-safe page-oriented "pseudo-RAM device" conforming +to the requirements of transcendent memory (such as Xen's "tmem", or +in-kernel compressed memory, aka "zcache", or future RAM-like devices); +this pseudo-RAM device is not directly accessible or addressable by the +kernel and is of unknown and possibly time-varying size. The driver +links itself to frontswap by calling frontswap_register_ops to set the +frontswap_ops funcs appropriately and the functions it provides must +conform to certain policies as follows: + +An "init" prepares the device to receive frontswap pages associated +with the specified swap device number (aka "type"). A "store" will +copy the page to transcendent memory and associate it with the type and +offset associated with the page. A "load" will copy the page, if found, +from transcendent memory into kernel memory, but will NOT remove the page +from from transcendent memory. An "invalidate_page" will remove the page +from transcendent memory and an "invalidate_area" will remove ALL pages +associated with the swap type (e.g., like swapoff) and notify the "device" +to refuse further stores with that swap type. + +Once a page is successfully stored, a matching load on the page will normally +succeed. So when the kernel finds itself in a situation where it needs +to swap out a page, it first attempts to use frontswap. If the store returns +success, the data has been successfully saved to transcendent memory and +a disk write and, if the data is later read back, a disk read are avoided. +If a store returns failure, transcendent memory has rejected the data, and the +page can be written to swap as usual. + +If a backend chooses, frontswap can be configured as a "writethrough +cache" by calling frontswap_writethrough(). In this mode, the reduction +in swap device writes is lost (and also a non-trivial performance advantage) +in order to allow the backend to arbitrarily "reclaim" space used to +store frontswap pages to more completely manage its memory usage. + +Note that if a page is stored and the page already exists in transcendent memory +(a "duplicate" store), either the store succeeds and the data is overwritten, +or the store fails AND the page is invalidated. This ensures stale data may +never be obtained from frontswap. + +If properly configured, monitoring of frontswap is done via debugfs in +the /sys/kernel/debug/frontswap directory. The effectiveness of +frontswap can be measured (across all swap devices) with: + +failed_stores - how many store attempts have failed +loads - how many loads were attempted (all should succeed) +succ_stores - how many store attempts have succeeded +invalidates - how many invalidates were attempted + +A backend implementation may provide additional metrics. + +FAQ + +1) Where's the value? + +When a workload starts swapping, performance falls through the floor. +Frontswap significantly increases performance in many such workloads by +providing a clean, dynamic interface to read and write swap pages to +"transcendent memory" that is otherwise not directly addressable to the kernel. +This interface is ideal when data is transformed to a different form +and size (such as with compression) or secretly moved (as might be +useful for write-balancing for some RAM-like devices). Swap pages (and +evicted page-cache pages) are a great use for this kind of slower-than-RAM- +but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and +cleancache) interface to transcendent memory provides a nice way to read +and write -- and indirectly "name" -- the pages. + +Frontswap -- and cleancache -- with a fairly small impact on the kernel, +provides a huge amount of flexibility for more dynamic, flexible RAM +utilization in various system configurations: + +In the single kernel case, aka "zcache", pages are compressed and +stored in local memory, thus increasing the total anonymous pages +that can be safely kept in RAM. Zcache essentially trades off CPU +cycles used in compression/decompression for better memory utilization. +Benchmarks have shown little or no impact when memory pressure is +low while providing a significant performance improvement (25%+) +on some workloads under high memory pressure. + +"RAMster" builds on zcache by adding "peer-to-peer" transcendent memory +support for clustered systems. Frontswap pages are locally compressed +as in zcache, but then "remotified" to another system's RAM. This +allows RAM to be dynamically load-balanced back-and-forth as needed, +i.e. when system A is overcommitted, it can swap to system B, and +vice versa. RAMster can also be configured as a memory server so +many servers in a cluster can swap, dynamically as needed, to a single +server configured with a large amount of RAM... without pre-configuring +how much of the RAM is available for each of the clients! + +In the virtual case, the whole point of virtualization is to statistically +multiplex physical resources acrosst the varying demands of multiple +virtual machines. This is really hard to do with RAM and efforts to do +it well with no kernel changes have essentially failed (except in some +well-publicized special-case workloads). +Specifically, the Xen Transcendent Memory backend allows otherwise +"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple +virtual machines, but the pages can be compressed and deduplicated to +optimize RAM utilization. And when guest OS's are induced to surrender +underutilized RAM (e.g. with "selfballooning"), sudden unexpected +memory pressure may result in swapping; frontswap allows those pages +to be swapped to and from hypervisor RAM (if overall host system memory +conditions allow), thus mitigating the potentially awful performance impact +of unplanned swapping. + +A KVM implementation is underway and has been RFC'ed to lkml. And, +using frontswap, investigation is also underway on the use of NVM as +a memory extension technology. + +2) Sure there may be performance advantages in some situations, but + what's the space/time overhead of frontswap? + +If CONFIG_FRONTSWAP is disabled, every frontswap hook compiles into +nothingness and the only overhead is a few extra bytes per swapon'ed +swap device. If CONFIG_FRONTSWAP is enabled but no frontswap "backend" +registers, there is one extra global variable compared to zero for +every swap page read or written. If CONFIG_FRONTSWAP is enabled +AND a frontswap backend registers AND the backend fails every "store" +request (i.e. provides no memory despite claiming it might), +CPU overhead is still negligible -- and since every frontswap fail +precedes a swap page write-to-disk, the system is highly likely +to be I/O bound and using a small fraction of a percent of a CPU +will be irrelevant anyway. + +As for space, if CONFIG_FRONTSWAP is enabled AND a frontswap backend +registers, one bit is allocated for every swap page for every swap +device that is swapon'd. This is added to the EIGHT bits (which +was sixteen until about 2.6.34) that the kernel already allocates +for every swap page for every swap device that is swapon'd. (Hugh +Dickins has observed that frontswap could probably steal one of +the existing eight bits, but let's worry about that minor optimization +later.) For very large swap disks (which are rare) on a standard +4K pagesize, this is 1MB per 32GB swap. + +When swap pages are stored in transcendent memory instead of written +out to disk, there is a side effect that this may create more memory +pressure that can potentially outweigh the other advantages. A +backend, such as zcache, must implement policies to carefully (but +dynamically) manage memory limits to ensure this doesn't happen. + +3) OK, how about a quick overview of what this frontswap patch does + in terms that a kernel hacker can grok? + +Let's assume that a frontswap "backend" has registered during +kernel initialization; this registration indicates that this +frontswap backend has access to some "memory" that is not directly +accessible by the kernel. Exactly how much memory it provides is +entirely dynamic and random. + +Whenever a swap-device is swapon'd frontswap_init() is called, +passing the swap device number (aka "type") as a parameter. +This notifies frontswap to expect attempts to "store" swap pages +associated with that number. + +Whenever the swap subsystem is readying a page to write to a swap +device (c.f swap_writepage()), frontswap_store is called. Frontswap +consults with the frontswap backend and if the backend says it does NOT +have room, frontswap_store returns -1 and the kernel swaps the page +to the swap device as normal. Note that the response from the frontswap +backend is unpredictable to the kernel; it may choose to never accept a +page, it could accept every ninth page, or it might accept every +page. But if the backend does accept a page, the data from the page +has already been copied and associated with the type and offset, +and the backend guarantees the persistence of the data. In this case, +frontswap sets a bit in the "frontswap_map" for the swap device +corresponding to the page offset on the swap device to which it would +otherwise have written the data. + +When the swap subsystem needs to swap-in a page (swap_readpage()), +it first calls frontswap_load() which checks the frontswap_map to +see if the page was earlier accepted by the frontswap backend. If +it was, the page of data is filled from the frontswap backend and +the swap-in is complete. If not, the normal swap-in code is +executed to obtain the page of data from the real swap device. + +So every time the frontswap backend accepts a page, a swap device read +and (potentially) a swap device write are replaced by a "frontswap backend +store" and (possibly) a "frontswap backend loads", which are presumably much +faster. + +4) Can't frontswap be configured as a "special" swap device that is + just higher priority than any real swap device (e.g. like zswap, + or maybe swap-over-nbd/NFS)? + +No. First, the existing swap subsystem doesn't allow for any kind of +swap hierarchy. Perhaps it could be rewritten to accomodate a hierarchy, +but this would require fairly drastic changes. Even if it were +rewritten, the existing swap subsystem uses the block I/O layer which +assumes a swap device is fixed size and any page in it is linearly +addressable. Frontswap barely touches the existing swap subsystem, +and works around the constraints of the block I/O subsystem to provide +a great deal of flexibility and dynamicity. + +For example, the acceptance of any swap page by the frontswap backend is +entirely unpredictable. This is critical to the definition of frontswap +backends because it grants completely dynamic discretion to the +backend. In zcache, one cannot know a priori how compressible a page is. +"Poorly" compressible pages can be rejected, and "poorly" can itself be +defined dynamically depending on current memory constraints. + +Further, frontswap is entirely synchronous whereas a real swap +device is, by definition, asynchronous and uses block I/O. The +block I/O layer is not only unnecessary, but may perform "optimizations" +that are inappropriate for a RAM-oriented device including delaying +the write of some pages for a significant amount of time. Synchrony is +required to ensure the dynamicity of the backend and to avoid thorny race +conditions that would unnecessarily and greatly complicate frontswap +and/or the block I/O subsystem. That said, only the initial "store" +and "load" operations need be synchronous. A separate asynchronous thread +is free to manipulate the pages stored by frontswap. For example, +the "remotification" thread in RAMster uses standard asynchronous +kernel sockets to move compressed frontswap pages to a remote machine. +Similarly, a KVM guest-side implementation could do in-guest compression +and use "batched" hypercalls. + +In a virtualized environment, the dynamicity allows the hypervisor +(or host OS) to do "intelligent overcommit". For example, it can +choose to accept pages only until host-swapping might be imminent, +then force guests to do their own swapping. + +There is a downside to the transcendent memory specifications for +frontswap: Since any "store" might fail, there must always be a real +slot on a real swap device to swap the page. Thus frontswap must be +implemented as a "shadow" to every swapon'd device with the potential +capability of holding every page that the swap device might have held +and the possibility that it might hold no pages at all. This means +that frontswap cannot contain more pages than the total of swapon'd +swap devices. For example, if NO swap device is configured on some +installation, frontswap is useless. Swapless portable devices +can still use frontswap but a backend for such devices must configure +some kind of "ghost" swap device and ensure that it is never used. + +5) Why this weird definition about "duplicate stores"? If a page + has been previously successfully stored, can't it always be + successfully overwritten? + +Nearly always it can, but no, sometimes it cannot. Consider an example +where data is compressed and the original 4K page has been compressed +to 1K. Now an attempt is made to overwrite the page with data that +is non-compressible and so would take the entire 4K. But the backend +has no more space. In this case, the store must be rejected. Whenever +frontswap rejects a store that would overwrite, it also must invalidate +the old data and ensure that it is no longer accessible. Since the +swap subsystem then writes the new data to the read swap device, +this is the correct course of action to ensure coherency. + +6) What is frontswap_shrink for? + +When the (non-frontswap) swap subsystem swaps out a page to a real +swap device, that page is only taking up low-value pre-allocated disk +space. But if frontswap has placed a page in transcendent memory, that +page may be taking up valuable real estate. The frontswap_shrink +routine allows code outside of the swap subsystem to force pages out +of the memory managed by frontswap and back into kernel-addressable memory. +For example, in RAMster, a "suction driver" thread will attempt +to "repatriate" pages sent to a remote machine back to the local machine; +this is driven using the frontswap_shrink mechanism when memory pressure +subsides. + +7) Why does the frontswap patch create the new include file swapfile.h? + +The frontswap code depends on some swap-subsystem-internal data +structures that have, over the years, moved back and forth between +static and global. This seemed a reasonable compromise: Define +them as global but declare them in a new include file that isn't +included by the large number of source files that include swap.h. + +Dan Magenheimer, last updated April 9, 2012 diff --git a/MAINTAINERS b/MAINTAINERS index 3075a2a..ae5a12a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1077,7 +1077,7 @@ F: drivers/media/video/s5p-fimc/ ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT M: Kyungmin Park <kyungmin.park@samsung.com> M: Kamil Debski <k.debski@samsung.com> -M: Jeongtae Park <jtp.park@samsung.com> +M: Jeongtae Park <jtp.park@samsung.com> L: linux-arm-kernel@lists.infradead.org L: linux-media@vger.kernel.org S: Maintained @@ -1743,10 +1743,10 @@ F: include/linux/can/platform/ CAPABILITIES M: Serge Hallyn <serge.hallyn@canonical.com> L: linux-security-module@vger.kernel.org -S: Supported +S: Supported F: include/linux/capability.h F: security/capability.c -F: security/commoncap.c +F: security/commoncap.c F: kernel/capability.c CELL BROADBAND ENGINE ARCHITECTURE @@ -2149,11 +2149,11 @@ S: Orphan F: drivers/net/wan/pc300* CYTTSP TOUCHSCREEN DRIVER -M: Javier Martinez Canillas <javier@dowhile0.org> -L: linux-input@vger.kernel.org -S: Maintained -F: drivers/input/touchscreen/cyttsp* -F: include/linux/input/cyttsp.h +M: Javier Martinez Canillas <javier@dowhile0.org> +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/input/touchscreen/cyttsp* +F: include/linux/input/cyttsp.h DAMA SLAVE for AX.25 M: Joerg Reuter <jreuter@yaina.de> @@ -2273,7 +2273,7 @@ F: include/linux/device-mapper.h F: include/linux/dm-*.h DIOLAN U2C-12 I2C DRIVER -M: Guenter Roeck <guenter.roeck@ericsson.com> +M: Guenter Roeck <linux@roeck-us.net> L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-diolan-u2c.c @@ -2933,6 +2933,13 @@ F: Documentation/power/freezing-of-tasks.txt F: include/linux/freezer.h F: kernel/freezer.c +FRONTSWAP API +M: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> +L: linux-kernel@vger.kernel.org +S: Maintained +F: mm/frontswap.c +F: include/linux/frontswap.h + FS-CACHE: LOCAL CACHING FOR NETWORK FILESYSTEMS M: David Howells <dhowells@redhat.com> L: linux-cachefs@redhat.com @@ -3141,7 +3148,7 @@ F: drivers/tty/hvc/ HARDWARE MONITORING M: Jean Delvare <khali@linux-fr.org> -M: Guenter Roeck <guenter.roeck@ericsson.com> +M: Guenter Roeck <linux@roeck-us.net> L: lm-sensors@lm-sensors.org W: http://www.lm-sensors.org/ T: quilt kernel.org/pub/linux/kernel/people/jdelvare/linux-2.6/jdelvare-hwmon/ @@ -4099,6 +4106,8 @@ F: drivers/scsi/53c700* LED SUBSYSTEM M: Bryan Wu <bryan.wu@canonical.com> M: Richard Purdie <rpurdie@rpsys.net> +L: linux-leds@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/cooloney/linux-leds.git S: Maintained F: drivers/leds/ F: include/linux/leds.h @@ -4416,6 +4425,13 @@ S: Orphan F: drivers/video/matrox/matroxfb_* F: include/linux/matroxfb.h +MAX16065 HARDWARE MONITOR DRIVER +M: Guenter Roeck <linux@roeck-us.net> +L: lm-sensors@lm-sensors.org +S: Maintained +F: Documentation/hwmon/max16065 +F: drivers/hwmon/max16065.c + MAX6650 HARDWARE MONITOR AND FAN CONTROLLER DRIVER M: "Hans J. Koch" <hjk@hansjkoch.de> L: lm-sensors@lm-sensors.org @@ -5154,7 +5170,7 @@ F: drivers/leds/leds-pca9532.c F: include/linux/leds-pca9532.h PCA9541 I2C BUS MASTER SELECTOR DRIVER -M: Guenter Roeck <guenter.roeck@ericsson.com> +M: Guenter Roeck <linux@roeck-us.net> L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/muxes/i2c-mux-pca9541.c @@ -5174,7 +5190,7 @@ S: Maintained F: drivers/firmware/pcdp.* PCI ERROR RECOVERY -M: Linas Vepstas <linasvepstas@gmail.com> +M: Linas Vepstas <linasvepstas@gmail.com> L: linux-pci@vger.kernel.org S: Supported F: Documentation/PCI/pci-error-recovery.txt @@ -5304,7 +5320,7 @@ F: drivers/video/fb-puv3.c F: drivers/rtc/rtc-puv3.c PMBUS HARDWARE MONITORING DRIVERS -M: Guenter Roeck <guenter.roeck@ericsson.com> +M: Guenter Roeck <linux@roeck-us.net> L: lm-sensors@lm-sensors.org W: http://www.lm-sensors.org/ W: http://www.roeck-us.net/linux/drivers/ @@ -7299,11 +7315,11 @@ F: Documentation/DocBook/uio-howto.tmpl F: drivers/uio/ F: include/linux/uio*.h -UTIL-LINUX-NG PACKAGE +UTIL-LINUX PACKAGE M: Karel Zak <kzak@redhat.com> -L: util-linux-ng@vger.kernel.org -W: http://kernel.org/~kzak/util-linux-ng/ -T: git git://git.kernel.org/pub/scm/utils/util-linux-ng/util-linux-ng.git +L: util-linux@vger.kernel.org +W: http://en.wikipedia.org/wiki/Util-linux +T: git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git S: Maintained UVESAFB DRIVER @@ -1,7 +1,7 @@ VERSION = 3 PATCHLEVEL = 5 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Saber-toothed Squirrel # *DOCUMENTATION* diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index b649c59..84449dd 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -7,7 +7,6 @@ config ARM select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_DMA_ATTRS select HAVE_DMA_CONTIGUOUS if (CPU_V6 || CPU_V6K || CPU_V7) - select CMA if (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_MEMBLOCK select RTC_LIB select SYS_SUPPORTS_APM_EMULATION diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 54d49dd..5fb47a1 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -271,9 +271,9 @@ static struct platform_device *create_simple_dss_pdev(const char *pdev_name, goto err; } - r = omap_device_register(pdev); + r = platform_device_add(pdev); if (r) { - pr_err("Could not register omap_device for %s\n", pdev_name); + pr_err("Could not register platform_device for %s\n", pdev_name); goto err; } diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index f31383c..df33909 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -186,6 +186,12 @@ config SH_TIMER_TMU help This enables build of the TMU timer driver. +config EM_TIMER_STI + bool "STI timer driver" + default y + help + This enables build of the STI timer driver. + endmenu config SH_CLK_CPG diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ea6b431..106c4c0 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -268,10 +268,8 @@ static int __init consistent_init(void) unsigned long base = consistent_base; unsigned long num_ptes = (CONSISTENT_END - base) >> PMD_SHIFT; -#ifndef CONFIG_ARM_DMA_USE_IOMMU - if (cpu_architecture() >= CPU_ARCH_ARMv6) + if (IS_ENABLED(CONFIG_CMA) && !IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) return 0; -#endif consistent_pte = kmalloc(num_ptes * sizeof(pte_t), GFP_KERNEL); if (!consistent_pte) { @@ -342,7 +340,7 @@ static int __init coherent_init(void) struct page *page; void *ptr; - if (cpu_architecture() < CPU_ARCH_ARMv6) + if (!IS_ENABLED(CONFIG_CMA)) return 0; ptr = __alloc_from_contiguous(NULL, size, prot, &page); @@ -704,7 +702,7 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, if (arch_is_coherent() || nommu()) addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (cpu_architecture() < CPU_ARCH_ARMv6) + else if (!IS_ENABLED(CONFIG_CMA)) addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller); else if (gfp & GFP_ATOMIC) addr = __alloc_from_pool(dev, size, &page, caller); @@ -773,7 +771,7 @@ void arm_dma_free(struct device *dev, size_t size, void *cpu_addr, if (arch_is_coherent() || nommu()) { __dma_free_buffer(page, size); - } else if (cpu_architecture() < CPU_ARCH_ARMv6) { + } else if (!IS_ENABLED(CONFIG_CMA)) { __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c index c140f9b..d552a85 100644 --- a/arch/avr32/kernel/signal.c +++ b/arch/avr32/kernel/signal.c @@ -300,7 +300,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti) if ((sysreg_read(SR) & MODE_MASK) == MODE_SUPERVISOR) syscall = 1; - if (ti->flags & _TIF_SIGPENDING)) + if (ti->flags & _TIF_SIGPENDING) do_signal(regs, syscall); if (ti->flags & _TIF_NOTIFY_RESUME) { diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 2e3994b..62bcea7 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -173,7 +173,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs) unsigned long newsp; #ifdef __ARCH_SYNC_CORE_DCACHE - if (current->rt.nr_cpus_allowed == num_possible_cpus()) + if (current->nr_cpus_allowed == num_possible_cpus()) set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id())); #endif diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index cac5b6b..1471201 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -7,6 +7,8 @@ config M68K select GENERIC_IRQ_SHOW select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS select GENERIC_CPU_DEVICES + select GENERIC_STRNCPY_FROM_USER if MMU + select GENERIC_STRNLEN_USER if MMU select FPU if MMU select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1a922fa..eafa253 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,2 +1,4 @@ include include/asm-generic/Kbuild.asm header-y += cachectl.h + +generic-y += word-at-a-time.h diff --git a/arch/m68k/include/asm/m528xsim.h b/arch/m68k/include/asm/m528xsim.h index d63b99f..497c31c 100644 --- a/arch/m68k/include/asm/m528xsim.h +++ b/arch/m68k/include/asm/m528xsim.h @@ -86,7 +86,7 @@ /* * QSPI module. */ -#define MCFQSPI_IOBASE (MCF_IPSBAR + 0x340) +#define MCFQSPI_BASE (MCF_IPSBAR + 0x340) #define MCFQSPI_SIZE 0x40 #define MCFQSPI_CS0 147 diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h index 9c80cd5..472c891 100644 --- a/arch/m68k/include/asm/uaccess_mm.h +++ b/arch/m68k/include/asm/uaccess_mm.h @@ -379,12 +379,15 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n) #define copy_from_user(to, from, n) __copy_from_user(to, from, n) #define copy_to_user(to, from, n) __copy_to_user(to, from, n) -long strncpy_from_user(char *dst, const char __user *src, long count); -long strnlen_user(const char __user *src, long n); +#define user_addr_max() \ + (segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL) + +extern long strncpy_from_user(char *dst, const char __user *src, long count); +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); + unsigned long __clear_user(void __user *to, unsigned long n); #define clear_user __clear_user -#define strlen_user(str) strnlen_user(str, 32767) - #endif /* _M68K_UACCESS_H */ diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index 8b4a222..1bc10e6 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -286,7 +286,7 @@ asmlinkage void syscall_trace(void) } } -#ifdef CONFIG_COLDFIRE +#if defined(CONFIG_COLDFIRE) || !defined(CONFIG_MMU) asmlinkage int syscall_trace_enter(void) { int ret = 0; diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index d7deb7f..707f057 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -85,7 +85,7 @@ void __init time_init(void) mach_sched_init(timer_interrupt); } -#ifdef CONFIG_M68KCLASSIC +#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET u32 arch_gettimeoffset(void) { @@ -108,4 +108,4 @@ static int __init rtc_init(void) module_init(rtc_init); -#endif /* CONFIG_M68KCLASSIC */ +#endif /* CONFIG_ARCH_USES_GETTIMEOFFSET */ diff --git a/arch/m68k/lib/uaccess.c b/arch/m68k/lib/uaccess.c index 5664386..5e97f2e 100644 --- a/arch/m68k/lib/uaccess.c +++ b/arch/m68k/lib/uaccess.c @@ -104,80 +104,6 @@ unsigned long __generic_copy_to_user(void __user *to, const void *from, EXPORT_SYMBOL(__generic_copy_to_user); /* - * Copy a null terminated string from userspace. - */ -long strncpy_from_user(char *dst, const char __user *src, long count) -{ - long res; - char c; - - if (count <= 0) - return count; - - asm volatile ("\n" - "1: "MOVES".b (%2)+,%4\n" - " move.b %4,(%1)+\n" - " jeq 2f\n" - " subq.l #1,%3\n" - " jne 1b\n" - "2: sub.l %3,%0\n" - "3:\n" - " .section .fixup,\"ax\"\n" - " .even\n" - "10: move.l %5,%0\n" - " jra 3b\n" - " .previous\n" - "\n" - " .section __ex_table,\"a\"\n" - " .align 4\n" - " .long 1b,10b\n" - " .previous" - : "=d" (res), "+a" (dst), "+a" (src), "+r" (count), "=&d" (c) - : "i" (-EFAULT), "0" (count)); - - return res; -} -EXPORT_SYMBOL(strncpy_from_user); - -/* - * Return the size of a string (including the ending 0) - * - * Return 0 on exception, a value greater than N if too long - */ -long strnlen_user(const char __user *src, long n) -{ - char c; - long res; - - asm volatile ("\n" - "1: subq.l #1,%1\n" - " jmi 3f\n" - "2: "MOVES".b (%0)+,%2\n" - " tst.b %2\n" - " jne 1b\n" - " jra 4f\n" - "\n" - "3: addq.l #1,%0\n" - "4: sub.l %4,%0\n" - "5:\n" - " .section .fixup,\"ax\"\n" - " .even\n" - "20: sub.l %0,%0\n" - " jra 5b\n" - " .previous\n" - "\n" - " .section __ex_table,\"a\"\n" - " .align 4\n" - " .long 2b,20b\n" - " .previous\n" - : "=&a" (res), "+d" (n), "=&d" (c) - : "0" (src), "r" (src)); - - return res; -} -EXPORT_SYMBOL(strnlen_user); - -/* * Zero Userspace */ diff --git a/arch/m68k/platform/68328/timers.c b/arch/m68k/platform/68328/timers.c index c801c17..f4dc9b2 100644 --- a/arch/m68k/platform/68328/timers.c +++ b/arch/m68k/platform/68328/timers.c @@ -53,6 +53,7 @@ #endif static u32 m68328_tick_cnt; +static irq_handler_t timer_interrupt; /***************************************************************************/ @@ -62,7 +63,7 @@ static irqreturn_t hw_tick(int irq, void *dummy) TSTAT &= 0; m68328_tick_cnt += TICKS_PER_JIFFY; - return arch_timer_interrupt(irq, dummy); + return timer_interrupt(irq, dummy); } /***************************************************************************/ @@ -99,7 +100,7 @@ static struct clocksource m68328_clk = { /***************************************************************************/ -void hw_timer_init(void) +void hw_timer_init(irq_handler_t handler) { /* disable timer 1 */ TCTL = 0; @@ -115,6 +116,7 @@ void hw_timer_init(void) /* Enable timer 1 */ TCTL |= TCTL_TEN; clocksource_register_hz(&m68328_clk, TICKS_PER_JIFFY*HZ); + timer_interrupt = handler; } /***************************************************************************/ diff --git a/arch/m68k/platform/68360/config.c b/arch/m68k/platform/68360/config.c index 255fc03..9877cef 100644 --- a/arch/m68k/platform/68360/config.c +++ b/arch/m68k/platform/68360/config.c @@ -35,6 +35,7 @@ extern void m360_cpm_reset(void); #define OSCILLATOR (unsigned long int)33000000 #endif +static irq_handler_t timer_interrupt; unsigned long int system_clock; extern QUICC *pquicc; @@ -52,7 +53,7 @@ static irqreturn_t hw_tick(int irq, void *dummy) pquicc->timer_ter1 = 0x0002; /* clear timer event */ - return arch_timer_interrupt(irq, dummy); + return timer_interrupt(irq, dummy); } static struct irqaction m68360_timer_irq = { @@ -61,7 +62,7 @@ static struct irqaction m68360_timer_irq = { .handler = hw_tick, }; -void hw_timer_init(void) +void hw_timer_init(irq_handler_t handler) { unsigned char prescaler; unsigned short tgcr_save; @@ -94,6 +95,8 @@ void hw_timer_init(void) pquicc->timer_ter1 = 0x0003; /* clear timer events */ + timer_interrupt = handler; + /* enable timer 1 interrupt in CIMR */ setup_irq(CPMVEC_TIMER1, &m68360_timer_irq); diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index dbc3850..5707f1a 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -21,6 +21,7 @@ KBUILD_DEFCONFIG := default_defconfig NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 +LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) MACHINE := $(shell uname -m) ifeq ($(MACHINE),parisc*) @@ -79,7 +80,7 @@ kernel-y := mm/ kernel/ math-emu/ kernel-$(CONFIG_HPUX) += hpux/ core-y += $(addprefix arch/parisc/, $(kernel-y)) -libs-y += arch/parisc/lib/ `$(CC) -print-libgcc-file-name` +libs-y += arch/parisc/lib/ $(LIBGCC) drivers-$(CONFIG_OPROFILE) += arch/parisc/oprofile/ diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 19a434f..4383707 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -1,3 +1,4 @@ include include/asm-generic/Kbuild.asm header-y += pdc.h +generic-y += word-at-a-time.h diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 72cfdb0..62a3333 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -1,6 +1,8 @@ #ifndef _PARISC_BUG_H #define _PARISC_BUG_H +#include <linux/kernel.h> /* for BUGFLAG_TAINT */ + /* * Tell the user there is some problem. * The offending file and line are encoded in the __bug_table section. diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 0b6d796..2e3200c 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -176,8 +176,8 @@ int module_frob_arch_sections(Elf32_Ehdr *hdr, static inline int entry_matches(struct ppc_plt_entry *entry, Elf32_Addr val) { - if (entry->jump[0] == 0x3d600000 + ((val + 0x8000) >> 16) - && entry->jump[1] == 0x396b0000 + (val & 0xffff)) + if (entry->jump[0] == 0x3d800000 + ((val + 0x8000) >> 16) + && entry->jump[1] == 0x398c0000 + (val & 0xffff)) return 1; return 0; } @@ -204,10 +204,9 @@ static uint32_t do_plt_call(void *location, entry++; } - /* Stolen from Paul Mackerras as well... */ - entry->jump[0] = 0x3d600000+((val+0x8000)>>16); /* lis r11,sym@ha */ - entry->jump[1] = 0x396b0000 + (val&0xffff); /* addi r11,r11,sym@l*/ - entry->jump[2] = 0x7d6903a6; /* mtctr r11 */ + entry->jump[0] = 0x3d800000+((val+0x8000)>>16); /* lis r12,sym@ha */ + entry->jump[1] = 0x398c0000 + (val&0xffff); /* addi r12,r12,sym@l*/ + entry->jump[2] = 0x7d8903a6; /* mtctr r12 */ entry->jump[3] = 0x4e800420; /* bctr */ DEBUGP("Initialized plt for 0x%x at %p\n", val, entry); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 99a995c..be171ee 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -475,6 +475,7 @@ void timer_interrupt(struct pt_regs * regs) struct pt_regs *old_regs; u64 *next_tb = &__get_cpu_var(decrementers_next_tb); struct clock_event_device *evt = &__get_cpu_var(decrementers); + u64 now; /* Ensure a positive value is written to the decrementer, or else * some CPUs will continue to take decrementer exceptions. @@ -509,9 +510,16 @@ void timer_interrupt(struct pt_regs * regs) irq_work_run(); } - *next_tb = ~(u64)0; - if (evt->event_handler) - evt->event_handler(evt); + now = get_tb_or_rtc(); + if (now >= *next_tb) { + *next_tb = ~(u64)0; + if (evt->event_handler) + evt->event_handler(evt); + } else { + now = *next_tb - now; + if (now <= DECREMENTER_MAX) + set_dec((int)now); + } #ifdef CONFIG_PPC64 /* collect purr register values often, for accurate calculations */ diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 99bcd0e..31d9db7 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -32,6 +32,8 @@ config SUPERH select GENERIC_SMP_IDLE_THREAD select GENERIC_CLOCKEVENTS select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast diff --git a/arch/sh/Makefile b/arch/sh/Makefile index 46edf07..aed701c 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -9,6 +9,12 @@ # License. See the file "COPYING" in the main directory of this archive # for more details. # +ifneq ($(SUBARCH),$(ARCH)) + ifeq ($(CROSS_COMPILE),) + CROSS_COMPILE := $(call cc-cross-prefix, $(UTS_MACHINE)-linux- $(UTS_MACHINE)-linux-gnu- $(UTS_MACHINE)-unknown-linux-gnu-) + endif +endif + isa-y := any isa-$(CONFIG_SH_DSP) := sh isa-$(CONFIG_CPU_SH2) := sh2 @@ -106,19 +112,13 @@ LDFLAGS_vmlinux += --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \ KBUILD_DEFCONFIG := cayman_defconfig endif -ifneq ($(SUBARCH),$(ARCH)) - ifeq ($(CROSS_COMPILE),) - CROSS_COMPILE := $(call cc-cross-prefix, $(UTS_MACHINE)-linux- $(UTS_MACHINE)-linux-gnu- $(UTS_MACHINE)-unknown-linux-gnu-) - endif -endif - ifdef CONFIG_CPU_LITTLE_ENDIAN ld-bfd := elf32-$(UTS_MACHINE)-linux -LDFLAGS_vmlinux += --defsym 'jiffies=jiffies_64' --oformat $(ld-bfd) +LDFLAGS_vmlinux += --defsym jiffies=jiffies_64 --oformat $(ld-bfd) LDFLAGS += -EL else ld-bfd := elf32-$(UTS_MACHINE)big-linux -LDFLAGS_vmlinux += --defsym 'jiffies=jiffies_64+4' --oformat $(ld-bfd) +LDFLAGS_vmlinux += --defsym jiffies=jiffies_64+4 --oformat $(ld-bfd) LDFLAGS += -EB endif diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 7beb423..7b673dd 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,5 +1,39 @@ include include/asm-generic/Kbuild.asm +generic-y += bitsperlong.h +generic-y += cputime.h +generic-y += current.h +generic-y += delay.h +generic-y += div64.h +generic-y += emergency-restart.h +generic-y += errno.h +generic-y += fcntl.h +generic-y += ioctl.h +generic-y += ipcbuf.h +generic-y += irq_regs.h +generic-y += kvm_para.h +generic-y += local.h +generic-y += local64.h +generic-y += param.h +generic-y += parport.h +generic-y += percpu.h +generic-y += poll.h +generic-y += mman.h +generic-y += msgbuf.h +generic-y += resource.h +generic-y += scatterlist.h +generic-y += sembuf.h +generic-y += serial.h +generic-y += shmbuf.h +generic-y += siginfo.h +generic-y += sizes.h +generic-y += socket.h +generic-y += statfs.h +generic-y += termbits.h +generic-y += termios.h +generic-y += ucontext.h +generic-y += xor.h + header-y += cachectl.h header-y += cpu-features.h header-y += hw_breakpoint.h diff --git a/arch/sh/include/asm/bitsperlong.h b/arch/sh/include/asm/bitsperlong.h deleted file mode 100644 index 6dc0bb0..0000000 --- a/arch/sh/include/asm/bitsperlong.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/bitsperlong.h> diff --git a/arch/sh/include/asm/cputime.h b/arch/sh/include/asm/cputime.h deleted file mode 100644 index 6ca395d..0000000 --- a/arch/sh/include/asm/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SH_CPUTIME_H -#define __SH_CPUTIME_H - -#include <asm-generic/cputime.h> - -#endif /* __SH_CPUTIME_H */ diff --git a/arch/sh/include/asm/current.h b/arch/sh/include/asm/current.h deleted file mode 100644 index 4c51401..0000000 --- a/arch/sh/include/asm/current.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/current.h> diff --git a/arch/sh/include/asm/delay.h b/arch/sh/include/asm/delay.h deleted file mode 100644 index 9670e12..0000000 --- a/arch/sh/include/asm/delay.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/delay.h> diff --git a/arch/sh/include/asm/div64.h b/arch/sh/include/asm/div64.h deleted file mode 100644 index 6cd978c..0000000 --- a/arch/sh/include/asm/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/div64.h> diff --git a/arch/sh/include/asm/emergency-restart.h b/arch/sh/include/asm/emergency-restart.h deleted file mode 100644 index 108d8c4..0000000 --- a/arch/sh/include/asm/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include <asm-generic/emergency-restart.h> - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/sh/include/asm/errno.h b/arch/sh/include/asm/errno.h deleted file mode 100644 index 51cf6f9..0000000 --- a/arch/sh/include/asm/errno.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_ERRNO_H -#define __ASM_SH_ERRNO_H - -#include <asm-generic/errno.h> - -#endif /* __ASM_SH_ERRNO_H */ diff --git a/arch/sh/include/asm/fcntl.h b/arch/sh/include/asm/fcntl.h deleted file mode 100644 index 46ab12d..0000000 --- a/arch/sh/include/asm/fcntl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/fcntl.h> diff --git a/arch/sh/include/asm/ioctl.h b/arch/sh/include/asm/ioctl.h deleted file mode 100644 index b279fe0..0000000 --- a/arch/sh/include/asm/ioctl.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ioctl.h> diff --git a/arch/sh/include/asm/ipcbuf.h b/arch/sh/include/asm/ipcbuf.h deleted file mode 100644 index 84c7e51..0000000 --- a/arch/sh/include/asm/ipcbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ipcbuf.h> diff --git a/arch/sh/include/asm/irq_regs.h b/arch/sh/include/asm/irq_regs.h deleted file mode 100644 index 3dd9c0b..0000000 --- a/arch/sh/include/asm/irq_regs.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/irq_regs.h> diff --git a/arch/sh/include/asm/kvm_para.h b/arch/sh/include/asm/kvm_para.h deleted file mode 100644 index 14fab8f..0000000 --- a/arch/sh/include/asm/kvm_para.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/kvm_para.h> diff --git a/arch/sh/include/asm/local.h b/arch/sh/include/asm/local.h deleted file mode 100644 index 9ed9b9c..0000000 --- a/arch/sh/include/asm/local.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef __ASM_SH_LOCAL_H -#define __ASM_SH_LOCAL_H - -#include <asm-generic/local.h> - -#endif /* __ASM_SH_LOCAL_H */ - diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h deleted file mode 100644 index 36c93b5..0000000 --- a/arch/sh/include/asm/local64.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/local64.h> diff --git a/arch/sh/include/asm/mman.h b/arch/sh/include/asm/mman.h deleted file mode 100644 index 8eebf89..0000000 --- a/arch/sh/include/asm/mman.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/mman.h> diff --git a/arch/sh/include/asm/msgbuf.h b/arch/sh/include/asm/msgbuf.h deleted file mode 100644 index 809134c..0000000 --- a/arch/sh/include/asm/msgbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/msgbuf.h> diff --git a/arch/sh/include/asm/param.h b/arch/sh/include/asm/param.h deleted file mode 100644 index 965d454..0000000 --- a/arch/sh/include/asm/param.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/param.h> diff --git a/arch/sh/include/asm/parport.h b/arch/sh/include/asm/parport.h deleted file mode 100644 index cf252af..0000000 --- a/arch/sh/include/asm/parport.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/parport.h> diff --git a/arch/sh/include/asm/percpu.h b/arch/sh/include/asm/percpu.h deleted file mode 100644 index 4db4b39..0000000 --- a/arch/sh/include/asm/percpu.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ARCH_SH_PERCPU -#define __ARCH_SH_PERCPU - -#include <asm-generic/percpu.h> - -#endif /* __ARCH_SH_PERCPU */ diff --git a/arch/sh/include/asm/poll.h b/arch/sh/include/asm/poll.h deleted file mode 100644 index c98509d..0000000 --- a/arch/sh/include/asm/poll.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/poll.h> diff --git a/arch/sh/include/asm/resource.h b/arch/sh/include/asm/resource.h deleted file mode 100644 index 9c2499a..0000000 --- a/arch/sh/include/asm/resource.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_RESOURCE_H -#define __ASM_SH_RESOURCE_H - -#include <asm-generic/resource.h> - -#endif /* __ASM_SH_RESOURCE_H */ diff --git a/arch/sh/include/asm/scatterlist.h b/arch/sh/include/asm/scatterlist.h deleted file mode 100644 index 98dfc35..0000000 --- a/arch/sh/include/asm/scatterlist.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_SCATTERLIST_H -#define __ASM_SH_SCATTERLIST_H - -#include <asm-generic/scatterlist.h> - -#endif /* __ASM_SH_SCATTERLIST_H */ diff --git a/arch/sh/include/asm/sembuf.h b/arch/sh/include/asm/sembuf.h deleted file mode 100644 index 7673b83..0000000 --- a/arch/sh/include/asm/sembuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sembuf.h> diff --git a/arch/sh/include/asm/serial.h b/arch/sh/include/asm/serial.h deleted file mode 100644 index a0cb0caf..0000000 --- a/arch/sh/include/asm/serial.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/serial.h> diff --git a/arch/sh/include/asm/shmbuf.h b/arch/sh/include/asm/shmbuf.h deleted file mode 100644 index 83c05fc..0000000 --- a/arch/sh/include/asm/shmbuf.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/shmbuf.h> diff --git a/arch/sh/include/asm/siginfo.h b/arch/sh/include/asm/siginfo.h deleted file mode 100644 index 813040e..0000000 --- a/arch/sh/include/asm/siginfo.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_SIGINFO_H -#define __ASM_SH_SIGINFO_H - -#include <asm-generic/siginfo.h> - -#endif /* __ASM_SH_SIGINFO_H */ diff --git a/arch/sh/include/asm/sizes.h b/arch/sh/include/asm/sizes.h deleted file mode 100644 index dd248c2..0000000 --- a/arch/sh/include/asm/sizes.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/sizes.h> diff --git a/arch/sh/include/asm/socket.h b/arch/sh/include/asm/socket.h deleted file mode 100644 index 6b71384..0000000 --- a/arch/sh/include/asm/socket.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/socket.h> diff --git a/arch/sh/include/asm/statfs.h b/arch/sh/include/asm/statfs.h deleted file mode 100644 index 9202a02..0000000 --- a/arch/sh/include/asm/statfs.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_SH_STATFS_H -#define __ASM_SH_STATFS_H - -#include <asm-generic/statfs.h> - -#endif /* __ASM_SH_STATFS_H */ diff --git a/arch/sh/include/asm/termbits.h b/arch/sh/include/asm/termbits.h deleted file mode 100644 index 3935b10..0000000 --- a/arch/sh/include/asm/termbits.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termbits.h> diff --git a/arch/sh/include/asm/termios.h b/arch/sh/include/asm/termios.h deleted file mode 100644 index 280d78a..0000000 --- a/arch/sh/include/asm/termios.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/termios.h> diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h index 050f221..8698a80 100644 --- a/arch/sh/include/asm/uaccess.h +++ b/arch/sh/include/asm/uaccess.h @@ -25,6 +25,8 @@ (__chk_user_ptr(addr), \ __access_ok((unsigned long __force)(addr), (size))) +#define user_addr_max() (current_thread_info()->addr_limit.seg) + /* * Uh, these should become the main single-value transfer routines ... * They automatically use the right size if we just have the right @@ -100,6 +102,11 @@ struct __large_struct { unsigned long buf[100]; }; # include "uaccess_64.h" #endif +extern long strncpy_from_user(char *dest, const char __user *src, long count); + +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); + /* Generic arbitrary sized copy. */ /* Return the number of bytes NOT copied */ __kernel_size_t __copy_user(void *to, const void *from, __kernel_size_t n); @@ -137,37 +144,6 @@ __kernel_size_t __clear_user(void *addr, __kernel_size_t size); __cl_size; \ }) -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -#define strncpy_from_user(dest,src,count) \ -({ \ - unsigned long __sfu_src = (unsigned long)(src); \ - int __sfu_count = (int)(count); \ - long __sfu_res = -EFAULT; \ - \ - if (__access_ok(__sfu_src, __sfu_count)) \ - __sfu_res = __strncpy_from_user((unsigned long)(dest), \ - __sfu_src, __sfu_count); \ - \ - __sfu_res; \ -}) - static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) { @@ -192,43 +168,6 @@ copy_to_user(void __user *to, const void *from, unsigned long n) return __copy_size; } -/** - * strnlen_user: - Get the size of a string in user space. - * @s: The string to measure. - * @n: The maximum valid length - * - * Context: User context only. This function may sleep. - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * If the string is too long, returns a value greater than @n. - */ -static inline long strnlen_user(const char __user *s, long n) -{ - if (!__addr_ok(s)) - return 0; - else - return __strnlen_user(s, n); -} - -/** - * strlen_user: - Get the size of a string in user space. - * @str: The string to measure. - * - * Context: User context only. This function may sleep. - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * - * If there is a limit on the length of a valid string, you may wish to - * consider using strnlen_user() instead. - */ -#define strlen_user(str) strnlen_user(str, ~0UL >> 1) - /* * The exception table consists of pairs of addresses: the first is the * address of an instruction that is allowed to fault, and the second is diff --git a/arch/sh/include/asm/uaccess_32.h b/arch/sh/include/asm/uaccess_32.h index ae0d24f..c0de7ee 100644 --- a/arch/sh/include/asm/uaccess_32.h +++ b/arch/sh/include/asm/uaccess_32.h @@ -170,79 +170,4 @@ __asm__ __volatile__( \ extern void __put_user_unknown(void); -static inline int -__strncpy_from_user(unsigned long __dest, unsigned long __user __src, int __count) -{ - __kernel_size_t res; - unsigned long __dummy, _d, _s, _c; - - __asm__ __volatile__( - "9:\n" - "mov.b @%2+, %1\n\t" - "cmp/eq #0, %1\n\t" - "bt/s 2f\n" - "1:\n" - "mov.b %1, @%3\n\t" - "dt %4\n\t" - "bf/s 9b\n\t" - " add #1, %3\n\t" - "2:\n\t" - "sub %4, %0\n" - "3:\n" - ".section .fixup,\"ax\"\n" - "4:\n\t" - "mov.l 5f, %1\n\t" - "jmp @%1\n\t" - " mov %9, %0\n\t" - ".balign 4\n" - "5: .long 3b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .balign 4\n" - " .long 9b,4b\n" - ".previous" - : "=r" (res), "=&z" (__dummy), "=r" (_s), "=r" (_d), "=r"(_c) - : "0" (__count), "2" (__src), "3" (__dest), "4" (__count), - "i" (-EFAULT) - : "memory", "t"); - - return res; -} - -/* - * Return the size of a string (including the ending 0 even when we have - * exceeded the maximum string length). - */ -static inline long __strnlen_user(const char __user *__s, long __n) -{ - unsigned long res; - unsigned long __dummy; - - __asm__ __volatile__( - "1:\t" - "mov.b @(%0,%3), %1\n\t" - "cmp/eq %4, %0\n\t" - "bt/s 2f\n\t" - " add #1, %0\n\t" - "tst %1, %1\n\t" - "bf 1b\n\t" - "2:\n" - ".section .fixup,\"ax\"\n" - "3:\n\t" - "mov.l 4f, %1\n\t" - "jmp @%1\n\t" - " mov #0, %0\n" - ".balign 4\n" - "4: .long 2b\n" - ".previous\n" - ".section __ex_table,\"a\"\n" - " .balign 4\n" - " .long 1b,3b\n" - ".previous" - : "=z" (res), "=&r" (__dummy) - : "0" (0), "r" (__s), "r" (__n) - : "t"); - return res; -} - #endif /* __ASM_SH_UACCESS_32_H */ diff --git a/arch/sh/include/asm/uaccess_64.h b/arch/sh/include/asm/uaccess_64.h index 56fd20b..2e07e0f 100644 --- a/arch/sh/include/asm/uaccess_64.h +++ b/arch/sh/include/asm/uaccess_64.h @@ -84,8 +84,4 @@ extern long __put_user_asm_l(void *, long); extern long __put_user_asm_q(void *, long); extern void __put_user_unknown(void); -extern long __strnlen_user(const char *__s, long __n); -extern int __strncpy_from_user(unsigned long __dest, - unsigned long __user __src, int __count); - #endif /* __ASM_SH_UACCESS_64_H */ diff --git a/arch/sh/include/asm/ucontext.h b/arch/sh/include/asm/ucontext.h deleted file mode 100644 index 9bc07b9..0000000 --- a/arch/sh/include/asm/ucontext.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/ucontext.h> diff --git a/arch/sh/include/asm/word-at-a-time.h b/arch/sh/include/asm/word-at-a-time.h new file mode 100644 index 0000000..6e38953 --- /dev/null +++ b/arch/sh/include/asm/word-at-a-time.h @@ -0,0 +1,53 @@ +#ifndef __ASM_SH_WORD_AT_A_TIME_H +#define __ASM_SH_WORD_AT_A_TIME_H + +#ifdef CONFIG_CPU_BIG_ENDIAN +# include <asm-generic/word-at-a-time.h> +#else +/* + * Little-endian version cribbed from x86. + */ +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } + +/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ +static inline long count_masked_bytes(long mask) +{ + /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ + long a = (0x0ff0001+mask) >> 23; + /* Fix the 1 for 00 case */ + return a & mask; +} + +/* Return nonzero if it has a zero */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +/* The mask we created is directly usable as a bytemask */ +#define zero_bytemask(mask) (mask) + +static inline unsigned long find_zero(unsigned long mask) +{ + return count_masked_bytes(mask); +} +#endif + +#endif diff --git a/arch/sh/include/asm/xor.h b/arch/sh/include/asm/xor.h deleted file mode 100644 index c82eb12..0000000 --- a/arch/sh/include/asm/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include <asm-generic/xor.h> diff --git a/arch/sh/include/cpu-sh2a/cpu/ubc.h b/arch/sh/include/cpu-sh2a/cpu/ubc.h deleted file mode 100644 index 1192e1c..0000000 --- a/arch/sh/include/cpu-sh2a/cpu/ubc.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * SH-2A UBC definitions - * - * Copyright (C) 2008 Kieran Bingham - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#ifndef __ASM_CPU_SH2A_UBC_H -#define __ASM_CPU_SH2A_UBC_H - -#define UBC_BARA 0xfffc0400 -#define UBC_BAMRA 0xfffc0404 -#define UBC_BBRA 0xfffc04a0 /* 16 bit access */ -#define UBC_BDRA 0xfffc0408 -#define UBC_BDMRA 0xfffc040c - -#define UBC_BARB 0xfffc0410 -#define UBC_BAMRB 0xfffc0414 -#define UBC_BBRB 0xfffc04b0 /* 16 bit access */ -#define UBC_BDRB 0xfffc0418 -#define UBC_BDMRB 0xfffc041c - -#define UBC_BRCR 0xfffc04c0 - -#endif /* __ASM_CPU_SH2A_UBC_H */ diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S index ff1f0e6..b7cf6a5 100644 --- a/arch/sh/kernel/cpu/sh5/entry.S +++ b/arch/sh/kernel/cpu/sh5/entry.S @@ -1569,86 +1569,6 @@ ___clear_user_exit: #endif /* CONFIG_MMU */ /* - * int __strncpy_from_user(unsigned long __dest, unsigned long __src, - * int __count) - * - * Inputs: - * (r2) target address - * (r3) source address - * (r4) maximum size in bytes - * - * Ouputs: - * (*r2) copied data - * (r2) -EFAULT (in case of faulting) - * copied data (otherwise) - */ - .global __strncpy_from_user -__strncpy_from_user: - pta ___strncpy_from_user1, tr0 - pta ___strncpy_from_user_done, tr1 - or r4, ZERO, r5 /* r5 = original count */ - beq/u r4, r63, tr1 /* early exit if r4==0 */ - movi -(EFAULT), r6 /* r6 = reply, no real fixup */ - or ZERO, ZERO, r7 /* r7 = data, clear top byte of data */ - -___strncpy_from_user1: - ld.b r3, 0, r7 /* Fault address: only in reading */ - st.b r2, 0, r7 - addi r2, 1, r2 - addi r3, 1, r3 - beq/u ZERO, r7, tr1 - addi r4, -1, r4 /* return real number of copied bytes */ - bne/l ZERO, r4, tr0 - -___strncpy_from_user_done: - sub r5, r4, r6 /* If done, return copied */ - -___strncpy_from_user_exit: - or r6, ZERO, r2 - ptabs LINK, tr0 - blink tr0, ZERO - -/* - * extern long __strnlen_user(const char *__s, long __n) - * - * Inputs: - * (r2) source address - * (r3) source size in bytes - * - * Ouputs: - * (r2) -EFAULT (in case of faulting) - * string length (otherwise) - */ - .global __strnlen_user -__strnlen_user: - pta ___strnlen_user_set_reply, tr0 - pta ___strnlen_user1, tr1 - or ZERO, ZERO, r5 /* r5 = counter */ - movi -(EFAULT), r6 /* r6 = reply, no real fixup */ - or ZERO, ZERO, r7 /* r7 = data, clear top byte of data */ - beq r3, ZERO, tr0 - -___strnlen_user1: - ldx.b r2, r5, r7 /* Fault address: only in reading */ - addi r3, -1, r3 /* No real fixup */ - addi r5, 1, r5 - beq r3, ZERO, tr0 - bne r7, ZERO, tr1 -! The line below used to be active. This meant led to a junk byte lying between each pair -! of entries in the argv & envp structures in memory. Whilst the program saw the right data -! via the argv and envp arguments to main, it meant the 'flat' representation visible through -! /proc/$pid/cmdline was corrupt, causing trouble with ps, for example. -! addi r5, 1, r5 /* Include '\0' */ - -___strnlen_user_set_reply: - or r5, ZERO, r6 /* If done, return counter */ - -___strnlen_user_exit: - or r6, ZERO, r2 - ptabs LINK, tr0 - blink tr0, ZERO - -/* * extern long __get_user_asm_?(void *val, long addr) * * Inputs: @@ -1982,8 +1902,6 @@ asm_uaccess_start: .long ___copy_user2, ___copy_user_exit .long ___clear_user1, ___clear_user_exit #endif - .long ___strncpy_from_user1, ___strncpy_from_user_exit - .long ___strnlen_user1, ___strnlen_user_exit .long ___get_user_asm_b1, ___get_user_asm_b_exit .long ___get_user_asm_w1, ___get_user_asm_w_exit .long ___get_user_asm_l1, ___get_user_asm_l_exit diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index 9b7a459..055d91b 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -4,6 +4,7 @@ #include <linux/sched.h> #include <linux/export.h> #include <linux/stackprotector.h> +#include <asm/fpu.h> struct kmem_cache *task_xstate_cachep = NULL; unsigned int xstate_size; diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 4264583e..602545b 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -33,6 +33,7 @@ #include <asm/switch_to.h> struct task_struct *last_task_used_math = NULL; +struct pt_regs fake_swapper_regs = { 0, }; void show_regs(struct pt_regs *regs) { diff --git a/arch/sh/kernel/sh_ksyms_64.c b/arch/sh/kernel/sh_ksyms_64.c index 45afa5c..26a0774 100644 --- a/arch/sh/kernel/sh_ksyms_64.c +++ b/arch/sh/kernel/sh_ksyms_64.c @@ -32,8 +32,6 @@ EXPORT_SYMBOL(__get_user_asm_b); EXPORT_SYMBOL(__get_user_asm_w); EXPORT_SYMBOL(__get_user_asm_l); EXPORT_SYMBOL(__get_user_asm_q); -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(copy_page); EXPORT_SYMBOL(__copy_user); diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h index 7e1fef3..e9c670d 100644 --- a/arch/tile/include/asm/thread_info.h +++ b/arch/tile/include/asm/thread_info.h @@ -91,11 +91,6 @@ extern void smp_nap(void); /* Enable interrupts racelessly and nap forever: helper for cpu_idle(). */ extern void _cpu_idle(void); -/* Switch boot idle thread to a freshly-allocated stack and free old stack. */ -extern void cpu_idle_on_new_stack(struct thread_info *old_ti, - unsigned long new_sp, - unsigned long new_ss10); - #else /* __ASSEMBLY__ */ /* diff --git a/arch/tile/kernel/entry.S b/arch/tile/kernel/entry.S index 133c4b5..c31637b 100644 --- a/arch/tile/kernel/entry.S +++ b/arch/tile/kernel/entry.S @@ -68,20 +68,6 @@ STD_ENTRY(KBacktraceIterator_init_current) jrp lr /* keep backtracer happy */ STD_ENDPROC(KBacktraceIterator_init_current) -/* - * Reset our stack to r1/r2 (sp and ksp0+cpu respectively), then - * free the old stack (passed in r0) and re-invoke cpu_idle(). - * We update sp and ksp0 simultaneously to avoid backtracer warnings. - */ -STD_ENTRY(cpu_idle_on_new_stack) - { - move sp, r1 - mtspr SPR_SYSTEM_SAVE_K_0, r2 - } - jal free_thread_info - j cpu_idle - STD_ENDPROC(cpu_idle_on_new_stack) - /* Loop forever on a nap during SMP boot. */ STD_ENTRY(smp_nap) nap diff --git a/arch/tile/kernel/setup.c b/arch/tile/kernel/setup.c index 6098ccc..dd87f34 100644 --- a/arch/tile/kernel/setup.c +++ b/arch/tile/kernel/setup.c @@ -29,6 +29,7 @@ #include <linux/smp.h> #include <linux/timex.h> #include <linux/hugetlb.h> +#include <linux/start_kernel.h> #include <asm/setup.h> #include <asm/sections.h> #include <asm/cacheflush.h> diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 8bbea6a..efe5acf 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -94,10 +94,10 @@ bs_die: .section ".bsdata", "a" bugger_off_msg: - .ascii "Direct booting from floppy is no longer supported.\r\n" - .ascii "Please use a boot loader program instead.\r\n" + .ascii "Direct floppy boot is not supported. " + .ascii "Use a boot loader program instead.\r\n" .ascii "\n" - .ascii "Remove disk and press any key to reboot . . .\r\n" + .ascii "Remove disk and press any key to reboot ...\r\n" .byte 0 #ifdef CONFIG_EFI_STUB @@ -111,7 +111,7 @@ coff_header: #else .word 0x8664 # x86-64 #endif - .word 2 # nr_sections + .word 3 # nr_sections .long 0 # TimeDateStamp .long 0 # PointerToSymbolTable .long 1 # NumberOfSymbols @@ -158,8 +158,8 @@ extra_header_fields: #else .quad 0 # ImageBase #endif - .long 0x1000 # SectionAlignment - .long 0x200 # FileAlignment + .long 0x20 # SectionAlignment + .long 0x20 # FileAlignment .word 0 # MajorOperatingSystemVersion .word 0 # MinorOperatingSystemVersion .word 0 # MajorImageVersion @@ -200,8 +200,10 @@ extra_header_fields: # Section table section_table: - .ascii ".text" - .byte 0 + # + # The offset & size fields are filled in by build.c. + # + .ascii ".setup" .byte 0 .byte 0 .long 0 @@ -217,9 +219,8 @@ section_table: # # The EFI application loader requires a relocation section - # because EFI applications must be relocatable. But since - # we don't need the loader to fixup any relocs for us, we - # just create an empty (zero-length) .reloc section header. + # because EFI applications must be relocatable. The .reloc + # offset & size fields are filled in by build.c. # .ascii ".reloc" .byte 0 @@ -233,6 +234,25 @@ section_table: .word 0 # NumberOfRelocations .word 0 # NumberOfLineNumbers .long 0x42100040 # Characteristics (section flags) + + # + # The offset & size fields are filled in by build.c. + # + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 + .long 0 + .long 0x0 # startup_{32,64} + .long 0 # Size of initialized data + # on disk + .long 0x0 # startup_{32,64} + .long 0 # PointerToRelocations + .long 0 # PointerToLineNumbers + .word 0 # NumberOfRelocations + .word 0 # NumberOfLineNumbers + .long 0x60500020 # Characteristics (section flags) + #endif /* CONFIG_EFI_STUB */ # Kernel attributes; used by setup. This is part 1 of the diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 3f61f6e..4b8e165 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c @@ -50,6 +50,8 @@ typedef unsigned int u32; u8 buf[SETUP_SECT_MAX*512]; int is_big_kernel; +#define PECOFF_RELOC_RESERVE 0x20 + /*----------------------------------------------------------------------*/ static const u32 crctab32[] = { @@ -133,11 +135,103 @@ static void usage(void) die("Usage: build setup system [> image]"); } -int main(int argc, char ** argv) -{ #ifdef CONFIG_EFI_STUB - unsigned int file_sz, pe_header; + +static void update_pecoff_section_header(char *section_name, u32 offset, u32 size) +{ + unsigned int pe_header; + unsigned short num_sections; + u8 *section; + + pe_header = get_unaligned_le32(&buf[0x3c]); + num_sections = get_unaligned_le16(&buf[pe_header + 6]); + +#ifdef CONFIG_X86_32 + section = &buf[pe_header + 0xa8]; +#else + section = &buf[pe_header + 0xb8]; #endif + + while (num_sections > 0) { + if (strncmp((char*)section, section_name, 8) == 0) { + /* section header size field */ + put_unaligned_le32(size, section + 0x8); + + /* section header vma field */ + put_unaligned_le32(offset, section + 0xc); + + /* section header 'size of initialised data' field */ + put_unaligned_le32(size, section + 0x10); + + /* section header 'file offset' field */ + put_unaligned_le32(offset, section + 0x14); + + break; + } + section += 0x28; + num_sections--; + } +} + +static void update_pecoff_setup_and_reloc(unsigned int size) +{ + u32 setup_offset = 0x200; + u32 reloc_offset = size - PECOFF_RELOC_RESERVE; + u32 setup_size = reloc_offset - setup_offset; + + update_pecoff_section_header(".setup", setup_offset, setup_size); + update_pecoff_section_header(".reloc", reloc_offset, PECOFF_RELOC_RESERVE); + + /* + * Modify .reloc section contents with a single entry. The + * relocation is applied to offset 10 of the relocation section. + */ + put_unaligned_le32(reloc_offset + 10, &buf[reloc_offset]); + put_unaligned_le32(10, &buf[reloc_offset + 4]); +} + +static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) +{ + unsigned int pe_header; + unsigned int text_sz = file_sz - text_start; + + pe_header = get_unaligned_le32(&buf[0x3c]); + + /* Size of image */ + put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); + + /* + * Size of code: Subtract the size of the first sector (512 bytes) + * which includes the header. + */ + put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]); + +#ifdef CONFIG_X86_32 + /* + * Address of entry point. + * + * The EFI stub entry point is +16 bytes from the start of + * the .text section. + */ + put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]); +#else + /* + * Address of entry point. startup_32 is at the beginning and + * the 64-bit entry point (startup_64) is always 512 bytes + * after. The EFI stub entry point is 16 bytes after that, as + * the first instruction allows legacy loaders to jump over + * the EFI stub initialisation + */ + put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]); +#endif /* CONFIG_X86_32 */ + + update_pecoff_section_header(".text", text_start, text_sz); +} + +#endif /* CONFIG_EFI_STUB */ + +int main(int argc, char ** argv) +{ unsigned int i, sz, setup_sectors; int c; u32 sys_size; @@ -163,6 +257,12 @@ int main(int argc, char ** argv) die("Boot block hasn't got boot flag (0xAA55)"); fclose(file); +#ifdef CONFIG_EFI_STUB + /* Reserve 0x20 bytes for .reloc section */ + memset(buf+c, 0, PECOFF_RELOC_RESERVE); + c += PECOFF_RELOC_RESERVE; +#endif + /* Pad unused space with zeros */ setup_sectors = (c + 511) / 512; if (setup_sectors < SETUP_SECT_MIN) @@ -170,6 +270,10 @@ int main(int argc, char ** argv) i = setup_sectors*512; memset(buf+c, 0, i-c); +#ifdef CONFIG_EFI_STUB + update_pecoff_setup_and_reloc(i); +#endif + /* Set the default root device */ put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); @@ -194,66 +298,8 @@ int main(int argc, char ** argv) put_unaligned_le32(sys_size, &buf[0x1f4]); #ifdef CONFIG_EFI_STUB - file_sz = sz + i + ((sys_size * 16) - sz); - - pe_header = get_unaligned_le32(&buf[0x3c]); - - /* Size of image */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x50]); - - /* - * Subtract the size of the first section (512 bytes) which - * includes the header and .reloc section. The remaining size - * is that of the .text section. - */ - file_sz -= 512; - - /* Size of code */ - put_unaligned_le32(file_sz, &buf[pe_header + 0x1c]); - -#ifdef CONFIG_X86_32 - /* - * Address of entry point. - * - * The EFI stub entry point is +16 bytes from the start of - * the .text section. - */ - put_unaligned_le32(i + 16, &buf[pe_header + 0x28]); - - /* .text size */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xb0]); - - /* .text vma */ - put_unaligned_le32(0x200, &buf[pe_header + 0xb4]); - - /* .text size of initialised data */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xb8]); - - /* .text file offset */ - put_unaligned_le32(0x200, &buf[pe_header + 0xbc]); -#else - /* - * Address of entry point. startup_32 is at the beginning and - * the 64-bit entry point (startup_64) is always 512 bytes - * after. The EFI stub entry point is 16 bytes after that, as - * the first instruction allows legacy loaders to jump over - * the EFI stub initialisation - */ - put_unaligned_le32(i + 528, &buf[pe_header + 0x28]); - - /* .text size */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xc0]); - - /* .text vma */ - put_unaligned_le32(0x200, &buf[pe_header + 0xc4]); - - /* .text size of initialised data */ - put_unaligned_le32(file_sz, &buf[pe_header + 0xc8]); - - /* .text file offset */ - put_unaligned_le32(0x200, &buf[pe_header + 0xcc]); -#endif /* CONFIG_X86_32 */ -#endif /* CONFIG_EFI_STUB */ + update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); +#endif crc = partial_crc32(buf, i, crc); if (fwrite(buf, 1, i, stdout) != i) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index be6d9e3..3470624 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2460,10 +2460,12 @@ ENTRY(aesni_cbc_dec) pxor IN3, STATE4 movaps IN4, IV #else - pxor (INP), STATE2 - pxor 0x10(INP), STATE3 pxor IN1, STATE4 movaps IN2, IV + movups (INP), IN1 + pxor IN1, STATE2 + movups 0x10(INP), IN2 + pxor IN2, STATE3 #endif movups STATE1, (OUTP) movups STATE2, 0x10(OUTP) diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 0e3793b..dc580c4 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -54,6 +54,20 @@ struct nmiaction { __register_nmi_handler((t), &fn##_na); \ }) +/* + * For special handlers that register/unregister in the + * init section only. This should be considered rare. + */ +#define register_nmi_handler_initonly(t, fn, fg, n) \ +({ \ + static struct nmiaction fn##_na __initdata = { \ + .handler = (fn), \ + .name = (n), \ + .flags = (fg), \ + }; \ + __register_nmi_handler((t), &fn##_na); \ +}) + int __register_nmi_handler(unsigned int, struct nmiaction *); void unregister_nmi_handler(unsigned int, const char *); diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 04cd688..e1f3a17 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -33,9 +33,8 @@ #define segment_eq(a, b) ((a).seg == (b).seg) #define user_addr_max() (current_thread_info()->addr_limit.seg) -#define __addr_ok(addr) \ - ((unsigned long __force)(addr) < \ - (current_thread_info()->addr_limit.seg)) +#define __addr_ok(addr) \ + ((unsigned long __force)(addr) < user_addr_max()) /* * Test whether a block of memory is a valid user space address. @@ -47,14 +46,14 @@ * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... */ -#define __range_not_ok(addr, size) \ +#define __range_not_ok(addr, size, limit) \ ({ \ unsigned long flag, roksum; \ __chk_user_ptr(addr); \ asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ : "=&r" (flag), "=r" (roksum) \ : "1" (addr), "g" ((long)(size)), \ - "rm" (current_thread_info()->addr_limit.seg)); \ + "rm" (limit)); \ flag; \ }) @@ -77,7 +76,8 @@ * checks that the pointer is in the user space range - after calling * this function, memory access functions may still return -EFAULT. */ -#define access_ok(type, addr, size) (likely(__range_not_ok(addr, size) == 0)) +#define access_ok(type, addr, size) \ + (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) /* * The exception table consists of pairs of addresses relative to the diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index becf47b..6149b47 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -149,7 +149,6 @@ /* 4 bits of software ack period */ #define UV2_ACK_MASK 0x7UL #define UV2_ACK_UNITS_SHFT 3 -#define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT #define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT /* diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 6e76c19..d5fd66f 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -20,7 +20,6 @@ #include <linux/bitops.h> #include <linux/ioport.h> #include <linux/suspend.h> -#include <linux/kmemleak.h> #include <asm/e820.h> #include <asm/io.h> #include <asm/iommu.h> @@ -95,11 +94,6 @@ static u32 __init allocate_aperture(void) return 0; } memblock_reserve(addr, aper_size); - /* - * Kmemleak should not scan this block as it may not be mapped via the - * kernel direct mapping. - */ - kmemleak_ignore(phys_to_virt(addr)); printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", aper_size >> 10, addr); insert_aperture_resource((u32)addr, aper_size); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ac96561..5f0ff59 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1195,7 +1195,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) BUG_ON(!cfg->vector); vector = cfg->vector; - for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) + for_each_cpu(cpu, cfg->domain) per_cpu(vector_irq, cpu)[vector] = -1; cfg->vector = 0; @@ -1203,7 +1203,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) if (likely(!cfg->move_in_progress)) return; - for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { + for_each_cpu(cpu, cfg->old_domain) { for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { if (per_cpu(vector_irq, cpu)[vector] != irq) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0a687fd..da27c5d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1274,7 +1274,7 @@ static void mce_timer_fn(unsigned long data) */ iv = __this_cpu_read(mce_next_interval); if (mce_notify_irq()) - iv = max(iv, (unsigned long) HZ/100); + iv = max(iv / 2, (unsigned long) HZ/100); else iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); __this_cpu_write(mce_next_interval, iv); @@ -1557,7 +1557,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); - unsigned long iv = __this_cpu_read(mce_next_interval); + unsigned long iv = check_interval * HZ; setup_timer(t, mce_timer_fn, smp_processor_id()); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e049d6d..c4706cf 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1496,6 +1496,7 @@ static struct cpu_hw_events *allocate_fake_cpuc(void) if (!cpuc->shared_regs) goto error; } + cpuc->is_fake = 1; return cpuc; error: free_fake_cpuc(cpuc); @@ -1756,6 +1757,12 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); } +static inline int +valid_user_frame(const void __user *fp, unsigned long size) +{ + return (__range_not_ok(fp, size, TASK_SIZE) == 0); +} + #ifdef CONFIG_COMPAT #include <asm/compat.h> @@ -1780,7 +1787,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) if (bytes != sizeof(frame)) break; - if (fp < compat_ptr(regs->sp)) + if (!valid_user_frame(fp, sizeof(frame))) break; perf_callchain_store(entry, frame.return_address); @@ -1826,7 +1833,7 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) if (bytes != sizeof(frame)) break; - if ((unsigned long)fp < regs->sp) + if (!valid_user_frame(fp, sizeof(frame))) break; perf_callchain_store(entry, frame.return_address); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 6638aaf..7241e2f 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -117,6 +117,7 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ unsigned int group_flag; + int is_fake; /* * Intel DebugStore bits @@ -364,6 +365,7 @@ struct x86_pmu { int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; + void (*pebs_aliases)(struct perf_event *event); /* * Intel LBR diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 166546e..187c294 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1119,27 +1119,33 @@ intel_bts_constraints(struct perf_event *event) return NULL; } -static bool intel_try_alt_er(struct perf_event *event, int orig_idx) +static int intel_alt_er(int idx) { if (!(x86_pmu.er_flags & ERF_HAS_RSP_1)) - return false; + return idx; - if (event->hw.extra_reg.idx == EXTRA_REG_RSP_0) { - event->hw.config &= ~INTEL_ARCH_EVENT_MASK; - event->hw.config |= 0x01bb; - event->hw.extra_reg.idx = EXTRA_REG_RSP_1; - event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; - } else if (event->hw.extra_reg.idx == EXTRA_REG_RSP_1) { + if (idx == EXTRA_REG_RSP_0) + return EXTRA_REG_RSP_1; + + if (idx == EXTRA_REG_RSP_1) + return EXTRA_REG_RSP_0; + + return idx; +} + +static void intel_fixup_er(struct perf_event *event, int idx) +{ + event->hw.extra_reg.idx = idx; + + if (idx == EXTRA_REG_RSP_0) { event->hw.config &= ~INTEL_ARCH_EVENT_MASK; event->hw.config |= 0x01b7; - event->hw.extra_reg.idx = EXTRA_REG_RSP_0; event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0; + } else if (idx == EXTRA_REG_RSP_1) { + event->hw.config &= ~INTEL_ARCH_EVENT_MASK; + event->hw.config |= 0x01bb; + event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1; } - - if (event->hw.extra_reg.idx == orig_idx) - return false; - - return true; } /* @@ -1157,14 +1163,18 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc, struct event_constraint *c = &emptyconstraint; struct er_account *era; unsigned long flags; - int orig_idx = reg->idx; + int idx = reg->idx; - /* already allocated shared msr */ - if (reg->alloc) + /* + * reg->alloc can be set due to existing state, so for fake cpuc we + * need to ignore this, otherwise we might fail to allocate proper fake + * state for this extra reg constraint. Also see the comment below. + */ + if (reg->alloc && !cpuc->is_fake) return NULL; /* call x86_get_event_constraint() */ again: - era = &cpuc->shared_regs->regs[reg->idx]; + era = &cpuc->shared_regs->regs[idx]; /* * we use spin_lock_irqsave() to avoid lockdep issues when * passing a fake cpuc @@ -1173,6 +1183,29 @@ again: if (!atomic_read(&era->ref) || era->config == reg->config) { + /* + * If its a fake cpuc -- as per validate_{group,event}() we + * shouldn't touch event state and we can avoid doing so + * since both will only call get_event_constraints() once + * on each event, this avoids the need for reg->alloc. + * + * Not doing the ER fixup will only result in era->reg being + * wrong, but since we won't actually try and program hardware + * this isn't a problem either. + */ + if (!cpuc->is_fake) { + if (idx != reg->idx) + intel_fixup_er(event, idx); + + /* + * x86_schedule_events() can call get_event_constraints() + * multiple times on events in the case of incremental + * scheduling(). reg->alloc ensures we only do the ER + * allocation once. + */ + reg->alloc = 1; + } + /* lock in msr value */ era->config = reg->config; era->reg = reg->reg; @@ -1180,17 +1213,17 @@ again: /* one more user */ atomic_inc(&era->ref); - /* no need to reallocate during incremental event scheduling */ - reg->alloc = 1; - /* * need to call x86_get_event_constraint() * to check if associated event has constraints */ c = NULL; - } else if (intel_try_alt_er(event, orig_idx)) { - raw_spin_unlock_irqrestore(&era->lock, flags); - goto again; + } else { + idx = intel_alt_er(idx); + if (idx != reg->idx) { + raw_spin_unlock_irqrestore(&era->lock, flags); + goto again; + } } raw_spin_unlock_irqrestore(&era->lock, flags); @@ -1204,11 +1237,14 @@ __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc, struct er_account *era; /* - * only put constraint if extra reg was actually - * allocated. Also takes care of event which do - * not use an extra shared reg + * Only put constraint if extra reg was actually allocated. Also takes + * care of event which do not use an extra shared reg. + * + * Also, if this is a fake cpuc we shouldn't touch any event state + * (reg->alloc) and we don't care about leaving inconsistent cpuc state + * either since it'll be thrown out. */ - if (!reg->alloc) + if (!reg->alloc || cpuc->is_fake) return; era = &cpuc->shared_regs->regs[reg->idx]; @@ -1300,15 +1336,9 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc, intel_put_shared_regs_event_constraints(cpuc, event); } -static int intel_pmu_hw_config(struct perf_event *event) +static void intel_pebs_aliases_core2(struct perf_event *event) { - int ret = x86_pmu_hw_config(event); - - if (ret) - return ret; - - if (event->attr.precise_ip && - (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { /* * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P * (0x003c) so that we can use it with PEBS. @@ -1329,10 +1359,48 @@ static int intel_pmu_hw_config(struct perf_event *event) */ u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16); + alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); + event->hw.config = alt_config; + } +} + +static void intel_pebs_aliases_snb(struct perf_event *event) +{ + if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) { + /* + * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P + * (0x003c) so that we can use it with PEBS. + * + * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't + * PEBS capable. However we can use UOPS_RETIRED.ALL + * (0x01c2), which is a PEBS capable event, to get the same + * count. + * + * UOPS_RETIRED.ALL counts the number of cycles that retires + * CNTMASK micro-ops. By setting CNTMASK to a value (16) + * larger than the maximum number of micro-ops that can be + * retired per cycle (4) and then inverting the condition, we + * count all cycles that retire 16 or less micro-ops, which + * is every cycle. + * + * Thereby we gain a PEBS capable cycle counter. + */ + u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16); alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK); event->hw.config = alt_config; } +} + +static int intel_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + if (event->attr.precise_ip && x86_pmu.pebs_aliases) + x86_pmu.pebs_aliases(event); if (intel_pmu_needs_lbr_smpl(event)) { ret = intel_pmu_setup_lbr_filter(event); @@ -1607,6 +1675,7 @@ static __initconst const struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, .put_event_constraints = intel_put_event_constraints, + .pebs_aliases = intel_pebs_aliases_core2, .format_attrs = intel_arch3_formats_attr, @@ -1840,8 +1909,9 @@ __init int intel_pmu_init(void) break; case 42: /* SandyBridge */ - x86_add_quirk(intel_sandybridge_quirk); case 45: /* SandyBridge, "Romely-EP" */ + x86_add_quirk(intel_sandybridge_quirk); + case 58: /* IvyBridge */ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -1849,6 +1919,7 @@ __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_snb_event_constraints; x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints; + x86_pmu.pebs_aliases = intel_pebs_aliases_snb; x86_pmu.extra_regs = intel_snb_extra_regs; /* all extra regs are per-cpu when HT is on */ x86_pmu.er_flags |= ERF_HAS_RSP_1; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 5a3edc2..35e2192 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -400,14 +400,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.* */ - INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ - INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ - INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ - INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ - INTEL_UEVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ + INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 086eb58..f1b42b3 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -120,11 +120,6 @@ bool kvm_check_and_clear_guest_paused(void) bool ret = false; struct pvclock_vcpu_time_info *src; - /* - * per_cpu() is safe here because this function is only called from - * timer functions where preemption is already disabled. - */ - WARN_ON(!in_atomic()); src = &__get_cpu_var(hv_clock); if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c index e31bf8d..149b8d9 100644 --- a/arch/x86/kernel/nmi_selftest.c +++ b/arch/x86/kernel/nmi_selftest.c @@ -42,7 +42,7 @@ static int __init nmi_unk_cb(unsigned int val, struct pt_regs *regs) static void __init init_nmi_testsuite(void) { /* trap all the unknown NMIs we may generate */ - register_nmi_handler(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); + register_nmi_handler_initonly(NMI_UNKNOWN, nmi_unk_cb, 0, "nmi_selftest_unk"); } static void __init cleanup_nmi_testsuite(void) @@ -64,7 +64,7 @@ static void __init test_nmi_ipi(struct cpumask *mask) { unsigned long timeout; - if (register_nmi_handler(NMI_LOCAL, test_nmi_ipi_callback, + if (register_nmi_handler_initonly(NMI_LOCAL, test_nmi_ipi_callback, NMI_FLAG_FIRST, "nmi_selftest")) { nmi_fail = FAILURE; return; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 79c45af..25b48ed 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -639,9 +639,11 @@ void native_machine_shutdown(void) set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); /* - * O.K Now that I'm on the appropriate processor, - * stop all of the others. + * O.K Now that I'm on the appropriate processor, stop all of the + * others. Also disable the local irq to not receive the per-cpu + * timer interrupt which may trigger scheduler's load balance. */ + local_irq_disable(); stop_other_cpus(); #endif diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f56f96d..3fab55b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -382,6 +382,15 @@ void __cpuinit set_cpu_sibling_map(int cpu) if ((i == cpu) || (has_mc && match_llc(c, o))) link_mask(llc_shared, cpu, i); + } + + /* + * This needs a separate iteration over the cpus because we rely on all + * cpu_sibling_mask links to be set-up. + */ + for_each_cpu(i, cpu_sibling_setup_mask) { + o = &cpu_data(i); + if ((i == cpu) || (has_mc && match_mc(c, o))) { link_mask(core, cpu, i); @@ -410,15 +419,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) /* maps the cpu to the sched domain representing multi-core */ const struct cpumask *cpu_coregroup_mask(int cpu) { - struct cpuinfo_x86 *c = &cpu_data(cpu); - /* - * For perf, we return last level cache shared map. - * And for power savings, we return cpu_core_map - */ - if (!(cpu_has(c, X86_FEATURE_AMD_DCM))) - return cpu_core_mask(cpu); - else - return cpu_llc_shared_mask(cpu); + return cpu_llc_shared_mask(cpu); } static void impress_friends(void) diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index f61ee67..677b1ed 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -8,6 +8,7 @@ #include <linux/module.h> #include <asm/word-at-a-time.h> +#include <linux/sched.h> /* * best effort, GUP based copy_from_user() that is NMI-safe @@ -21,6 +22,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) void *map; int ret; + if (__range_not_ok(from, n, TASK_SIZE) == 0) + return len; + do { ret = __get_user_pages_fast(addr, 1, 0, &page); if (!ret) diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 8191379..5d7e51f 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -28,7 +28,7 @@ # - (66): the last prefix is 0x66 # - (F3): the last prefix is 0xF3 # - (F2): the last prefix is 0xF2 -# +# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) Table: one byte opcode Referrer: @@ -515,12 +515,12 @@ b4: LFS Gv,Mp b5: LGS Gv,Mp b6: MOVZX Gv,Eb b7: MOVZX Gv,Ew -b8: JMPE | POPCNT Gv,Ev (F3) +b8: JMPE (!F3) | POPCNT Gv,Ev (F3) b9: Grp10 (1A) ba: Grp8 Ev,Ib (1A) bb: BTC Ev,Gv -bc: BSF Gv,Ev | TZCNT Gv,Ev (F3) -bd: BSR Gv,Ev | LZCNT Gv,Ev (F3) +bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) +bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) be: MOVSX Gv,Eb bf: MOVSX Gv,Ew # 0x0f 0xc0-0xcf diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 97141c2..bc4e9d8 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -62,7 +62,8 @@ static void __init find_early_table_space(struct map_range *mr, unsigned long en extra += PMD_SIZE; #endif /* The first 2/4M doesn't use large pages. */ - extra += mr->end - mr->start; + if (mr->start < PMD_SIZE) + extra += mr->end - mr->start; ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; } else diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 732af3a..4599c3e 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c @@ -176,6 +176,8 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) return; } + node_set(node, numa_nodes_parsed); + printk(KERN_INFO "SRAT: Node %u PXM %u [mem %#010Lx-%#010Lx]\n", node, pxm, (unsigned long long) start, (unsigned long long) end - 1); diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c index e31bcd8..fd41a92 100644 --- a/arch/x86/platform/mrst/mrst.c +++ b/arch/x86/platform/mrst/mrst.c @@ -782,7 +782,7 @@ BLOCKING_NOTIFIER_HEAD(intel_scu_notifier); EXPORT_SYMBOL_GPL(intel_scu_notifier); /* Called by IPC driver */ -void intel_scu_devices_create(void) +void __devinit intel_scu_devices_create(void) { int i; diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 3ae0e61..59880af 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1295,7 +1295,6 @@ static void __init enable_timeouts(void) */ mmr_image |= (1L << SOFTACK_MSHIFT); if (is_uv2_hub()) { - mmr_image &= ~(1L << UV2_LEG_SHFT); mmr_image |= (1L << UV2_EXT_SHFT); } write_mmr_misc_control(pnode, mmr_image); diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk index 5f6a5b6..ddcf39b 100644 --- a/arch/x86/tools/gen-insn-attr-x86.awk +++ b/arch/x86/tools/gen-insn-attr-x86.awk @@ -66,9 +66,10 @@ BEGIN { rex_expr = "^REX(\\.[XRWB]+)*" fpu_expr = "^ESC" # TODO - lprefix1_expr = "\\(66\\)" + lprefix1_expr = "\\((66|!F3)\\)" lprefix2_expr = "\\(F3\\)" - lprefix3_expr = "\\(F2\\)" + lprefix3_expr = "\\((F2|!F3)\\)" + lprefix_expr = "\\((66|F2|F3)\\)" max_lprefix = 4 # All opcodes starting with lower-case 'v' or with (v1) superscript @@ -333,13 +334,16 @@ function convert_operands(count,opnd, i,j,imm,mod) if (match(ext, lprefix1_expr)) { lptable1[idx] = add_flags(lptable1[idx],flags) variant = "INAT_VARIANT" - } else if (match(ext, lprefix2_expr)) { + } + if (match(ext, lprefix2_expr)) { lptable2[idx] = add_flags(lptable2[idx],flags) variant = "INAT_VARIANT" - } else if (match(ext, lprefix3_expr)) { + } + if (match(ext, lprefix3_expr)) { lptable3[idx] = add_flags(lptable3[idx],flags) variant = "INAT_VARIANT" - } else { + } + if (!match(ext, lprefix_expr)){ table[idx] = add_flags(table[idx],flags) } } diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h index 0b9f2e1..c1dacca 100644 --- a/arch/xtensa/include/asm/syscall.h +++ b/arch/xtensa/include/asm/syscall.h @@ -31,5 +31,5 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize); - - +asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, + size_t sigsetsize); diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index b9f8e58..efe4e85 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -493,7 +493,7 @@ static void do_signal(struct pt_regs *regs) if (ret) return; - signal_delivered(signr, info, ka, regs, 0); + signal_delivered(signr, &info, &ka, regs, 0); if (current->ptrace & PT_SINGLESTEP) task_pt_regs(current)->icountlevel = 1; diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 47768ff..8099895 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -208,7 +208,7 @@ config ACPI_IPMI config ACPI_HOTPLUG_CPU bool - depends on ACPI_PROCESSOR && HOTPLUG_CPU + depends on EXPERIMENTAL && ACPI_PROCESSOR && HOTPLUG_CPU select ACPI_CONTAINER default y diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 86933ca..7dd3f9f 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -643,11 +643,19 @@ static int acpi_battery_update(struct acpi_battery *battery) static void acpi_battery_refresh(struct acpi_battery *battery) { + int power_unit; + if (!battery->bat.dev) return; + power_unit = battery->power_unit; + acpi_battery_get_info(battery); - /* The battery may have changed its reporting units. */ + + if (power_unit == battery->power_unit) + return; + + /* The battery has changed its reporting units. */ sysfs_remove_battery(battery); sysfs_add_battery(battery); } diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 3188da3..adceafd 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -182,41 +182,66 @@ EXPORT_SYMBOL(acpi_bus_get_private_data); Power Management -------------------------------------------------------------------------- */ +static const char *state_string(int state) +{ + switch (state) { + case ACPI_STATE_D0: + return "D0"; + case ACPI_STATE_D1: + return "D1"; + case ACPI_STATE_D2: + return "D2"; + case ACPI_STATE_D3_HOT: + return "D3hot"; + case ACPI_STATE_D3_COLD: + return "D3"; + default: + return "(unknown)"; + } +} + static int __acpi_bus_get_power(struct acpi_device *device, int *state) { - int result = 0; - acpi_status status = 0; - unsigned long long psc = 0; + int result = ACPI_STATE_UNKNOWN; if (!device || !state) return -EINVAL; - *state = ACPI_STATE_UNKNOWN; - - if (device->flags.power_manageable) { - /* - * Get the device's power state either directly (via _PSC) or - * indirectly (via power resources). - */ - if (device->power.flags.power_resources) { - result = acpi_power_get_inferred_state(device, state); - if (result) - return result; - } else if (device->power.flags.explicit_get) { - status = acpi_evaluate_integer(device->handle, "_PSC", - NULL, &psc); - if (ACPI_FAILURE(status)) - return -ENODEV; - *state = (int)psc; - } - } else { + if (!device->flags.power_manageable) { /* TBD: Non-recursive algorithm for walking up hierarchy. */ *state = device->parent ? device->parent->power.state : ACPI_STATE_D0; + goto out; + } + + /* + * Get the device's power state either directly (via _PSC) or + * indirectly (via power resources). + */ + if (device->power.flags.explicit_get) { + unsigned long long psc; + acpi_status status = acpi_evaluate_integer(device->handle, + "_PSC", NULL, &psc); + if (ACPI_FAILURE(status)) + return -ENODEV; + + result = psc; + } + /* The test below covers ACPI_STATE_UNKNOWN too. */ + if (result <= ACPI_STATE_D2) { + ; /* Do nothing. */ + } else if (device->power.flags.power_resources) { + int error = acpi_power_get_inferred_state(device, &result); + if (error) + return error; + } else if (result == ACPI_STATE_D3_HOT) { + result = ACPI_STATE_D3; } + *state = result; - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is D%d\n", - device->pnp.bus_id, *state)); + out: + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device [%s] power state is %s\n", + device->pnp.bus_id, state_string(*state))); return 0; } @@ -234,13 +259,14 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state) /* Make sure this is a valid target state */ if (state == device->power.state) { - ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device is already at D%d\n", - state)); + ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device is already at %s\n", + state_string(state))); return 0; } if (!device->power.states[state].flags.valid) { - printk(KERN_WARNING PREFIX "Device does not support D%d\n", state); + printk(KERN_WARNING PREFIX "Device does not support %s\n", + state_string(state)); return -ENODEV; } if (device->parent && (state < device->parent->power.state)) { @@ -294,13 +320,13 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state) end: if (result) printk(KERN_WARNING PREFIX - "Device [%s] failed to transition to D%d\n", - device->pnp.bus_id, state); + "Device [%s] failed to transition to %s\n", + device->pnp.bus_id, state_string(state)); else { device->power.state = state; ACPI_DEBUG_PRINT((ACPI_DB_INFO, - "Device [%s] transitioned to D%d\n", - device->pnp.bus_id, state)); + "Device [%s] transitioned to %s\n", + device->pnp.bus_id, state_string(state))); } return result; diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 0500f71..dd6d6a3 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -631,7 +631,7 @@ int acpi_power_get_inferred_state(struct acpi_device *device, int *state) * We know a device's inferred power state when all the resources * required for a given D-state are 'on'. */ - for (i = ACPI_STATE_D0; i < ACPI_STATE_D3_HOT; i++) { + for (i = ACPI_STATE_D0; i <= ACPI_STATE_D3_HOT; i++) { list = &device->power.states[i].resources; if (list->count < 1) continue; diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index 0af48a8..a093dc1 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -333,6 +333,7 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) struct acpi_buffer state = { 0, NULL }; union acpi_object *pss = NULL; int i; + int last_invalid = -1; status = acpi_evaluate_object(pr->handle, "_PSS", NULL, &buffer); @@ -394,14 +395,33 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr) ((u32)(px->core_frequency * 1000) != (px->core_frequency * 1000))) { printk(KERN_ERR FW_BUG PREFIX - "Invalid BIOS _PSS frequency: 0x%llx MHz\n", - px->core_frequency); - result = -EFAULT; - kfree(pr->performance->states); - goto end; + "Invalid BIOS _PSS frequency found for processor %d: 0x%llx MHz\n", + pr->id, px->core_frequency); + if (last_invalid == -1) + last_invalid = i; + } else { + if (last_invalid != -1) { + /* + * Copy this valid entry over last_invalid entry + */ + memcpy(&(pr->performance->states[last_invalid]), + px, sizeof(struct acpi_processor_px)); + ++last_invalid; + } } } + if (last_invalid == 0) { + printk(KERN_ERR FW_BUG PREFIX + "No valid BIOS _PSS frequency found for processor %d\n", pr->id); + result = -EFAULT; + kfree(pr->performance->states); + pr->performance->states = NULL; + } + + if (last_invalid > 0) + pr->performance->state_count = last_invalid; + end: kfree(buffer.pointer); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 85cbfdc..c8a1f3b 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1567,6 +1567,7 @@ static int acpi_bus_scan_fixed(void) ACPI_BUS_TYPE_POWER_BUTTON, ACPI_STA_DEFAULT, &ops); + device_init_wakeup(&device->dev, true); } if ((acpi_gbl_FADT.flags & ACPI_FADT_SLEEP_BUTTON) == 0) { diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 74ee4ab..88561029 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -57,6 +57,7 @@ MODULE_PARM_DESC(gts, "Enable evaluation of _GTS on suspend."); MODULE_PARM_DESC(bfs, "Enable evaluation of _BFS on resume".); static u8 sleep_states[ACPI_S_STATE_COUNT]; +static bool pwr_btn_event_pending; static void acpi_sleep_tts_switch(u32 acpi_state) { @@ -184,6 +185,14 @@ static int acpi_pm_prepare(void) return error; } +static int find_powerf_dev(struct device *dev, void *data) +{ + struct acpi_device *device = to_acpi_device(dev); + const char *hid = acpi_device_hid(device); + + return !strcmp(hid, ACPI_BUTTON_HID_POWERF); +} + /** * acpi_pm_finish - Instruct the platform to leave a sleep state. * @@ -192,6 +201,7 @@ static int acpi_pm_prepare(void) */ static void acpi_pm_finish(void) { + struct device *pwr_btn_dev; u32 acpi_state = acpi_target_sleep_state; acpi_ec_unblock_transactions(); @@ -209,6 +219,23 @@ static void acpi_pm_finish(void) acpi_set_firmware_waking_vector((acpi_physical_address) 0); acpi_target_sleep_state = ACPI_STATE_S0; + + /* If we were woken with the fixed power button, provide a small + * hint to userspace in the form of a wakeup event on the fixed power + * button device (if it can be found). + * + * We delay the event generation til now, as the PM layer requires + * timekeeping to be running before we generate events. */ + if (!pwr_btn_event_pending) + return; + + pwr_btn_event_pending = false; + pwr_btn_dev = bus_find_device(&acpi_bus_type, NULL, NULL, + find_powerf_dev); + if (pwr_btn_dev) { + pm_wakeup_event(pwr_btn_dev, 0); + put_device(pwr_btn_dev); + } } /** @@ -298,9 +325,23 @@ static int acpi_suspend_enter(suspend_state_t pm_state) /* ACPI 3.0 specs (P62) says that it's the responsibility * of the OSPM to clear the status bit [ implying that the * POWER_BUTTON event should not reach userspace ] + * + * However, we do generate a small hint for userspace in the form of + * a wakeup event. We flag this condition for now and generate the + * event later, as we're currently too early in resume to be able to + * generate wakeup events. */ - if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) - acpi_clear_event(ACPI_EVENT_POWER_BUTTON); + if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) { + acpi_event_status pwr_btn_status; + + acpi_get_event_status(ACPI_EVENT_POWER_BUTTON, &pwr_btn_status); + + if (pwr_btn_status & ACPI_EVENT_FLAG_SET) { + acpi_clear_event(ACPI_EVENT_POWER_BUTTON); + /* Flag for later */ + pwr_btn_event_pending = true; + } + } /* * Disable and clear GPE status before interrupt is enabled. Some GPEs @@ -730,8 +771,8 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p) * can wake the system. _S0W may be valid, too. */ if (acpi_target_sleep_state == ACPI_STATE_S0 || - (device_may_wakeup(dev) && - adev->wakeup.sleep_state <= acpi_target_sleep_state)) { + (device_may_wakeup(dev) && adev->wakeup.flags.valid && + adev->wakeup.sleep_state >= acpi_target_sleep_state)) { acpi_status status; acpi_method[3] = 'W'; diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c index 9577b6f..a576575 100644 --- a/drivers/acpi/video.c +++ b/drivers/acpi/video.c @@ -1687,10 +1687,6 @@ static int acpi_video_bus_add(struct acpi_device *device) set_bit(KEY_BRIGHTNESS_ZERO, input->keybit); set_bit(KEY_DISPLAY_OFF, input->keybit); - error = input_register_device(input); - if (error) - goto err_stop_video; - printk(KERN_INFO PREFIX "%s [%s] (multi-head: %s rom: %s post: %s)\n", ACPI_VIDEO_DEVICE_NAME, acpi_device_bid(device), video->flags.multihead ? "yes" : "no", @@ -1701,12 +1697,16 @@ static int acpi_video_bus_add(struct acpi_device *device) video->pm_nb.priority = 0; error = register_pm_notifier(&video->pm_nb); if (error) - goto err_unregister_input_dev; + goto err_stop_video; + + error = input_register_device(input); + if (error) + goto err_unregister_pm_notifier; return 0; - err_unregister_input_dev: - input_unregister_device(input); + err_unregister_pm_notifier: + unregister_pm_notifier(&video->pm_nb); err_stop_video: acpi_video_bus_stop_devices(video); err_free_input_dev: @@ -1743,9 +1743,18 @@ static int acpi_video_bus_remove(struct acpi_device *device, int type) return 0; } +static int __init is_i740(struct pci_dev *dev) +{ + if (dev->device == 0x00D1) + return 1; + if (dev->device == 0x7000) + return 1; + return 0; +} + static int __init intel_opregion_present(void) { -#if defined(CONFIG_DRM_I915) || defined(CONFIG_DRM_I915_MODULE) + int opregion = 0; struct pci_dev *dev = NULL; u32 address; @@ -1754,13 +1763,15 @@ static int __init intel_opregion_present(void) continue; if (dev->vendor != PCI_VENDOR_ID_INTEL) continue; + /* We don't want to poke around undefined i740 registers */ + if (is_i740(dev)) + continue; pci_read_config_dword(dev, 0xfc, &address); if (!address) continue; - return 1; + opregion = 1; } -#endif - return 0; + return opregion; } int acpi_video_register(void) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 0bcda48..c89aa01 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -246,11 +246,11 @@ struct regmap *regmap_init(struct device *dev, map->lock = regmap_lock_mutex; map->unlock = regmap_unlock_mutex; } - map->format.buf_size = (config->reg_bits + config->val_bits) / 8; map->format.reg_bytes = DIV_ROUND_UP(config->reg_bits, 8); map->format.pad_bytes = config->pad_bits / 8; map->format.val_bytes = DIV_ROUND_UP(config->val_bits, 8); - map->format.buf_size += map->format.pad_bytes; + map->format.buf_size = DIV_ROUND_UP(config->reg_bits + + config->val_bits + config->pad_bits, 8); map->reg_shift = config->pad_bits % 8; if (config->reg_stride) map->reg_stride = config->reg_stride; @@ -368,7 +368,7 @@ struct regmap *regmap_init(struct device *dev, ret = regcache_init(map, config); if (ret < 0) - goto err_free_workbuf; + goto err_debugfs; /* Add a devres resource for dev_get_regmap() */ m = devres_alloc(dev_get_regmap_release, sizeof(*m), GFP_KERNEL); @@ -383,7 +383,8 @@ struct regmap *regmap_init(struct device *dev, err_cache: regcache_exit(map); -err_free_workbuf: +err_debugfs: + regmap_debugfs_exit(map); kfree(map->work_buf); err_map: kfree(map); @@ -471,6 +472,7 @@ int regmap_reinit_cache(struct regmap *map, const struct regmap_config *config) return ret; } +EXPORT_SYMBOL_GPL(regmap_reinit_cache); /** * regmap_exit(): Free a previously allocated register map diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 764f70c..0a41852 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -898,6 +898,7 @@ static struct pci_device_id agp_intel_pci_table[] = { ID(PCI_DEVICE_ID_INTEL_B43_HB), ID(PCI_DEVICE_ID_INTEL_B43_1_HB), ID(PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB), + ID(PCI_DEVICE_ID_INTEL_IRONLAKE_D2_HB), ID(PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB), ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB), ID(PCI_DEVICE_ID_INTEL_IRONLAKE_MC2_HB), diff --git a/drivers/char/agp/intel-agp.h b/drivers/char/agp/intel-agp.h index c009175..8e2d914 100644 --- a/drivers/char/agp/intel-agp.h +++ b/drivers/char/agp/intel-agp.h @@ -212,6 +212,7 @@ #define PCI_DEVICE_ID_INTEL_G41_HB 0x2E30 #define PCI_DEVICE_ID_INTEL_G41_IG 0x2E32 #define PCI_DEVICE_ID_INTEL_IRONLAKE_D_HB 0x0040 +#define PCI_DEVICE_ID_INTEL_IRONLAKE_D2_HB 0x0069 #define PCI_DEVICE_ID_INTEL_IRONLAKE_D_IG 0x0042 #define PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB 0x0044 #define PCI_DEVICE_ID_INTEL_IRONLAKE_MA_HB 0x0062 diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index f518b99..6289f0e 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -36,6 +36,13 @@ static int atmel_trng_read(struct hwrng *rng, void *buf, size_t max, /* data ready? */ if (readl(trng->base + TRNG_ODATA) & 1) { *data = readl(trng->base + TRNG_ODATA); + /* + ensure data ready is only set again AFTER the next data + word is ready in case it got set between checking ISR + and reading ODATA, so we don't risk re-reading the + same word + */ + readl(trng->base + TRNG_ISR); return 4; } else return 0; diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index 8d81a1d..dd3e661 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o obj-$(CONFIG_SH_TIMER_CMT) += sh_cmt.o obj-$(CONFIG_SH_TIMER_MTU2) += sh_mtu2.o obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o +obj-$(CONFIG_EM_TIMER_STI) += em_sti.o obj-$(CONFIG_CLKBLD_I8253) += i8253.o obj-$(CONFIG_CLKSRC_MMIO) += mmio.o obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o diff --git a/drivers/clocksource/em_sti.c b/drivers/clocksource/em_sti.c new file mode 100644 index 0000000..372051d --- /dev/null +++ b/drivers/clocksource/em_sti.c @@ -0,0 +1,406 @@ +/* + * Emma Mobile Timer Support - STI + * + * Copyright (C) 2012 Magnus Damm + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/init.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/ioport.h> +#include <linux/io.h> +#include <linux/clk.h> +#include <linux/irq.h> +#include <linux/err.h> +#include <linux/delay.h> +#include <linux/clocksource.h> +#include <linux/clockchips.h> +#include <linux/slab.h> +#include <linux/module.h> + +enum { USER_CLOCKSOURCE, USER_CLOCKEVENT, USER_NR }; + +struct em_sti_priv { + void __iomem *base; + struct clk *clk; + struct platform_device *pdev; + unsigned int active[USER_NR]; + unsigned long rate; + raw_spinlock_t lock; + struct clock_event_device ced; + struct clocksource cs; +}; + +#define STI_CONTROL 0x00 +#define STI_COMPA_H 0x10 +#define STI_COMPA_L 0x14 +#define STI_COMPB_H 0x18 +#define STI_COMPB_L 0x1c +#define STI_COUNT_H 0x20 +#define STI_COUNT_L 0x24 +#define STI_COUNT_RAW_H 0x28 +#define STI_COUNT_RAW_L 0x2c +#define STI_SET_H 0x30 +#define STI_SET_L 0x34 +#define STI_INTSTATUS 0x40 +#define STI_INTRAWSTATUS 0x44 +#define STI_INTENSET 0x48 +#define STI_INTENCLR 0x4c +#define STI_INTFFCLR 0x50 + +static inline unsigned long em_sti_read(struct em_sti_priv *p, int offs) +{ + return ioread32(p->base + offs); +} + +static inline void em_sti_write(struct em_sti_priv *p, int offs, + unsigned long value) +{ + iowrite32(value, p->base + offs); +} + +static int em_sti_enable(struct em_sti_priv *p) +{ + int ret; + + /* enable clock */ + ret = clk_enable(p->clk); + if (ret) { + dev_err(&p->pdev->dev, "cannot enable clock\n"); + return ret; + } + + /* configure channel, periodic mode and maximum timeout */ + p->rate = clk_get_rate(p->clk); + + /* reset the counter */ + em_sti_write(p, STI_SET_H, 0x40000000); + em_sti_write(p, STI_SET_L, 0x00000000); + + /* mask and clear pending interrupts */ + em_sti_write(p, STI_INTENCLR, 3); + em_sti_write(p, STI_INTFFCLR, 3); + + /* enable updates of counter registers */ + em_sti_write(p, STI_CONTROL, 1); + + return 0; +} + +static void em_sti_disable(struct em_sti_priv *p) +{ + /* mask interrupts */ + em_sti_write(p, STI_INTENCLR, 3); + + /* stop clock */ + clk_disable(p->clk); +} + +static cycle_t em_sti_count(struct em_sti_priv *p) +{ + cycle_t ticks; + unsigned long flags; + + /* the STI hardware buffers the 48-bit count, but to + * break it out into two 32-bit access the registers + * must be accessed in a certain order. + * Always read STI_COUNT_H before STI_COUNT_L. + */ + raw_spin_lock_irqsave(&p->lock, flags); + ticks = (cycle_t)(em_sti_read(p, STI_COUNT_H) & 0xffff) << 32; + ticks |= em_sti_read(p, STI_COUNT_L); + raw_spin_unlock_irqrestore(&p->lock, flags); + + return ticks; +} + +static cycle_t em_sti_set_next(struct em_sti_priv *p, cycle_t next) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&p->lock, flags); + + /* mask compare A interrupt */ + em_sti_write(p, STI_INTENCLR, 1); + + /* update compare A value */ + em_sti_write(p, STI_COMPA_H, next >> 32); + em_sti_write(p, STI_COMPA_L, next & 0xffffffff); + + /* clear compare A interrupt source */ + em_sti_write(p, STI_INTFFCLR, 1); + + /* unmask compare A interrupt */ + em_sti_write(p, STI_INTENSET, 1); + + raw_spin_unlock_irqrestore(&p->lock, flags); + + return next; +} + +static irqreturn_t em_sti_interrupt(int irq, void *dev_id) +{ + struct em_sti_priv *p = dev_id; + + p->ced.event_handler(&p->ced); + return IRQ_HANDLED; +} + +static int em_sti_start(struct em_sti_priv *p, unsigned int user) +{ + unsigned long flags; + int used_before; + int ret = 0; + + raw_spin_lock_irqsave(&p->lock, flags); + used_before = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT]; + if (!used_before) + ret = em_sti_enable(p); + + if (!ret) + p->active[user] = 1; + raw_spin_unlock_irqrestore(&p->lock, flags); + + return ret; +} + +static void em_sti_stop(struct em_sti_priv *p, unsigned int user) +{ + unsigned long flags; + int used_before, used_after; + + raw_spin_lock_irqsave(&p->lock, flags); + used_before = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT]; + p->active[user] = 0; + used_after = p->active[USER_CLOCKSOURCE] | p->active[USER_CLOCKEVENT]; + + if (used_before && !used_after) + em_sti_disable(p); + raw_spin_unlock_irqrestore(&p->lock, flags); +} + +static struct em_sti_priv *cs_to_em_sti(struct clocksource *cs) +{ + return container_of(cs, struct em_sti_priv, cs); +} + +static cycle_t em_sti_clocksource_read(struct clocksource *cs) +{ + return em_sti_count(cs_to_em_sti(cs)); +} + +static int em_sti_clocksource_enable(struct clocksource *cs) +{ + int ret; + struct em_sti_priv *p = cs_to_em_sti(cs); + + ret = em_sti_start(p, USER_CLOCKSOURCE); + if (!ret) + __clocksource_updatefreq_hz(cs, p->rate); + return ret; +} + +static void em_sti_clocksource_disable(struct clocksource *cs) +{ + em_sti_stop(cs_to_em_sti(cs), USER_CLOCKSOURCE); +} + +static void em_sti_clocksource_resume(struct clocksource *cs) +{ + em_sti_clocksource_enable(cs); +} + +static int em_sti_register_clocksource(struct em_sti_priv *p) +{ + struct clocksource *cs = &p->cs; + + memset(cs, 0, sizeof(*cs)); + cs->name = dev_name(&p->pdev->dev); + cs->rating = 200; + cs->read = em_sti_clocksource_read; + cs->enable = em_sti_clocksource_enable; + cs->disable = em_sti_clocksource_disable; + cs->suspend = em_sti_clocksource_disable; + cs->resume = em_sti_clocksource_resume; + cs->mask = CLOCKSOURCE_MASK(48); + cs->flags = CLOCK_SOURCE_IS_CONTINUOUS; + + dev_info(&p->pdev->dev, "used as clock source\n"); + + /* Register with dummy 1 Hz value, gets updated in ->enable() */ + clocksource_register_hz(cs, 1); + return 0; +} + +static struct em_sti_priv *ced_to_em_sti(struct clock_event_device *ced) +{ + return container_of(ced, struct em_sti_priv, ced); +} + +static void em_sti_clock_event_mode(enum clock_event_mode mode, + struct clock_event_device *ced) +{ + struct em_sti_priv *p = ced_to_em_sti(ced); + + /* deal with old setting first */ + switch (ced->mode) { + case CLOCK_EVT_MODE_ONESHOT: + em_sti_stop(p, USER_CLOCKEVENT); + break; + default: + break; + } + + switch (mode) { + case CLOCK_EVT_MODE_ONESHOT: + dev_info(&p->pdev->dev, "used for oneshot clock events\n"); + em_sti_start(p, USER_CLOCKEVENT); + clockevents_config(&p->ced, p->rate); + break; + case CLOCK_EVT_MODE_SHUTDOWN: + case CLOCK_EVT_MODE_UNUSED: + em_sti_stop(p, USER_CLOCKEVENT); + break; + default: + break; + } +} + +static int em_sti_clock_event_next(unsigned long delta, + struct clock_event_device *ced) +{ + struct em_sti_priv *p = ced_to_em_sti(ced); + cycle_t next; + int safe; + + next = em_sti_set_next(p, em_sti_count(p) + delta); + safe = em_sti_count(p) < (next - 1); + + return !safe; +} + +static void em_sti_register_clockevent(struct em_sti_priv *p) +{ + struct clock_event_device *ced = &p->ced; + + memset(ced, 0, sizeof(*ced)); + ced->name = dev_name(&p->pdev->dev); + ced->features = CLOCK_EVT_FEAT_ONESHOT; + ced->rating = 200; + ced->cpumask = cpumask_of(0); + ced->set_next_event = em_sti_clock_event_next; + ced->set_mode = em_sti_clock_event_mode; + + dev_info(&p->pdev->dev, "used for clock events\n"); + + /* Register with dummy 1 Hz value, gets updated in ->set_mode() */ + clockevents_config_and_register(ced, 1, 2, 0xffffffff); +} + +static int __devinit em_sti_probe(struct platform_device *pdev) +{ + struct em_sti_priv *p; + struct resource *res; + int irq, ret; + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) { + dev_err(&pdev->dev, "failed to allocate driver data\n"); + ret = -ENOMEM; + goto err0; + } + + p->pdev = pdev; + platform_set_drvdata(pdev, p); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "failed to get I/O memory\n"); + ret = -EINVAL; + goto err0; + } + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "failed to get irq\n"); + ret = -EINVAL; + goto err0; + } + + /* map memory, let base point to the STI instance */ + p->base = ioremap_nocache(res->start, resource_size(res)); + if (p->base == NULL) { + dev_err(&pdev->dev, "failed to remap I/O memory\n"); + ret = -ENXIO; + goto err0; + } + + /* get hold of clock */ + p->clk = clk_get(&pdev->dev, "sclk"); + if (IS_ERR(p->clk)) { + dev_err(&pdev->dev, "cannot get clock\n"); + ret = PTR_ERR(p->clk); + goto err1; + } + + if (request_irq(irq, em_sti_interrupt, + IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING, + dev_name(&pdev->dev), p)) { + dev_err(&pdev->dev, "failed to request low IRQ\n"); + ret = -ENOENT; + goto err2; + } + + raw_spin_lock_init(&p->lock); + em_sti_register_clockevent(p); + em_sti_register_clocksource(p); + return 0; + +err2: + clk_put(p->clk); +err1: + iounmap(p->base); +err0: + kfree(p); + return ret; +} + +static int __devexit em_sti_remove(struct platform_device *pdev) +{ + return -EBUSY; /* cannot unregister clockevent and clocksource */ +} + +static const struct of_device_id em_sti_dt_ids[] __devinitconst = { + { .compatible = "renesas,em-sti", }, + {}, +}; +MODULE_DEVICE_TABLE(of, em_sti_dt_ids); + +static struct platform_driver em_sti_device_driver = { + .probe = em_sti_probe, + .remove = __devexit_p(em_sti_remove), + .driver = { + .name = "em_sti", + .of_match_table = em_sti_dt_ids, + } +}; + +module_platform_driver(em_sti_device_driver); + +MODULE_AUTHOR("Magnus Damm"); +MODULE_DESCRIPTION("Renesas Emma Mobile STI Timer Driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 32fe9ef..98b06ba 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -48,13 +48,13 @@ struct sh_cmt_priv { unsigned long next_match_value; unsigned long max_match_value; unsigned long rate; - spinlock_t lock; + raw_spinlock_t lock; struct clock_event_device ced; struct clocksource cs; unsigned long total_cycles; }; -static DEFINE_SPINLOCK(sh_cmt_lock); +static DEFINE_RAW_SPINLOCK(sh_cmt_lock); #define CMSTR -1 /* shared register */ #define CMCSR 0 /* channel register */ @@ -139,7 +139,7 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start) unsigned long flags, value; /* start stop register shared by multiple timer channels */ - spin_lock_irqsave(&sh_cmt_lock, flags); + raw_spin_lock_irqsave(&sh_cmt_lock, flags); value = sh_cmt_read(p, CMSTR); if (start) @@ -148,7 +148,7 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start) value &= ~(1 << cfg->timer_bit); sh_cmt_write(p, CMSTR, value); - spin_unlock_irqrestore(&sh_cmt_lock, flags); + raw_spin_unlock_irqrestore(&sh_cmt_lock, flags); } static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) @@ -328,9 +328,9 @@ static void sh_cmt_set_next(struct sh_cmt_priv *p, unsigned long delta) { unsigned long flags; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); __sh_cmt_set_next(p, delta); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static irqreturn_t sh_cmt_interrupt(int irq, void *dev_id) @@ -385,7 +385,7 @@ static int sh_cmt_start(struct sh_cmt_priv *p, unsigned long flag) int ret = 0; unsigned long flags; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); if (!(p->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE))) ret = sh_cmt_enable(p, &p->rate); @@ -398,7 +398,7 @@ static int sh_cmt_start(struct sh_cmt_priv *p, unsigned long flag) if ((flag == FLAG_CLOCKSOURCE) && (!(p->flags & FLAG_CLOCKEVENT))) __sh_cmt_set_next(p, p->max_match_value); out: - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); return ret; } @@ -408,7 +408,7 @@ static void sh_cmt_stop(struct sh_cmt_priv *p, unsigned long flag) unsigned long flags; unsigned long f; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); f = p->flags & (FLAG_CLOCKEVENT | FLAG_CLOCKSOURCE); p->flags &= ~flag; @@ -420,7 +420,7 @@ static void sh_cmt_stop(struct sh_cmt_priv *p, unsigned long flag) if ((flag == FLAG_CLOCKEVENT) && (p->flags & FLAG_CLOCKSOURCE)) __sh_cmt_set_next(p, p->max_match_value); - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); } static struct sh_cmt_priv *cs_to_sh_cmt(struct clocksource *cs) @@ -435,13 +435,13 @@ static cycle_t sh_cmt_clocksource_read(struct clocksource *cs) unsigned long value; int has_wrapped; - spin_lock_irqsave(&p->lock, flags); + raw_spin_lock_irqsave(&p->lock, flags); value = p->total_cycles; raw = sh_cmt_get_counter(p, &has_wrapped); if (unlikely(has_wrapped)) raw += p->match_value + 1; - spin_unlock_irqrestore(&p->lock, flags); + raw_spin_unlock_irqrestore(&p->lock, flags); return value + raw; } @@ -591,7 +591,7 @@ static int sh_cmt_register(struct sh_cmt_priv *p, char *name, p->max_match_value = (1 << p->width) - 1; p->match_value = p->max_match_value; - spin_lock_init(&p->lock); + raw_spin_lock_init(&p->lock); if (clockevent_rating) sh_cmt_register_clockevent(p, name, clockevent_rating); diff --git a/drivers/clocksource/sh_mtu2.c b/drivers/clocksource/sh_mtu2.c index a2172f6..d9b76ca 100644 --- a/drivers/clocksource/sh_mtu2.c +++ b/drivers/clocksource/sh_mtu2.c @@ -43,7 +43,7 @@ struct sh_mtu2_priv { struct clock_event_device ced; }; -static DEFINE_SPINLOCK(sh_mtu2_lock); +static DEFINE_RAW_SPINLOCK(sh_mtu2_lock); #define TSTR -1 /* shared register */ #define TCR 0 /* channel register */ @@ -107,7 +107,7 @@ static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start) unsigned long flags, value; /* start stop register shared by multiple timer channels */ - spin_lock_irqsave(&sh_mtu2_lock, flags); + raw_spin_lock_irqsave(&sh_mtu2_lock, flags); value = sh_mtu2_read(p, TSTR); if (start) @@ -116,7 +116,7 @@ static void sh_mtu2_start_stop_ch(struct sh_mtu2_priv *p, int start) value &= ~(1 << cfg->timer_bit); sh_mtu2_write(p, TSTR, value); - spin_unlock_irqrestore(&sh_mtu2_lock, flags); + raw_spin_unlock_irqrestore(&sh_mtu2_lock, flags); } static int sh_mtu2_enable(struct sh_mtu2_priv *p) diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 97f54b6..c1b51d4 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -45,7 +45,7 @@ struct sh_tmu_priv { struct clocksource cs; }; -static DEFINE_SPINLOCK(sh_tmu_lock); +static DEFINE_RAW_SPINLOCK(sh_tmu_lock); #define TSTR -1 /* shared register */ #define TCOR 0 /* channel register */ @@ -95,7 +95,7 @@ static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start) unsigned long flags, value; /* start stop register shared by multiple timer channels */ - spin_lock_irqsave(&sh_tmu_lock, flags); + raw_spin_lock_irqsave(&sh_tmu_lock, flags); value = sh_tmu_read(p, TSTR); if (start) @@ -104,7 +104,7 @@ static void sh_tmu_start_stop_ch(struct sh_tmu_priv *p, int start) value &= ~(1 << cfg->timer_bit); sh_tmu_write(p, TSTR, value); - spin_unlock_irqrestore(&sh_tmu_lock, flags); + raw_spin_unlock_irqrestore(&sh_tmu_lock, flags); } static int sh_tmu_enable(struct sh_tmu_priv *p) @@ -245,12 +245,7 @@ static void sh_tmu_clock_event_start(struct sh_tmu_priv *p, int periodic) sh_tmu_enable(p); - /* TODO: calculate good shift from rate and counter bit width */ - - ced->shift = 32; - ced->mult = div_sc(p->rate, NSEC_PER_SEC, ced->shift); - ced->max_delta_ns = clockevent_delta2ns(0xffffffff, ced); - ced->min_delta_ns = 5000; + clockevents_config(ced, p->rate); if (periodic) { p->periodic = (p->rate + HZ/2) / HZ; @@ -323,7 +318,8 @@ static void sh_tmu_register_clockevent(struct sh_tmu_priv *p, ced->set_mode = sh_tmu_clock_event_mode; dev_info(&p->pdev->dev, "used for clock events\n"); - clockevents_register_device(ced); + + clockevents_config_and_register(ced, 1, 0x300, 0xffffffff); ret = setup_irq(p->irqaction.irq, &p->irqaction); if (ret) { diff --git a/drivers/gpio/gpio-samsung.c b/drivers/gpio/gpio-samsung.c index 7bb0044..b6453d0 100644 --- a/drivers/gpio/gpio-samsung.c +++ b/drivers/gpio/gpio-samsung.c @@ -2833,7 +2833,7 @@ static __init void exynos5_gpiolib_init(void) } /* need to set base address for gpc4 */ - exonys5_gpios_1[11].base = gpio_base1 + 0x2E0; + exynos5_gpios_1[11].base = gpio_base1 + 0x2E0; /* need to set base address for gpx */ chip = &exynos5_gpios_1[21]; diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 4209531..d6de2e07f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -244,8 +244,8 @@ static const struct file_operations exynos_drm_driver_fops = { }; static struct drm_driver exynos_drm_driver = { - .driver_features = DRIVER_HAVE_IRQ | DRIVER_BUS_PLATFORM | - DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME, + .driver_features = DRIVER_HAVE_IRQ | DRIVER_MODESET | + DRIVER_GEM | DRIVER_PRIME, .load = exynos_drm_load, .unload = exynos_drm_unload, .open = exynos_drm_open, diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.c b/drivers/gpu/drm/exynos/exynos_drm_encoder.c index 6e9ac7b..23d5ad3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_encoder.c +++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.c @@ -172,19 +172,12 @@ static void exynos_drm_encoder_commit(struct drm_encoder *encoder) manager_ops->commit(manager->dev); } -static struct drm_crtc * -exynos_drm_encoder_get_crtc(struct drm_encoder *encoder) -{ - return encoder->crtc; -} - static struct drm_encoder_helper_funcs exynos_encoder_helper_funcs = { .dpms = exynos_drm_encoder_dpms, .mode_fixup = exynos_drm_encoder_mode_fixup, .mode_set = exynos_drm_encoder_mode_set, .prepare = exynos_drm_encoder_prepare, .commit = exynos_drm_encoder_commit, - .get_crtc = exynos_drm_encoder_get_crtc, }; static void exynos_drm_encoder_destroy(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index f82a299..4ccfe43 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -51,11 +51,22 @@ struct exynos_drm_fb { static void exynos_drm_fb_destroy(struct drm_framebuffer *fb) { struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); + unsigned int i; DRM_DEBUG_KMS("%s\n", __FILE__); drm_framebuffer_cleanup(fb); + for (i = 0; i < ARRAY_SIZE(exynos_fb->exynos_gem_obj); i++) { + struct drm_gem_object *obj; + + if (exynos_fb->exynos_gem_obj[i] == NULL) + continue; + + obj = &exynos_fb->exynos_gem_obj[i]->base; + drm_gem_object_unreference_unlocked(obj); + } + kfree(exynos_fb); exynos_fb = NULL; } @@ -134,11 +145,11 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, return ERR_PTR(-ENOENT); } - drm_gem_object_unreference_unlocked(obj); - fb = exynos_drm_framebuffer_init(dev, mode_cmd, obj); - if (IS_ERR(fb)) + if (IS_ERR(fb)) { + drm_gem_object_unreference_unlocked(obj); return fb; + } exynos_fb = to_exynos_fb(fb); nr = exynos_drm_format_num_buffers(fb->pixel_format); @@ -152,8 +163,6 @@ exynos_user_fb_create(struct drm_device *dev, struct drm_file *file_priv, return ERR_PTR(-ENOENT); } - drm_gem_object_unreference_unlocked(obj); - exynos_fb->exynos_gem_obj[i] = to_exynos_gem_obj(obj); } diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.h b/drivers/gpu/drm/exynos/exynos_drm_fb.h index 3ecb30d..5082375 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.h +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.h @@ -31,10 +31,10 @@ static inline int exynos_drm_format_num_buffers(uint32_t format) { switch (format) { - case DRM_FORMAT_NV12M: + case DRM_FORMAT_NV12: case DRM_FORMAT_NV12MT: return 2; - case DRM_FORMAT_YUV420M: + case DRM_FORMAT_YUV420: return 3; default: return 1; diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index fc91293..5c8b683 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -689,7 +689,6 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv, struct drm_device *dev, uint32_t handle, uint64_t *offset) { - struct exynos_drm_gem_obj *exynos_gem_obj; struct drm_gem_object *obj; int ret = 0; @@ -710,15 +709,13 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv, goto unlock; } - exynos_gem_obj = to_exynos_gem_obj(obj); - - if (!exynos_gem_obj->base.map_list.map) { - ret = drm_gem_create_mmap_offset(&exynos_gem_obj->base); + if (!obj->map_list.map) { + ret = drm_gem_create_mmap_offset(obj); if (ret) goto out; } - *offset = (u64)exynos_gem_obj->base.map_list.hash.key << PAGE_SHIFT; + *offset = (u64)obj->map_list.hash.key << PAGE_SHIFT; DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset); out: diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 68ef010..e2147a2 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -365,7 +365,7 @@ static void vp_video_buffer(struct mixer_context *ctx, int win) switch (win_data->pixel_format) { case DRM_FORMAT_NV12MT: tiled_mode = true; - case DRM_FORMAT_NV12M: + case DRM_FORMAT_NV12: crcb_mode = false; buf_num = 2; break; @@ -601,18 +601,20 @@ static void mixer_win_reset(struct mixer_context *ctx) mixer_reg_write(res, MXR_BG_COLOR2, 0x008080); /* setting graphical layers */ - val = MXR_GRP_CFG_COLOR_KEY_DISABLE; /* no blank key */ val |= MXR_GRP_CFG_WIN_BLEND_EN; + val |= MXR_GRP_CFG_BLEND_PRE_MUL; + val |= MXR_GRP_CFG_PIXEL_BLEND_EN; val |= MXR_GRP_CFG_ALPHA_VAL(0xff); /* non-transparent alpha */ /* the same configuration for both layers */ mixer_reg_write(res, MXR_GRAPHIC_CFG(0), val); - - val |= MXR_GRP_CFG_BLEND_PRE_MUL; - val |= MXR_GRP_CFG_PIXEL_BLEND_EN; mixer_reg_write(res, MXR_GRAPHIC_CFG(1), val); + /* setting video layers */ + val = MXR_GRP_CFG_ALPHA_VAL(0); + mixer_reg_write(res, MXR_VIDEO_CFG, val); + /* configuration of Video Processor Registers */ vp_win_reset(ctx); vp_default_filter(res); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 238a521..9fe9ebe 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -233,6 +233,7 @@ static const struct intel_device_info intel_sandybridge_d_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_sandybridge_m_info = { @@ -243,6 +244,7 @@ static const struct intel_device_info intel_sandybridge_m_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_ivybridge_d_info = { @@ -252,6 +254,7 @@ static const struct intel_device_info intel_ivybridge_d_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_ivybridge_m_info = { @@ -262,6 +265,7 @@ static const struct intel_device_info intel_ivybridge_m_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_valleyview_m_info = { @@ -289,6 +293,7 @@ static const struct intel_device_info intel_haswell_d_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct intel_device_info intel_haswell_m_info = { @@ -298,6 +303,7 @@ static const struct intel_device_info intel_haswell_m_info = { .has_blt_ring = 1, .has_llc = 1, .has_pch_split = 1, + .has_force_wake = 1, }; static const struct pci_device_id pciidlist[] = { /* aka */ @@ -1139,10 +1145,9 @@ MODULE_LICENSE("GPL and additional rights"); /* We give fast paths for the really cool registers */ #define NEEDS_FORCE_WAKE(dev_priv, reg) \ - (((dev_priv)->info->gen >= 6) && \ - ((reg) < 0x40000) && \ - ((reg) != FORCEWAKE)) && \ - (!IS_VALLEYVIEW((dev_priv)->dev)) + ((HAS_FORCE_WAKE((dev_priv)->dev)) && \ + ((reg) < 0x40000) && \ + ((reg) != FORCEWAKE)) #define __i915_read(x, y) \ u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c9cfc67..b0b676a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -285,6 +285,7 @@ struct intel_device_info { u8 is_ivybridge:1; u8 is_valleyview:1; u8 has_pch_split:1; + u8 has_force_wake:1; u8 is_haswell:1; u8 has_fbc:1; u8 has_pipe_cxsr:1; @@ -1101,6 +1102,8 @@ struct drm_i915_file_private { #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT) #define HAS_PCH_IBX(dev) (INTEL_PCH_TYPE(dev) == PCH_IBX) +#define HAS_FORCE_WAKE(dev) (INTEL_INFO(dev)->has_force_wake) + #include "i915_trace.h" /** diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 1417660..b1fe0ed 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -510,7 +510,7 @@ out: return ret; } -static void pch_irq_handler(struct drm_device *dev, u32 pch_iir) +static void ibx_irq_handler(struct drm_device *dev, u32 pch_iir) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; int pipe; @@ -550,6 +550,35 @@ static void pch_irq_handler(struct drm_device *dev, u32 pch_iir) DRM_DEBUG_DRIVER("PCH transcoder A underrun interrupt\n"); } +static void cpt_irq_handler(struct drm_device *dev, u32 pch_iir) +{ + drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; + int pipe; + + if (pch_iir & SDE_AUDIO_POWER_MASK_CPT) + DRM_DEBUG_DRIVER("PCH audio power change on port %d\n", + (pch_iir & SDE_AUDIO_POWER_MASK_CPT) >> + SDE_AUDIO_POWER_SHIFT_CPT); + + if (pch_iir & SDE_AUX_MASK_CPT) + DRM_DEBUG_DRIVER("AUX channel interrupt\n"); + + if (pch_iir & SDE_GMBUS_CPT) + DRM_DEBUG_DRIVER("PCH GMBUS interrupt\n"); + + if (pch_iir & SDE_AUDIO_CP_REQ_CPT) + DRM_DEBUG_DRIVER("Audio CP request interrupt\n"); + + if (pch_iir & SDE_AUDIO_CP_CHG_CPT) + DRM_DEBUG_DRIVER("Audio CP change interrupt\n"); + + if (pch_iir & SDE_FDI_MASK_CPT) + for_each_pipe(pipe) + DRM_DEBUG_DRIVER(" pipe %c FDI IIR: 0x%08x\n", + pipe_name(pipe), + I915_READ(FDI_RX_IIR(pipe))); +} + static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS) { struct drm_device *dev = (struct drm_device *) arg; @@ -591,7 +620,7 @@ static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS) if (pch_iir & SDE_HOTPLUG_MASK_CPT) queue_work(dev_priv->wq, &dev_priv->hotplug_work); - pch_irq_handler(dev, pch_iir); + cpt_irq_handler(dev, pch_iir); /* clear PCH hotplug event before clear CPU irq */ I915_WRITE(SDEIIR, pch_iir); @@ -684,7 +713,10 @@ static irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS) if (de_iir & DE_PCH_EVENT) { if (pch_iir & hotplug_mask) queue_work(dev_priv->wq, &dev_priv->hotplug_work); - pch_irq_handler(dev, pch_iir); + if (HAS_PCH_CPT(dev)) + cpt_irq_handler(dev, pch_iir); + else + ibx_irq_handler(dev, pch_iir); } if (de_iir & DE_PCU_EVENT) { diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2d49b95..48d5e8e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -210,6 +210,14 @@ #define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) #define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1) #define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) +/* IVB has funny definitions for which plane to flip. */ +#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19) +#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19) +#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19) + #define MI_SET_CONTEXT MI_INSTR(0x18, 0) #define MI_MM_SPACE_GTT (1<<8) #define MI_MM_SPACE_PHYSICAL (0<<8) @@ -3313,7 +3321,7 @@ /* PCH */ -/* south display engine interrupt */ +/* south display engine interrupt: IBX */ #define SDE_AUDIO_POWER_D (1 << 27) #define SDE_AUDIO_POWER_C (1 << 26) #define SDE_AUDIO_POWER_B (1 << 25) @@ -3349,15 +3357,44 @@ #define SDE_TRANSA_CRC_ERR (1 << 1) #define SDE_TRANSA_FIFO_UNDER (1 << 0) #define SDE_TRANS_MASK (0x3f) -/* CPT */ -#define SDE_CRT_HOTPLUG_CPT (1 << 19) + +/* south display engine interrupt: CPT/PPT */ +#define SDE_AUDIO_POWER_D_CPT (1 << 31) +#define SDE_AUDIO_POWER_C_CPT (1 << 30) +#define SDE_AUDIO_POWER_B_CPT (1 << 29) +#define SDE_AUDIO_POWER_SHIFT_CPT 29 +#define SDE_AUDIO_POWER_MASK_CPT (7 << 29) +#define SDE_AUXD_CPT (1 << 27) +#define SDE_AUXC_CPT (1 << 26) +#define SDE_AUXB_CPT (1 << 25) +#define SDE_AUX_MASK_CPT (7 << 25) #define SDE_PORTD_HOTPLUG_CPT (1 << 23) #define SDE_PORTC_HOTPLUG_CPT (1 << 22) #define SDE_PORTB_HOTPLUG_CPT (1 << 21) +#define SDE_CRT_HOTPLUG_CPT (1 << 19) #define SDE_HOTPLUG_MASK_CPT (SDE_CRT_HOTPLUG_CPT | \ SDE_PORTD_HOTPLUG_CPT | \ SDE_PORTC_HOTPLUG_CPT | \ SDE_PORTB_HOTPLUG_CPT) +#define SDE_GMBUS_CPT (1 << 17) +#define SDE_AUDIO_CP_REQ_C_CPT (1 << 10) +#define SDE_AUDIO_CP_CHG_C_CPT (1 << 9) +#define SDE_FDI_RXC_CPT (1 << 8) +#define SDE_AUDIO_CP_REQ_B_CPT (1 << 6) +#define SDE_AUDIO_CP_CHG_B_CPT (1 << 5) +#define SDE_FDI_RXB_CPT (1 << 4) +#define SDE_AUDIO_CP_REQ_A_CPT (1 << 2) +#define SDE_AUDIO_CP_CHG_A_CPT (1 << 1) +#define SDE_FDI_RXA_CPT (1 << 0) +#define SDE_AUDIO_CP_REQ_CPT (SDE_AUDIO_CP_REQ_C_CPT | \ + SDE_AUDIO_CP_REQ_B_CPT | \ + SDE_AUDIO_CP_REQ_A_CPT) +#define SDE_AUDIO_CP_CHG_CPT (SDE_AUDIO_CP_CHG_C_CPT | \ + SDE_AUDIO_CP_CHG_B_CPT | \ + SDE_AUDIO_CP_CHG_A_CPT) +#define SDE_FDI_MASK_CPT (SDE_FDI_RXC_CPT | \ + SDE_FDI_RXB_CPT | \ + SDE_FDI_RXA_CPT) #define SDEISR 0xc4000 #define SDEIMR 0xc4004 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 91478942..e0aa064 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -6158,17 +6158,34 @@ static int intel_gen7_queue_flip(struct drm_device *dev, struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_ring_buffer *ring = &dev_priv->ring[BCS]; + uint32_t plane_bit = 0; int ret; ret = intel_pin_and_fence_fb_obj(dev, obj, ring); if (ret) goto err; + switch(intel_crtc->plane) { + case PLANE_A: + plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_A; + break; + case PLANE_B: + plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_B; + break; + case PLANE_C: + plane_bit = MI_DISPLAY_FLIP_IVB_PLANE_C; + break; + default: + WARN_ONCE(1, "unknown plane in flip command\n"); + ret = -ENODEV; + goto err; + } + ret = intel_ring_begin(ring, 4); if (ret) goto err_unpin; - intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | (intel_crtc->plane << 19)); + intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); intel_ring_emit(ring, (fb->pitches[0] | obj->tiling_mode)); intel_ring_emit(ring, (obj->gtt_offset)); intel_ring_emit(ring, (MI_NOOP)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b59b6d5..e5b84ff 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -266,10 +266,15 @@ u32 intel_ring_get_active_head(struct intel_ring_buffer *ring) static int init_ring_common(struct intel_ring_buffer *ring) { - drm_i915_private_t *dev_priv = ring->dev->dev_private; + struct drm_device *dev = ring->dev; + drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj = ring->obj; + int ret = 0; u32 head; + if (HAS_FORCE_WAKE(dev)) + gen6_gt_force_wake_get(dev_priv); + /* Stop the ring if it's running. */ I915_WRITE_CTL(ring, 0); I915_WRITE_HEAD(ring, 0); @@ -317,7 +322,8 @@ static int init_ring_common(struct intel_ring_buffer *ring) I915_READ_HEAD(ring), I915_READ_TAIL(ring), I915_READ_START(ring)); - return -EIO; + ret = -EIO; + goto out; } if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) @@ -326,9 +332,14 @@ static int init_ring_common(struct intel_ring_buffer *ring) ring->head = I915_READ_HEAD(ring); ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; ring->space = ring_space(ring); + ring->last_retired_head = -1; } - return 0; +out: + if (HAS_FORCE_WAKE(dev)) + gen6_gt_force_wake_put(dev_priv); + + return ret; } static int @@ -987,6 +998,10 @@ static int intel_init_ring_buffer(struct drm_device *dev, if (ret) goto err_unref; + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + goto err_unpin; + ring->virtual_start = ioremap_wc(dev->agp->base + obj->gtt_offset, ring->size); if (ring->virtual_start == NULL) { diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 4e7dd2b..c1655412 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -52,6 +52,7 @@ struct evergreen_cs_track { u32 cb_color_view[12]; u32 cb_color_pitch[12]; u32 cb_color_slice[12]; + u32 cb_color_slice_idx[12]; u32 cb_color_attrib[12]; u32 cb_color_cmask_slice[8];/* unused */ u32 cb_color_fmask_slice[8];/* unused */ @@ -127,12 +128,14 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track) track->cb_color_info[i] = 0; track->cb_color_view[i] = 0xFFFFFFFF; track->cb_color_pitch[i] = 0; - track->cb_color_slice[i] = 0; + track->cb_color_slice[i] = 0xfffffff; + track->cb_color_slice_idx[i] = 0; } track->cb_target_mask = 0xFFFFFFFF; track->cb_shader_mask = 0xFFFFFFFF; track->cb_dirty = true; + track->db_depth_slice = 0xffffffff; track->db_depth_view = 0xFFFFC000; track->db_depth_size = 0xFFFFFFFF; track->db_depth_control = 0xFFFFFFFF; @@ -250,10 +253,9 @@ static int evergreen_surface_check_2d(struct radeon_cs_parser *p, { struct evergreen_cs_track *track = p->track; unsigned palign, halign, tileb, slice_pt; + unsigned mtile_pr, mtile_ps, mtileb; tileb = 64 * surf->bpe * surf->nsamples; - palign = track->group_size / (8 * surf->bpe * surf->nsamples); - palign = MAX(8, palign); slice_pt = 1; if (tileb > surf->tsplit) { slice_pt = tileb / surf->tsplit; @@ -262,7 +264,10 @@ static int evergreen_surface_check_2d(struct radeon_cs_parser *p, /* macro tile width & height */ palign = (8 * surf->bankw * track->npipes) * surf->mtilea; halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea; - surf->layer_size = surf->nbx * surf->nby * surf->bpe * slice_pt; + mtileb = (palign / 8) * (halign / 8) * tileb;; + mtile_pr = surf->nbx / palign; + mtile_ps = (mtile_pr * surf->nby) / halign; + surf->layer_size = mtile_ps * mtileb * slice_pt; surf->base_align = (palign / 8) * (halign / 8) * tileb; surf->palign = palign; surf->halign = halign; @@ -434,6 +439,39 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i offset += surf.layer_size * mslice; if (offset > radeon_bo_size(track->cb_color_bo[id])) { + /* old ddx are broken they allocate bo with w*h*bpp but + * program slice with ALIGN(h, 8), catch this and patch + * command stream. + */ + if (!surf.mode) { + volatile u32 *ib = p->ib.ptr; + unsigned long tmp, nby, bsize, size, min = 0; + + /* find the height the ddx wants */ + if (surf.nby > 8) { + min = surf.nby - 8; + } + bsize = radeon_bo_size(track->cb_color_bo[id]); + tmp = track->cb_color_bo_offset[id] << 8; + for (nby = surf.nby; nby > min; nby--) { + size = nby * surf.nbx * surf.bpe * surf.nsamples; + if ((tmp + size * mslice) <= bsize) { + break; + } + } + if (nby > min) { + surf.nby = nby; + slice = ((nby * surf.nbx) / 64) - 1; + if (!evergreen_surface_check(p, &surf, "cb")) { + /* check if this one works */ + tmp += surf.layer_size * mslice; + if (tmp <= bsize) { + ib[track->cb_color_slice_idx[id]] = slice; + goto old_ddx_ok; + } + } + } + } dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, " "offset %d, max layer %d, bo size %ld, slice %d)\n", __func__, __LINE__, id, surf.layer_size, @@ -446,6 +484,7 @@ static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, unsigned i surf.tsplit, surf.mtilea); return -EINVAL; } +old_ddx_ok: return 0; } @@ -1532,6 +1571,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR7_SLICE: tmp = (reg - CB_COLOR0_SLICE) / 0x3c; track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); + track->cb_color_slice_idx[tmp] = idx; track->cb_dirty = true; break; case CB_COLOR8_SLICE: @@ -1540,6 +1580,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx) case CB_COLOR11_SLICE: tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8; track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx); + track->cb_color_slice_idx[tmp] = idx; track->cb_dirty = true; break; case CB_COLOR0_ATTRIB: diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 3df4efa..3186522 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -460,15 +460,28 @@ static void cayman_gpu_init(struct radeon_device *rdev) rdev->config.cayman.max_pipes_per_simd = 4; rdev->config.cayman.max_tile_pipes = 2; if ((rdev->pdev->device == 0x9900) || - (rdev->pdev->device == 0x9901)) { + (rdev->pdev->device == 0x9901) || + (rdev->pdev->device == 0x9905) || + (rdev->pdev->device == 0x9906) || + (rdev->pdev->device == 0x9907) || + (rdev->pdev->device == 0x9908) || + (rdev->pdev->device == 0x9909) || + (rdev->pdev->device == 0x9910) || + (rdev->pdev->device == 0x9917)) { rdev->config.cayman.max_simds_per_se = 6; rdev->config.cayman.max_backends_per_se = 2; } else if ((rdev->pdev->device == 0x9903) || - (rdev->pdev->device == 0x9904)) { + (rdev->pdev->device == 0x9904) || + (rdev->pdev->device == 0x990A) || + (rdev->pdev->device == 0x9913) || + (rdev->pdev->device == 0x9918)) { rdev->config.cayman.max_simds_per_se = 4; rdev->config.cayman.max_backends_per_se = 2; - } else if ((rdev->pdev->device == 0x9990) || - (rdev->pdev->device == 0x9991)) { + } else if ((rdev->pdev->device == 0x9919) || + (rdev->pdev->device == 0x9990) || + (rdev->pdev->device == 0x9991) || + (rdev->pdev->device == 0x9994) || + (rdev->pdev->device == 0x99A0)) { rdev->config.cayman.max_simds_per_se = 3; rdev->config.cayman.max_backends_per_se = 1; } else { diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 45cfcea..f30dc95 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -2426,6 +2426,12 @@ int r600_startup(struct radeon_device *rdev) if (r) return r; + r = r600_audio_init(rdev); + if (r) { + DRM_ERROR("radeon: audio init failed\n"); + return r; + } + return 0; } @@ -2462,12 +2468,6 @@ int r600_resume(struct radeon_device *rdev) return r; } - r = r600_audio_init(rdev); - if (r) { - DRM_ERROR("radeon: audio resume failed\n"); - return r; - } - return r; } @@ -2577,9 +2577,6 @@ int r600_init(struct radeon_device *rdev) rdev->accel_working = false; } - r = r600_audio_init(rdev); - if (r) - return r; /* TODO error handling */ return 0; } diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c index 7c4fa77..7479a5c 100644 --- a/drivers/gpu/drm/radeon/r600_audio.c +++ b/drivers/gpu/drm/radeon/r600_audio.c @@ -192,6 +192,7 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock) struct radeon_device *rdev = dev->dev_private; struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv; + struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc); int base_rate = 48000; switch (radeon_encoder->encoder_id) { @@ -217,8 +218,8 @@ void r600_audio_set_clock(struct drm_encoder *encoder, int clock) WREG32(EVERGREEN_AUDIO_PLL1_DIV, clock * 10); WREG32(EVERGREEN_AUDIO_PLL1_UNK, 0x00000071); - /* Some magic trigger or src sel? */ - WREG32_P(0x5ac, 0x01, ~0x77); + /* Select DTO source */ + WREG32(0x5ac, radeon_crtc->crtc_id); } else { switch (dig->dig_encoder) { case 0: diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c index 226379e..969c275 100644 --- a/drivers/gpu/drm/radeon/r600_hdmi.c +++ b/drivers/gpu/drm/radeon/r600_hdmi.c @@ -348,7 +348,6 @@ void r600_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mod WREG32(HDMI0_AUDIO_PACKET_CONTROL + offset, HDMI0_AUDIO_SAMPLE_SEND | /* send audio packets */ HDMI0_AUDIO_DELAY_EN(1) | /* default audio delay */ - HDMI0_AUDIO_SEND_MAX_PACKETS | /* send NULL packets if no audio is available */ HDMI0_AUDIO_PACKETS_PER_LINE(3) | /* should be suffient for all audio modes and small enough for all hblanks */ HDMI0_60958_CS_UPDATE); /* allow 60958 channel status fields to be updated */ } diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 85dac33..fefcca5 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -1374,9 +1374,9 @@ struct cayman_asic { struct si_asic { unsigned max_shader_engines; - unsigned max_pipes_per_simd; unsigned max_tile_pipes; - unsigned max_simds_per_se; + unsigned max_cu_per_sh; + unsigned max_sh_per_se; unsigned max_backends_per_se; unsigned max_texture_channel_caches; unsigned max_gprs; @@ -1387,7 +1387,6 @@ struct si_asic { unsigned sc_hiz_tile_fifo_size; unsigned sc_earlyz_tile_fifo_size; - unsigned num_shader_engines; unsigned num_tile_pipes; unsigned num_backends_per_se; unsigned backend_disable_mask_per_asic; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index f0bb2b5..03e5f5d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -57,9 +57,10 @@ * 2.13.0 - virtual memory support, streamout * 2.14.0 - add evergreen tiling informations * 2.15.0 - add max_pipes query + * 2.16.0 - fix evergreen 2D tiled surface calculation */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 15 +#define KMS_DRIVER_MINOR 16 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 79db56e..59d4493 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -476,12 +476,18 @@ int radeon_vm_bo_add(struct radeon_device *rdev, mutex_lock(&vm->mutex); if (last_pfn > vm->last_pfn) { - /* grow va space 32M by 32M */ - unsigned align = ((32 << 20) >> 12) - 1; + /* release mutex and lock in right order */ + mutex_unlock(&vm->mutex); radeon_mutex_lock(&rdev->cs_mutex); - radeon_vm_unbind_locked(rdev, vm); + mutex_lock(&vm->mutex); + /* and check again */ + if (last_pfn > vm->last_pfn) { + /* grow va space 32M by 32M */ + unsigned align = ((32 << 20) >> 12) - 1; + radeon_vm_unbind_locked(rdev, vm); + vm->last_pfn = (last_pfn + align) & ~align; + } radeon_mutex_unlock(&rdev->cs_mutex); - vm->last_pfn = (last_pfn + align) & ~align; } head = &vm->va; last_offset = 0; @@ -595,8 +601,8 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev, if (bo_va == NULL) return 0; - mutex_lock(&vm->mutex); radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&vm->mutex); radeon_vm_bo_update_pte(rdev, vm, bo, NULL); radeon_mutex_unlock(&rdev->cs_mutex); list_del(&bo_va->vm_list); @@ -641,9 +647,8 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) struct radeon_bo_va *bo_va, *tmp; int r; - mutex_lock(&vm->mutex); - radeon_mutex_lock(&rdev->cs_mutex); + mutex_lock(&vm->mutex); radeon_vm_unbind_locked(rdev, vm); radeon_mutex_unlock(&rdev->cs_mutex); diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index f1016a5..5c58d7d 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -273,7 +273,7 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) break; case RADEON_INFO_MAX_PIPES: if (rdev->family >= CHIP_TAHITI) - value = rdev->config.si.max_pipes_per_simd; + value = rdev->config.si.max_cu_per_sh; else if (rdev->family >= CHIP_CAYMAN) value = rdev->config.cayman.max_pipes_per_simd; else if (rdev->family >= CHIP_CEDAR) diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index 25f9eef..e95c5e6 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -908,12 +908,6 @@ static int rs600_startup(struct radeon_device *rdev) return r; } - r = r600_audio_init(rdev); - if (r) { - dev_err(rdev->dev, "failed initializing audio\n"); - return r; - } - r = radeon_ib_pool_start(rdev); if (r) return r; @@ -922,6 +916,12 @@ static int rs600_startup(struct radeon_device *rdev) if (r) return r; + r = r600_audio_init(rdev); + if (r) { + dev_err(rdev->dev, "failed initializing audio\n"); + return r; + } + return 0; } diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index 3277dde..159b6a4 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -637,12 +637,6 @@ static int rs690_startup(struct radeon_device *rdev) return r; } - r = r600_audio_init(rdev); - if (r) { - dev_err(rdev->dev, "failed initializing audio\n"); - return r; - } - r = radeon_ib_pool_start(rdev); if (r) return r; @@ -651,6 +645,12 @@ static int rs690_startup(struct radeon_device *rdev) if (r) return r; + r = r600_audio_init(rdev); + if (r) { + dev_err(rdev->dev, "failed initializing audio\n"); + return r; + } + return 0; } diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c index 04ddc36..4ad0281 100644 --- a/drivers/gpu/drm/radeon/rv770.c +++ b/drivers/gpu/drm/radeon/rv770.c @@ -956,6 +956,12 @@ static int rv770_startup(struct radeon_device *rdev) if (r) return r; + r = r600_audio_init(rdev); + if (r) { + DRM_ERROR("radeon: audio init failed\n"); + return r; + } + return 0; } @@ -978,12 +984,6 @@ int rv770_resume(struct radeon_device *rdev) return r; } - r = r600_audio_init(rdev); - if (r) { - dev_err(rdev->dev, "radeon: audio init failed\n"); - return r; - } - return r; } @@ -1092,12 +1092,6 @@ int rv770_init(struct radeon_device *rdev) rdev->accel_working = false; } - r = r600_audio_init(rdev); - if (r) { - dev_err(rdev->dev, "radeon: audio init failed\n"); - return r; - } - return 0; } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 549732e..c7b61f1 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -867,200 +867,6 @@ void dce6_bandwidth_update(struct radeon_device *rdev) /* * Core functions */ -static u32 si_get_tile_pipe_to_backend_map(struct radeon_device *rdev, - u32 num_tile_pipes, - u32 num_backends_per_asic, - u32 *backend_disable_mask_per_asic, - u32 num_shader_engines) -{ - u32 backend_map = 0; - u32 enabled_backends_mask = 0; - u32 enabled_backends_count = 0; - u32 num_backends_per_se; - u32 cur_pipe; - u32 swizzle_pipe[SI_MAX_PIPES]; - u32 cur_backend = 0; - u32 i; - bool force_no_swizzle; - - /* force legal values */ - if (num_tile_pipes < 1) - num_tile_pipes = 1; - if (num_tile_pipes > rdev->config.si.max_tile_pipes) - num_tile_pipes = rdev->config.si.max_tile_pipes; - if (num_shader_engines < 1) - num_shader_engines = 1; - if (num_shader_engines > rdev->config.si.max_shader_engines) - num_shader_engines = rdev->config.si.max_shader_engines; - if (num_backends_per_asic < num_shader_engines) - num_backends_per_asic = num_shader_engines; - if (num_backends_per_asic > (rdev->config.si.max_backends_per_se * num_shader_engines)) - num_backends_per_asic = rdev->config.si.max_backends_per_se * num_shader_engines; - - /* make sure we have the same number of backends per se */ - num_backends_per_asic = ALIGN(num_backends_per_asic, num_shader_engines); - /* set up the number of backends per se */ - num_backends_per_se = num_backends_per_asic / num_shader_engines; - if (num_backends_per_se > rdev->config.si.max_backends_per_se) { - num_backends_per_se = rdev->config.si.max_backends_per_se; - num_backends_per_asic = num_backends_per_se * num_shader_engines; - } - - /* create enable mask and count for enabled backends */ - for (i = 0; i < SI_MAX_BACKENDS; ++i) { - if (((*backend_disable_mask_per_asic >> i) & 1) == 0) { - enabled_backends_mask |= (1 << i); - ++enabled_backends_count; - } - if (enabled_backends_count == num_backends_per_asic) - break; - } - - /* force the backends mask to match the current number of backends */ - if (enabled_backends_count != num_backends_per_asic) { - u32 this_backend_enabled; - u32 shader_engine; - u32 backend_per_se; - - enabled_backends_mask = 0; - enabled_backends_count = 0; - *backend_disable_mask_per_asic = SI_MAX_BACKENDS_MASK; - for (i = 0; i < SI_MAX_BACKENDS; ++i) { - /* calc the current se */ - shader_engine = i / rdev->config.si.max_backends_per_se; - /* calc the backend per se */ - backend_per_se = i % rdev->config.si.max_backends_per_se; - /* default to not enabled */ - this_backend_enabled = 0; - if ((shader_engine < num_shader_engines) && - (backend_per_se < num_backends_per_se)) - this_backend_enabled = 1; - if (this_backend_enabled) { - enabled_backends_mask |= (1 << i); - *backend_disable_mask_per_asic &= ~(1 << i); - ++enabled_backends_count; - } - } - } - - - memset((uint8_t *)&swizzle_pipe[0], 0, sizeof(u32) * SI_MAX_PIPES); - switch (rdev->family) { - case CHIP_TAHITI: - case CHIP_PITCAIRN: - case CHIP_VERDE: - force_no_swizzle = true; - break; - default: - force_no_swizzle = false; - break; - } - if (force_no_swizzle) { - bool last_backend_enabled = false; - - force_no_swizzle = false; - for (i = 0; i < SI_MAX_BACKENDS; ++i) { - if (((enabled_backends_mask >> i) & 1) == 1) { - if (last_backend_enabled) - force_no_swizzle = true; - last_backend_enabled = true; - } else - last_backend_enabled = false; - } - } - - switch (num_tile_pipes) { - case 1: - case 3: - case 5: - case 7: - DRM_ERROR("odd number of pipes!\n"); - break; - case 2: - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 1; - break; - case 4: - if (force_no_swizzle) { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 1; - swizzle_pipe[2] = 2; - swizzle_pipe[3] = 3; - } else { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 2; - swizzle_pipe[2] = 1; - swizzle_pipe[3] = 3; - } - break; - case 6: - if (force_no_swizzle) { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 1; - swizzle_pipe[2] = 2; - swizzle_pipe[3] = 3; - swizzle_pipe[4] = 4; - swizzle_pipe[5] = 5; - } else { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 2; - swizzle_pipe[2] = 4; - swizzle_pipe[3] = 1; - swizzle_pipe[4] = 3; - swizzle_pipe[5] = 5; - } - break; - case 8: - if (force_no_swizzle) { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 1; - swizzle_pipe[2] = 2; - swizzle_pipe[3] = 3; - swizzle_pipe[4] = 4; - swizzle_pipe[5] = 5; - swizzle_pipe[6] = 6; - swizzle_pipe[7] = 7; - } else { - swizzle_pipe[0] = 0; - swizzle_pipe[1] = 2; - swizzle_pipe[2] = 4; - swizzle_pipe[3] = 6; - swizzle_pipe[4] = 1; - swizzle_pipe[5] = 3; - swizzle_pipe[6] = 5; - swizzle_pipe[7] = 7; - } - break; - } - - for (cur_pipe = 0; cur_pipe < num_tile_pipes; ++cur_pipe) { - while (((1 << cur_backend) & enabled_backends_mask) == 0) - cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS; - - backend_map |= (((cur_backend & 0xf) << (swizzle_pipe[cur_pipe] * 4))); - - cur_backend = (cur_backend + 1) % SI_MAX_BACKENDS; - } - - return backend_map; -} - -static u32 si_get_disable_mask_per_asic(struct radeon_device *rdev, - u32 disable_mask_per_se, - u32 max_disable_mask_per_se, - u32 num_shader_engines) -{ - u32 disable_field_width_per_se = r600_count_pipe_bits(disable_mask_per_se); - u32 disable_mask_per_asic = disable_mask_per_se & max_disable_mask_per_se; - - if (num_shader_engines == 1) - return disable_mask_per_asic; - else if (num_shader_engines == 2) - return disable_mask_per_asic | (disable_mask_per_asic << disable_field_width_per_se); - else - return 0xffffffff; -} - static void si_tiling_mode_table_init(struct radeon_device *rdev) { const u32 num_tile_mode_states = 32; @@ -1562,18 +1368,151 @@ static void si_tiling_mode_table_init(struct radeon_device *rdev) DRM_ERROR("unknown asic: 0x%x\n", rdev->family); } +static void si_select_se_sh(struct radeon_device *rdev, + u32 se_num, u32 sh_num) +{ + u32 data = INSTANCE_BROADCAST_WRITES; + + if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) + data = SH_BROADCAST_WRITES | SE_BROADCAST_WRITES; + else if (se_num == 0xffffffff) + data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num); + else if (sh_num == 0xffffffff) + data |= SH_BROADCAST_WRITES | SE_INDEX(se_num); + else + data |= SH_INDEX(sh_num) | SE_INDEX(se_num); + WREG32(GRBM_GFX_INDEX, data); +} + +static u32 si_create_bitmask(u32 bit_width) +{ + u32 i, mask = 0; + + for (i = 0; i < bit_width; i++) { + mask <<= 1; + mask |= 1; + } + return mask; +} + +static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh) +{ + u32 data, mask; + + data = RREG32(CC_GC_SHADER_ARRAY_CONFIG); + if (data & 1) + data &= INACTIVE_CUS_MASK; + else + data = 0; + data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG); + + data >>= INACTIVE_CUS_SHIFT; + + mask = si_create_bitmask(cu_per_sh); + + return ~data & mask; +} + +static void si_setup_spi(struct radeon_device *rdev, + u32 se_num, u32 sh_per_se, + u32 cu_per_sh) +{ + int i, j, k; + u32 data, mask, active_cu; + + for (i = 0; i < se_num; i++) { + for (j = 0; j < sh_per_se; j++) { + si_select_se_sh(rdev, i, j); + data = RREG32(SPI_STATIC_THREAD_MGMT_3); + active_cu = si_get_cu_enabled(rdev, cu_per_sh); + + mask = 1; + for (k = 0; k < 16; k++) { + mask <<= k; + if (active_cu & mask) { + data &= ~mask; + WREG32(SPI_STATIC_THREAD_MGMT_3, data); + break; + } + } + } + } + si_select_se_sh(rdev, 0xffffffff, 0xffffffff); +} + +static u32 si_get_rb_disabled(struct radeon_device *rdev, + u32 max_rb_num, u32 se_num, + u32 sh_per_se) +{ + u32 data, mask; + + data = RREG32(CC_RB_BACKEND_DISABLE); + if (data & 1) + data &= BACKEND_DISABLE_MASK; + else + data = 0; + data |= RREG32(GC_USER_RB_BACKEND_DISABLE); + + data >>= BACKEND_DISABLE_SHIFT; + + mask = si_create_bitmask(max_rb_num / se_num / sh_per_se); + + return data & mask; +} + +static void si_setup_rb(struct radeon_device *rdev, + u32 se_num, u32 sh_per_se, + u32 max_rb_num) +{ + int i, j; + u32 data, mask; + u32 disabled_rbs = 0; + u32 enabled_rbs = 0; + + for (i = 0; i < se_num; i++) { + for (j = 0; j < sh_per_se; j++) { + si_select_se_sh(rdev, i, j); + data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se); + disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH); + } + } + si_select_se_sh(rdev, 0xffffffff, 0xffffffff); + + mask = 1; + for (i = 0; i < max_rb_num; i++) { + if (!(disabled_rbs & mask)) + enabled_rbs |= mask; + mask <<= 1; + } + + for (i = 0; i < se_num; i++) { + si_select_se_sh(rdev, i, 0xffffffff); + data = 0; + for (j = 0; j < sh_per_se; j++) { + switch (enabled_rbs & 3) { + case 1: + data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); + break; + case 2: + data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); + break; + case 3: + default: + data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); + break; + } + enabled_rbs >>= 2; + } + WREG32(PA_SC_RASTER_CONFIG, data); + } + si_select_se_sh(rdev, 0xffffffff, 0xffffffff); +} + static void si_gpu_init(struct radeon_device *rdev) { - u32 cc_rb_backend_disable = 0; - u32 cc_gc_shader_array_config; u32 gb_addr_config = 0; u32 mc_shared_chmap, mc_arb_ramcfg; - u32 gb_backend_map; - u32 cgts_tcc_disable; u32 sx_debug_1; - u32 gc_user_shader_array_config; - u32 gc_user_rb_backend_disable; - u32 cgts_user_tcc_disable; u32 hdp_host_path_cntl; u32 tmp; int i, j; @@ -1581,9 +1520,9 @@ static void si_gpu_init(struct radeon_device *rdev) switch (rdev->family) { case CHIP_TAHITI: rdev->config.si.max_shader_engines = 2; - rdev->config.si.max_pipes_per_simd = 4; rdev->config.si.max_tile_pipes = 12; - rdev->config.si.max_simds_per_se = 8; + rdev->config.si.max_cu_per_sh = 8; + rdev->config.si.max_sh_per_se = 2; rdev->config.si.max_backends_per_se = 4; rdev->config.si.max_texture_channel_caches = 12; rdev->config.si.max_gprs = 256; @@ -1594,12 +1533,13 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.sc_prim_fifo_size_backend = 0x100; rdev->config.si.sc_hiz_tile_fifo_size = 0x30; rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_PITCAIRN: rdev->config.si.max_shader_engines = 2; - rdev->config.si.max_pipes_per_simd = 4; rdev->config.si.max_tile_pipes = 8; - rdev->config.si.max_simds_per_se = 5; + rdev->config.si.max_cu_per_sh = 5; + rdev->config.si.max_sh_per_se = 2; rdev->config.si.max_backends_per_se = 4; rdev->config.si.max_texture_channel_caches = 8; rdev->config.si.max_gprs = 256; @@ -1610,13 +1550,14 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.sc_prim_fifo_size_backend = 0x100; rdev->config.si.sc_hiz_tile_fifo_size = 0x30; rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN; break; case CHIP_VERDE: default: rdev->config.si.max_shader_engines = 1; - rdev->config.si.max_pipes_per_simd = 4; rdev->config.si.max_tile_pipes = 4; - rdev->config.si.max_simds_per_se = 2; + rdev->config.si.max_cu_per_sh = 2; + rdev->config.si.max_sh_per_se = 2; rdev->config.si.max_backends_per_se = 4; rdev->config.si.max_texture_channel_caches = 4; rdev->config.si.max_gprs = 256; @@ -1627,6 +1568,7 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.sc_prim_fifo_size_backend = 0x40; rdev->config.si.sc_hiz_tile_fifo_size = 0x30; rdev->config.si.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN; break; } @@ -1648,31 +1590,7 @@ static void si_gpu_init(struct radeon_device *rdev) mc_shared_chmap = RREG32(MC_SHARED_CHMAP); mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG); - cc_rb_backend_disable = RREG32(CC_RB_BACKEND_DISABLE); - cc_gc_shader_array_config = RREG32(CC_GC_SHADER_ARRAY_CONFIG); - cgts_tcc_disable = 0xffff0000; - for (i = 0; i < rdev->config.si.max_texture_channel_caches; i++) - cgts_tcc_disable &= ~(1 << (16 + i)); - gc_user_rb_backend_disable = RREG32(GC_USER_RB_BACKEND_DISABLE); - gc_user_shader_array_config = RREG32(GC_USER_SHADER_ARRAY_CONFIG); - cgts_user_tcc_disable = RREG32(CGTS_USER_TCC_DISABLE); - - rdev->config.si.num_shader_engines = rdev->config.si.max_shader_engines; rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes; - tmp = ((~gc_user_rb_backend_disable) & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT; - rdev->config.si.num_backends_per_se = r600_count_pipe_bits(tmp); - tmp = (gc_user_rb_backend_disable & BACKEND_DISABLE_MASK) >> BACKEND_DISABLE_SHIFT; - rdev->config.si.backend_disable_mask_per_asic = - si_get_disable_mask_per_asic(rdev, tmp, SI_MAX_BACKENDS_PER_SE_MASK, - rdev->config.si.num_shader_engines); - rdev->config.si.backend_map = - si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes, - rdev->config.si.num_backends_per_se * - rdev->config.si.num_shader_engines, - &rdev->config.si.backend_disable_mask_per_asic, - rdev->config.si.num_shader_engines); - tmp = ((~cgts_user_tcc_disable) & TCC_DISABLE_MASK) >> TCC_DISABLE_SHIFT; - rdev->config.si.num_texture_channel_caches = r600_count_pipe_bits(tmp); rdev->config.si.mem_max_burst_length_bytes = 256; tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT; rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; @@ -1683,55 +1601,8 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.num_gpus = 1; rdev->config.si.multi_gpu_tile_size = 64; - gb_addr_config = 0; - switch (rdev->config.si.num_tile_pipes) { - case 1: - gb_addr_config |= NUM_PIPES(0); - break; - case 2: - gb_addr_config |= NUM_PIPES(1); - break; - case 4: - gb_addr_config |= NUM_PIPES(2); - break; - case 8: - default: - gb_addr_config |= NUM_PIPES(3); - break; - } - - tmp = (rdev->config.si.mem_max_burst_length_bytes / 256) - 1; - gb_addr_config |= PIPE_INTERLEAVE_SIZE(tmp); - gb_addr_config |= NUM_SHADER_ENGINES(rdev->config.si.num_shader_engines - 1); - tmp = (rdev->config.si.shader_engine_tile_size / 16) - 1; - gb_addr_config |= SHADER_ENGINE_TILE_SIZE(tmp); - switch (rdev->config.si.num_gpus) { - case 1: - default: - gb_addr_config |= NUM_GPUS(0); - break; - case 2: - gb_addr_config |= NUM_GPUS(1); - break; - case 4: - gb_addr_config |= NUM_GPUS(2); - break; - } - switch (rdev->config.si.multi_gpu_tile_size) { - case 16: - gb_addr_config |= MULTI_GPU_TILE_SIZE(0); - break; - case 32: - default: - gb_addr_config |= MULTI_GPU_TILE_SIZE(1); - break; - case 64: - gb_addr_config |= MULTI_GPU_TILE_SIZE(2); - break; - case 128: - gb_addr_config |= MULTI_GPU_TILE_SIZE(3); - break; - } + /* fix up row size */ + gb_addr_config &= ~ROW_SIZE_MASK; switch (rdev->config.si.mem_row_size_in_kb) { case 1: default: @@ -1745,26 +1616,6 @@ static void si_gpu_init(struct radeon_device *rdev) break; } - tmp = (gb_addr_config & NUM_PIPES_MASK) >> NUM_PIPES_SHIFT; - rdev->config.si.num_tile_pipes = (1 << tmp); - tmp = (gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT; - rdev->config.si.mem_max_burst_length_bytes = (tmp + 1) * 256; - tmp = (gb_addr_config & NUM_SHADER_ENGINES_MASK) >> NUM_SHADER_ENGINES_SHIFT; - rdev->config.si.num_shader_engines = tmp + 1; - tmp = (gb_addr_config & NUM_GPUS_MASK) >> NUM_GPUS_SHIFT; - rdev->config.si.num_gpus = tmp + 1; - tmp = (gb_addr_config & MULTI_GPU_TILE_SIZE_MASK) >> MULTI_GPU_TILE_SIZE_SHIFT; - rdev->config.si.multi_gpu_tile_size = 1 << tmp; - tmp = (gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT; - rdev->config.si.mem_row_size_in_kb = 1 << tmp; - - gb_backend_map = - si_get_tile_pipe_to_backend_map(rdev, rdev->config.si.num_tile_pipes, - rdev->config.si.num_backends_per_se * - rdev->config.si.num_shader_engines, - &rdev->config.si.backend_disable_mask_per_asic, - rdev->config.si.num_shader_engines); - /* setup tiling info dword. gb_addr_config is not adequate since it does * not have bank info, so create a custom tiling dword. * bits 3:0 num_pipes @@ -1789,33 +1640,29 @@ static void si_gpu_init(struct radeon_device *rdev) rdev->config.si.tile_config |= (3 << 0); break; } - rdev->config.si.tile_config |= - ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4; + if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) + rdev->config.si.tile_config |= 1 << 4; + else + rdev->config.si.tile_config |= 0 << 4; rdev->config.si.tile_config |= ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8; rdev->config.si.tile_config |= ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12; - rdev->config.si.backend_map = gb_backend_map; WREG32(GB_ADDR_CONFIG, gb_addr_config); WREG32(DMIF_ADDR_CONFIG, gb_addr_config); WREG32(HDP_ADDR_CONFIG, gb_addr_config); - /* primary versions */ - WREG32(CC_RB_BACKEND_DISABLE, cc_rb_backend_disable); - WREG32(CC_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); - WREG32(CC_GC_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config); - - WREG32(CGTS_TCC_DISABLE, cgts_tcc_disable); + si_tiling_mode_table_init(rdev); - /* user versions */ - WREG32(GC_USER_RB_BACKEND_DISABLE, cc_rb_backend_disable); - WREG32(GC_USER_SYS_RB_BACKEND_DISABLE, cc_rb_backend_disable); - WREG32(GC_USER_SHADER_ARRAY_CONFIG, cc_gc_shader_array_config); + si_setup_rb(rdev, rdev->config.si.max_shader_engines, + rdev->config.si.max_sh_per_se, + rdev->config.si.max_backends_per_se); - WREG32(CGTS_USER_TCC_DISABLE, cgts_tcc_disable); + si_setup_spi(rdev, rdev->config.si.max_shader_engines, + rdev->config.si.max_sh_per_se, + rdev->config.si.max_cu_per_sh); - si_tiling_mode_table_init(rdev); /* set HW defaults for 3D engine */ WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) | diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h index 53ea2c4..db40679 100644 --- a/drivers/gpu/drm/radeon/sid.h +++ b/drivers/gpu/drm/radeon/sid.h @@ -24,6 +24,11 @@ #ifndef SI_H #define SI_H +#define TAHITI_RB_BITMAP_WIDTH_PER_SH 2 + +#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003 +#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002 + #define CG_MULT_THERMAL_STATUS 0x714 #define ASIC_MAX_TEMP(x) ((x) << 0) #define ASIC_MAX_TEMP_MASK 0x000001ff @@ -408,6 +413,12 @@ #define SOFT_RESET_IA (1 << 15) #define GRBM_GFX_INDEX 0x802C +#define INSTANCE_INDEX(x) ((x) << 0) +#define SH_INDEX(x) ((x) << 8) +#define SE_INDEX(x) ((x) << 16) +#define SH_BROADCAST_WRITES (1 << 29) +#define INSTANCE_BROADCAST_WRITES (1 << 30) +#define SE_BROADCAST_WRITES (1 << 31) #define GRBM_INT_CNTL 0x8060 # define RDERR_INT_ENABLE (1 << 0) @@ -480,6 +491,8 @@ #define VGT_TF_MEMORY_BASE 0x89B8 #define CC_GC_SHADER_ARRAY_CONFIG 0x89bc +#define INACTIVE_CUS_MASK 0xFFFF0000 +#define INACTIVE_CUS_SHIFT 16 #define GC_USER_SHADER_ARRAY_CONFIG 0x89c0 #define PA_CL_ENHANCE 0x8A14 @@ -688,6 +701,12 @@ #define RLC_MC_CNTL 0xC344 #define RLC_UCODE_CNTL 0xC348 +#define PA_SC_RASTER_CONFIG 0x28350 +# define RASTER_CONFIG_RB_MAP_0 0 +# define RASTER_CONFIG_RB_MAP_1 1 +# define RASTER_CONFIG_RB_MAP_2 2 +# define RASTER_CONFIG_RB_MAP_3 3 + #define VGT_EVENT_INITIATOR 0x28a90 # define SAMPLE_STREAMOUTSTATS1 (1 << 0) # define SAMPLE_STREAMOUTSTATS2 (2 << 0) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index b67cfca..36f4b28 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1204,6 +1204,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev, (*destroy)(bo); else kfree(bo); + ttm_mem_global_free(mem_glob, acc_size); return -EINVAL; } bo->destroy = destroy; @@ -1307,22 +1308,14 @@ int ttm_bo_create(struct ttm_bo_device *bdev, struct ttm_buffer_object **p_bo) { struct ttm_buffer_object *bo; - struct ttm_mem_global *mem_glob = bdev->glob->mem_glob; size_t acc_size; int ret; - acc_size = ttm_bo_acc_size(bdev, size, sizeof(struct ttm_buffer_object)); - ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false); - if (unlikely(ret != 0)) - return ret; - bo = kzalloc(sizeof(*bo), GFP_KERNEL); - - if (unlikely(bo == NULL)) { - ttm_mem_global_free(mem_glob, acc_size); + if (unlikely(bo == NULL)) return -ENOMEM; - } + acc_size = ttm_bo_acc_size(bdev, size, sizeof(struct ttm_buffer_object)); ret = ttm_bo_init(bdev, bo, size, type, placement, page_alignment, buffer_start, interruptible, persistent_swap_storage, acc_size, NULL, NULL); diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c index 38f9534..5b3c7d1 100644 --- a/drivers/gpu/vga/vga_switcheroo.c +++ b/drivers/gpu/vga/vga_switcheroo.c @@ -190,6 +190,19 @@ find_active_client(struct list_head *head) return NULL; } +int vga_switcheroo_get_client_state(struct pci_dev *pdev) +{ + struct vga_switcheroo_client *client; + + client = find_client_from_pci(&vgasr_priv.clients, pdev); + if (!client) + return VGA_SWITCHEROO_NOT_FOUND; + if (!vgasr_priv.active) + return VGA_SWITCHEROO_INIT; + return client->pwr_state; +} +EXPORT_SYMBOL(vga_switcheroo_get_client_state); + void vga_switcheroo_unregister_client(struct pci_dev *pdev) { struct vga_switcheroo_client *client; @@ -291,8 +304,6 @@ static int vga_switchto_stage1(struct vga_switcheroo_client *new_client) vga_switchon(new_client); vga_set_default_device(new_client->pdev); - set_audio_state(new_client->id, VGA_SWITCHEROO_ON); - return 0; } @@ -308,6 +319,8 @@ static int vga_switchto_stage2(struct vga_switcheroo_client *new_client) active->active = false; + set_audio_state(active->id, VGA_SWITCHEROO_OFF); + if (new_client->fb_info) { struct fb_event event; event.info = new_client->fb_info; @@ -321,11 +334,11 @@ static int vga_switchto_stage2(struct vga_switcheroo_client *new_client) if (new_client->ops->reprobe) new_client->ops->reprobe(new_client->pdev); - set_audio_state(active->id, VGA_SWITCHEROO_OFF); - if (active->pwr_state == VGA_SWITCHEROO_ON) vga_switchoff(active); + set_audio_state(new_client->id, VGA_SWITCHEROO_ON); + new_client->active = true; return 0; } @@ -371,8 +384,9 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf, /* pwr off the device not in use */ if (strncmp(usercmd, "OFF", 3) == 0) { list_for_each_entry(client, &vgasr_priv.clients, list) { - if (client->active) + if (client->active || client_is_audio(client)) continue; + set_audio_state(client->id, VGA_SWITCHEROO_OFF); if (client->pwr_state == VGA_SWITCHEROO_ON) vga_switchoff(client); } @@ -381,10 +395,11 @@ vga_switcheroo_debugfs_write(struct file *filp, const char __user *ubuf, /* pwr on the device not in use */ if (strncmp(usercmd, "ON", 2) == 0) { list_for_each_entry(client, &vgasr_priv.clients, list) { - if (client->active) + if (client->active || client_is_audio(client)) continue; if (client->pwr_state == VGA_SWITCHEROO_OFF) vga_switchon(client); + set_audio_state(client->id, VGA_SWITCHEROO_ON); } goto out; } diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig index beb2491..a0edd98 100644 --- a/drivers/i2c/muxes/Kconfig +++ b/drivers/i2c/muxes/Kconfig @@ -37,4 +37,16 @@ config I2C_MUX_PCA954x This driver can also be built as a module. If so, the module will be called i2c-mux-pca954x. +config I2C_MUX_PINCTRL + tristate "pinctrl-based I2C multiplexer" + depends on PINCTRL + help + If you say yes to this option, support will be included for an I2C + multiplexer that uses the pinctrl subsystem, i.e. pin multiplexing. + This is useful for SoCs whose I2C module's signals can be routed to + different sets of pins at run-time. + + This driver can also be built as a module. If so, the module will be + called pinctrl-i2cmux. + endmenu diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile index 5826249..76da869 100644 --- a/drivers/i2c/muxes/Makefile +++ b/drivers/i2c/muxes/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_I2C_MUX_GPIO) += i2c-mux-gpio.o obj-$(CONFIG_I2C_MUX_PCA9541) += i2c-mux-pca9541.o obj-$(CONFIG_I2C_MUX_PCA954x) += i2c-mux-pca954x.o +obj-$(CONFIG_I2C_MUX_PINCTRL) += i2c-mux-pinctrl.o ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG diff --git a/drivers/i2c/muxes/i2c-mux-pinctrl.c b/drivers/i2c/muxes/i2c-mux-pinctrl.c new file mode 100644 index 0000000..46a6697 --- /dev/null +++ b/drivers/i2c/muxes/i2c-mux-pinctrl.c @@ -0,0 +1,279 @@ +/* + * I2C multiplexer using pinctrl API + * + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/i2c.h> +#include <linux/i2c-mux.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/of_i2c.h> +#include <linux/pinctrl/consumer.h> +#include <linux/i2c-mux-pinctrl.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +struct i2c_mux_pinctrl { + struct device *dev; + struct i2c_mux_pinctrl_platform_data *pdata; + struct pinctrl *pinctrl; + struct pinctrl_state **states; + struct pinctrl_state *state_idle; + struct i2c_adapter *parent; + struct i2c_adapter **busses; +}; + +static int i2c_mux_pinctrl_select(struct i2c_adapter *adap, void *data, + u32 chan) +{ + struct i2c_mux_pinctrl *mux = data; + + return pinctrl_select_state(mux->pinctrl, mux->states[chan]); +} + +static int i2c_mux_pinctrl_deselect(struct i2c_adapter *adap, void *data, + u32 chan) +{ + struct i2c_mux_pinctrl *mux = data; + + return pinctrl_select_state(mux->pinctrl, mux->state_idle); +} + +#ifdef CONFIG_OF +static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux, + struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + int num_names, i, ret; + struct device_node *adapter_np; + struct i2c_adapter *adapter; + + if (!np) + return 0; + + mux->pdata = devm_kzalloc(&pdev->dev, sizeof(*mux->pdata), GFP_KERNEL); + if (!mux->pdata) { + dev_err(mux->dev, + "Cannot allocate i2c_mux_pinctrl_platform_data\n"); + return -ENOMEM; + } + + num_names = of_property_count_strings(np, "pinctrl-names"); + if (num_names < 0) { + dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n", + num_names); + return num_names; + } + + mux->pdata->pinctrl_states = devm_kzalloc(&pdev->dev, + sizeof(*mux->pdata->pinctrl_states) * num_names, + GFP_KERNEL); + if (!mux->pdata->pinctrl_states) { + dev_err(mux->dev, "Cannot allocate pinctrl_states\n"); + return -ENOMEM; + } + + for (i = 0; i < num_names; i++) { + ret = of_property_read_string_index(np, "pinctrl-names", i, + &mux->pdata->pinctrl_states[mux->pdata->bus_count]); + if (ret < 0) { + dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n", + ret); + return ret; + } + if (!strcmp(mux->pdata->pinctrl_states[mux->pdata->bus_count], + "idle")) { + if (i != num_names - 1) { + dev_err(mux->dev, "idle state must be last\n"); + return -EINVAL; + } + mux->pdata->pinctrl_state_idle = "idle"; + } else { + mux->pdata->bus_count++; + } + } + + adapter_np = of_parse_phandle(np, "i2c-parent", 0); + if (!adapter_np) { + dev_err(mux->dev, "Cannot parse i2c-parent\n"); + return -ENODEV; + } + adapter = of_find_i2c_adapter_by_node(adapter_np); + if (!adapter) { + dev_err(mux->dev, "Cannot find parent bus\n"); + return -ENODEV; + } + mux->pdata->parent_bus_num = i2c_adapter_id(adapter); + put_device(&adapter->dev); + + return 0; +} +#else +static inline int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux, + struct platform_device *pdev) +{ + return 0; +} +#endif + +static int __devinit i2c_mux_pinctrl_probe(struct platform_device *pdev) +{ + struct i2c_mux_pinctrl *mux; + int (*deselect)(struct i2c_adapter *, void *, u32); + int i, ret; + + mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL); + if (!mux) { + dev_err(&pdev->dev, "Cannot allocate i2c_mux_pinctrl\n"); + ret = -ENOMEM; + goto err; + } + platform_set_drvdata(pdev, mux); + + mux->dev = &pdev->dev; + + mux->pdata = pdev->dev.platform_data; + if (!mux->pdata) { + ret = i2c_mux_pinctrl_parse_dt(mux, pdev); + if (ret < 0) + goto err; + } + if (!mux->pdata) { + dev_err(&pdev->dev, "Missing platform data\n"); + ret = -ENODEV; + goto err; + } + + mux->states = devm_kzalloc(&pdev->dev, + sizeof(*mux->states) * mux->pdata->bus_count, + GFP_KERNEL); + if (!mux->states) { + dev_err(&pdev->dev, "Cannot allocate states\n"); + ret = -ENOMEM; + goto err; + } + + mux->busses = devm_kzalloc(&pdev->dev, + sizeof(mux->busses) * mux->pdata->bus_count, + GFP_KERNEL); + if (!mux->states) { + dev_err(&pdev->dev, "Cannot allocate busses\n"); + ret = -ENOMEM; + goto err; + } + + mux->pinctrl = devm_pinctrl_get(&pdev->dev); + if (IS_ERR(mux->pinctrl)) { + ret = PTR_ERR(mux->pinctrl); + dev_err(&pdev->dev, "Cannot get pinctrl: %d\n", ret); + goto err; + } + for (i = 0; i < mux->pdata->bus_count; i++) { + mux->states[i] = pinctrl_lookup_state(mux->pinctrl, + mux->pdata->pinctrl_states[i]); + if (IS_ERR(mux->states[i])) { + ret = PTR_ERR(mux->states[i]); + dev_err(&pdev->dev, + "Cannot look up pinctrl state %s: %d\n", + mux->pdata->pinctrl_states[i], ret); + goto err; + } + } + if (mux->pdata->pinctrl_state_idle) { + mux->state_idle = pinctrl_lookup_state(mux->pinctrl, + mux->pdata->pinctrl_state_idle); + if (IS_ERR(mux->state_idle)) { + ret = PTR_ERR(mux->state_idle); + dev_err(&pdev->dev, + "Cannot look up pinctrl state %s: %d\n", + mux->pdata->pinctrl_state_idle, ret); + goto err; + } + + deselect = i2c_mux_pinctrl_deselect; + } else { + deselect = NULL; + } + + mux->parent = i2c_get_adapter(mux->pdata->parent_bus_num); + if (!mux->parent) { + dev_err(&pdev->dev, "Parent adapter (%d) not found\n", + mux->pdata->parent_bus_num); + ret = -ENODEV; + goto err; + } + + for (i = 0; i < mux->pdata->bus_count; i++) { + u32 bus = mux->pdata->base_bus_num ? + (mux->pdata->base_bus_num + i) : 0; + + mux->busses[i] = i2c_add_mux_adapter(mux->parent, &pdev->dev, + mux, bus, i, + i2c_mux_pinctrl_select, + deselect); + if (!mux->busses[i]) { + ret = -ENODEV; + dev_err(&pdev->dev, "Failed to add adapter %d\n", i); + goto err_del_adapter; + } + } + + return 0; + +err_del_adapter: + for (; i > 0; i--) + i2c_del_mux_adapter(mux->busses[i - 1]); + i2c_put_adapter(mux->parent); +err: + return ret; +} + +static int __devexit i2c_mux_pinctrl_remove(struct platform_device *pdev) +{ + struct i2c_mux_pinctrl *mux = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < mux->pdata->bus_count; i++) + i2c_del_mux_adapter(mux->busses[i]); + + i2c_put_adapter(mux->parent); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id i2c_mux_pinctrl_of_match[] __devinitconst = { + { .compatible = "i2c-mux-pinctrl", }, + {}, +}; +MODULE_DEVICE_TABLE(of, i2c_mux_pinctrl_of_match); +#endif + +static struct platform_driver i2c_mux_pinctrl_driver = { + .driver = { + .name = "i2c-mux-pinctrl", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(i2c_mux_pinctrl_of_match), + }, + .probe = i2c_mux_pinctrl_probe, + .remove = __devexit_p(i2c_mux_pinctrl_remove), +}; +module_platform_driver(i2c_mux_pinctrl_driver); + +MODULE_DESCRIPTION("pinctrl-based I2C multiplexer driver"); +MODULE_AUTHOR("Stephen Warren <swarren@nvidia.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:i2c-mux-pinctrl"); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 55ab284e..b18870c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1593,6 +1593,10 @@ static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst, struct net_device *pdev; pdev = ip_dev_find(&init_net, peer_ip); + if (!pdev) { + err = -ENODEV; + goto out; + } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, 0); if (!ep->l2t) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ee1c577..3530c41 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -140,7 +140,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_mr_size = ~0ull; props->page_size_cap = dev->dev->caps.page_size_cap; props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; - props->max_qp_wr = dev->dev->caps.max_wqes; + props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; props->max_sge = min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg); props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; @@ -1084,12 +1084,9 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) int total_eqs = 0; int i, j, eq; - /* Init eq table */ - ibdev->eq_table = NULL; - ibdev->eq_added = 0; - - /* Legacy mode? */ - if (dev->caps.comp_pool == 0) + /* Legacy mode or comp_pool is not large enough */ + if (dev->caps.comp_pool == 0 || + dev->caps.num_ports > dev->caps.comp_pool) return; eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/ @@ -1135,7 +1132,10 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { int i; - int total_eqs; + + /* no additional eqs were added */ + if (!ibdev->eq_table) + return; /* Reset the advertised EQ number */ ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; @@ -1148,12 +1148,7 @@ static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) mlx4_release_eq(dev, ibdev->eq_table[i]); } - total_eqs = dev->caps.num_comp_vectors + ibdev->eq_added; - memset(ibdev->eq_table, 0, total_eqs * sizeof(int)); kfree(ibdev->eq_table); - - ibdev->eq_table = NULL; - ibdev->eq_added = 0; } static void *mlx4_ib_add(struct mlx4_dev *dev) diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e62297c..ff36655 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -44,6 +44,14 @@ #include <linux/mlx4/device.h> #include <linux/mlx4/doorbell.h> +enum { + MLX4_IB_SQ_MIN_WQE_SHIFT = 6, + MLX4_IB_MAX_HEADROOM = 2048 +}; + +#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1) +#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT)) + struct mlx4_ib_ucontext { struct ib_ucontext ibucontext; struct mlx4_uar uar; diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index ceb3332..8d4ed24 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -310,8 +310,8 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, int is_user, int has_rq, struct mlx4_ib_qp *qp) { /* Sanity check RQ size before proceeding */ - if (cap->max_recv_wr > dev->dev->caps.max_wqes || - cap->max_recv_sge > dev->dev->caps.max_rq_sg) + if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || + cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) return -EINVAL; if (!has_rq) { @@ -329,8 +329,17 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); } - cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; - cap->max_recv_sge = qp->rq.max_gs; + /* leave userspace return values as they were, so as not to break ABI */ + if (is_user) { + cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt; + cap->max_recv_sge = qp->rq.max_gs; + } else { + cap->max_recv_wr = qp->rq.max_post = + min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt); + cap->max_recv_sge = min(qp->rq.max_gs, + min(dev->dev->caps.max_sq_sg, + dev->dev->caps.max_rq_sg)); + } return 0; } @@ -341,8 +350,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, int s; /* Sanity check SQ size before proceeding */ - if (cap->max_send_wr > dev->dev->caps.max_wqes || - cap->max_send_sge > dev->dev->caps.max_sq_sg || + if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) || + cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) || cap->max_inline_data + send_wqe_overhead(type, qp->flags) + sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) return -EINVAL; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 85a69c9..037f5ce 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -231,7 +231,6 @@ struct ocrdma_qp_hwq_info { u32 entry_size; u32 max_cnt; u32 max_wqe_idx; - u32 free_delta; u16 dbid; /* qid, where to ring the doorbell. */ u32 len; dma_addr_t pa; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h index a411a4e..517ab20 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h @@ -101,8 +101,6 @@ struct ocrdma_create_qp_uresp { u32 rsvd1; u32 num_wqe_allocated; u32 num_rqe_allocated; - u32 free_wqe_delta; - u32 free_rqe_delta; u32 db_sq_offset; u32 db_rq_offset; u32 db_shift; @@ -126,8 +124,7 @@ struct ocrdma_create_srq_uresp { u32 db_rq_offset; u32 db_shift; - u32 free_rqe_delta; - u32 rsvd2; + u64 rsvd2; u64 rsvd3; } __packed; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 9b204b1..9343a15 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -732,7 +732,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, break; case OCRDMA_SRQ_LIMIT_EVENT: ib_evt.element.srq = &qp->srq->ibsrq; - ib_evt.event = IB_EVENT_QP_LAST_WQE_REACHED; + ib_evt.event = IB_EVENT_SRQ_LIMIT_REACHED; srq_event = 1; qp_event = 0; break; @@ -1990,19 +1990,12 @@ static void ocrdma_get_create_qp_rsp(struct ocrdma_create_qp_rsp *rsp, max_wqe_allocated = 1 << max_wqe_allocated; max_rqe_allocated = 1 << ((u16)rsp->max_wqe_rqe); - if (qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { - qp->sq.free_delta = 0; - qp->rq.free_delta = 1; - } else - qp->sq.free_delta = 1; - qp->sq.max_cnt = max_wqe_allocated; qp->sq.max_wqe_idx = max_wqe_allocated - 1; if (!attrs->srq) { qp->rq.max_cnt = max_rqe_allocated; qp->rq.max_wqe_idx = max_rqe_allocated - 1; - qp->rq.free_delta = 1; } } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index a20d16e..04fef3d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -26,7 +26,6 @@ *******************************************************************/ #include <linux/module.h> -#include <linux/version.h> #include <linux/idr.h> #include <rdma/ib_verbs.h> #include <rdma/ib_user_verbs.h> diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index e9f74d1..d16d172 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -940,8 +940,6 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp, uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET; uresp.db_shift = 16; } - uresp.free_wqe_delta = qp->sq.free_delta; - uresp.free_rqe_delta = qp->rq.free_delta; if (qp->dpp_enabled) { uresp.dpp_credit = dpp_credit_lmt; @@ -1307,8 +1305,6 @@ static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q) free_cnt = (q->max_cnt - q->head) + q->tail; else free_cnt = q->tail - q->head; - if (q->free_delta) - free_cnt -= q->free_delta; return free_cnt; } @@ -1501,7 +1497,6 @@ static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata) (srq->pd->id * srq->dev->nic_info.db_page_size); uresp.db_page_size = srq->dev->nic_info.db_page_size; uresp.num_rqe_allocated = srq->rq.max_cnt; - uresp.free_rqe_delta = 1; if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) { uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET; uresp.db_shift = 24; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h index e648343..633f03d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h @@ -28,7 +28,6 @@ #ifndef __OCRDMA_VERBS_H__ #define __OCRDMA_VERBS_H__ -#include <linux/version.h> int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *, struct ib_send_wr **bad_wr); int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *, diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d90a421..a2e418c 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -547,26 +547,12 @@ static void iommu_poll_events(struct amd_iommu *iommu) spin_unlock_irqrestore(&iommu->lock, flags); } -static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head) +static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) { struct amd_iommu_fault fault; - volatile u64 *raw; - int i; INC_STATS_COUNTER(pri_requests); - raw = (u64 *)(iommu->ppr_log + head); - - /* - * Hardware bug: Interrupt may arrive before the entry is written to - * memory. If this happens we need to wait for the entry to arrive. - */ - for (i = 0; i < LOOP_TIMEOUT; ++i) { - if (PPR_REQ_TYPE(raw[0]) != 0) - break; - udelay(1); - } - if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { pr_err_ratelimited("AMD-Vi: Unknown PPR request received\n"); return; @@ -578,12 +564,6 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u32 head) fault.tag = PPR_TAG(raw[0]); fault.flags = PPR_FLAGS(raw[0]); - /* - * To detect the hardware bug we need to clear the entry - * to back to zero. - */ - raw[0] = raw[1] = 0; - atomic_notifier_call_chain(&ppr_notifier, 0, &fault); } @@ -595,25 +575,62 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) if (iommu->ppr_log == NULL) return; + /* enable ppr interrupts again */ + writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); + spin_lock_irqsave(&iommu->lock, flags); head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); while (head != tail) { + volatile u64 *raw; + u64 entry[2]; + int i; - /* Handle PPR entry */ - iommu_handle_ppr_entry(iommu, head); + raw = (u64 *)(iommu->ppr_log + head); + + /* + * Hardware bug: Interrupt may arrive before the entry is + * written to memory. If this happens we need to wait for the + * entry to arrive. + */ + for (i = 0; i < LOOP_TIMEOUT; ++i) { + if (PPR_REQ_TYPE(raw[0]) != 0) + break; + udelay(1); + } + + /* Avoid memcpy function-call overhead */ + entry[0] = raw[0]; + entry[1] = raw[1]; - /* Update and refresh ring-buffer state*/ + /* + * To detect the hardware bug we need to clear the entry + * back to zero. + */ + raw[0] = raw[1] = 0UL; + + /* Update head pointer of hardware ring-buffer */ head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); + + /* + * Release iommu->lock because ppr-handling might need to + * re-aquire it + */ + spin_unlock_irqrestore(&iommu->lock, flags); + + /* Handle PPR entry */ + iommu_handle_ppr_entry(iommu, entry); + + spin_lock_irqsave(&iommu->lock, flags); + + /* Refresh ring-buffer information */ + head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); } - /* enable ppr interrupts again */ - writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); - spin_unlock_irqrestore(&iommu->lock, flags); } diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index c567903..542024b 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1029,6 +1029,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (!iommu->dev) return 1; + iommu->root_pdev = pci_get_bus_and_slot(iommu->dev->bus->number, + PCI_DEVFN(0, 0)); + iommu->cap_ptr = h->cap_ptr; iommu->pci_seg = h->pci_seg; iommu->mmio_phys = h->mmio_phys; @@ -1323,20 +1326,16 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu) { int i, j; u32 ioc_feature_control; - struct pci_dev *pdev = NULL; + struct pci_dev *pdev = iommu->root_pdev; /* RD890 BIOSes may not have completely reconfigured the iommu */ - if (!is_rd890_iommu(iommu->dev)) + if (!is_rd890_iommu(iommu->dev) || !pdev) return; /* * First, we need to ensure that the iommu is enabled. This is * controlled by a register in the northbridge */ - pdev = pci_get_bus_and_slot(iommu->dev->bus->number, PCI_DEVFN(0, 0)); - - if (!pdev) - return; /* Select Northbridge indirect register 0x75 and enable writing */ pci_write_config_dword(pdev, 0x60, 0x75 | (1 << 7)); @@ -1346,8 +1345,6 @@ static void iommu_apply_resume_quirks(struct amd_iommu *iommu) if (!(ioc_feature_control & 0x1)) pci_write_config_dword(pdev, 0x64, ioc_feature_control | 1); - pci_dev_put(pdev); - /* Restore the iommu BAR */ pci_write_config_dword(iommu->dev, iommu->cap_ptr + 4, iommu->stored_addr_lo); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 2452f3b..2435555 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -481,6 +481,9 @@ struct amd_iommu { /* Pointer to PCI device of this IOMMU */ struct pci_dev *dev; + /* Cache pdev to root device for resume quirks */ + struct pci_dev *root_pdev; + /* physical address of MMIO space */ u64 mmio_phys; /* virtual address of MMIO space */ diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index 04cb8c8..12b2b55 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -379,7 +379,7 @@ config LEDS_NETXBIG config LEDS_ASIC3 bool "LED support for the HTC ASIC3" - depends on LEDS_CLASS + depends on LEDS_CLASS=y depends on MFD_ASIC3 default y help @@ -390,7 +390,7 @@ config LEDS_ASIC3 config LEDS_RENESAS_TPU bool "LED support for Renesas TPU" - depends on LEDS_CLASS && HAVE_CLK && GENERIC_GPIO + depends on LEDS_CLASS=y && HAVE_CLK && GENERIC_GPIO help This option enables build of the LED TPU platform driver, suitable to drive any TPU channel on newer Renesas SoCs. diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index 8ee92c8..e663e6f 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -29,7 +29,7 @@ static void led_update_brightness(struct led_classdev *led_cdev) led_cdev->brightness = led_cdev->brightness_get(led_cdev); } -static ssize_t led_brightness_show(struct device *dev, +static ssize_t led_brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct led_classdev *led_cdev = dev_get_drvdata(dev); diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index d686004..d65353d 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -44,13 +44,6 @@ static void led_set_software_blink(struct led_classdev *led_cdev, if (!led_cdev->blink_brightness) led_cdev->blink_brightness = led_cdev->max_brightness; - if (led_get_trigger_data(led_cdev) && - delay_on == led_cdev->blink_delay_on && - delay_off == led_cdev->blink_delay_off) - return; - - led_stop_software_blink(led_cdev); - led_cdev->blink_delay_on = delay_on; led_cdev->blink_delay_off = delay_off; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 835de71..a9c7981 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2550,6 +2550,7 @@ static struct r1conf *setup_conf(struct mddev *mddev) err = -EINVAL; spin_lock_init(&conf->device_lock); rdev_for_each(rdev, mddev) { + struct request_queue *q; int disk_idx = rdev->raid_disk; if (disk_idx >= mddev->raid_disks || disk_idx < 0) @@ -2562,6 +2563,9 @@ static struct r1conf *setup_conf(struct mddev *mddev) if (disk->rdev) goto abort; disk->rdev = rdev; + q = bdev_get_queue(rdev->bdev); + if (q->merge_bvec_fn) + mddev->merge_check_needed = 1; disk->head_position = 0; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 987db37..99ae606 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3475,6 +3475,7 @@ static int run(struct mddev *mddev) rdev_for_each(rdev, mddev) { long long diff; + struct request_queue *q; disk_idx = rdev->raid_disk; if (disk_idx < 0) @@ -3493,6 +3494,9 @@ static int run(struct mddev *mddev) goto out_free_conf; disk->rdev = rdev; } + q = bdev_get_queue(rdev->bdev); + if (q->merge_bvec_fn) + mddev->merge_check_needed = 1; diff = (rdev->new_data_offset - rdev->data_offset); if (!mddev->reshape_backwards) diff = -diff; diff --git a/drivers/mtd/ubi/debug.c b/drivers/mtd/ubi/debug.c index 9f957c2..09d4f8d 100644 --- a/drivers/mtd/ubi/debug.c +++ b/drivers/mtd/ubi/debug.c @@ -264,6 +264,9 @@ static struct dentry *dfs_rootdir; */ int ubi_debugfs_init(void) { + if (!IS_ENABLED(DEBUG_FS)) + return 0; + dfs_rootdir = debugfs_create_dir("ubi", NULL); if (IS_ERR_OR_NULL(dfs_rootdir)) { int err = dfs_rootdir ? -ENODEV : PTR_ERR(dfs_rootdir); @@ -281,7 +284,8 @@ int ubi_debugfs_init(void) */ void ubi_debugfs_exit(void) { - debugfs_remove(dfs_rootdir); + if (IS_ENABLED(DEBUG_FS)) + debugfs_remove(dfs_rootdir); } /* Read an UBI debugfs file */ @@ -403,6 +407,9 @@ int ubi_debugfs_init_dev(struct ubi_device *ubi) struct dentry *dent; struct ubi_debug_info *d = ubi->dbg; + if (!IS_ENABLED(DEBUG_FS)) + return 0; + n = snprintf(d->dfs_dir_name, UBI_DFS_DIR_LEN + 1, UBI_DFS_DIR_NAME, ubi->ubi_num); if (n == UBI_DFS_DIR_LEN) { @@ -470,5 +477,6 @@ out: */ void ubi_debugfs_exit_dev(struct ubi_device *ubi) { - debugfs_remove_recursive(ubi->dbg->dfs_dir); + if (IS_ENABLED(DEBUG_FS)) + debugfs_remove_recursive(ubi->dbg->dfs_dir); } diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index 9df100a..b6be644 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -1262,11 +1262,11 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) dbg_wl("flush pending work for LEB %d:%d (%d pending works)", vol_id, lnum, ubi->works_count); - down_write(&ubi->work_sem); while (found) { struct ubi_work *wrk; found = 0; + down_read(&ubi->work_sem); spin_lock(&ubi->wl_lock); list_for_each_entry(wrk, &ubi->works, list) { if ((vol_id == UBI_ALL || wrk->vol_id == vol_id) && @@ -1277,18 +1277,27 @@ int ubi_wl_flush(struct ubi_device *ubi, int vol_id, int lnum) spin_unlock(&ubi->wl_lock); err = wrk->func(ubi, wrk, 0); - if (err) - goto out; + if (err) { + up_read(&ubi->work_sem); + return err; + } + spin_lock(&ubi->wl_lock); found = 1; break; } } spin_unlock(&ubi->wl_lock); + up_read(&ubi->work_sem); } -out: + /* + * Make sure all the works which have been done in parallel are + * finished. + */ + down_write(&ubi->work_sem); up_write(&ubi->work_sem); + return err; } diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 3031e04..a030e63 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2454,7 +2454,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev) out: if (res) { /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); + kfree_skb(skb); } return NETDEV_TX_OK; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index ef3791a..e15cc11 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1346,12 +1346,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev) } } + read_unlock(&bond->curr_slave_lock); + if (res) { /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); + kfree_skb(skb); } - read_unlock(&bond->curr_slave_lock); - return NETDEV_TX_OK; } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index af50632..f5a40b9 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3990,7 +3990,7 @@ static int bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev out: if (res) { /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); + kfree_skb(skb); } return NETDEV_TX_OK; @@ -4012,11 +4012,11 @@ static int bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_d res = bond_dev_queue_xmit(bond, skb, bond->curr_active_slave->dev); + read_unlock(&bond->curr_slave_lock); + if (res) /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); - - read_unlock(&bond->curr_slave_lock); + kfree_skb(skb); return NETDEV_TX_OK; } @@ -4055,7 +4055,7 @@ static int bond_xmit_xor(struct sk_buff *skb, struct net_device *bond_dev) if (res) { /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); + kfree_skb(skb); } return NETDEV_TX_OK; @@ -4093,7 +4093,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) res = bond_dev_queue_xmit(bond, skb2, tx_dev); if (res) { - dev_kfree_skb(skb2); + kfree_skb(skb2); continue; } } @@ -4107,7 +4107,7 @@ static int bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev) out: if (res) /* no suitable interface, frame not sent */ - dev_kfree_skb(skb); + kfree_skb(skb); /* frame sent to all suitable interfaces */ return NETDEV_TX_OK; @@ -4213,7 +4213,7 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev pr_err("%s: Error: Unknown bonding mode %d\n", dev->name, bond->params.mode); WARN_ON_ONCE(1); - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } } @@ -4235,7 +4235,7 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) if (bond->slave_cnt) ret = __bond_start_xmit(skb, dev); else - dev_kfree_skb(skb); + kfree_skb(skb); read_unlock(&bond->lock); diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index e2ce508..eea6608 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -594,8 +594,8 @@ static void c_can_chip_config(struct net_device *dev) priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_ENABLE_AR); - if (priv->can.ctrlmode & (CAN_CTRLMODE_LISTENONLY & - CAN_CTRLMODE_LOOPBACK)) { + if ((priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) && + (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK)) { /* loopback + silent mode : useful for hot self-test */ priv->write_reg(priv, C_CAN_CTRL_REG, CONTROL_EIE | CONTROL_SIE | CONTROL_IE | CONTROL_TEST); diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index ff5d3c1..9b69a62 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/stringify.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/errno.h> @@ -57,8 +58,8 @@ #include "bnx2_fw.h" #define DRV_MODULE_NAME "bnx2" -#define DRV_MODULE_VERSION "2.2.1" -#define DRV_MODULE_RELDATE "Dec 18, 2011" +#define DRV_MODULE_VERSION "2.2.2" +#define DRV_MODULE_RELDATE "June 16, 2012" #define FW_MIPS_FILE_06 "bnx2/bnx2-mips-06-6.2.3.fw" #define FW_RV2P_FILE_06 "bnx2/bnx2-rv2p-06-6.0.15.fw" #define FW_MIPS_FILE_09 "bnx2/bnx2-mips-09-6.2.1b.fw" @@ -2472,6 +2473,7 @@ bnx2_dump_mcp_state(struct bnx2 *bp) bnx2_shmem_rd(bp, BNX2_BC_STATE_RESET_TYPE)); pr_cont(" condition[%08x]\n", bnx2_shmem_rd(bp, BNX2_BC_STATE_CONDITION)); + DP_SHMEM_LINE(bp, BNX2_BC_RESET_TYPE); DP_SHMEM_LINE(bp, 0x3cc); DP_SHMEM_LINE(bp, 0x3dc); DP_SHMEM_LINE(bp, 0x3ec); @@ -6405,6 +6407,75 @@ bnx2_reset_task(struct work_struct *work) rtnl_unlock(); } +#define BNX2_FTQ_ENTRY(ftq) { __stringify(ftq##FTQ_CTL), BNX2_##ftq##FTQ_CTL } + +static void +bnx2_dump_ftq(struct bnx2 *bp) +{ + int i; + u32 reg, bdidx, cid, valid; + struct net_device *dev = bp->dev; + static const struct ftq_reg { + char *name; + u32 off; + } ftq_arr[] = { + BNX2_FTQ_ENTRY(RV2P_P), + BNX2_FTQ_ENTRY(RV2P_T), + BNX2_FTQ_ENTRY(RV2P_M), + BNX2_FTQ_ENTRY(TBDR_), + BNX2_FTQ_ENTRY(TDMA_), + BNX2_FTQ_ENTRY(TXP_), + BNX2_FTQ_ENTRY(TXP_), + BNX2_FTQ_ENTRY(TPAT_), + BNX2_FTQ_ENTRY(RXP_C), + BNX2_FTQ_ENTRY(RXP_), + BNX2_FTQ_ENTRY(COM_COMXQ_), + BNX2_FTQ_ENTRY(COM_COMTQ_), + BNX2_FTQ_ENTRY(COM_COMQ_), + BNX2_FTQ_ENTRY(CP_CPQ_), + }; + + netdev_err(dev, "<--- start FTQ dump --->\n"); + for (i = 0; i < ARRAY_SIZE(ftq_arr); i++) + netdev_err(dev, "%s %08x\n", ftq_arr[i].name, + bnx2_reg_rd_ind(bp, ftq_arr[i].off)); + + netdev_err(dev, "CPU states:\n"); + for (reg = BNX2_TXP_CPU_MODE; reg <= BNX2_CP_CPU_MODE; reg += 0x40000) + netdev_err(dev, "%06x mode %x state %x evt_mask %x pc %x pc %x instr %x\n", + reg, bnx2_reg_rd_ind(bp, reg), + bnx2_reg_rd_ind(bp, reg + 4), + bnx2_reg_rd_ind(bp, reg + 8), + bnx2_reg_rd_ind(bp, reg + 0x1c), + bnx2_reg_rd_ind(bp, reg + 0x1c), + bnx2_reg_rd_ind(bp, reg + 0x20)); + + netdev_err(dev, "<--- end FTQ dump --->\n"); + netdev_err(dev, "<--- start TBDC dump --->\n"); + netdev_err(dev, "TBDC free cnt: %ld\n", + REG_RD(bp, BNX2_TBDC_STATUS) & BNX2_TBDC_STATUS_FREE_CNT); + netdev_err(dev, "LINE CID BIDX CMD VALIDS\n"); + for (i = 0; i < 0x20; i++) { + int j = 0; + + REG_WR(bp, BNX2_TBDC_BD_ADDR, i); + REG_WR(bp, BNX2_TBDC_CAM_OPCODE, + BNX2_TBDC_CAM_OPCODE_OPCODE_CAM_READ); + REG_WR(bp, BNX2_TBDC_COMMAND, BNX2_TBDC_COMMAND_CMD_REG_ARB); + while ((REG_RD(bp, BNX2_TBDC_COMMAND) & + BNX2_TBDC_COMMAND_CMD_REG_ARB) && j < 100) + j++; + + cid = REG_RD(bp, BNX2_TBDC_CID); + bdidx = REG_RD(bp, BNX2_TBDC_BIDX); + valid = REG_RD(bp, BNX2_TBDC_CAM_OPCODE); + netdev_err(dev, "%02x %06x %04lx %02x [%x]\n", + i, cid, bdidx & BNX2_TBDC_BDIDX_BDIDX, + bdidx >> 24, (valid >> 8) & 0x0ff); + } + netdev_err(dev, "<--- end TBDC dump --->\n"); +} + static void bnx2_dump_state(struct bnx2 *bp) { @@ -6434,6 +6505,7 @@ bnx2_tx_timeout(struct net_device *dev) { struct bnx2 *bp = netdev_priv(dev); + bnx2_dump_ftq(bp); bnx2_dump_state(bp); bnx2_dump_mcp_state(bp); @@ -7831,7 +7903,7 @@ bnx2_get_5709_media(struct bnx2 *bp) else strap = (val & BNX2_MISC_DUAL_MEDIA_CTRL_PHY_CTRL_STRAP) >> 8; - if (PCI_FUNC(bp->pdev->devfn) == 0) { + if (bp->func == 0) { switch (strap) { case 0x4: case 0x5: @@ -8130,9 +8202,12 @@ bnx2_init_board(struct pci_dev *pdev, struct net_device *dev) reg = bnx2_reg_rd_ind(bp, BNX2_SHM_HDR_SIGNATURE); + if (bnx2_reg_rd_ind(bp, BNX2_MCP_TOE_ID) & BNX2_MCP_TOE_ID_FUNCTION_ID) + bp->func = 1; + if ((reg & BNX2_SHM_HDR_SIGNATURE_SIG_MASK) == BNX2_SHM_HDR_SIGNATURE_SIG) { - u32 off = PCI_FUNC(pdev->devfn) << 2; + u32 off = bp->func << 2; bp->shmem_base = bnx2_reg_rd_ind(bp, BNX2_SHM_HDR_ADDR_0 + off); } else diff --git a/drivers/net/ethernet/broadcom/bnx2.h b/drivers/net/ethernet/broadcom/bnx2.h index dc06bda..af6451d 100644 --- a/drivers/net/ethernet/broadcom/bnx2.h +++ b/drivers/net/ethernet/broadcom/bnx2.h @@ -4642,6 +4642,47 @@ struct l2_fhdr { #define BNX2_TBDR_FTQ_CTL_CUR_DEPTH (0x3ffL<<22) +/* + * tbdc definition + * offset: 0x5400 + */ +#define BNX2_TBDC_COMMAND 0x5400 +#define BNX2_TBDC_COMMAND_CMD_ENABLED (1UL<<0) +#define BNX2_TBDC_COMMAND_CMD_FLUSH (1UL<<1) +#define BNX2_TBDC_COMMAND_CMD_SOFT_RST (1UL<<2) +#define BNX2_TBDC_COMMAND_CMD_REG_ARB (1UL<<3) +#define BNX2_TBDC_COMMAND_WRCHK_RANGE_ERROR (1UL<<4) +#define BNX2_TBDC_COMMAND_WRCHK_ALL_ONES_ERROR (1UL<<5) +#define BNX2_TBDC_COMMAND_WRCHK_ALL_ZEROS_ERROR (1UL<<6) +#define BNX2_TBDC_COMMAND_WRCHK_ANY_ONES_ERROR (1UL<<7) +#define BNX2_TBDC_COMMAND_WRCHK_ANY_ZEROS_ERROR (1UL<<8) + +#define BNX2_TBDC_STATUS 0x5404 +#define BNX2_TBDC_STATUS_FREE_CNT (0x3fUL<<0) + +#define BNX2_TBDC_BD_ADDR 0x5424 + +#define BNX2_TBDC_BIDX 0x542c +#define BNX2_TBDC_BDIDX_BDIDX (0xffffUL<<0) +#define BNX2_TBDC_BDIDX_CMD (0xffUL<<24) + +#define BNX2_TBDC_CID 0x5430 + +#define BNX2_TBDC_CAM_OPCODE 0x5434 +#define BNX2_TBDC_CAM_OPCODE_OPCODE (0x7UL<<0) +#define BNX2_TBDC_CAM_OPCODE_OPCODE_SEARCH (0UL<<0) +#define BNX2_TBDC_CAM_OPCODE_OPCODE_CACHE_WRITE (1UL<<0) +#define BNX2_TBDC_CAM_OPCODE_OPCODE_INVALIDATE (2UL<<0) +#define BNX2_TBDC_CAM_OPCODE_OPCODE_CAM_WRITE (4UL<<0) +#define BNX2_TBDC_CAM_OPCODE_OPCODE_CAM_READ (5UL<<0) +#define BNX2_TBDC_CAM_OPCODE_OPCODE_RAM_WRITE (6UL<<0) +#define BNX2_TBDC_CAM_OPCODE_OPCODE_RAM_READ (7UL<<0) +#define BNX2_TBDC_CAM_OPCODE_SMASK_BDIDX (1UL<<4) +#define BNX2_TBDC_CAM_OPCODE_SMASK_CID (1UL<<5) +#define BNX2_TBDC_CAM_OPCODE_SMASK_CMD (1UL<<6) +#define BNX2_TBDC_CAM_OPCODE_WMT_FAILED (1UL<<7) +#define BNX2_TBDC_CAM_OPCODE_CAM_VALIDS (0xffUL<<8) + /* * tdma_reg definition @@ -6930,6 +6971,8 @@ struct bnx2 { struct bnx2_irq irq_tbl[BNX2_MAX_MSIX_VEC]; int irq_nvecs; + u8 func; + u8 num_tx_rings; u8 num_rx_rings; @@ -7314,6 +7357,8 @@ struct bnx2_rv2p_fw_file { #define BNX2_BC_STATE_RESET_TYPE_VALUE(msg) (BNX2_BC_STATE_RESET_TYPE_SIG | \ (msg)) +#define BNX2_BC_RESET_TYPE 0x000001c0 + #define BNX2_BC_STATE 0x000001c4 #define BNX2_BC_STATE_ERR_MASK 0x0000ff00 #define BNX2_BC_STATE_SIGN 0x42530000 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index e30e2a2..7211cb0 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -23,8 +23,8 @@ * (you will need to reboot afterwards) */ /* #define BNX2X_STOP_ON_ERROR */ -#define DRV_MODULE_VERSION "1.72.50-0" -#define DRV_MODULE_RELDATE "2012/04/23" +#define DRV_MODULE_VERSION "1.72.51-0" +#define DRV_MODULE_RELDATE "2012/06/18" #define BNX2X_BC_VER 0x040200 #if defined(CONFIG_DCB) @@ -248,13 +248,12 @@ enum { BNX2X_MAX_CNIC_ETH_CL_ID_IDX, }; -#define BNX2X_CNIC_START_ETH_CID 48 -enum { +#define BNX2X_CNIC_START_ETH_CID(bp) (BNX2X_NUM_NON_CNIC_QUEUES(bp) *\ + (bp)->max_cos) /* iSCSI L2 */ - BNX2X_ISCSI_ETH_CID = BNX2X_CNIC_START_ETH_CID, +#define BNX2X_ISCSI_ETH_CID(bp) (BNX2X_CNIC_START_ETH_CID(bp)) /* FCoE L2 */ - BNX2X_FCOE_ETH_CID, -}; +#define BNX2X_FCOE_ETH_CID(bp) (BNX2X_CNIC_START_ETH_CID(bp) + 1) /** Additional rings budgeting */ #ifdef BCM_CNIC @@ -276,29 +275,30 @@ enum { #define FIRST_TX_ONLY_COS_INDEX 1 #define FIRST_TX_COS_INDEX 0 -/* defines for decodeing the fastpath index and the cos index out of the - * transmission queue index - */ -#define MAX_TXQS_PER_COS FP_SB_MAX_E1x - -#define TXQ_TO_FP(txq_index) ((txq_index) % MAX_TXQS_PER_COS) -#define TXQ_TO_COS(txq_index) ((txq_index) / MAX_TXQS_PER_COS) - /* rules for calculating the cids of tx-only connections */ -#define CID_TO_FP(cid) ((cid) % MAX_TXQS_PER_COS) -#define CID_COS_TO_TX_ONLY_CID(cid, cos) (cid + cos * MAX_TXQS_PER_COS) +#define CID_TO_FP(cid, bp) ((cid) % BNX2X_NUM_NON_CNIC_QUEUES(bp)) +#define CID_COS_TO_TX_ONLY_CID(cid, cos, bp) \ + (cid + cos * BNX2X_NUM_NON_CNIC_QUEUES(bp)) /* fp index inside class of service range */ -#define FP_COS_TO_TXQ(fp, cos) ((fp)->index + cos * MAX_TXQS_PER_COS) - -/* - * 0..15 eth cos0 - * 16..31 eth cos1 if applicable - * 32..47 eth cos2 If applicable - * fcoe queue follows eth queues (16, 32, 48 depending on cos) +#define FP_COS_TO_TXQ(fp, cos, bp) \ + ((fp)->index + cos * BNX2X_NUM_NON_CNIC_QUEUES(bp)) + +/* Indexes for transmission queues array: + * txdata for RSS i CoS j is at location i + (j * num of RSS) + * txdata for FCoE (if exist) is at location max cos * num of RSS + * txdata for FWD (if exist) is one location after FCoE + * txdata for OOO (if exist) is one location after FWD */ -#define MAX_ETH_TXQ_IDX(bp) (MAX_TXQS_PER_COS * (bp)->max_cos) -#define FCOE_TXQ_IDX(bp) (MAX_ETH_TXQ_IDX(bp)) +enum { + FCOE_TXQ_IDX_OFFSET, + FWD_TXQ_IDX_OFFSET, + OOO_TXQ_IDX_OFFSET, +}; +#define MAX_ETH_TXQ_IDX(bp) (BNX2X_NUM_NON_CNIC_QUEUES(bp) * (bp)->max_cos) +#ifdef BCM_CNIC +#define FCOE_TXQ_IDX(bp) (MAX_ETH_TXQ_IDX(bp) + FCOE_TXQ_IDX_OFFSET) +#endif /* fast path */ /* @@ -481,6 +481,8 @@ struct bnx2x_fp_txdata { __le16 *tx_cons_sb; int txq_index; + struct bnx2x_fastpath *parent_fp; + int tx_ring_size; }; enum bnx2x_tpa_mode_t { @@ -507,7 +509,7 @@ struct bnx2x_fastpath { enum bnx2x_tpa_mode_t mode; u8 max_cos; /* actual number of active tx coses */ - struct bnx2x_fp_txdata txdata[BNX2X_MULTI_TX_COS]; + struct bnx2x_fp_txdata *txdata_ptr[BNX2X_MULTI_TX_COS]; struct sw_rx_bd *rx_buf_ring; /* BDs mappings ring */ struct sw_rx_page *rx_page_ring; /* SGE pages mappings ring */ @@ -547,51 +549,45 @@ struct bnx2x_fastpath { rx_calls; /* TPA related */ - struct bnx2x_agg_info tpa_info[ETH_MAX_AGGREGATION_QUEUES_E1H_E2]; + struct bnx2x_agg_info *tpa_info; u8 disable_tpa; #ifdef BNX2X_STOP_ON_ERROR u64 tpa_queue_used; #endif - - struct tstorm_per_queue_stats old_tclient; - struct ustorm_per_queue_stats old_uclient; - struct xstorm_per_queue_stats old_xclient; - struct bnx2x_eth_q_stats eth_q_stats; - struct bnx2x_eth_q_stats_old eth_q_stats_old; - /* The size is calculated using the following: sizeof name field from netdev structure + 4 ('-Xx-' string) + 4 (for the digits and to make it DWORD aligned) */ #define FP_NAME_SIZE (sizeof(((struct net_device *)0)->name) + 8) char name[FP_NAME_SIZE]; - - /* MACs object */ - struct bnx2x_vlan_mac_obj mac_obj; - - /* Queue State object */ - struct bnx2x_queue_sp_obj q_obj; - }; -#define bnx2x_fp(bp, nr, var) (bp->fp[nr].var) +#define bnx2x_fp(bp, nr, var) ((bp)->fp[(nr)].var) +#define bnx2x_sp_obj(bp, fp) ((bp)->sp_objs[(fp)->index]) +#define bnx2x_fp_stats(bp, fp) (&((bp)->fp_stats[(fp)->index])) +#define bnx2x_fp_qstats(bp, fp) (&((bp)->fp_stats[(fp)->index].eth_q_stats)) /* Use 2500 as a mini-jumbo MTU for FCoE */ #define BNX2X_FCOE_MINI_JUMBO_MTU 2500 -/* FCoE L2 `fastpath' entry is right after the eth entries */ -#define FCOE_IDX BNX2X_NUM_ETH_QUEUES(bp) -#define bnx2x_fcoe_fp(bp) (&bp->fp[FCOE_IDX]) -#define bnx2x_fcoe(bp, var) (bnx2x_fcoe_fp(bp)->var) -#define bnx2x_fcoe_tx(bp, var) (bnx2x_fcoe_fp(bp)-> \ - txdata[FIRST_TX_COS_INDEX].var) +#define FCOE_IDX_OFFSET 0 + +#define FCOE_IDX(bp) (BNX2X_NUM_NON_CNIC_QUEUES(bp) + \ + FCOE_IDX_OFFSET) +#define bnx2x_fcoe_fp(bp) (&bp->fp[FCOE_IDX(bp)]) +#define bnx2x_fcoe(bp, var) (bnx2x_fcoe_fp(bp)->var) +#define bnx2x_fcoe_inner_sp_obj(bp) (&bp->sp_objs[FCOE_IDX(bp)]) +#define bnx2x_fcoe_sp_obj(bp, var) (bnx2x_fcoe_inner_sp_obj(bp)->var) +#define bnx2x_fcoe_tx(bp, var) (bnx2x_fcoe_fp(bp)-> \ + txdata_ptr[FIRST_TX_COS_INDEX] \ + ->var) #define IS_ETH_FP(fp) (fp->index < \ BNX2X_NUM_ETH_QUEUES(fp->bp)) #ifdef BCM_CNIC -#define IS_FCOE_FP(fp) (fp->index == FCOE_IDX) -#define IS_FCOE_IDX(idx) ((idx) == FCOE_IDX) +#define IS_FCOE_FP(fp) (fp->index == FCOE_IDX(fp->bp)) +#define IS_FCOE_IDX(idx) ((idx) == FCOE_IDX(bp)) #else #define IS_FCOE_FP(fp) false #define IS_FCOE_IDX(idx) false @@ -747,21 +743,6 @@ struct bnx2x_fastpath { #define ETH_RX_ERROR_FALGS ETH_FAST_PATH_RX_CQE_PHY_DECODE_ERR_FLG -#define BNX2X_IP_CSUM_ERR(cqe) \ - (!((cqe)->fast_path_cqe.status_flags & \ - ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG) && \ - ((cqe)->fast_path_cqe.type_error_flags & \ - ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG)) - -#define BNX2X_L4_CSUM_ERR(cqe) \ - (!((cqe)->fast_path_cqe.status_flags & \ - ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG) && \ - ((cqe)->fast_path_cqe.type_error_flags & \ - ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)) - -#define BNX2X_RX_CSUM_OK(cqe) \ - (!(BNX2X_L4_CSUM_ERR(cqe) || BNX2X_IP_CSUM_ERR(cqe))) - #define BNX2X_PRS_FLAG_OVERETH_IPV4(flags) \ (((le16_to_cpu(flags) & \ PARSING_FLAGS_OVER_ETHERNET_PROTOCOL) >> \ @@ -993,8 +974,8 @@ union cdu_context { }; /* CDU host DB constants */ -#define CDU_ILT_PAGE_SZ_HW 3 -#define CDU_ILT_PAGE_SZ (8192 << CDU_ILT_PAGE_SZ_HW) /* 64K */ +#define CDU_ILT_PAGE_SZ_HW 2 +#define CDU_ILT_PAGE_SZ (8192 << CDU_ILT_PAGE_SZ_HW) /* 32K */ #define ILT_PAGE_CIDS (CDU_ILT_PAGE_SZ / sizeof(union cdu_context)) #ifdef BCM_CNIC @@ -1197,11 +1178,31 @@ struct bnx2x_prev_path_list { struct list_head list; }; +struct bnx2x_sp_objs { + /* MACs object */ + struct bnx2x_vlan_mac_obj mac_obj; + + /* Queue State object */ + struct bnx2x_queue_sp_obj q_obj; +}; + +struct bnx2x_fp_stats { + struct tstorm_per_queue_stats old_tclient; + struct ustorm_per_queue_stats old_uclient; + struct xstorm_per_queue_stats old_xclient; + struct bnx2x_eth_q_stats eth_q_stats; + struct bnx2x_eth_q_stats_old eth_q_stats_old; +}; + struct bnx2x { /* Fields used in the tx and intr/napi performance paths * are grouped together in the beginning of the structure */ struct bnx2x_fastpath *fp; + struct bnx2x_sp_objs *sp_objs; + struct bnx2x_fp_stats *fp_stats; + struct bnx2x_fp_txdata *bnx2x_txq; + int bnx2x_txq_size; void __iomem *regview; void __iomem *doorbells; u16 db_size; @@ -1317,6 +1318,7 @@ struct bnx2x { #define NO_FCOE_FLAG (1 << 15) #define BC_SUPPORTS_PFC_STATS (1 << 17) #define USING_SINGLE_MSIX_FLAG (1 << 20) +#define BC_SUPPORTS_DCBX_MSG_NON_PMF (1 << 21) #define NO_ISCSI(bp) ((bp)->flags & NO_ISCSI_FLAG) #define NO_ISCSI_OOO(bp) ((bp)->flags & NO_ISCSI_OOO_FLAG) @@ -1392,6 +1394,7 @@ struct bnx2x { #define BNX2X_MAX_COS 3 #define BNX2X_MAX_TX_COS 2 int num_queues; + int num_napi_queues; int disable_tpa; u32 rx_mode; @@ -1404,6 +1407,7 @@ struct bnx2x { u8 igu_dsb_id; u8 igu_base_sb; u8 igu_sb_cnt; + dma_addr_t def_status_blk_mapping; struct bnx2x_slowpath *slowpath; @@ -1435,7 +1439,11 @@ struct bnx2x { dma_addr_t fw_stats_data_mapping; int fw_stats_data_sz; - struct hw_context context; + /* For max 196 cids (64*3 + non-eth), 32KB ILT page size and 1KB + * context size we need 8 ILT entries. + */ +#define ILT_MAX_L2_LINES 8 + struct hw_context context[ILT_MAX_L2_LINES]; struct bnx2x_ilt *ilt; #define BP_ILT(bp) ((bp)->ilt) @@ -1448,13 +1456,14 @@ struct bnx2x { /* * Maximum CID count that might be required by the bnx2x: - * Max Tss * Max_Tx_Multi_Cos + CNIC L2 Clients (FCoE and iSCSI related) + * Max RSS * Max_Tx_Multi_Cos + FCoE + iSCSI */ -#define BNX2X_L2_CID_COUNT(bp) (MAX_TXQS_PER_COS * BNX2X_MULTI_TX_COS +\ - NON_ETH_CONTEXT_USE + CNIC_PRESENT) +#define BNX2X_L2_CID_COUNT(bp) (BNX2X_NUM_ETH_QUEUES(bp) * BNX2X_MULTI_TX_COS \ + + NON_ETH_CONTEXT_USE + CNIC_PRESENT) +#define BNX2X_L2_MAX_CID(bp) (BNX2X_MAX_RSS_COUNT(bp) * BNX2X_MULTI_TX_COS \ + + NON_ETH_CONTEXT_USE + CNIC_PRESENT) #define L2_ILT_LINES(bp) (DIV_ROUND_UP(BNX2X_L2_CID_COUNT(bp),\ ILT_PAGE_CIDS)) -#define BNX2X_DB_SIZE(bp) (BNX2X_L2_CID_COUNT(bp) * (1 << BNX2X_DB_SHIFT)) int qm_cid_count; @@ -1613,6 +1622,8 @@ struct bnx2x { extern int num_queues; #define BNX2X_NUM_QUEUES(bp) (bp->num_queues) #define BNX2X_NUM_ETH_QUEUES(bp) (BNX2X_NUM_QUEUES(bp) - NON_ETH_CONTEXT_USE) +#define BNX2X_NUM_NON_CNIC_QUEUES(bp) (BNX2X_NUM_QUEUES(bp) - \ + NON_ETH_CONTEXT_USE) #define BNX2X_NUM_RX_QUEUES(bp) BNX2X_NUM_QUEUES(bp) #define is_multi(bp) (BNX2X_NUM_QUEUES(bp) > 1) @@ -1671,6 +1682,9 @@ struct bnx2x_func_init_params { continue; \ else +#define for_each_napi_rx_queue(bp, var) \ + for ((var) = 0; (var) < bp->num_napi_queues; (var)++) + /* Skip OOO FP */ #define for_each_tx_queue(bp, var) \ for ((var) = 0; (var) < BNX2X_NUM_QUEUES(bp); (var)++) \ @@ -1832,6 +1846,7 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, #define LOAD_NORMAL 0 #define LOAD_OPEN 1 #define LOAD_DIAG 2 +#define LOAD_LOOPBACK_EXT 3 #define UNLOAD_NORMAL 0 #define UNLOAD_CLOSE 1 #define UNLOAD_RECOVERY 2 @@ -1914,13 +1929,17 @@ static inline u32 reg_poll(struct bnx2x *bp, u32 reg, u32 expected, int ms, #define PCICFG_LINK_SPEED 0xf0000 #define PCICFG_LINK_SPEED_SHIFT 16 - -#define BNX2X_NUM_TESTS 7 +#define BNX2X_NUM_TESTS_SF 7 +#define BNX2X_NUM_TESTS_MF 3 +#define BNX2X_NUM_TESTS(bp) (IS_MF(bp) ? BNX2X_NUM_TESTS_MF : \ + BNX2X_NUM_TESTS_SF) #define BNX2X_PHY_LOOPBACK 0 #define BNX2X_MAC_LOOPBACK 1 +#define BNX2X_EXT_LOOPBACK 2 #define BNX2X_PHY_LOOPBACK_FAILED 1 #define BNX2X_MAC_LOOPBACK_FAILED 2 +#define BNX2X_EXT_LOOPBACK_FAILED 3 #define BNX2X_LOOPBACK_FAILED (BNX2X_MAC_LOOPBACK_FAILED | \ BNX2X_PHY_LOOPBACK_FAILED) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index ad0743b..00951b3 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -40,12 +40,19 @@ * Makes sure the contents of the bp->fp[to].napi is kept * intact. This is done by first copying the napi struct from * the target to the source, and then mem copying the entire - * source onto the target + * source onto the target. Update txdata pointers and related + * content. */ static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to) { struct bnx2x_fastpath *from_fp = &bp->fp[from]; struct bnx2x_fastpath *to_fp = &bp->fp[to]; + struct bnx2x_sp_objs *from_sp_objs = &bp->sp_objs[from]; + struct bnx2x_sp_objs *to_sp_objs = &bp->sp_objs[to]; + struct bnx2x_fp_stats *from_fp_stats = &bp->fp_stats[from]; + struct bnx2x_fp_stats *to_fp_stats = &bp->fp_stats[to]; + int old_max_eth_txqs, new_max_eth_txqs; + int old_txdata_index = 0, new_txdata_index = 0; /* Copy the NAPI object as it has been already initialized */ from_fp->napi = to_fp->napi; @@ -53,6 +60,30 @@ static inline void bnx2x_move_fp(struct bnx2x *bp, int from, int to) /* Move bnx2x_fastpath contents */ memcpy(to_fp, from_fp, sizeof(*to_fp)); to_fp->index = to; + + /* move sp_objs contents as well, as their indices match fp ones */ + memcpy(to_sp_objs, from_sp_objs, sizeof(*to_sp_objs)); + + /* move fp_stats contents as well, as their indices match fp ones */ + memcpy(to_fp_stats, from_fp_stats, sizeof(*to_fp_stats)); + + /* Update txdata pointers in fp and move txdata content accordingly: + * Each fp consumes 'max_cos' txdata structures, so the index should be + * decremented by max_cos x delta. + */ + + old_max_eth_txqs = BNX2X_NUM_ETH_QUEUES(bp) * (bp)->max_cos; + new_max_eth_txqs = (BNX2X_NUM_ETH_QUEUES(bp) - from + to) * + (bp)->max_cos; + if (from == FCOE_IDX(bp)) { + old_txdata_index = old_max_eth_txqs + FCOE_TXQ_IDX_OFFSET; + new_txdata_index = new_max_eth_txqs + FCOE_TXQ_IDX_OFFSET; + } + + memcpy(&bp->bnx2x_txq[old_txdata_index], + &bp->bnx2x_txq[new_txdata_index], + sizeof(struct bnx2x_fp_txdata)); + to_fp->txdata_ptr[0] = &bp->bnx2x_txq[new_txdata_index]; } int load_count[2][3] = { {0} }; /* per-path: 0-common, 1-port0, 2-port1 */ @@ -190,7 +221,7 @@ int bnx2x_tx_int(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata) if ((netif_tx_queue_stopped(txq)) && (bp->state == BNX2X_STATE_OPEN) && - (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3)) + (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4)) netif_tx_wake_queue(txq); __netif_tx_unlock(txq); @@ -479,7 +510,7 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, where we are and drop the whole packet */ err = bnx2x_alloc_rx_sge(bp, fp, sge_idx); if (unlikely(err)) { - fp->eth_q_stats.rx_skb_alloc_failed++; + bnx2x_fp_qstats(bp, fp)->rx_skb_alloc_failed++; return err; } @@ -584,7 +615,7 @@ drop: /* drop the packet and keep the buffer in the bin */ DP(NETIF_MSG_RX_STATUS, "Failed to allocate or map a new skb - dropping packet!\n"); - fp->eth_q_stats.rx_skb_alloc_failed++; + bnx2x_fp_stats(bp, fp)->eth_q_stats.rx_skb_alloc_failed++; } static int bnx2x_alloc_rx_data(struct bnx2x *bp, @@ -617,6 +648,27 @@ static int bnx2x_alloc_rx_data(struct bnx2x *bp, return 0; } +static +void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe, + struct bnx2x_fastpath *fp, + struct bnx2x_eth_q_stats *qstats) +{ + /* Do nothing if no IP/L4 csum validation was done */ + + if (cqe->fast_path_cqe.status_flags & + (ETH_FAST_PATH_RX_CQE_IP_XSUM_NO_VALIDATION_FLG | + ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG)) + return; + + /* If both IP/L4 validation were done, check if an error was found. */ + + if (cqe->fast_path_cqe.type_error_flags & + (ETH_FAST_PATH_RX_CQE_IP_BAD_XSUM_FLG | + ETH_FAST_PATH_RX_CQE_L4_BAD_XSUM_FLG)) + qstats->hw_csum_err++; + else + skb->ip_summed = CHECKSUM_UNNECESSARY; +} int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) { @@ -757,7 +809,7 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) DP(NETIF_MSG_RX_ERR | NETIF_MSG_RX_STATUS, "ERROR flags %x rx packet %u\n", cqe_fp_flags, sw_comp_cons); - fp->eth_q_stats.rx_err_discard_pkt++; + bnx2x_fp_qstats(bp, fp)->rx_err_discard_pkt++; goto reuse_rx; } @@ -770,7 +822,7 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) if (skb == NULL) { DP(NETIF_MSG_RX_ERR | NETIF_MSG_RX_STATUS, "ERROR packet dropped because of alloc failure\n"); - fp->eth_q_stats.rx_skb_alloc_failed++; + bnx2x_fp_qstats(bp, fp)->rx_skb_alloc_failed++; goto reuse_rx; } memcpy(skb->data, data + pad, len); @@ -784,14 +836,15 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) skb = build_skb(data, 0); if (unlikely(!skb)) { kfree(data); - fp->eth_q_stats.rx_skb_alloc_failed++; + bnx2x_fp_qstats(bp, fp)-> + rx_skb_alloc_failed++; goto next_rx; } skb_reserve(skb, pad); } else { DP(NETIF_MSG_RX_ERR | NETIF_MSG_RX_STATUS, "ERROR packet dropped because of alloc failure\n"); - fp->eth_q_stats.rx_skb_alloc_failed++; + bnx2x_fp_qstats(bp, fp)->rx_skb_alloc_failed++; reuse_rx: bnx2x_reuse_rx_data(fp, bd_cons, bd_prod); goto next_rx; @@ -806,13 +859,9 @@ reuse_rx: skb_checksum_none_assert(skb); - if (bp->dev->features & NETIF_F_RXCSUM) { - - if (likely(BNX2X_RX_CSUM_OK(cqe))) - skb->ip_summed = CHECKSUM_UNNECESSARY; - else - fp->eth_q_stats.hw_csum_err++; - } + if (bp->dev->features & NETIF_F_RXCSUM) + bnx2x_csum_validate(skb, cqe, fp, + bnx2x_fp_qstats(bp, fp)); skb_record_rx_queue(skb, fp->rx_queue); @@ -873,7 +922,7 @@ static irqreturn_t bnx2x_msix_fp_int(int irq, void *fp_cookie) prefetch(fp->rx_cons_sb); for_each_cos_in_tx_queue(fp, cos) - prefetch(fp->txdata[cos].tx_cons_sb); + prefetch(fp->txdata_ptr[cos]->tx_cons_sb); prefetch(&fp->sb_running_index[SM_RX_ID]); napi_schedule(&bnx2x_fp(bp, fp->index, napi)); @@ -1190,7 +1239,7 @@ static void bnx2x_free_tx_skbs(struct bnx2x *bp) for_each_tx_queue(bp, i) { struct bnx2x_fastpath *fp = &bp->fp[i]; for_each_cos_in_tx_queue(fp, cos) { - struct bnx2x_fp_txdata *txdata = &fp->txdata[cos]; + struct bnx2x_fp_txdata *txdata = fp->txdata_ptr[cos]; unsigned pkts_compl = 0, bytes_compl = 0; u16 sw_prod = txdata->tx_pkt_prod; @@ -1202,7 +1251,8 @@ static void bnx2x_free_tx_skbs(struct bnx2x *bp) sw_cons++; } netdev_tx_reset_queue( - netdev_get_tx_queue(bp->dev, txdata->txq_index)); + netdev_get_tx_queue(bp->dev, + txdata->txq_index)); } } } @@ -1310,7 +1360,7 @@ void bnx2x_free_irq(struct bnx2x *bp) free_irq(bp->dev->irq, bp->dev); } -int __devinit bnx2x_enable_msix(struct bnx2x *bp) +int bnx2x_enable_msix(struct bnx2x *bp) { int msix_vec = 0, i, rc, req_cnt; @@ -1564,6 +1614,8 @@ void bnx2x_set_num_queues(struct bnx2x *bp) #endif /* Add special queues */ bp->num_queues += NON_ETH_CONTEXT_USE; + + BNX2X_DEV_INFO("set number of queues to %d\n", bp->num_queues); } /** @@ -1592,8 +1644,8 @@ static int bnx2x_set_real_num_queues(struct bnx2x *bp) { int rc, tx, rx; - tx = MAX_TXQS_PER_COS * bp->max_cos; - rx = BNX2X_NUM_ETH_QUEUES(bp); + tx = BNX2X_NUM_ETH_QUEUES(bp) * bp->max_cos; + rx = BNX2X_NUM_QUEUES(bp) - NON_ETH_CONTEXT_USE; /* account for fcoe queue */ #ifdef BCM_CNIC @@ -1651,14 +1703,13 @@ static void bnx2x_set_rx_buf_size(struct bnx2x *bp) static int bnx2x_init_rss_pf(struct bnx2x *bp) { int i; - u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0}; u8 num_eth_queues = BNX2X_NUM_ETH_QUEUES(bp); /* Prepare the initial contents fo the indirection table if RSS is * enabled */ - for (i = 0; i < sizeof(ind_table); i++) - ind_table[i] = + for (i = 0; i < sizeof(bp->rss_conf_obj.ind_table); i++) + bp->rss_conf_obj.ind_table[i] = bp->fp->cl_id + ethtool_rxfh_indir_default(i, num_eth_queues); @@ -1670,12 +1721,11 @@ static int bnx2x_init_rss_pf(struct bnx2x *bp) * For 57712 and newer on the other hand it's a per-function * configuration. */ - return bnx2x_config_rss_eth(bp, ind_table, - bp->port.pmf || !CHIP_IS_E1x(bp)); + return bnx2x_config_rss_eth(bp, bp->port.pmf || !CHIP_IS_E1x(bp)); } int bnx2x_config_rss_pf(struct bnx2x *bp, struct bnx2x_rss_config_obj *rss_obj, - u8 *ind_table, bool config_hash) + bool config_hash) { struct bnx2x_config_rss_params params = {NULL}; int i; @@ -1698,11 +1748,15 @@ int bnx2x_config_rss_pf(struct bnx2x *bp, struct bnx2x_rss_config_obj *rss_obj, __set_bit(BNX2X_RSS_IPV4_TCP, ¶ms.rss_flags); __set_bit(BNX2X_RSS_IPV6, ¶ms.rss_flags); __set_bit(BNX2X_RSS_IPV6_TCP, ¶ms.rss_flags); + if (rss_obj->udp_rss_v4) + __set_bit(BNX2X_RSS_IPV4_UDP, ¶ms.rss_flags); + if (rss_obj->udp_rss_v6) + __set_bit(BNX2X_RSS_IPV6_UDP, ¶ms.rss_flags); /* Hash bits */ params.rss_result_mask = MULTI_MASK; - memcpy(params.ind_table, ind_table, sizeof(params.ind_table)); + memcpy(params.ind_table, rss_obj->ind_table, sizeof(params.ind_table)); if (config_hash) { /* RSS keys */ @@ -1739,7 +1793,7 @@ static void bnx2x_squeeze_objects(struct bnx2x *bp) int rc; unsigned long ramrod_flags = 0, vlan_mac_flags = 0; struct bnx2x_mcast_ramrod_params rparam = {NULL}; - struct bnx2x_vlan_mac_obj *mac_obj = &bp->fp->mac_obj; + struct bnx2x_vlan_mac_obj *mac_obj = &bp->sp_objs->mac_obj; /***************** Cleanup MACs' object first *************************/ @@ -1750,7 +1804,7 @@ static void bnx2x_squeeze_objects(struct bnx2x *bp) /* Clean ETH primary MAC */ __set_bit(BNX2X_ETH_MAC, &vlan_mac_flags); - rc = mac_obj->delete_all(bp, &bp->fp->mac_obj, &vlan_mac_flags, + rc = mac_obj->delete_all(bp, &bp->sp_objs->mac_obj, &vlan_mac_flags, &ramrod_flags); if (rc != 0) BNX2X_ERR("Failed to clean ETH MACs: %d\n", rc); @@ -1836,11 +1890,16 @@ bool bnx2x_test_firmware_version(struct bnx2x *bp, bool is_err) static void bnx2x_bz_fp(struct bnx2x *bp, int index) { struct bnx2x_fastpath *fp = &bp->fp[index]; + struct bnx2x_fp_stats *fp_stats = &bp->fp_stats[index]; + + int cos; struct napi_struct orig_napi = fp->napi; + struct bnx2x_agg_info *orig_tpa_info = fp->tpa_info; /* bzero bnx2x_fastpath contents */ - if (bp->stats_init) + if (bp->stats_init) { + memset(fp->tpa_info, 0, sizeof(*fp->tpa_info)); memset(fp, 0, sizeof(*fp)); - else { + } else { /* Keep Queue statistics */ struct bnx2x_eth_q_stats *tmp_eth_q_stats; struct bnx2x_eth_q_stats_old *tmp_eth_q_stats_old; @@ -1848,26 +1907,27 @@ static void bnx2x_bz_fp(struct bnx2x *bp, int index) tmp_eth_q_stats = kzalloc(sizeof(struct bnx2x_eth_q_stats), GFP_KERNEL); if (tmp_eth_q_stats) - memcpy(tmp_eth_q_stats, &fp->eth_q_stats, + memcpy(tmp_eth_q_stats, &fp_stats->eth_q_stats, sizeof(struct bnx2x_eth_q_stats)); tmp_eth_q_stats_old = kzalloc(sizeof(struct bnx2x_eth_q_stats_old), GFP_KERNEL); if (tmp_eth_q_stats_old) - memcpy(tmp_eth_q_stats_old, &fp->eth_q_stats_old, + memcpy(tmp_eth_q_stats_old, &fp_stats->eth_q_stats_old, sizeof(struct bnx2x_eth_q_stats_old)); + memset(fp->tpa_info, 0, sizeof(*fp->tpa_info)); memset(fp, 0, sizeof(*fp)); if (tmp_eth_q_stats) { - memcpy(&fp->eth_q_stats, tmp_eth_q_stats, - sizeof(struct bnx2x_eth_q_stats)); + memcpy(&fp_stats->eth_q_stats, tmp_eth_q_stats, + sizeof(struct bnx2x_eth_q_stats)); kfree(tmp_eth_q_stats); } if (tmp_eth_q_stats_old) { - memcpy(&fp->eth_q_stats_old, tmp_eth_q_stats_old, + memcpy(&fp_stats->eth_q_stats_old, tmp_eth_q_stats_old, sizeof(struct bnx2x_eth_q_stats_old)); kfree(tmp_eth_q_stats_old); } @@ -1876,7 +1936,7 @@ static void bnx2x_bz_fp(struct bnx2x *bp, int index) /* Restore the NAPI object as it has been already initialized */ fp->napi = orig_napi; - + fp->tpa_info = orig_tpa_info; fp->bp = bp; fp->index = index; if (IS_ETH_FP(fp)) @@ -1885,6 +1945,16 @@ static void bnx2x_bz_fp(struct bnx2x *bp, int index) /* Special queues support only one CoS */ fp->max_cos = 1; + /* Init txdata pointers */ +#ifdef BCM_CNIC + if (IS_FCOE_FP(fp)) + fp->txdata_ptr[0] = &bp->bnx2x_txq[FCOE_TXQ_IDX(bp)]; +#endif + if (IS_ETH_FP(fp)) + for_each_cos_in_tx_queue(fp, cos) + fp->txdata_ptr[cos] = &bp->bnx2x_txq[cos * + BNX2X_NUM_ETH_QUEUES(bp) + index]; + /* * set the tpa flag for each queue. The tpa flag determines the queue * minimal size so it must be set prior to queue memory allocation @@ -1934,11 +2004,13 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) /* * Zero fastpath structures preserving invariants like napi, which are * allocated only once, fp index, max_cos, bp pointer. - * Also set fp->disable_tpa. + * Also set fp->disable_tpa and txdata_ptr. */ DP(NETIF_MSG_IFUP, "num queues: %d", bp->num_queues); for_each_queue(bp, i) bnx2x_bz_fp(bp, i); + memset(bp->bnx2x_txq, 0, bp->bnx2x_txq_size * + sizeof(struct bnx2x_fp_txdata)); /* Set the receive queues buffer size */ @@ -2161,6 +2233,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) break; case LOAD_DIAG: + case LOAD_LOOPBACK_EXT: bp->state = BNX2X_STATE_DIAG; break; @@ -2180,6 +2253,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) /* re-read iscsi info */ bnx2x_get_iscsi_info(bp); bnx2x_setup_cnic_irq_info(bp); + bnx2x_setup_cnic_info(bp); if (bp->state == BNX2X_STATE_OPEN) bnx2x_cnic_notify(bp, CNIC_CTL_START_CMD); #endif @@ -2200,7 +2274,10 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) return -EBUSY; } - bnx2x_dcbx_init(bp); + /* If PMF - send ADMIN DCBX msg to MFW to initiate DCBX FSM */ + if (bp->port.pmf && (bp->state != BNX2X_STATE_DIAG)) + bnx2x_dcbx_init(bp, false); + return 0; #ifndef BNX2X_STOP_ON_ERROR @@ -2283,6 +2360,7 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode) /* Stop Tx */ bnx2x_tx_disable(bp); + netdev_reset_tc(bp->dev); #ifdef BCM_CNIC bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD); @@ -2441,8 +2519,8 @@ int bnx2x_poll(struct napi_struct *napi, int budget) #endif for_each_cos_in_tx_queue(fp, cos) - if (bnx2x_tx_queue_has_work(&fp->txdata[cos])) - bnx2x_tx_int(bp, &fp->txdata[cos]); + if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos])) + bnx2x_tx_int(bp, fp->txdata_ptr[cos]); if (bnx2x_has_rx_work(fp)) { @@ -2501,8 +2579,6 @@ int bnx2x_poll(struct napi_struct *napi, int budget) /* we split the first BD into headers and data BDs * to ease the pain of our fellow microcode engineers * we use one mapping for both BDs - * So far this has only been observed to happen - * in Other Operating Systems(TM) */ static noinline u16 bnx2x_tx_split(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata, @@ -2821,7 +2897,6 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct bnx2x *bp = netdev_priv(dev); - struct bnx2x_fastpath *fp; struct netdev_queue *txq; struct bnx2x_fp_txdata *txdata; struct sw_tx_bd *tx_buf; @@ -2831,7 +2906,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) struct eth_tx_parse_bd_e2 *pbd_e2 = NULL; u32 pbd_e2_parsing_data = 0; u16 pkt_prod, bd_prod; - int nbd, txq_index, fp_index, txdata_index; + int nbd, txq_index; dma_addr_t mapping; u32 xmit_type = bnx2x_xmit_type(bp, skb); int i; @@ -2850,31 +2925,12 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) BUG_ON(txq_index >= MAX_ETH_TXQ_IDX(bp) + FCOE_PRESENT); - /* decode the fastpath index and the cos index from the txq */ - fp_index = TXQ_TO_FP(txq_index); - txdata_index = TXQ_TO_COS(txq_index); - -#ifdef BCM_CNIC - /* - * Override the above for the FCoE queue: - * - FCoE fp entry is right after the ETH entries. - * - FCoE L2 queue uses bp->txdata[0] only. - */ - if (unlikely(!NO_FCOE(bp) && (txq_index == - bnx2x_fcoe_tx(bp, txq_index)))) { - fp_index = FCOE_IDX; - txdata_index = 0; - } -#endif + txdata = &bp->bnx2x_txq[txq_index]; /* enable this debug print to view the transmission queue being used DP(NETIF_MSG_TX_QUEUED, "indices: txq %d, fp %d, txdata %d\n", txq_index, fp_index, txdata_index); */ - /* locate the fastpath and the txdata */ - fp = &bp->fp[fp_index]; - txdata = &fp->txdata[txdata_index]; - /* enable this debug print to view the tranmission details DP(NETIF_MSG_TX_QUEUED, "transmitting packet cid %d fp index %d txdata_index %d tx_data ptr %p fp pointer %p\n", @@ -2882,7 +2938,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(bnx2x_tx_avail(bp, txdata) < (skb_shinfo(skb)->nr_frags + 3))) { - fp->eth_q_stats.driver_xoff++; + bnx2x_fp_qstats(bp, txdata->parent_fp)->driver_xoff++; netif_tx_stop_queue(txq); BNX2X_ERR("BUG! Tx ring full when queue awake!\n"); return NETDEV_TX_BUSY; @@ -3156,7 +3212,7 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) txdata->tx_bd_prod += nbd; - if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 3)) { + if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_SKB_FRAGS + 4)) { netif_tx_stop_queue(txq); /* paired memory barrier is in bnx2x_tx_int(), we have to keep @@ -3164,8 +3220,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) * fp->bd_tx_cons */ smp_mb(); - fp->eth_q_stats.driver_xoff++; - if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 3) + bnx2x_fp_qstats(bp, txdata->parent_fp)->driver_xoff++; + if (bnx2x_tx_avail(bp, txdata) >= MAX_SKB_FRAGS + 4) netif_tx_wake_queue(txq); } txdata->tx_pkt++; @@ -3230,7 +3286,7 @@ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc) /* configure traffic class to transmission queue mapping */ for (cos = 0; cos < bp->max_cos; cos++) { count = BNX2X_NUM_ETH_QUEUES(bp); - offset = cos * MAX_TXQS_PER_COS; + offset = cos * BNX2X_NUM_NON_CNIC_QUEUES(bp); netdev_set_tc_queue(dev, cos, count, offset); DP(BNX2X_MSG_SP | NETIF_MSG_IFUP, "mapping tc %d to offset %d count %d\n", @@ -3329,7 +3385,7 @@ static void bnx2x_free_fp_mem_at(struct bnx2x *bp, int fp_index) if (!skip_tx_queue(bp, fp_index)) { /* fastpath tx rings: tx_buf tx_desc */ for_each_cos_in_tx_queue(fp, cos) { - struct bnx2x_fp_txdata *txdata = &fp->txdata[cos]; + struct bnx2x_fp_txdata *txdata = fp->txdata_ptr[cos]; DP(NETIF_MSG_IFDOWN, "freeing tx memory of fp %d cos %d cid %d\n", @@ -3401,7 +3457,7 @@ static int bnx2x_alloc_rx_bds(struct bnx2x_fastpath *fp, cqe_ring_prod); fp->rx_pkt = fp->rx_calls = 0; - fp->eth_q_stats.rx_skb_alloc_failed += failure_cnt; + bnx2x_fp_stats(bp, fp)->eth_q_stats.rx_skb_alloc_failed += failure_cnt; return i - failure_cnt; } @@ -3486,7 +3542,7 @@ static int bnx2x_alloc_fp_mem_at(struct bnx2x *bp, int index) if (!skip_tx_queue(bp, index)) { /* fastpath tx rings: tx_buf tx_desc */ for_each_cos_in_tx_queue(fp, cos) { - struct bnx2x_fp_txdata *txdata = &fp->txdata[cos]; + struct bnx2x_fp_txdata *txdata = fp->txdata_ptr[cos]; DP(NETIF_MSG_IFUP, "allocating tx memory of fp %d cos %d\n", @@ -3569,7 +3625,7 @@ int bnx2x_alloc_fp_mem(struct bnx2x *bp) #ifdef BCM_CNIC if (!NO_FCOE(bp)) /* FCoE */ - if (bnx2x_alloc_fp_mem_at(bp, FCOE_IDX)) + if (bnx2x_alloc_fp_mem_at(bp, FCOE_IDX(bp))) /* we will fail load process instead of mark * NO_FCOE_FLAG */ @@ -3594,7 +3650,7 @@ int bnx2x_alloc_fp_mem(struct bnx2x *bp) */ /* move FCoE fp even NO_FCOE_FLAG is on */ - bnx2x_move_fp(bp, FCOE_IDX, FCOE_IDX - delta); + bnx2x_move_fp(bp, FCOE_IDX(bp), FCOE_IDX(bp) - delta); #endif bp->num_queues -= delta; BNX2X_ERR("Adjusted num of queues from %d to %d\n", @@ -3606,7 +3662,11 @@ int bnx2x_alloc_fp_mem(struct bnx2x *bp) void bnx2x_free_mem_bp(struct bnx2x *bp) { + kfree(bp->fp->tpa_info); kfree(bp->fp); + kfree(bp->sp_objs); + kfree(bp->fp_stats); + kfree(bp->bnx2x_txq); kfree(bp->msix_table); kfree(bp->ilt); } @@ -3617,6 +3677,8 @@ int __devinit bnx2x_alloc_mem_bp(struct bnx2x *bp) struct msix_entry *tbl; struct bnx2x_ilt *ilt; int msix_table_size = 0; + int fp_array_size; + int i; /* * The biggest MSI-X table we might need is as a maximum number of fast @@ -3625,12 +3687,44 @@ int __devinit bnx2x_alloc_mem_bp(struct bnx2x *bp) msix_table_size = bp->igu_sb_cnt + 1; /* fp array: RSS plus CNIC related L2 queues */ - fp = kcalloc(BNX2X_MAX_RSS_COUNT(bp) + NON_ETH_CONTEXT_USE, - sizeof(*fp), GFP_KERNEL); + fp_array_size = BNX2X_MAX_RSS_COUNT(bp) + NON_ETH_CONTEXT_USE; + BNX2X_DEV_INFO("fp_array_size %d", fp_array_size); + + fp = kcalloc(fp_array_size, sizeof(*fp), GFP_KERNEL); if (!fp) goto alloc_err; + for (i = 0; i < fp_array_size; i++) { + fp[i].tpa_info = + kcalloc(ETH_MAX_AGGREGATION_QUEUES_E1H_E2, + sizeof(struct bnx2x_agg_info), GFP_KERNEL); + if (!(fp[i].tpa_info)) + goto alloc_err; + } + bp->fp = fp; + /* allocate sp objs */ + bp->sp_objs = kcalloc(fp_array_size, sizeof(struct bnx2x_sp_objs), + GFP_KERNEL); + if (!bp->sp_objs) + goto alloc_err; + + /* allocate fp_stats */ + bp->fp_stats = kcalloc(fp_array_size, sizeof(struct bnx2x_fp_stats), + GFP_KERNEL); + if (!bp->fp_stats) + goto alloc_err; + + /* Allocate memory for the transmission queues array */ + bp->bnx2x_txq_size = BNX2X_MAX_RSS_COUNT(bp) * BNX2X_MULTI_TX_COS; +#ifdef BCM_CNIC + bp->bnx2x_txq_size++; +#endif + bp->bnx2x_txq = kcalloc(bp->bnx2x_txq_size, + sizeof(struct bnx2x_fp_txdata), GFP_KERNEL); + if (!bp->bnx2x_txq) + goto alloc_err; + /* msix table */ tbl = kcalloc(msix_table_size, sizeof(*tbl), GFP_KERNEL); if (!tbl) diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 7cd99b7..daa894b 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -29,6 +29,7 @@ extern int load_count[2][3]; /* per-path: 0-common, 1-port0, 2-port1 */ extern int num_queues; +extern int int_mode; /************************ Macros ********************************/ #define BNX2X_PCI_FREE(x, y, size) \ @@ -94,7 +95,7 @@ void bnx2x_send_unload_done(struct bnx2x *bp); * @config_hash: re-configure RSS hash keys configuration */ int bnx2x_config_rss_pf(struct bnx2x *bp, struct bnx2x_rss_config_obj *rss_obj, - u8 *ind_table, bool config_hash); + bool config_hash); /** * bnx2x__init_func_obj - init function object @@ -244,6 +245,14 @@ int bnx2x_cnic_notify(struct bnx2x *bp, int cmd); * @bp: driver handle */ void bnx2x_setup_cnic_irq_info(struct bnx2x *bp); + +/** + * bnx2x_setup_cnic_info - provides cnic with updated info + * + * @bp: driver handle + */ +void bnx2x_setup_cnic_info(struct bnx2x *bp); + #endif /** @@ -409,7 +418,7 @@ void bnx2x_ilt_set_info(struct bnx2x *bp); * * @bp: driver handle */ -void bnx2x_dcbx_init(struct bnx2x *bp); +void bnx2x_dcbx_init(struct bnx2x *bp, bool update_shmem); /** * bnx2x_set_power_state - set power state to the requested value. @@ -487,7 +496,7 @@ void bnx2x_netif_start(struct bnx2x *bp); * fills msix_table, requests vectors, updates num_queues * according to number of available vectors. */ -int __devinit bnx2x_enable_msix(struct bnx2x *bp); +int bnx2x_enable_msix(struct bnx2x *bp); /** * bnx2x_enable_msi - request msi mode from OS, updated internals accordingly @@ -728,7 +737,7 @@ static inline bool bnx2x_has_tx_work(struct bnx2x_fastpath *fp) { u8 cos; for_each_cos_in_tx_queue(fp, cos) - if (bnx2x_tx_queue_has_work(&fp->txdata[cos])) + if (bnx2x_tx_queue_has_work(fp->txdata_ptr[cos])) return true; return false; } @@ -780,8 +789,10 @@ static inline void bnx2x_add_all_napi(struct bnx2x *bp) { int i; + bp->num_napi_queues = bp->num_queues; + /* Add NAPI objects */ - for_each_rx_queue(bp, i) + for_each_napi_rx_queue(bp, i) netif_napi_add(bp->dev, &bnx2x_fp(bp, i, napi), bnx2x_poll, BNX2X_NAPI_WEIGHT); } @@ -790,10 +801,12 @@ static inline void bnx2x_del_all_napi(struct bnx2x *bp) { int i; - for_each_rx_queue(bp, i) + for_each_napi_rx_queue(bp, i) netif_napi_del(&bnx2x_fp(bp, i, napi)); } +void bnx2x_set_int_mode(struct bnx2x *bp); + static inline void bnx2x_disable_msi(struct bnx2x *bp) { if (bp->flags & USING_MSIX_FLAG) { @@ -865,11 +878,9 @@ static inline int func_by_vn(struct bnx2x *bp, int vn) return 2 * vn + BP_PORT(bp); } -static inline int bnx2x_config_rss_eth(struct bnx2x *bp, u8 *ind_table, - bool config_hash) +static inline int bnx2x_config_rss_eth(struct bnx2x *bp, bool config_hash) { - return bnx2x_config_rss_pf(bp, &bp->rss_conf_obj, ind_table, - config_hash); + return bnx2x_config_rss_pf(bp, &bp->rss_conf_obj, config_hash); } /** @@ -975,8 +986,8 @@ static inline void bnx2x_init_vlan_mac_fp_objs(struct bnx2x_fastpath *fp, struct bnx2x *bp = fp->bp; /* Configure classification DBs */ - bnx2x_init_mac_obj(bp, &fp->mac_obj, fp->cl_id, fp->cid, - BP_FUNC(bp), bnx2x_sp(bp, mac_rdata), + bnx2x_init_mac_obj(bp, &bnx2x_sp_obj(bp, fp).mac_obj, fp->cl_id, + fp->cid, BP_FUNC(bp), bnx2x_sp(bp, mac_rdata), bnx2x_sp_mapping(bp, mac_rdata), BNX2X_FILTER_MAC_PENDING, &bp->sp_state, obj_type, @@ -1068,12 +1079,14 @@ static inline u32 bnx2x_rx_ustorm_prods_offset(struct bnx2x_fastpath *fp) } static inline void bnx2x_init_txdata(struct bnx2x *bp, - struct bnx2x_fp_txdata *txdata, u32 cid, int txq_index, - __le16 *tx_cons_sb) + struct bnx2x_fp_txdata *txdata, u32 cid, + int txq_index, __le16 *tx_cons_sb, + struct bnx2x_fastpath *fp) { txdata->cid = cid; txdata->txq_index = txq_index; txdata->tx_cons_sb = tx_cons_sb; + txdata->parent_fp = fp; DP(NETIF_MSG_IFUP, "created tx data cid %d, txq %d\n", txdata->cid, txdata->txq_index); @@ -1107,18 +1120,13 @@ static inline void bnx2x_init_fcoe_fp(struct bnx2x *bp) bnx2x_fcoe(bp, rx_queue) = BNX2X_NUM_ETH_QUEUES(bp); bnx2x_fcoe(bp, cl_id) = bnx2x_cnic_eth_cl_id(bp, BNX2X_FCOE_ETH_CL_ID_IDX); - /** Current BNX2X_FCOE_ETH_CID deffinition implies not more than - * 16 ETH clients per function when CNIC is enabled! - * - * Fix it ASAP!!! - */ - bnx2x_fcoe(bp, cid) = BNX2X_FCOE_ETH_CID; + bnx2x_fcoe(bp, cid) = BNX2X_FCOE_ETH_CID(bp); bnx2x_fcoe(bp, fw_sb_id) = DEF_SB_ID; bnx2x_fcoe(bp, igu_sb_id) = bp->igu_dsb_id; bnx2x_fcoe(bp, rx_cons_sb) = BNX2X_FCOE_L2_RX_INDEX; - - bnx2x_init_txdata(bp, &bnx2x_fcoe(bp, txdata[0]), - fp->cid, FCOE_TXQ_IDX(bp), BNX2X_FCOE_L2_TX_INDEX); + bnx2x_init_txdata(bp, bnx2x_fcoe(bp, txdata_ptr[0]), + fp->cid, FCOE_TXQ_IDX(bp), BNX2X_FCOE_L2_TX_INDEX, + fp); DP(NETIF_MSG_IFUP, "created fcoe tx data (fp index %d)\n", fp->index); @@ -1135,8 +1143,8 @@ static inline void bnx2x_init_fcoe_fp(struct bnx2x *bp) /* No multi-CoS for FCoE L2 client */ BUG_ON(fp->max_cos != 1); - bnx2x_init_queue_obj(bp, &fp->q_obj, fp->cl_id, &fp->cid, 1, - BP_FUNC(bp), bnx2x_sp(bp, q_rdata), + bnx2x_init_queue_obj(bp, &bnx2x_sp_obj(bp, fp).q_obj, fp->cl_id, + &fp->cid, 1, BP_FUNC(bp), bnx2x_sp(bp, q_rdata), bnx2x_sp_mapping(bp, q_rdata), q_type); DP(NETIF_MSG_IFUP, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c index 4f9244b..8a73374 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c @@ -972,23 +972,26 @@ void bnx2x_dcbx_init_params(struct bnx2x *bp) bp->dcbx_config_params.admin_default_priority = 0; } -void bnx2x_dcbx_init(struct bnx2x *bp) +void bnx2x_dcbx_init(struct bnx2x *bp, bool update_shmem) { u32 dcbx_lldp_params_offset = SHMEM_LLDP_DCBX_PARAMS_NONE; + /* only PMF can send ADMIN msg to MFW in old MFW versions */ + if ((!bp->port.pmf) && (!(bp->flags & BC_SUPPORTS_DCBX_MSG_NON_PMF))) + return; + if (bp->dcbx_enabled <= 0) return; /* validate: * chip of good for dcbx version, * dcb is wanted - * the function is pmf * shmem2 contains DCBX support fields */ DP(BNX2X_MSG_DCB, "dcb_state %d bp->port.pmf %d\n", bp->dcb_state, bp->port.pmf); - if (bp->dcb_state == BNX2X_DCB_STATE_ON && bp->port.pmf && + if (bp->dcb_state == BNX2X_DCB_STATE_ON && SHMEM2_HAS(bp, dcbx_lldp_params_offset)) { dcbx_lldp_params_offset = SHMEM2_RD(bp, dcbx_lldp_params_offset); @@ -999,12 +1002,23 @@ void bnx2x_dcbx_init(struct bnx2x *bp) bnx2x_update_drv_flags(bp, 1 << DRV_FLAGS_DCB_CONFIGURED, 0); if (SHMEM_LLDP_DCBX_PARAMS_NONE != dcbx_lldp_params_offset) { - bnx2x_dcbx_admin_mib_updated_params(bp, - dcbx_lldp_params_offset); + /* need HW lock to avoid scenario of two drivers + * writing in parallel to shmem + */ + bnx2x_acquire_hw_lock(bp, + HW_LOCK_RESOURCE_DCBX_ADMIN_MIB); + if (update_shmem) + bnx2x_dcbx_admin_mib_updated_params(bp, + dcbx_lldp_params_offset); /* Let HW start negotiation */ bnx2x_fw_command(bp, DRV_MSG_CODE_DCBX_ADMIN_PMF_MSG, 0); + /* release HW lock only after MFW acks that it finished + * reading values from shmem + */ + bnx2x_release_hw_lock(bp, + HW_LOCK_RESOURCE_DCBX_ADMIN_MIB); } } } @@ -2063,10 +2077,8 @@ static u8 bnx2x_dcbnl_set_all(struct net_device *netdev) "Handling parity error recovery. Try again later\n"); return 1; } - if (netif_running(bp->dev)) { - bnx2x_nic_unload(bp, UNLOAD_NORMAL); - rc = bnx2x_nic_load(bp, LOAD_NORMAL); - } + if (netif_running(bp->dev)) + bnx2x_dcbx_init(bp, true); DP(BNX2X_MSG_DCB, "set_dcbx_params done (%d)\n", rc); if (rc) return 1; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c index bf30e28..70c0881 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c @@ -826,7 +826,7 @@ static void bnx2x_get_drvinfo(struct net_device *dev, ((phy_fw_ver[0] != '\0') ? " phy " : ""), phy_fw_ver); strlcpy(info->bus_info, pci_name(bp->pdev), sizeof(info->bus_info)); info->n_stats = BNX2X_NUM_STATS; - info->testinfo_len = BNX2X_NUM_TESTS; + info->testinfo_len = BNX2X_NUM_TESTS(bp); info->eedump_len = bp->common.flash_size; info->regdump_len = bnx2x_get_regs_len(dev); } @@ -1533,16 +1533,14 @@ static int bnx2x_set_pauseparam(struct net_device *dev, return 0; } -static const struct { - char string[ETH_GSTRING_LEN]; -} bnx2x_tests_str_arr[BNX2X_NUM_TESTS] = { - { "register_test (offline)" }, - { "memory_test (offline)" }, - { "loopback_test (offline)" }, - { "nvram_test (online)" }, - { "interrupt_test (online)" }, - { "link_test (online)" }, - { "idle check (online)" } +char *bnx2x_tests_str_arr[BNX2X_NUM_TESTS_SF] = { + "register_test (offline) ", + "memory_test (offline) ", + "int_loopback_test (offline)", + "ext_loopback_test (offline)", + "nvram_test (online) ", + "interrupt_test (online) ", + "link_test (online) " }; static u32 bnx2x_eee_to_adv(u32 eee_adv) @@ -1943,6 +1941,14 @@ static void bnx2x_wait_for_link(struct bnx2x *bp, u8 link_up, u8 is_serdes) if (cnt <= 0 && bnx2x_link_test(bp, is_serdes)) DP(BNX2X_MSG_ETHTOOL, "Timeout waiting for link up\n"); + + cnt = 1400; + while (!bp->link_vars.link_up && cnt--) + msleep(20); + + if (cnt <= 0 && !bp->link_vars.link_up) + DP(BNX2X_MSG_ETHTOOL, + "Timeout waiting for link init\n"); } } @@ -1953,7 +1959,7 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode) unsigned char *packet; struct bnx2x_fastpath *fp_rx = &bp->fp[0]; struct bnx2x_fastpath *fp_tx = &bp->fp[0]; - struct bnx2x_fp_txdata *txdata = &fp_tx->txdata[0]; + struct bnx2x_fp_txdata *txdata = fp_tx->txdata_ptr[0]; u16 tx_start_idx, tx_idx; u16 rx_start_idx, rx_idx; u16 pkt_prod, bd_prod; @@ -1968,13 +1974,16 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode) u16 len; int rc = -ENODEV; u8 *data; - struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txdata->txq_index); + struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, + txdata->txq_index); /* check the loopback mode */ switch (loopback_mode) { case BNX2X_PHY_LOOPBACK: - if (bp->link_params.loopback_mode != LOOPBACK_XGXS) + if (bp->link_params.loopback_mode != LOOPBACK_XGXS) { + DP(BNX2X_MSG_ETHTOOL, "PHY loopback not supported\n"); return -EINVAL; + } break; case BNX2X_MAC_LOOPBACK: if (CHIP_IS_E3(bp)) { @@ -1991,6 +2000,13 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode) bnx2x_phy_init(&bp->link_params, &bp->link_vars); break; + case BNX2X_EXT_LOOPBACK: + if (bp->link_params.loopback_mode != LOOPBACK_EXT) { + DP(BNX2X_MSG_ETHTOOL, + "Can't configure external loopback\n"); + return -EINVAL; + } + break; default: DP(BNX2X_MSG_ETHTOOL, "Command parameters not supported\n"); return -EINVAL; @@ -2162,6 +2178,38 @@ static int bnx2x_test_loopback(struct bnx2x *bp) return rc; } +static int bnx2x_test_ext_loopback(struct bnx2x *bp) +{ + int rc; + u8 is_serdes = + (bp->link_vars.link_status & LINK_STATUS_SERDES_LINK) > 0; + + if (BP_NOMCP(bp)) + return -ENODEV; + + if (!netif_running(bp->dev)) + return BNX2X_EXT_LOOPBACK_FAILED; + + bnx2x_nic_unload(bp, UNLOAD_NORMAL); + rc = bnx2x_nic_load(bp, LOAD_LOOPBACK_EXT); + if (rc) { + DP(BNX2X_MSG_ETHTOOL, + "Can't perform self-test, nic_load (for external lb) failed\n"); + return -ENODEV; + } + bnx2x_wait_for_link(bp, 1, is_serdes); + + bnx2x_netif_stop(bp, 1); + + rc = bnx2x_run_loopback(bp, BNX2X_EXT_LOOPBACK); + if (rc) + DP(BNX2X_MSG_ETHTOOL, "EXT loopback failed (res %d)\n", rc); + + bnx2x_netif_start(bp); + + return rc; +} + #define CRC32_RESIDUAL 0xdebb20e3 static int bnx2x_test_nvram(struct bnx2x *bp) @@ -2244,7 +2292,7 @@ static int bnx2x_test_intr(struct bnx2x *bp) return -ENODEV; } - params.q_obj = &bp->fp->q_obj; + params.q_obj = &bp->sp_objs->q_obj; params.cmd = BNX2X_Q_CMD_EMPTY; __set_bit(RAMROD_COMP_WAIT, ¶ms.ramrod_flags); @@ -2257,24 +2305,31 @@ static void bnx2x_self_test(struct net_device *dev, { struct bnx2x *bp = netdev_priv(dev); u8 is_serdes; + int rc; + if (bp->recovery_state != BNX2X_RECOVERY_DONE) { netdev_err(bp->dev, "Handling parity error recovery. Try again later\n"); etest->flags |= ETH_TEST_FL_FAILED; return; } + DP(BNX2X_MSG_ETHTOOL, + "Self-test command parameters: offline = %d, external_lb = %d\n", + (etest->flags & ETH_TEST_FL_OFFLINE), + (etest->flags & ETH_TEST_FL_EXTERNAL_LB)>>2); - memset(buf, 0, sizeof(u64) * BNX2X_NUM_TESTS); + memset(buf, 0, sizeof(u64) * BNX2X_NUM_TESTS(bp)); - if (!netif_running(dev)) + if (!netif_running(dev)) { + DP(BNX2X_MSG_ETHTOOL, + "Can't perform self-test when interface is down\n"); return; + } - /* offline tests are not supported in MF mode */ - if (IS_MF(bp)) - etest->flags &= ~ETH_TEST_FL_OFFLINE; is_serdes = (bp->link_vars.link_status & LINK_STATUS_SERDES_LINK) > 0; - if (etest->flags & ETH_TEST_FL_OFFLINE) { + /* offline tests are not supported in MF mode */ + if ((etest->flags & ETH_TEST_FL_OFFLINE) && !IS_MF(bp)) { int port = BP_PORT(bp); u32 val; u8 link_up; @@ -2287,7 +2342,14 @@ static void bnx2x_self_test(struct net_device *dev, link_up = bp->link_vars.link_up; bnx2x_nic_unload(bp, UNLOAD_NORMAL); - bnx2x_nic_load(bp, LOAD_DIAG); + rc = bnx2x_nic_load(bp, LOAD_DIAG); + if (rc) { + etest->flags |= ETH_TEST_FL_FAILED; + DP(BNX2X_MSG_ETHTOOL, + "Can't perform self-test, nic_load (for offline) failed\n"); + return; + } + /* wait until link state is restored */ bnx2x_wait_for_link(bp, 1, is_serdes); @@ -2300,30 +2362,51 @@ static void bnx2x_self_test(struct net_device *dev, etest->flags |= ETH_TEST_FL_FAILED; } - buf[2] = bnx2x_test_loopback(bp); + buf[2] = bnx2x_test_loopback(bp); /* internal LB */ if (buf[2] != 0) etest->flags |= ETH_TEST_FL_FAILED; + if (etest->flags & ETH_TEST_FL_EXTERNAL_LB) { + buf[3] = bnx2x_test_ext_loopback(bp); /* external LB */ + if (buf[3] != 0) + etest->flags |= ETH_TEST_FL_FAILED; + etest->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE; + } + bnx2x_nic_unload(bp, UNLOAD_NORMAL); /* restore input for TX port IF */ REG_WR(bp, NIG_REG_EGRESS_UMP0_IN_EN + port*4, val); - - bnx2x_nic_load(bp, LOAD_NORMAL); + rc = bnx2x_nic_load(bp, LOAD_NORMAL); + if (rc) { + etest->flags |= ETH_TEST_FL_FAILED; + DP(BNX2X_MSG_ETHTOOL, + "Can't perform self-test, nic_load (for online) failed\n"); + return; + } /* wait until link state is restored */ bnx2x_wait_for_link(bp, link_up, is_serdes); } if (bnx2x_test_nvram(bp) != 0) { - buf[3] = 1; + if (!IS_MF(bp)) + buf[4] = 1; + else + buf[0] = 1; etest->flags |= ETH_TEST_FL_FAILED; } if (bnx2x_test_intr(bp) != 0) { - buf[4] = 1; + if (!IS_MF(bp)) + buf[5] = 1; + else + buf[1] = 1; etest->flags |= ETH_TEST_FL_FAILED; } if (bnx2x_link_test(bp, is_serdes) != 0) { - buf[5] = 1; + if (!IS_MF(bp)) + buf[6] = 1; + else + buf[2] = 1; etest->flags |= ETH_TEST_FL_FAILED; } @@ -2368,7 +2451,7 @@ static int bnx2x_get_sset_count(struct net_device *dev, int stringset) return num_stats; case ETH_SS_TEST: - return BNX2X_NUM_TESTS; + return BNX2X_NUM_TESTS(bp); default: return -EINVAL; @@ -2378,7 +2461,7 @@ static int bnx2x_get_sset_count(struct net_device *dev, int stringset) static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf) { struct bnx2x *bp = netdev_priv(dev); - int i, j, k; + int i, j, k, offset, start; char queue_name[MAX_QUEUE_NAME_LEN+1]; switch (stringset) { @@ -2409,7 +2492,17 @@ static void bnx2x_get_strings(struct net_device *dev, u32 stringset, u8 *buf) break; case ETH_SS_TEST: - memcpy(buf, bnx2x_tests_str_arr, sizeof(bnx2x_tests_str_arr)); + /* First 4 tests cannot be done in MF mode */ + if (!IS_MF(bp)) + start = 0; + else + start = 4; + for (i = 0, j = start; j < (start + BNX2X_NUM_TESTS(bp)); + i++, j++) { + offset = sprintf(buf+32*i, "%s", + bnx2x_tests_str_arr[j]); + *(buf+offset) = '\0'; + } break; } } @@ -2423,7 +2516,7 @@ static void bnx2x_get_ethtool_stats(struct net_device *dev, if (is_multi(bp)) { for_each_eth_queue(bp, i) { - hw_stats = (u32 *)&bp->fp[i].eth_q_stats; + hw_stats = (u32 *)&bp->fp_stats[i].eth_q_stats; for (j = 0; j < BNX2X_NUM_Q_STATS; j++) { if (bnx2x_q_stats_arr[j].size == 0) { /* skip this counter */ @@ -2507,6 +2600,41 @@ static int bnx2x_set_phys_id(struct net_device *dev, return 0; } +static int bnx2x_get_rss_flags(struct bnx2x *bp, struct ethtool_rxnfc *info) +{ + + switch (info->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + info->data = RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3; + break; + case UDP_V4_FLOW: + if (bp->rss_conf_obj.udp_rss_v4) + info->data = RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3; + else + info->data = RXH_IP_SRC | RXH_IP_DST; + break; + case UDP_V6_FLOW: + if (bp->rss_conf_obj.udp_rss_v6) + info->data = RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3; + else + info->data = RXH_IP_SRC | RXH_IP_DST; + break; + case IPV4_FLOW: + case IPV6_FLOW: + info->data = RXH_IP_SRC | RXH_IP_DST; + break; + default: + info->data = 0; + break; + } + + return 0; +} + static int bnx2x_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rules __always_unused) { @@ -2516,7 +2644,102 @@ static int bnx2x_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, case ETHTOOL_GRXRINGS: info->data = BNX2X_NUM_ETH_QUEUES(bp); return 0; + case ETHTOOL_GRXFH: + return bnx2x_get_rss_flags(bp, info); + default: + DP(BNX2X_MSG_ETHTOOL, "Command parameters not supported\n"); + return -EOPNOTSUPP; + } +} + +static int bnx2x_set_rss_flags(struct bnx2x *bp, struct ethtool_rxnfc *info) +{ + int udp_rss_requested; + + DP(BNX2X_MSG_ETHTOOL, + "Set rss flags command parameters: flow type = %d, data = %llu\n", + info->flow_type, info->data); + + switch (info->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + /* For TCP only 4-tupple hash is supported */ + if (info->data ^ (RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + DP(BNX2X_MSG_ETHTOOL, + "Command parameters not supported\n"); + return -EINVAL; + } else { + return 0; + } + + case UDP_V4_FLOW: + case UDP_V6_FLOW: + /* For UDP either 2-tupple hash or 4-tupple hash is supported */ + if (info->data == (RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + udp_rss_requested = 1; + else if (info->data == (RXH_IP_SRC | RXH_IP_DST)) + udp_rss_requested = 0; + else + return -EINVAL; + if ((info->flow_type == UDP_V4_FLOW) && + (bp->rss_conf_obj.udp_rss_v4 != udp_rss_requested)) { + bp->rss_conf_obj.udp_rss_v4 = udp_rss_requested; + DP(BNX2X_MSG_ETHTOOL, + "rss re-configured, UDP 4-tupple %s\n", + udp_rss_requested ? "enabled" : "disabled"); + return bnx2x_config_rss_pf(bp, &bp->rss_conf_obj, 0); + } else if ((info->flow_type == UDP_V6_FLOW) && + (bp->rss_conf_obj.udp_rss_v6 != udp_rss_requested)) { + bp->rss_conf_obj.udp_rss_v6 = udp_rss_requested; + return bnx2x_config_rss_pf(bp, &bp->rss_conf_obj, 0); + DP(BNX2X_MSG_ETHTOOL, + "rss re-configured, UDP 4-tupple %s\n", + udp_rss_requested ? "enabled" : "disabled"); + } else { + return 0; + } + case IPV4_FLOW: + case IPV6_FLOW: + /* For IP only 2-tupple hash is supported */ + if (info->data ^ (RXH_IP_SRC | RXH_IP_DST)) { + DP(BNX2X_MSG_ETHTOOL, + "Command parameters not supported\n"); + return -EINVAL; + } else { + return 0; + } + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case SCTP_V6_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case IP_USER_FLOW: + case ETHER_FLOW: + /* RSS is not supported for these protocols */ + if (info->data) { + DP(BNX2X_MSG_ETHTOOL, + "Command parameters not supported\n"); + return -EINVAL; + } else { + return 0; + } + default: + return -EINVAL; + } +} +static int bnx2x_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) +{ + struct bnx2x *bp = netdev_priv(dev); + + switch (info->cmd) { + case ETHTOOL_SRXFH: + return bnx2x_set_rss_flags(bp, info); default: DP(BNX2X_MSG_ETHTOOL, "Command parameters not supported\n"); return -EOPNOTSUPP; @@ -2556,7 +2779,6 @@ static int bnx2x_set_rxfh_indir(struct net_device *dev, const u32 *indir) { struct bnx2x *bp = netdev_priv(dev); size_t i; - u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0}; for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) { /* @@ -2568,10 +2790,88 @@ static int bnx2x_set_rxfh_indir(struct net_device *dev, const u32 *indir) * align the received table to the Client ID of the leading RSS * queue */ - ind_table[i] = indir[i] + bp->fp->cl_id; + bp->rss_conf_obj.ind_table[i] = indir[i] + bp->fp->cl_id; } - return bnx2x_config_rss_eth(bp, ind_table, false); + return bnx2x_config_rss_eth(bp, false); +} + +/** + * bnx2x_get_channels - gets the number of RSS queues. + * + * @dev: net device + * @channels: returns the number of max / current queues + */ +static void bnx2x_get_channels(struct net_device *dev, + struct ethtool_channels *channels) +{ + struct bnx2x *bp = netdev_priv(dev); + + channels->max_combined = BNX2X_MAX_RSS_COUNT(bp); + channels->combined_count = BNX2X_NUM_ETH_QUEUES(bp); +} + +/** + * bnx2x_change_num_queues - change the number of RSS queues. + * + * @bp: bnx2x private structure + * + * Re-configure interrupt mode to get the new number of MSI-X + * vectors and re-add NAPI objects. + */ +static void bnx2x_change_num_queues(struct bnx2x *bp, int num_rss) +{ + bnx2x_del_all_napi(bp); + bnx2x_disable_msi(bp); + BNX2X_NUM_QUEUES(bp) = num_rss + NON_ETH_CONTEXT_USE; + bnx2x_set_int_mode(bp); + bnx2x_add_all_napi(bp); +} + +/** + * bnx2x_set_channels - sets the number of RSS queues. + * + * @dev: net device + * @channels: includes the number of queues requested + */ +static int bnx2x_set_channels(struct net_device *dev, + struct ethtool_channels *channels) +{ + struct bnx2x *bp = netdev_priv(dev); + + + DP(BNX2X_MSG_ETHTOOL, + "set-channels command parameters: rx = %d, tx = %d, other = %d, combined = %d\n", + channels->rx_count, channels->tx_count, channels->other_count, + channels->combined_count); + + /* We don't support separate rx / tx channels. + * We don't allow setting 'other' channels. + */ + if (channels->rx_count || channels->tx_count || channels->other_count + || (channels->combined_count == 0) || + (channels->combined_count > BNX2X_MAX_RSS_COUNT(bp))) { + DP(BNX2X_MSG_ETHTOOL, "command parameters not supported\n"); + return -EINVAL; + } + + /* Check if there was a change in the active parameters */ + if (channels->combined_count == BNX2X_NUM_ETH_QUEUES(bp)) { + DP(BNX2X_MSG_ETHTOOL, "No change in active parameters\n"); + return 0; + } + + /* Set the requested number of queues in bp context. + * Note that the actual number of queues created during load may be + * less than requested if memory is low. + */ + if (unlikely(!netif_running(dev))) { + bnx2x_change_num_queues(bp, channels->combined_count); + return 0; + } + bnx2x_nic_unload(bp, UNLOAD_NORMAL); + bnx2x_change_num_queues(bp, channels->combined_count); + return bnx2x_nic_load(bp, LOAD_NORMAL); } static const struct ethtool_ops bnx2x_ethtool_ops = { @@ -2601,9 +2901,12 @@ static const struct ethtool_ops bnx2x_ethtool_ops = { .set_phys_id = bnx2x_set_phys_id, .get_ethtool_stats = bnx2x_get_ethtool_stats, .get_rxnfc = bnx2x_get_rxnfc, + .set_rxnfc = bnx2x_set_rxnfc, .get_rxfh_indir_size = bnx2x_get_rxfh_indir_size, .get_rxfh_indir = bnx2x_get_rxfh_indir, .set_rxfh_indir = bnx2x_set_rxfh_indir, + .get_channels = bnx2x_get_channels, + .set_channels = bnx2x_set_channels, .get_eee = bnx2x_get_eee, .set_eee = bnx2x_set_eee, }; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index c61aa37..6b77630 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -1253,6 +1253,7 @@ struct drv_func_mb { #define DRV_MSG_CODE_DCBX_ADMIN_PMF_MSG 0xb0000000 #define DRV_MSG_CODE_DCBX_PMF_DRV_OK 0xb2000000 + #define REQ_BC_VER_4_DCBX_ADMIN_MSG_NON_PMF 0x00070401 #define DRV_MSG_CODE_VF_DISABLED_DONE 0xc0000000 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c index c7c814d..91aa565 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c @@ -4057,18 +4057,12 @@ static void bnx2x_warpcore_set_10G_XFI(struct bnx2x_phy *phy, MDIO_WC_REG_DIGITAL4_MISC3, val | 0x8080); /* Enable LPI pass through */ - if ((params->eee_mode & EEE_MODE_ADV_LPI) && - (phy->flags & FLAGS_EEE_10GBT) && - (!(params->eee_mode & EEE_MODE_ENABLE_LPI) || - bnx2x_eee_calc_timer(params)) && - (params->req_duplex[bnx2x_phy_selection(params)] == DUPLEX_FULL)) { - DP(NETIF_MSG_LINK, "Configure WC for LPI pass through\n"); - bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, - MDIO_WC_REG_EEE_COMBO_CONTROL0, - 0x7c); - bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD, - MDIO_WC_REG_DIGITAL4_MISC5, 0xc000); - } + DP(NETIF_MSG_LINK, "Configure WC for LPI pass through\n"); + bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, + MDIO_WC_REG_EEE_COMBO_CONTROL0, + 0x7c); + bnx2x_cl45_read_or_write(bp, phy, MDIO_WC_DEVAD, + MDIO_WC_REG_DIGITAL4_MISC5, 0xc000); /* 10G XFI Full Duplex */ bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index a622bb7..8ddc78e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -104,7 +104,7 @@ MODULE_PARM_DESC(disable_tpa, " Disable the TPA (LRO) feature"); #define INT_MODE_INTx 1 #define INT_MODE_MSI 2 -static int int_mode; +int int_mode; module_param(int_mode, int, 0); MODULE_PARM_DESC(int_mode, " Force interrupt mode other than MSI-X " "(1 INT#x; 2 MSI)"); @@ -758,7 +758,7 @@ void bnx2x_panic_dump(struct bnx2x *bp) /* Tx */ for_each_cos_in_tx_queue(fp, cos) { - txdata = fp->txdata[cos]; + txdata = *fp->txdata_ptr[cos]; BNX2X_ERR("fp%d: tx_pkt_prod(0x%x) tx_pkt_cons(0x%x) tx_bd_prod(0x%x) tx_bd_cons(0x%x) *tx_cons_sb(0x%x)\n", i, txdata.tx_pkt_prod, txdata.tx_pkt_cons, txdata.tx_bd_prod, @@ -876,7 +876,7 @@ void bnx2x_panic_dump(struct bnx2x *bp) for_each_tx_queue(bp, i) { struct bnx2x_fastpath *fp = &bp->fp[i]; for_each_cos_in_tx_queue(fp, cos) { - struct bnx2x_fp_txdata *txdata = &fp->txdata[cos]; + struct bnx2x_fp_txdata *txdata = fp->txdata_ptr[cos]; start = TX_BD(le16_to_cpu(*txdata->tx_cons_sb) - 10); end = TX_BD(le16_to_cpu(*txdata->tx_cons_sb) + 245); @@ -1583,7 +1583,7 @@ void bnx2x_sp_event(struct bnx2x_fastpath *fp, union eth_rx_cqe *rr_cqe) int cid = SW_CID(rr_cqe->ramrod_cqe.conn_and_cmd_data); int command = CQE_CMD(rr_cqe->ramrod_cqe.conn_and_cmd_data); enum bnx2x_queue_cmd drv_cmd = BNX2X_Q_CMD_MAX; - struct bnx2x_queue_sp_obj *q_obj = &fp->q_obj; + struct bnx2x_queue_sp_obj *q_obj = &bnx2x_sp_obj(bp, fp).q_obj; DP(BNX2X_MSG_SP, "fp %d cid %d got ramrod #%d state is %x type is %d\n", @@ -1710,7 +1710,7 @@ irqreturn_t bnx2x_interrupt(int irq, void *dev_instance) /* Handle Rx or Tx according to SB id */ prefetch(fp->rx_cons_sb); for_each_cos_in_tx_queue(fp, cos) - prefetch(fp->txdata[cos].tx_cons_sb); + prefetch(fp->txdata_ptr[cos]->tx_cons_sb); prefetch(&fp->sb_running_index[SM_RX_ID]); napi_schedule(&bnx2x_fp(bp, fp->index, napi)); status &= ~mask; @@ -2124,6 +2124,11 @@ u8 bnx2x_initial_phy_init(struct bnx2x *bp, int load_mode) } } + if (load_mode == LOAD_LOOPBACK_EXT) { + struct link_params *lp = &bp->link_params; + lp->loopback_mode = LOOPBACK_EXT; + } + rc = bnx2x_phy_init(&bp->link_params, &bp->link_vars); bnx2x_release_phy_lock(bp); @@ -2916,7 +2921,7 @@ static void bnx2x_pf_tx_q_prep(struct bnx2x *bp, struct bnx2x_fastpath *fp, struct bnx2x_txq_setup_params *txq_init, u8 cos) { - txq_init->dscr_map = fp->txdata[cos].tx_desc_mapping; + txq_init->dscr_map = fp->txdata_ptr[cos]->tx_desc_mapping; txq_init->sb_cq_index = HC_INDEX_ETH_FIRST_TX_CQ_CONS + cos; txq_init->traffic_type = LLFC_TRAFFIC_TYPE_NW; txq_init->fw_sb_id = fp->fw_sb_id; @@ -3030,9 +3035,9 @@ static void bnx2x_drv_info_ether_stat(struct bnx2x *bp) memcpy(ether_stat->version, DRV_MODULE_VERSION, ETH_STAT_INFO_VERSION_LEN - 1); - bp->fp[0].mac_obj.get_n_elements(bp, &bp->fp[0].mac_obj, - DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED, - ether_stat->mac_local); + bp->sp_objs[0].mac_obj.get_n_elements(bp, &bp->sp_objs[0].mac_obj, + DRV_INFO_ETH_STAT_NUM_MACS_REQUIRED, + ether_stat->mac_local); ether_stat->mtu_size = bp->dev->mtu; @@ -3063,11 +3068,11 @@ static void bnx2x_drv_info_fcoe_stat(struct bnx2x *bp) /* insert FCoE stats from ramrod response */ if (!NO_FCOE(bp)) { struct tstorm_per_queue_stats *fcoe_q_tstorm_stats = - &bp->fw_stats_data->queue_stats[FCOE_IDX]. + &bp->fw_stats_data->queue_stats[FCOE_IDX(bp)]. tstorm_queue_statistics; struct xstorm_per_queue_stats *fcoe_q_xstorm_stats = - &bp->fw_stats_data->queue_stats[FCOE_IDX]. + &bp->fw_stats_data->queue_stats[FCOE_IDX(bp)]. xstorm_queue_statistics; struct fcoe_statistics_params *fw_fcoe_stat = @@ -4623,11 +4628,11 @@ static void bnx2x_handle_classification_eqe(struct bnx2x *bp, case BNX2X_FILTER_MAC_PENDING: DP(BNX2X_MSG_SP, "Got SETUP_MAC completions\n"); #ifdef BCM_CNIC - if (cid == BNX2X_ISCSI_ETH_CID) + if (cid == BNX2X_ISCSI_ETH_CID(bp)) vlan_mac_obj = &bp->iscsi_l2_mac_obj; else #endif - vlan_mac_obj = &bp->fp[cid].mac_obj; + vlan_mac_obj = &bp->sp_objs[cid].mac_obj; break; case BNX2X_FILTER_MCAST_PENDING: @@ -4725,7 +4730,7 @@ static void bnx2x_after_function_update(struct bnx2x *bp) for_each_eth_queue(bp, q) { /* Set the appropriate Queue object */ fp = &bp->fp[q]; - queue_params.q_obj = &fp->q_obj; + queue_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj; /* send the ramrod */ rc = bnx2x_queue_state_change(bp, &queue_params); @@ -4736,8 +4741,8 @@ static void bnx2x_after_function_update(struct bnx2x *bp) #ifdef BCM_CNIC if (!NO_FCOE(bp)) { - fp = &bp->fp[FCOE_IDX]; - queue_params.q_obj = &fp->q_obj; + fp = &bp->fp[FCOE_IDX(bp)]; + queue_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj; /* clear pending completion bit */ __clear_bit(RAMROD_COMP_WAIT, &queue_params.ramrod_flags); @@ -4769,11 +4774,11 @@ static struct bnx2x_queue_sp_obj *bnx2x_cid_to_q_obj( { DP(BNX2X_MSG_SP, "retrieving fp from cid %d\n", cid); #ifdef BCM_CNIC - if (cid == BNX2X_FCOE_ETH_CID) - return &bnx2x_fcoe(bp, q_obj); + if (cid == BNX2X_FCOE_ETH_CID(bp)) + return &bnx2x_fcoe_sp_obj(bp, q_obj); else #endif - return &bnx2x_fp(bp, CID_TO_FP(cid), q_obj); + return &bp->sp_objs[CID_TO_FP(cid, bp)].q_obj; } static void bnx2x_eq_int(struct bnx2x *bp) @@ -5655,15 +5660,15 @@ static void bnx2x_init_eth_fp(struct bnx2x *bp, int fp_idx) /* init tx data */ for_each_cos_in_tx_queue(fp, cos) { - bnx2x_init_txdata(bp, &fp->txdata[cos], - CID_COS_TO_TX_ONLY_CID(fp->cid, cos), - FP_COS_TO_TXQ(fp, cos), - BNX2X_TX_SB_INDEX_BASE + cos); - cids[cos] = fp->txdata[cos].cid; + bnx2x_init_txdata(bp, fp->txdata_ptr[cos], + CID_COS_TO_TX_ONLY_CID(fp->cid, cos, bp), + FP_COS_TO_TXQ(fp, cos, bp), + BNX2X_TX_SB_INDEX_BASE + cos, fp); + cids[cos] = fp->txdata_ptr[cos]->cid; } - bnx2x_init_queue_obj(bp, &fp->q_obj, fp->cl_id, cids, fp->max_cos, - BP_FUNC(bp), bnx2x_sp(bp, q_rdata), + bnx2x_init_queue_obj(bp, &bnx2x_sp_obj(bp, fp).q_obj, fp->cl_id, cids, + fp->max_cos, BP_FUNC(bp), bnx2x_sp(bp, q_rdata), bnx2x_sp_mapping(bp, q_rdata), q_type); /** @@ -5714,7 +5719,7 @@ static void bnx2x_init_tx_rings(struct bnx2x *bp) for_each_tx_queue(bp, i) for_each_cos_in_tx_queue(&bp->fp[i], cos) - bnx2x_init_tx_ring_one(&bp->fp[i].txdata[cos]); + bnx2x_init_tx_ring_one(bp->fp[i].txdata_ptr[cos]); } void bnx2x_nic_init(struct bnx2x *bp, u32 load_code) @@ -7063,12 +7068,10 @@ static int bnx2x_init_hw_func(struct bnx2x *bp) cdu_ilt_start = ilt->clients[ILT_CLIENT_CDU].start; for (i = 0; i < L2_ILT_LINES(bp); i++) { - ilt->lines[cdu_ilt_start + i].page = - bp->context.vcxt + (ILT_PAGE_CIDS * i); + ilt->lines[cdu_ilt_start + i].page = bp->context[i].vcxt; ilt->lines[cdu_ilt_start + i].page_mapping = - bp->context.cxt_mapping + (CDU_ILT_PAGE_SZ * i); - /* cdu ilt pages are allocated manually so there's no need to - set the size */ + bp->context[i].cxt_mapping; + ilt->lines[cdu_ilt_start + i].size = bp->context[i].size; } bnx2x_ilt_init_op(bp, INITOP_SET); @@ -7335,6 +7338,8 @@ static int bnx2x_init_hw_func(struct bnx2x *bp) void bnx2x_free_mem(struct bnx2x *bp) { + int i; + /* fastpath */ bnx2x_free_fp_mem(bp); /* end of fastpath */ @@ -7348,9 +7353,9 @@ void bnx2x_free_mem(struct bnx2x *bp) BNX2X_PCI_FREE(bp->slowpath, bp->slowpath_mapping, sizeof(struct bnx2x_slowpath)); - BNX2X_PCI_FREE(bp->context.vcxt, bp->context.cxt_mapping, - bp->context.size); - + for (i = 0; i < L2_ILT_LINES(bp); i++) + BNX2X_PCI_FREE(bp->context[i].vcxt, bp->context[i].cxt_mapping, + bp->context[i].size); bnx2x_ilt_mem_op(bp, ILT_MEMOP_FREE); BNX2X_FREE(bp->ilt->lines); @@ -7436,6 +7441,8 @@ alloc_mem_err: int bnx2x_alloc_mem(struct bnx2x *bp) { + int i, allocated, context_size; + #ifdef BCM_CNIC if (!CHIP_IS_E1x(bp)) /* size = the status block + ramrod buffers */ @@ -7465,11 +7472,29 @@ int bnx2x_alloc_mem(struct bnx2x *bp) if (bnx2x_alloc_fw_stats_mem(bp)) goto alloc_mem_err; - bp->context.size = sizeof(union cdu_context) * BNX2X_L2_CID_COUNT(bp); - - BNX2X_PCI_ALLOC(bp->context.vcxt, &bp->context.cxt_mapping, - bp->context.size); + /* Allocate memory for CDU context: + * This memory is allocated separately and not in the generic ILT + * functions because CDU differs in few aspects: + * 1. There are multiple entities allocating memory for context - + * 'regular' driver, CNIC and SRIOV driver. Each separately controls + * its own ILT lines. + * 2. Since CDU page-size is not a single 4KB page (which is the case + * for the other ILT clients), to be efficient we want to support + * allocation of sub-page-size in the last entry. + * 3. Context pointers are used by the driver to pass to FW / update + * the context (for the other ILT clients the pointers are used just to + * free the memory during unload). + */ + context_size = sizeof(union cdu_context) * BNX2X_L2_CID_COUNT(bp); + for (i = 0, allocated = 0; allocated < context_size; i++) { + bp->context[i].size = min(CDU_ILT_PAGE_SZ, + (context_size - allocated)); + BNX2X_PCI_ALLOC(bp->context[i].vcxt, + &bp->context[i].cxt_mapping, + bp->context[i].size); + allocated += bp->context[i].size; + } BNX2X_ALLOC(bp->ilt->lines, sizeof(struct ilt_line) * ILT_MAX_LINES); if (bnx2x_ilt_mem_op(bp, ILT_MEMOP_ALLOC)) @@ -7571,8 +7596,8 @@ int bnx2x_set_eth_mac(struct bnx2x *bp, bool set) __set_bit(RAMROD_COMP_WAIT, &ramrod_flags); /* Eth MAC is set on RSS leading client (fp[0]) */ - return bnx2x_set_mac_one(bp, bp->dev->dev_addr, &bp->fp->mac_obj, set, - BNX2X_ETH_MAC, &ramrod_flags); + return bnx2x_set_mac_one(bp, bp->dev->dev_addr, &bp->sp_objs->mac_obj, + set, BNX2X_ETH_MAC, &ramrod_flags); } int bnx2x_setup_leading(struct bnx2x *bp) @@ -7587,7 +7612,7 @@ int bnx2x_setup_leading(struct bnx2x *bp) * * In case of MSI-X it will also try to enable MSI-X. */ -static void __devinit bnx2x_set_int_mode(struct bnx2x *bp) +void bnx2x_set_int_mode(struct bnx2x *bp) { switch (int_mode) { case INT_MODE_MSI: @@ -7598,11 +7623,6 @@ static void __devinit bnx2x_set_int_mode(struct bnx2x *bp) BNX2X_DEV_INFO("set number of queues to 1\n"); break; default: - /* Set number of queues for MSI-X mode */ - bnx2x_set_num_queues(bp); - - BNX2X_DEV_INFO("set number of queues to %d\n", bp->num_queues); - /* if we can't use MSI-X we only need one fp, * so try to enable MSI-X with the requested number of fp's * and fallback to MSI or legacy INTx with one fp @@ -7743,6 +7763,8 @@ static void bnx2x_pf_q_prep_init(struct bnx2x *bp, { u8 cos; + int cxt_index, cxt_offset; + /* FCoE Queue uses Default SB, thus has no HC capabilities */ if (!IS_FCOE_FP(fp)) { __set_bit(BNX2X_Q_FLG_HC, &init_params->rx.flags); @@ -7779,9 +7801,13 @@ static void bnx2x_pf_q_prep_init(struct bnx2x *bp, fp->index, init_params->max_cos); /* set the context pointers queue object */ - for (cos = FIRST_TX_COS_INDEX; cos < init_params->max_cos; cos++) + for (cos = FIRST_TX_COS_INDEX; cos < init_params->max_cos; cos++) { + cxt_index = fp->txdata_ptr[cos]->cid / ILT_PAGE_CIDS; + cxt_offset = fp->txdata_ptr[cos]->cid - (cxt_index * + ILT_PAGE_CIDS); init_params->cxts[cos] = - &bp->context.vcxt[fp->txdata[cos].cid].eth; + &bp->context[cxt_index].vcxt[cxt_offset].eth; + } } int bnx2x_setup_tx_only(struct bnx2x *bp, struct bnx2x_fastpath *fp, @@ -7846,7 +7872,7 @@ int bnx2x_setup_queue(struct bnx2x *bp, struct bnx2x_fastpath *fp, bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID, 0, IGU_INT_ENABLE, 0); - q_params.q_obj = &fp->q_obj; + q_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj; /* We want to wait for completion in this context */ __set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); @@ -7919,7 +7945,7 @@ static int bnx2x_stop_queue(struct bnx2x *bp, int index) DP(NETIF_MSG_IFDOWN, "stopping queue %d cid %d\n", index, fp->cid); - q_params.q_obj = &fp->q_obj; + q_params.q_obj = &bnx2x_sp_obj(bp, fp).q_obj; /* We want to wait for completion in this context */ __set_bit(RAMROD_COMP_WAIT, &q_params.ramrod_flags); @@ -7930,7 +7956,7 @@ static int bnx2x_stop_queue(struct bnx2x *bp, int index) tx_index++){ /* ascertain this is a normal queue*/ - txdata = &fp->txdata[tx_index]; + txdata = fp->txdata_ptr[tx_index]; DP(NETIF_MSG_IFDOWN, "stopping tx-only queue %d\n", txdata->txq_index); @@ -8297,7 +8323,7 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode) struct bnx2x_fastpath *fp = &bp->fp[i]; for_each_cos_in_tx_queue(fp, cos) - rc = bnx2x_clean_tx_queue(bp, &fp->txdata[cos]); + rc = bnx2x_clean_tx_queue(bp, fp->txdata_ptr[cos]); #ifdef BNX2X_STOP_ON_ERROR if (rc) return; @@ -8308,12 +8334,13 @@ void bnx2x_chip_cleanup(struct bnx2x *bp, int unload_mode) usleep_range(1000, 1000); /* Clean all ETH MACs */ - rc = bnx2x_del_all_macs(bp, &bp->fp[0].mac_obj, BNX2X_ETH_MAC, false); + rc = bnx2x_del_all_macs(bp, &bp->sp_objs[0].mac_obj, BNX2X_ETH_MAC, + false); if (rc < 0) BNX2X_ERR("Failed to delete all ETH macs: %d\n", rc); /* Clean up UC list */ - rc = bnx2x_del_all_macs(bp, &bp->fp[0].mac_obj, BNX2X_UC_LIST_MAC, + rc = bnx2x_del_all_macs(bp, &bp->sp_objs[0].mac_obj, BNX2X_UC_LIST_MAC, true); if (rc < 0) BNX2X_ERR("Failed to schedule DEL commands for UC MACs list: %d\n", @@ -9705,6 +9732,8 @@ static void __devinit bnx2x_get_common_hwinfo(struct bnx2x *bp) bp->flags |= (val >= REQ_BC_VER_4_PFC_STATS_SUPPORTED) ? BC_SUPPORTS_PFC_STATS : 0; + bp->flags |= (val >= REQ_BC_VER_4_DCBX_ADMIN_MSG_NON_PMF) ? + BC_SUPPORTS_DCBX_MSG_NON_PMF : 0; boot_mode = SHMEM_RD(bp, dev_info.port_feature_config[BP_PORT(bp)].mba_config) & PORT_FEATURE_MBA_BOOT_AGENT_TYPE_MASK; @@ -11018,7 +11047,7 @@ static int bnx2x_set_uc_list(struct bnx2x *bp) int rc; struct net_device *dev = bp->dev; struct netdev_hw_addr *ha; - struct bnx2x_vlan_mac_obj *mac_obj = &bp->fp->mac_obj; + struct bnx2x_vlan_mac_obj *mac_obj = &bp->sp_objs->mac_obj; unsigned long ramrod_flags = 0; /* First schedule a cleanup up of old configuration */ @@ -11693,7 +11722,7 @@ void bnx2x__init_func_obj(struct bnx2x *bp) /* must be called after sriov-enable */ static int bnx2x_set_qm_cid_count(struct bnx2x *bp) { - int cid_count = BNX2X_L2_CID_COUNT(bp); + int cid_count = BNX2X_L2_MAX_CID(bp); #ifdef BCM_CNIC cid_count += CNIC_CID_MAX; @@ -11738,7 +11767,7 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, struct bnx2x *bp; int pcie_width, pcie_speed; int rc, max_non_def_sbs; - int rx_count, tx_count, rss_count; + int rx_count, tx_count, rss_count, doorbell_size; /* * An estimated maximum supported CoS number according to the chip * version. @@ -11781,13 +11810,6 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, max_non_def_sbs = bnx2x_get_num_non_def_sbs(pdev); - /* !!! FIXME !!! - * Do not allow the maximum SB count to grow above 16 - * since Special CIDs starts from 16*BNX2X_MULTI_TX_COS=48. - * We will use the FP_SB_MAX_E1x macro for this matter. - */ - max_non_def_sbs = min_t(int, FP_SB_MAX_E1x, max_non_def_sbs); - WARN_ON(!max_non_def_sbs); /* Maximum number of RSS queues: one IGU SB goes to CNIC */ @@ -11798,9 +11820,9 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, /* * Maximum number of netdev Tx queues: - * Maximum TSS queues * Maximum supported number of CoS + FCoE L2 + * Maximum TSS queues * Maximum supported number of CoS + FCoE L2 */ - tx_count = MAX_TXQS_PER_COS * max_cos_est + FCOE_PRESENT; + tx_count = rss_count * max_cos_est + FCOE_PRESENT; /* dev zeroed in init_etherdev */ dev = alloc_etherdev_mqs(sizeof(*bp), tx_count, rx_count); @@ -11809,9 +11831,6 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, bp = netdev_priv(dev); - BNX2X_DEV_INFO("Allocated netdev with %d tx and %d rx queues\n", - tx_count, rx_count); - bp->igu_sb_cnt = max_non_def_sbs; bp->msg_enable = debug; pci_set_drvdata(pdev, dev); @@ -11824,6 +11843,9 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, BNX2X_DEV_INFO("max_non_def_sbs %d\n", max_non_def_sbs); + BNX2X_DEV_INFO("Allocated netdev with %d tx and %d rx queues\n", + tx_count, rx_count); + rc = bnx2x_init_bp(bp); if (rc) goto init_one_exit; @@ -11832,9 +11854,15 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, * Map doorbels here as we need the real value of bp->max_cos which * is initialized in bnx2x_init_bp(). */ + doorbell_size = BNX2X_L2_MAX_CID(bp) * (1 << BNX2X_DB_SHIFT); + if (doorbell_size > pci_resource_len(pdev, 2)) { + dev_err(&bp->pdev->dev, + "Cannot map doorbells, bar size too small, aborting\n"); + rc = -ENOMEM; + goto init_one_exit; + } bp->doorbells = ioremap_nocache(pci_resource_start(pdev, 2), - min_t(u64, BNX2X_DB_SIZE(bp), - pci_resource_len(pdev, 2))); + doorbell_size); if (!bp->doorbells) { dev_err(&bp->pdev->dev, "Cannot map doorbell space, aborting\n"); @@ -11852,8 +11880,12 @@ static int __devinit bnx2x_init_one(struct pci_dev *pdev, #endif + + /* Set bp->num_queues for MSI-X mode*/ + bnx2x_set_num_queues(bp); + /* Configure interrupt mode: try to enable MSI-X/MSI if - * needed, set bp->num_queues appropriately. + * needed. */ bnx2x_set_int_mode(bp); @@ -12197,6 +12229,7 @@ static int bnx2x_set_iscsi_eth_mac_addr(struct bnx2x *bp) static void bnx2x_cnic_sp_post(struct bnx2x *bp, int count) { struct eth_spe *spe; + int cxt_index, cxt_offset; #ifdef BNX2X_STOP_ON_ERROR if (unlikely(bp->panic)) @@ -12219,10 +12252,16 @@ static void bnx2x_cnic_sp_post(struct bnx2x *bp, int count) * ramrod */ if (type == ETH_CONNECTION_TYPE) { - if (cmd == RAMROD_CMD_ID_ETH_CLIENT_SETUP) - bnx2x_set_ctx_validation(bp, &bp->context. - vcxt[BNX2X_ISCSI_ETH_CID].eth, - BNX2X_ISCSI_ETH_CID); + if (cmd == RAMROD_CMD_ID_ETH_CLIENT_SETUP) { + cxt_index = BNX2X_ISCSI_ETH_CID(bp) / + ILT_PAGE_CIDS; + cxt_offset = BNX2X_ISCSI_ETH_CID(bp) - + (cxt_index * ILT_PAGE_CIDS); + bnx2x_set_ctx_validation(bp, + &bp->context[cxt_index]. + vcxt[cxt_offset].eth, + BNX2X_ISCSI_ETH_CID(bp)); + } } /* @@ -12575,6 +12614,21 @@ void bnx2x_setup_cnic_irq_info(struct bnx2x *bp) cp->num_irq = 2; } +void bnx2x_setup_cnic_info(struct bnx2x *bp) +{ + struct cnic_eth_dev *cp = &bp->cnic_eth_dev; + + + cp->ctx_tbl_offset = FUNC_ILT_BASE(BP_FUNC(bp)) + + bnx2x_cid_ilt_lines(bp); + cp->starting_cid = bnx2x_cid_ilt_lines(bp) * ILT_PAGE_CIDS; + cp->fcoe_init_cid = BNX2X_FCOE_ETH_CID(bp); + cp->iscsi_l2_cid = BNX2X_ISCSI_ETH_CID(bp); + + if (NO_ISCSI_OOO(bp)) + cp->drv_state |= CNIC_DRV_STATE_NO_ISCSI_OOO; +} + static int bnx2x_register_cnic(struct net_device *dev, struct cnic_ops *ops, void *data) { @@ -12653,10 +12707,10 @@ struct cnic_eth_dev *bnx2x_cnic_probe(struct net_device *dev) cp->drv_ctl = bnx2x_drv_ctl; cp->drv_register_cnic = bnx2x_register_cnic; cp->drv_unregister_cnic = bnx2x_unregister_cnic; - cp->fcoe_init_cid = BNX2X_FCOE_ETH_CID; + cp->fcoe_init_cid = BNX2X_FCOE_ETH_CID(bp); cp->iscsi_l2_client_id = bnx2x_cnic_eth_cl_id(bp, BNX2X_ISCSI_ETH_CL_ID_IDX); - cp->iscsi_l2_cid = BNX2X_ISCSI_ETH_CID; + cp->iscsi_l2_cid = BNX2X_ISCSI_ETH_CID(bp); if (NO_ISCSI_OOO(bp)) cp->drv_state |= CNIC_DRV_STATE_NO_ISCSI_OOO; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h index bfef98f..a78e356 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h @@ -5913,6 +5913,7 @@ #define MISC_REGISTERS_SPIO_OUTPUT_LOW 0 #define MISC_REGISTERS_SPIO_SET_POS 8 #define HW_LOCK_MAX_RESOURCE_VALUE 31 +#define HW_LOCK_RESOURCE_DCBX_ADMIN_MIB 13 #define HW_LOCK_RESOURCE_DRV_FLAGS 10 #define HW_LOCK_RESOURCE_GPIO 1 #define HW_LOCK_RESOURCE_MDIO 0 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index 6c14b4a..734fd87 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -4107,6 +4107,10 @@ static int bnx2x_setup_rss(struct bnx2x *bp, data->capabilities |= ETH_RSS_UPDATE_RAMROD_DATA_IPV4_TCP_CAPABILITY; + if (test_bit(BNX2X_RSS_IPV4_UDP, &p->rss_flags)) + data->capabilities |= + ETH_RSS_UPDATE_RAMROD_DATA_IPV4_UDP_CAPABILITY; + if (test_bit(BNX2X_RSS_IPV6, &p->rss_flags)) data->capabilities |= ETH_RSS_UPDATE_RAMROD_DATA_IPV6_CAPABILITY; @@ -4115,6 +4119,10 @@ static int bnx2x_setup_rss(struct bnx2x *bp, data->capabilities |= ETH_RSS_UPDATE_RAMROD_DATA_IPV6_TCP_CAPABILITY; + if (test_bit(BNX2X_RSS_IPV6_UDP, &p->rss_flags)) + data->capabilities |= + ETH_RSS_UPDATE_RAMROD_DATA_IPV6_UDP_CAPABILITY; + /* Hashing mask */ data->rss_result_mask = p->rss_result_mask; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index efd80bd..76818ef 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -694,8 +694,10 @@ enum { BNX2X_RSS_IPV4, BNX2X_RSS_IPV4_TCP, + BNX2X_RSS_IPV4_UDP, BNX2X_RSS_IPV6, BNX2X_RSS_IPV6_TCP, + BNX2X_RSS_IPV6_UDP, }; struct bnx2x_config_rss_params { @@ -729,6 +731,10 @@ struct bnx2x_rss_config_obj { /* Last configured indirection table */ u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE]; + /* flags for enabling 4-tupple hash on UDP */ + u8 udp_rss_v4; + u8 udp_rss_v6; + int (*config_rss)(struct bnx2x *bp, struct bnx2x_config_rss_params *p); }; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c index 0e8bdcb..514a528 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c @@ -859,17 +859,22 @@ static int bnx2x_storm_stats_update(struct bnx2x *bp) struct tstorm_per_queue_stats *tclient = &bp->fw_stats_data->queue_stats[i]. tstorm_queue_statistics; - struct tstorm_per_queue_stats *old_tclient = &fp->old_tclient; + struct tstorm_per_queue_stats *old_tclient = + &bnx2x_fp_stats(bp, fp)->old_tclient; struct ustorm_per_queue_stats *uclient = &bp->fw_stats_data->queue_stats[i]. ustorm_queue_statistics; - struct ustorm_per_queue_stats *old_uclient = &fp->old_uclient; + struct ustorm_per_queue_stats *old_uclient = + &bnx2x_fp_stats(bp, fp)->old_uclient; struct xstorm_per_queue_stats *xclient = &bp->fw_stats_data->queue_stats[i]. xstorm_queue_statistics; - struct xstorm_per_queue_stats *old_xclient = &fp->old_xclient; - struct bnx2x_eth_q_stats *qstats = &fp->eth_q_stats; - struct bnx2x_eth_q_stats_old *qstats_old = &fp->eth_q_stats_old; + struct xstorm_per_queue_stats *old_xclient = + &bnx2x_fp_stats(bp, fp)->old_xclient; + struct bnx2x_eth_q_stats *qstats = + &bnx2x_fp_stats(bp, fp)->eth_q_stats; + struct bnx2x_eth_q_stats_old *qstats_old = + &bnx2x_fp_stats(bp, fp)->eth_q_stats_old; u32 diff; @@ -1052,8 +1057,11 @@ static void bnx2x_net_stats_update(struct bnx2x *bp) nstats->tx_bytes = bnx2x_hilo(&estats->total_bytes_transmitted_hi); tmp = estats->mac_discard; - for_each_rx_queue(bp, i) - tmp += le32_to_cpu(bp->fp[i].old_tclient.checksum_discard); + for_each_rx_queue(bp, i) { + struct tstorm_per_queue_stats *old_tclient = + &bp->fp_stats[i].old_tclient; + tmp += le32_to_cpu(old_tclient->checksum_discard); + } nstats->rx_dropped = tmp + bp->net_stats_old.rx_dropped; nstats->tx_dropped = 0; @@ -1103,9 +1111,9 @@ static void bnx2x_drv_stats_update(struct bnx2x *bp) int i; for_each_queue(bp, i) { - struct bnx2x_eth_q_stats *qstats = &bp->fp[i].eth_q_stats; + struct bnx2x_eth_q_stats *qstats = &bp->fp_stats[i].eth_q_stats; struct bnx2x_eth_q_stats_old *qstats_old = - &bp->fp[i].eth_q_stats_old; + &bp->fp_stats[i].eth_q_stats_old; UPDATE_ESTAT_QSTAT(driver_xoff); UPDATE_ESTAT_QSTAT(rx_err_discard_pkt); @@ -1432,7 +1440,7 @@ static void bnx2x_prep_fw_stats_req(struct bnx2x *bp) query[first_queue_query_index + i]; cur_query_entry->kind = STATS_TYPE_QUEUE; - cur_query_entry->index = bnx2x_stats_id(&bp->fp[FCOE_IDX]); + cur_query_entry->index = bnx2x_stats_id(&bp->fp[FCOE_IDX(bp)]); cur_query_entry->funcID = cpu_to_le16(BP_FUNC(bp)); cur_query_entry->address.hi = cpu_to_le32(U64_HI(cur_data_offset)); @@ -1483,15 +1491,19 @@ void bnx2x_stats_init(struct bnx2x *bp) /* function stats */ for_each_queue(bp, i) { - struct bnx2x_fastpath *fp = &bp->fp[i]; - - memset(&fp->old_tclient, 0, sizeof(fp->old_tclient)); - memset(&fp->old_uclient, 0, sizeof(fp->old_uclient)); - memset(&fp->old_xclient, 0, sizeof(fp->old_xclient)); + struct bnx2x_fp_stats *fp_stats = &bp->fp_stats[i]; + + memset(&fp_stats->old_tclient, 0, + sizeof(fp_stats->old_tclient)); + memset(&fp_stats->old_uclient, 0, + sizeof(fp_stats->old_uclient)); + memset(&fp_stats->old_xclient, 0, + sizeof(fp_stats->old_xclient)); if (bp->stats_init) { - memset(&fp->eth_q_stats, 0, sizeof(fp->eth_q_stats)); - memset(&fp->eth_q_stats_old, 0, - sizeof(fp->eth_q_stats_old)); + memset(&fp_stats->eth_q_stats, 0, + sizeof(fp_stats->eth_q_stats)); + memset(&fp_stats->eth_q_stats_old, 0, + sizeof(fp_stats->eth_q_stats_old)); } } @@ -1533,8 +1545,10 @@ void bnx2x_save_statistics(struct bnx2x *bp) /* save queue statistics */ for_each_eth_queue(bp, i) { struct bnx2x_fastpath *fp = &bp->fp[i]; - struct bnx2x_eth_q_stats *qstats = &fp->eth_q_stats; - struct bnx2x_eth_q_stats_old *qstats_old = &fp->eth_q_stats_old; + struct bnx2x_eth_q_stats *qstats = + &bnx2x_fp_stats(bp, fp)->eth_q_stats; + struct bnx2x_eth_q_stats_old *qstats_old = + &bnx2x_fp_stats(bp, fp)->eth_q_stats_old; UPDATE_QSTAT_OLD(total_unicast_bytes_received_hi); UPDATE_QSTAT_OLD(total_unicast_bytes_received_lo); @@ -1573,7 +1587,7 @@ void bnx2x_afex_collect_stats(struct bnx2x *bp, void *void_afex_stats, struct afex_stats *afex_stats = (struct afex_stats *)void_afex_stats; struct bnx2x_eth_stats *estats = &bp->eth_stats; struct per_queue_stats *fcoe_q_stats = - &bp->fw_stats_data->queue_stats[FCOE_IDX]; + &bp->fw_stats_data->queue_stats[FCOE_IDX(bp)]; struct tstorm_per_queue_stats *fcoe_q_tstorm_stats = &fcoe_q_stats->tstorm_queue_statistics; @@ -1590,8 +1604,7 @@ void bnx2x_afex_collect_stats(struct bnx2x *bp, void *void_afex_stats, memset(afex_stats, 0, sizeof(struct afex_stats)); for_each_eth_queue(bp, i) { - struct bnx2x_fastpath *fp = &bp->fp[i]; - struct bnx2x_eth_q_stats *qstats = &fp->eth_q_stats; + struct bnx2x_eth_q_stats *qstats = &bp->fp_stats[i].eth_q_stats; ADD_64(afex_stats->rx_unicast_bytes_hi, qstats->total_unicast_bytes_received_hi, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 31d37a2..ba86b3f 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6238,7 +6238,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev, } if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw)) - e_info("PHY reset is blocked due to SOL/IDER session.\n"); + dev_info(&pdev->dev, + "PHY reset is blocked due to SOL/IDER session.\n"); /* Set initial default active device features */ netdev->features = (NETIF_F_SG | @@ -6288,7 +6289,7 @@ static int __devinit e1000_probe(struct pci_dev *pdev, if (e1000_validate_nvm_checksum(&adapter->hw) >= 0) break; if (i == 2) { - e_err("The NVM Checksum Is Not Valid\n"); + dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n"); err = -EIO; goto err_eeprom; } @@ -6298,13 +6299,15 @@ static int __devinit e1000_probe(struct pci_dev *pdev, /* copy the MAC address */ if (e1000e_read_mac_addr(&adapter->hw)) - e_err("NVM Read Error while reading MAC address\n"); + dev_err(&pdev->dev, + "NVM Read Error while reading MAC address\n"); memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len); if (!is_valid_ether_addr(netdev->perm_addr)) { - e_err("Invalid MAC Address: %pM\n", netdev->perm_addr); + dev_err(&pdev->dev, "Invalid MAC Address: %pM\n", + netdev->perm_addr); err = -EIO; goto err_eeprom; } diff --git a/drivers/net/ethernet/intel/e1000e/param.c b/drivers/net/ethernet/intel/e1000e/param.c index 55cc1565b..dfbfa7f 100644 --- a/drivers/net/ethernet/intel/e1000e/param.c +++ b/drivers/net/ethernet/intel/e1000e/param.c @@ -199,16 +199,19 @@ static int __devinit e1000_validate_option(unsigned int *value, case enable_option: switch (*value) { case OPTION_ENABLED: - e_info("%s Enabled\n", opt->name); + dev_info(&adapter->pdev->dev, "%s Enabled\n", + opt->name); return 0; case OPTION_DISABLED: - e_info("%s Disabled\n", opt->name); + dev_info(&adapter->pdev->dev, "%s Disabled\n", + opt->name); return 0; } break; case range_option: if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) { - e_info("%s set to %i\n", opt->name, *value); + dev_info(&adapter->pdev->dev, "%s set to %i\n", + opt->name, *value); return 0; } break; @@ -220,7 +223,8 @@ static int __devinit e1000_validate_option(unsigned int *value, ent = &opt->arg.l.p[i]; if (*value == ent->i) { if (ent->str[0] != '\0') - e_info("%s\n", ent->str); + dev_info(&adapter->pdev->dev, "%s\n", + ent->str); return 0; } } @@ -230,8 +234,8 @@ static int __devinit e1000_validate_option(unsigned int *value, BUG(); } - e_info("Invalid %s value specified (%i) %s\n", opt->name, *value, - opt->err); + dev_info(&adapter->pdev->dev, "Invalid %s value specified (%i) %s\n", + opt->name, *value, opt->err); *value = opt->def; return -1; } @@ -251,8 +255,10 @@ void __devinit e1000e_check_options(struct e1000_adapter *adapter) int bd = adapter->bd_number; if (bd >= E1000_MAX_NIC) { - e_notice("Warning: no configuration for board #%i\n", bd); - e_notice("Using defaults for all values\n"); + dev_notice(&adapter->pdev->dev, + "Warning: no configuration for board #%i\n", bd); + dev_notice(&adapter->pdev->dev, + "Using defaults for all values\n"); } { /* Transmit Interrupt Delay */ @@ -366,27 +372,32 @@ void __devinit e1000e_check_options(struct e1000_adapter *adapter) * default values */ if (adapter->itr > 4) - e_info("%s set to default %d\n", opt.name, - adapter->itr); + dev_info(&adapter->pdev->dev, + "%s set to default %d\n", opt.name, + adapter->itr); } adapter->itr_setting = adapter->itr; switch (adapter->itr) { case 0: - e_info("%s turned off\n", opt.name); + dev_info(&adapter->pdev->dev, "%s turned off\n", + opt.name); break; case 1: - e_info("%s set to dynamic mode\n", opt.name); + dev_info(&adapter->pdev->dev, + "%s set to dynamic mode\n", opt.name); adapter->itr = 20000; break; case 3: - e_info("%s set to dynamic conservative mode\n", - opt.name); + dev_info(&adapter->pdev->dev, + "%s set to dynamic conservative mode\n", + opt.name); adapter->itr = 20000; break; case 4: - e_info("%s set to simplified (2000-8000 ints) mode\n", - opt.name); + dev_info(&adapter->pdev->dev, + "%s set to simplified (2000-8000 ints) mode\n", + opt.name); break; default: /* diff --git a/drivers/net/ethernet/intel/ixgbe/Makefile b/drivers/net/ethernet/intel/ixgbe/Makefile index 0bdf06b..5fd5d04 100644 --- a/drivers/net/ethernet/intel/ixgbe/Makefile +++ b/drivers/net/ethernet/intel/ixgbe/Makefile @@ -34,11 +34,11 @@ obj-$(CONFIG_IXGBE) += ixgbe.o ixgbe-objs := ixgbe_main.o ixgbe_common.o ixgbe_ethtool.o \ ixgbe_82599.o ixgbe_82598.o ixgbe_phy.o ixgbe_sriov.o \ - ixgbe_mbx.o ixgbe_x540.o ixgbe_sysfs.o ixgbe_lib.o + ixgbe_mbx.o ixgbe_x540.o ixgbe_lib.o ixgbe-$(CONFIG_IXGBE_DCB) += ixgbe_dcb.o ixgbe_dcb_82598.o \ ixgbe_dcb_82599.o ixgbe_dcb_nl.o ixgbe-$(CONFIG_IXGBE_PTP) += ixgbe_ptp.o - +ixgbe-$(CONFIG_IXGBE_HWMON) += ixgbe_sysfs.o ixgbe-$(CONFIG_FCOE:m=y) += ixgbe_fcoe.o diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 3ef3c52..41f9f6e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -561,6 +561,7 @@ struct ixgbe_adapter { spinlock_t tmreg_lock; struct cyclecounter cc; struct timecounter tc; + int rx_hwtstamp_filter; u32 base_incval; u32 cycle_speed; #endif /* CONFIG_IXGBE_PTP */ @@ -718,6 +719,7 @@ extern void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter); extern void ixgbe_ptp_tx_hwtstamp(struct ixgbe_q_vector *q_vector, struct sk_buff *skb); extern void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, + union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb); extern int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter, struct ifreq *ifr, int cmd); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 17ad6a3..b0ddfd4 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -790,12 +790,10 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector, total_packets += tx_buffer->gso_segs; #ifdef CONFIG_IXGBE_PTP - if (unlikely(tx_buffer->tx_flags & - IXGBE_TX_FLAGS_TSTAMP)) - ixgbe_ptp_tx_hwtstamp(q_vector, - tx_buffer->skb); - + if (unlikely(tx_buffer->tx_flags & IXGBE_TX_FLAGS_TSTAMP)) + ixgbe_ptp_tx_hwtstamp(q_vector, tx_buffer->skb); #endif + /* free the skb */ dev_kfree_skb_any(tx_buffer->skb); @@ -1399,8 +1397,7 @@ static void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, ixgbe_rx_checksum(rx_ring, rx_desc, skb); #ifdef CONFIG_IXGBE_PTP - if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS)) - ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector, skb); + ixgbe_ptp_rx_hwtstamp(rx_ring->q_vector, rx_desc, skb); #endif if ((dev->features & NETIF_F_HW_VLAN_RX) && diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index ddc6a4d..cb7d1b2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -26,6 +26,7 @@ *******************************************************************************/ #include "ixgbe.h" #include <linux/export.h> +#include <linux/ptp_classify.h> /* * The 82599 and the X540 do not have true 64bit nanosecond scale @@ -100,6 +101,10 @@ #define NSECS_PER_SEC 1000000000ULL #endif +static struct sock_filter ptp_filter[] = { + PTP_FILTER +}; + /** * ixgbe_ptp_read - read raw cycle counter (to be used by time counter) * @cc - the cyclecounter structure @@ -307,13 +312,14 @@ void ixgbe_ptp_check_pps_event(struct ixgbe_adapter *adapter, u32 eicr) !(adapter->flags2 & IXGBE_FLAG2_PTP_PPS_ENABLED)) return; - switch (hw->mac.type) { - case ixgbe_mac_X540: - if (eicr & IXGBE_EICR_TIMESYNC) + if (unlikely(eicr & IXGBE_EICR_TIMESYNC)) { + switch (hw->mac.type) { + case ixgbe_mac_X540: ptp_clock_event(adapter->ptp_clock, &event); - break; - default: - break; + break; + default: + break; + } } } @@ -425,6 +431,68 @@ void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter) } /** + * ixgbe_ptp_match - determine if this skb matches a ptp packet + * @skb: pointer to the skb + * @hwtstamp: pointer to the hwtstamp_config to check + * + * Determine whether the skb should have been timestamped, assuming the + * hwtstamp was set via the hwtstamp ioctl. Returns non-zero when the packet + * should have a timestamp waiting in the registers, and 0 otherwise. + * + * V1 packets have to check the version type to determine whether they are + * correct. However, we can't directly access the data because it might be + * fragmented in the SKB, in paged memory. In order to work around this, we + * use skb_copy_bits which will properly copy the data whether it is in the + * paged memory fragments or not. We have to copy the IP header as well as the + * message type. + */ +static int ixgbe_ptp_match(struct sk_buff *skb, int rx_filter) +{ + struct iphdr iph; + u8 msgtype; + unsigned int type, offset; + + if (rx_filter == HWTSTAMP_FILTER_NONE) + return 0; + + type = sk_run_filter(skb, ptp_filter); + + if (likely(rx_filter == HWTSTAMP_FILTER_PTP_V2_EVENT)) + return type & PTP_CLASS_V2; + + /* For the remaining cases actually check message type */ + switch (type) { + case PTP_CLASS_V1_IPV4: + skb_copy_bits(skb, OFF_IHL, &iph, sizeof(iph)); + offset = ETH_HLEN + (iph.ihl << 2) + UDP_HLEN + OFF_PTP_CONTROL; + break; + case PTP_CLASS_V1_IPV6: + offset = OFF_PTP6 + OFF_PTP_CONTROL; + break; + default: + /* other cases invalid or handled above */ + return 0; + } + + /* Make sure our buffer is long enough */ + if (skb->len < offset) + return 0; + + skb_copy_bits(skb, offset, &msgtype, sizeof(msgtype)); + + switch (rx_filter) { + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + return (msgtype == IXGBE_RXMTRL_V1_SYNC_MSG); + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + return (msgtype == IXGBE_RXMTRL_V1_DELAY_REQ_MSG); + break; + default: + return 0; + } +} + +/** * ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp * @q_vector: structure containing interrupt and ring information * @skb: particular skb to send timestamp with @@ -473,6 +541,7 @@ void ixgbe_ptp_tx_hwtstamp(struct ixgbe_q_vector *q_vector, /** * ixgbe_ptp_rx_hwtstamp - utility function which checks for RX time stamp * @q_vector: structure containing interrupt and ring information + * @rx_desc: the rx descriptor * @skb: particular skb to send timestamp with * * if the timestamp is valid, we convert it into the timecounter ns @@ -480,6 +549,7 @@ void ixgbe_ptp_tx_hwtstamp(struct ixgbe_q_vector *q_vector, * is passed up the network stack */ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, + union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb) { struct ixgbe_adapter *adapter; @@ -497,21 +567,33 @@ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, hw = &adapter->hw; tsyncrxctl = IXGBE_READ_REG(hw, IXGBE_TSYNCRXCTL); + + /* Check if we have a valid timestamp and make sure the skb should + * have been timestamped */ + if (likely(!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID) || + !ixgbe_ptp_match(skb, adapter->rx_hwtstamp_filter))) + return; + + /* + * Always read the registers, in order to clear a possible fault + * because of stagnant RX timestamp values for a packet that never + * reached the queue. + */ regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_RXSTMPH) << 32; /* - * If this bit is set, then the RX registers contain the time stamp. No - * other packet will be time stamped until we read these registers, so - * read the registers to make them available again. Because only one - * packet can be time stamped at a time, we know that the register - * values must belong to this one here and therefore we don't need to - * compare any of the additional attributes stored for it. + * If the timestamp bit is set in the packet's descriptor, we know the + * timestamp belongs to this packet. No other packet can be + * timestamped until the registers for timestamping have been read. + * Therefor only one packet with this bit can be in the queue at a + * time, and the rx timestamp values that were in the registers belong + * to this packet. * * If nothing went wrong, then it should have a skb_shared_tx that we * can turn into a skb_shared_hwtstamps. */ - if (!(tsyncrxctl & IXGBE_TSYNCRXCTL_VALID)) + if (unlikely(!ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_TS))) return; spin_lock_irqsave(&adapter->tmreg_lock, flags); @@ -539,6 +621,11 @@ void ixgbe_ptp_rx_hwtstamp(struct ixgbe_q_vector *q_vector, * type has to be specified. Matching the kind of event packet is * not supported, with the exception of "all V2 events regardless of * level 2 or 4". + * + * Since hardware always timestamps Path delay packets when timestamping V2 + * packets, regardless of the type specified in the register, only use V2 + * Event mode. This more accurately tells the user what the hardware is going + * to do anyways. */ int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter, struct ifreq *ifr, int cmd) @@ -582,41 +669,30 @@ int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter, tsync_rx_mtrl = IXGBE_RXMTRL_V1_DELAY_REQ_MSG; is_l4 = true; break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: - tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_mtrl = IXGBE_RXMTRL_V2_SYNC_MSG; - is_l2 = true; - is_l4 = true; - config.rx_filter = HWTSTAMP_FILTER_SOME; - break; case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: - tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2; - tsync_rx_mtrl = IXGBE_RXMTRL_V2_DELAY_REQ_MSG; - is_l2 = true; - is_l4 = true; - config.rx_filter = HWTSTAMP_FILTER_SOME; - break; - case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: - case HWTSTAMP_FILTER_PTP_V2_EVENT: tsync_rx_ctl |= IXGBE_TSYNCRXCTL_TYPE_EVENT_V2; - config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; is_l2 = true; is_l4 = true; + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: case HWTSTAMP_FILTER_ALL: default: /* - * register RXMTRL must be set, therefore it is not - * possible to time stamp both V1 Sync and Delay_Req messages - * and hardware does not support timestamping all packets - * => return error + * register RXMTRL must be set in order to do V1 packets, + * therefore it is not possible to time stamp both V1 Sync and + * Delay_Req messages and hardware does not support + * timestamping all packets => return error */ + config.rx_filter = HWTSTAMP_FILTER_NONE; return -ERANGE; } @@ -626,6 +702,9 @@ int ixgbe_ptp_hwtstamp_ioctl(struct ixgbe_adapter *adapter, return 0; } + /* Store filter value for later use */ + adapter->rx_hwtstamp_filter = config.rx_filter; + /* define ethertype filter for timestamped packets */ if (is_l2) IXGBE_WRITE_REG(hw, IXGBE_ETQF(3), @@ -861,6 +940,10 @@ void ixgbe_ptp_init(struct ixgbe_adapter *adapter) return; } + /* initialize the ptp filter */ + if (ptp_filter_init(ptp_filter, ARRAY_SIZE(ptp_filter))) + e_dev_warn("ptp_filter_init failed\n"); + spin_lock_init(&adapter->tmreg_lock); ixgbe_ptp_start_cyclecounter(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c index 1d80b1c..2334fce 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sysfs.c @@ -37,7 +37,6 @@ #include <linux/netdevice.h> #include <linux/hwmon.h> -#ifdef CONFIG_IXGBE_HWMON /* hwmon callback functions */ static ssize_t ixgbe_hwmon_show_location(struct device *dev, struct device_attribute *attr, @@ -241,5 +240,4 @@ err: exit: return rc; } -#endif /* CONFIG_IXGBE_HWMON */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 204848d..1085c07 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -2419,7 +2419,7 @@ typedef u32 ixgbe_physical_layer; */ /* BitTimes (BT) conversion */ -#define IXGBE_BT2KB(BT) ((BT + 1023) / (8 * 1024)) +#define IXGBE_BT2KB(BT) ((BT + (8 * 1024 - 1)) / (8 * 1024)) #define IXGBE_B2BT(BT) (BT * 8) /* Calculate Delay to respond to PFC */ @@ -2450,24 +2450,31 @@ typedef u32 ixgbe_physical_layer; #define IXGBE_PCI_DELAY 10000 /* Calculate X540 delay value in bit times */ -#define IXGBE_FILL_RATE (36 / 25) - -#define IXGBE_DV_X540(LINK, TC) (IXGBE_FILL_RATE * \ - (IXGBE_B2BT(LINK) + IXGBE_PFC_D + \ - (2 * IXGBE_CABLE_DC) + \ - (2 * IXGBE_ID_X540) + \ - IXGBE_HD + IXGBE_B2BT(TC))) +#define IXGBE_DV_X540(_max_frame_link, _max_frame_tc) \ + ((36 * \ + (IXGBE_B2BT(_max_frame_link) + \ + IXGBE_PFC_D + \ + (2 * IXGBE_CABLE_DC) + \ + (2 * IXGBE_ID_X540) + \ + IXGBE_HD) / 25 + 1) + \ + 2 * IXGBE_B2BT(_max_frame_tc)) /* Calculate 82599, 82598 delay value in bit times */ -#define IXGBE_DV(LINK, TC) (IXGBE_FILL_RATE * \ - (IXGBE_B2BT(LINK) + IXGBE_PFC_D + \ - (2 * IXGBE_CABLE_DC) + (2 * IXGBE_ID) + \ - IXGBE_HD + IXGBE_B2BT(TC))) +#define IXGBE_DV(_max_frame_link, _max_frame_tc) \ + ((36 * \ + (IXGBE_B2BT(_max_frame_link) + \ + IXGBE_PFC_D + \ + (2 * IXGBE_CABLE_DC) + \ + (2 * IXGBE_ID) + \ + IXGBE_HD) / 25 + 1) + \ + 2 * IXGBE_B2BT(_max_frame_tc)) /* Calculate low threshold delay values */ -#define IXGBE_LOW_DV_X540(TC) (2 * IXGBE_B2BT(TC) + \ - (IXGBE_FILL_RATE * IXGBE_PCI_DELAY)) -#define IXGBE_LOW_DV(TC) (2 * IXGBE_LOW_DV_X540(TC)) +#define IXGBE_LOW_DV_X540(_max_frame_tc) \ + (2 * IXGBE_B2BT(_max_frame_tc) + \ + (36 * IXGBE_PCI_DELAY / 25) + 1) +#define IXGBE_LOW_DV(_max_frame_tc) \ + (2 * IXGBE_LOW_DV_X540(_max_frame_tc)) /* Software ATR hash keys */ #define IXGBE_ATR_BUCKET_HASH_KEY 0x3DAD14E2 diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 1fe2c7a..a8fb529 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -697,10 +697,10 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, if (slave != dev->caps.function) memset(inbox->buf, 0, 256); if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) { - *(u8 *) inbox->buf = !!reset_qkey_viols << 6; + *(u8 *) inbox->buf |= !!reset_qkey_viols << 6; ((__be32 *) inbox->buf)[2] = agg_cap_mask; } else { - ((u8 *) inbox->buf)[3] = !!reset_qkey_viols; + ((u8 *) inbox->buf)[3] |= !!reset_qkey_viols; ((__be32 *) inbox->buf)[1] = agg_cap_mask; } diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index 083d671..e7d2496 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -52,7 +52,6 @@ #define MODNAME "lpc-eth" #define DRV_VERSION "1.00" -#define PHYDEF_ADDR 0x00 #define ENET_MAXF_SIZE 1536 #define ENET_RX_DESC 48 @@ -416,9 +415,6 @@ static bool use_iram_for_net(struct device *dev) #define TXDESC_CONTROL_LAST (1 << 30) #define TXDESC_CONTROL_INT (1 << 31) -static int lpc_eth_hard_start_xmit(struct sk_buff *skb, - struct net_device *ndev); - /* * Structure of a TX/RX descriptors and RX status */ @@ -440,7 +436,7 @@ struct netdata_local { spinlock_t lock; void __iomem *net_base; u32 msg_enable; - struct sk_buff *skb[ENET_TX_DESC]; + unsigned int skblen[ENET_TX_DESC]; unsigned int last_tx_idx; unsigned int num_used_tx_buffs; struct mii_bus *mii_bus; @@ -903,12 +899,11 @@ err_out: static void __lpc_handle_xmit(struct net_device *ndev) { struct netdata_local *pldat = netdev_priv(ndev); - struct sk_buff *skb; u32 txcidx, *ptxstat, txstat; txcidx = readl(LPC_ENET_TXCONSUMEINDEX(pldat->net_base)); while (pldat->last_tx_idx != txcidx) { - skb = pldat->skb[pldat->last_tx_idx]; + unsigned int skblen = pldat->skblen[pldat->last_tx_idx]; /* A buffer is available, get buffer status */ ptxstat = &pldat->tx_stat_v[pldat->last_tx_idx]; @@ -945,9 +940,8 @@ static void __lpc_handle_xmit(struct net_device *ndev) } else { /* Update stats */ ndev->stats.tx_packets++; - ndev->stats.tx_bytes += skb->len; + ndev->stats.tx_bytes += skblen; } - dev_kfree_skb_irq(skb); txcidx = readl(LPC_ENET_TXCONSUMEINDEX(pldat->net_base)); } @@ -1132,7 +1126,7 @@ static int lpc_eth_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev) memcpy(pldat->tx_buff_v + txidx * ENET_MAXF_SIZE, skb->data, len); /* Save the buffer and increment the buffer counter */ - pldat->skb[txidx] = skb; + pldat->skblen[txidx] = len; pldat->num_used_tx_buffs++; /* Start transmit */ @@ -1147,6 +1141,7 @@ static int lpc_eth_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev) spin_unlock_irq(&pldat->lock); + dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -1442,7 +1437,7 @@ static int lpc_eth_drv_probe(struct platform_device *pdev) res->start); netdev_dbg(ndev, "IO address size :%d\n", res->end - res->start + 1); - netdev_err(ndev, "IO address (mapped) :0x%p\n", + netdev_dbg(ndev, "IO address (mapped) :0x%p\n", pldat->net_base); netdev_dbg(ndev, "IRQ number :%d\n", ndev->irq); netdev_dbg(ndev, "DMA buffer size :%d\n", pldat->dma_buff_size); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 33c3e46..212c121 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -479,7 +479,7 @@ qlcnic_init_pci_info(struct qlcnic_adapter *adapter) for (i = 0; i < QLCNIC_MAX_PCI_FUNC; i++) { pfn = pci_info[i].id; - if (pfn > QLCNIC_MAX_PCI_FUNC) { + if (pfn >= QLCNIC_MAX_PCI_FUNC) { ret = QL_STATUS_INVALID_PARAM; goto err_eswitch; } diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index c3b331b..0cc053a 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -61,7 +61,7 @@ static LIST_HEAD(pinctrl_maps); list_for_each_entry(_maps_node_, &pinctrl_maps, node) \ for (_i_ = 0, _map_ = &_maps_node_->maps[_i_]; \ _i_ < _maps_node_->num_maps; \ - i++, _map_ = &_maps_node_->maps[_i_]) + _i_++, _map_ = &_maps_node_->maps[_i_]) /** * pinctrl_provide_dummies() - indicate if pinctrl provides dummy state support diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c index f6e7c67..dd6d93a 100644 --- a/drivers/pinctrl/pinctrl-imx.c +++ b/drivers/pinctrl/pinctrl-imx.c @@ -27,16 +27,16 @@ #include "core.h" #include "pinctrl-imx.h" -#define IMX_PMX_DUMP(info, p, m, c, n) \ -{ \ - int i, j; \ - printk("Format: Pin Mux Config\n"); \ - for (i = 0; i < n; i++) { \ - j = p[i]; \ - printk("%s %d 0x%lx\n", \ - info->pins[j].name, \ - m[i], c[i]); \ - } \ +#define IMX_PMX_DUMP(info, p, m, c, n) \ +{ \ + int i, j; \ + printk(KERN_DEBUG "Format: Pin Mux Config\n"); \ + for (i = 0; i < n; i++) { \ + j = p[i]; \ + printk(KERN_DEBUG "%s %d 0x%lx\n", \ + info->pins[j].name, \ + m[i], c[i]); \ + } \ } /* The bits in CONFIG cell defined in binding doc*/ @@ -173,8 +173,10 @@ static int imx_dt_node_to_map(struct pinctrl_dev *pctldev, /* create mux map */ parent = of_get_parent(np); - if (!parent) + if (!parent) { + kfree(new_map); return -EINVAL; + } new_map[0].type = PIN_MAP_TYPE_MUX_GROUP; new_map[0].data.mux.function = parent->name; new_map[0].data.mux.group = np->name; @@ -193,7 +195,7 @@ static int imx_dt_node_to_map(struct pinctrl_dev *pctldev, } dev_dbg(pctldev->dev, "maps: function %s group %s num %d\n", - new_map->data.mux.function, new_map->data.mux.group, map_num); + (*map)->data.mux.function, (*map)->data.mux.group, map_num); return 0; } @@ -201,10 +203,7 @@ static int imx_dt_node_to_map(struct pinctrl_dev *pctldev, static void imx_dt_free_map(struct pinctrl_dev *pctldev, struct pinctrl_map *map, unsigned num_maps) { - int i; - - for (i = 0; i < num_maps; i++) - kfree(map); + kfree(map); } static struct pinctrl_ops imx_pctrl_ops = { @@ -475,9 +474,8 @@ static int __devinit imx_pinctrl_parse_groups(struct device_node *np, grp->configs[j] = config & ~IMX_PAD_SION; } -#ifdef DEBUG IMX_PMX_DUMP(info, grp->pins, grp->mux_mode, grp->configs, grp->npins); -#endif + return 0; } diff --git a/drivers/pinctrl/pinctrl-mxs.c b/drivers/pinctrl/pinctrl-mxs.c index 556e45a..afb50ee 100644 --- a/drivers/pinctrl/pinctrl-mxs.c +++ b/drivers/pinctrl/pinctrl-mxs.c @@ -107,8 +107,10 @@ static int mxs_dt_node_to_map(struct pinctrl_dev *pctldev, /* Compose group name */ group = kzalloc(length, GFP_KERNEL); - if (!group) - return -ENOMEM; + if (!group) { + ret = -ENOMEM; + goto free; + } snprintf(group, length, "%s.%d", np->name, reg); new_map[i].data.mux.group = group; i++; @@ -118,7 +120,7 @@ static int mxs_dt_node_to_map(struct pinctrl_dev *pctldev, pconfig = kmemdup(&config, sizeof(config), GFP_KERNEL); if (!pconfig) { ret = -ENOMEM; - goto free; + goto free_group; } new_map[i].type = PIN_MAP_TYPE_CONFIGS_GROUP; @@ -133,6 +135,9 @@ static int mxs_dt_node_to_map(struct pinctrl_dev *pctldev, return 0; +free_group: + if (!purecfg) + free(group); free: kfree(new_map); return ret; @@ -511,6 +516,7 @@ int __devinit mxs_pinctrl_probe(struct platform_device *pdev, return 0; err: + platform_set_drvdata(pdev, NULL); iounmap(d->base); return ret; } @@ -520,6 +526,7 @@ int __devexit mxs_pinctrl_remove(struct platform_device *pdev) { struct mxs_pinctrl_data *d = platform_get_drvdata(pdev); + platform_set_drvdata(pdev, NULL); pinctrl_unregister(d->pctl); iounmap(d->base); diff --git a/drivers/pinctrl/pinctrl-nomadik.c b/drivers/pinctrl/pinctrl-nomadik.c index b26395d..e8937e7 100644 --- a/drivers/pinctrl/pinctrl-nomadik.c +++ b/drivers/pinctrl/pinctrl-nomadik.c @@ -673,7 +673,7 @@ static void __nmk_gpio_set_wake(struct nmk_gpio_chip *nmk_chip, * wakeup is anyhow controlled by the RIMSC and FIMSC registers. */ if (nmk_chip->sleepmode && on) { - __nmk_gpio_set_slpm(nmk_chip, gpio % nmk_chip->chip.base, + __nmk_gpio_set_slpm(nmk_chip, gpio % NMK_GPIO_PER_CHIP, NMK_GPIO_SLPM_WAKEUP_ENABLE); } @@ -1246,6 +1246,7 @@ static int __devinit nmk_gpio_probe(struct platform_device *dev) ret = PTR_ERR(clk); goto out_unmap; } + clk_prepare(clk); nmk_chip = kzalloc(sizeof(*nmk_chip), GFP_KERNEL); if (!nmk_chip) { diff --git a/drivers/pinctrl/pinctrl-sirf.c b/drivers/pinctrl/pinctrl-sirf.c index ba15b1a..e9f8e7d 100644 --- a/drivers/pinctrl/pinctrl-sirf.c +++ b/drivers/pinctrl/pinctrl-sirf.c @@ -1184,7 +1184,7 @@ out_no_gpio_remap: return ret; } -static const struct of_device_id pinmux_ids[] = { +static const struct of_device_id pinmux_ids[] __devinitconst = { { .compatible = "sirf,prima2-gpio-pinmux" }, {} }; diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 639db4d..2fd9d36 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -5,7 +5,7 @@ * * (C) 2009 - Peter Feuerer peter (a) piie.net * http://piie.net - * 2009 Borislav Petkov <petkovbb@gmail.com> + * 2009 Borislav Petkov bp (a) alien8.de * * Inspired by and many thanks to: * o acerfand - Rachel Greenham diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c index 3660bac..e82e7ea 100644 --- a/drivers/regulator/anatop-regulator.c +++ b/drivers/regulator/anatop-regulator.c @@ -224,7 +224,7 @@ static struct platform_driver anatop_regulator_driver = { .of_match_table = of_anatop_regulator_match_tbl, }, .probe = anatop_regulator_probe, - .remove = anatop_regulator_remove, + .remove = __devexit_p(anatop_regulator_remove), }; static int __init anatop_regulator_init(void) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 7584a74..09a737c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -2050,6 +2050,9 @@ int regulator_map_voltage_linear(struct regulator_dev *rdev, return -EINVAL; } + if (min_uV < rdev->desc->min_uV) + min_uV = rdev->desc->min_uV; + ret = DIV_ROUND_UP(min_uV - rdev->desc->min_uV, rdev->desc->uV_step); if (ret < 0) return ret; diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c index 9997d7a..242851a 100644 --- a/drivers/regulator/gpio-regulator.c +++ b/drivers/regulator/gpio-regulator.c @@ -101,16 +101,20 @@ static int gpio_regulator_get_value(struct regulator_dev *dev) } static int gpio_regulator_set_value(struct regulator_dev *dev, - int min, int max) + int min, int max, unsigned *selector) { struct gpio_regulator_data *data = rdev_get_drvdata(dev); - int ptr, target, state, best_val = INT_MAX; + int ptr, target = 0, state, best_val = INT_MAX; for (ptr = 0; ptr < data->nr_states; ptr++) if (data->states[ptr].value < best_val && data->states[ptr].value >= min && - data->states[ptr].value <= max) + data->states[ptr].value <= max) { target = data->states[ptr].gpios; + best_val = data->states[ptr].value; + if (selector) + *selector = ptr; + } if (best_val == INT_MAX) return -EINVAL; @@ -128,7 +132,7 @@ static int gpio_regulator_set_voltage(struct regulator_dev *dev, int min_uV, int max_uV, unsigned *selector) { - return gpio_regulator_set_value(dev, min_uV, max_uV); + return gpio_regulator_set_value(dev, min_uV, max_uV, selector); } static int gpio_regulator_list_voltage(struct regulator_dev *dev, @@ -145,7 +149,7 @@ static int gpio_regulator_list_voltage(struct regulator_dev *dev, static int gpio_regulator_set_current_limit(struct regulator_dev *dev, int min_uA, int max_uA) { - return gpio_regulator_set_value(dev, min_uA, max_uA); + return gpio_regulator_set_value(dev, min_uA, max_uA, NULL); } static struct regulator_ops gpio_regulator_voltage_ops = { @@ -286,7 +290,7 @@ static int __devinit gpio_regulator_probe(struct platform_device *pdev) cfg.dev = &pdev->dev; cfg.init_data = config->init_data; - cfg.driver_data = &drvdata; + cfg.driver_data = drvdata; drvdata->dev = regulator_register(&drvdata->desc, &cfg); if (IS_ERR(drvdata->dev)) { diff --git a/drivers/regulator/max8649.c b/drivers/regulator/max8649.c index 1f4bb80..9d540cd 100644 --- a/drivers/regulator/max8649.c +++ b/drivers/regulator/max8649.c @@ -259,6 +259,7 @@ static int __devinit max8649_regulator_probe(struct i2c_client *client, config.dev = &client->dev; config.init_data = pdata->regulator; config.driver_data = info; + config.regmap = info->regmap; info->regulator = regulator_register(&dcdc_desc, &config); if (IS_ERR(info->regulator)) { diff --git a/drivers/regulator/palmas-regulator.c b/drivers/regulator/palmas-regulator.c index c4435f6..9b7ca90 100644 --- a/drivers/regulator/palmas-regulator.c +++ b/drivers/regulator/palmas-regulator.c @@ -775,9 +775,6 @@ static __devinit int palmas_probe(struct platform_device *pdev) err_unregister_regulator: while (--id >= 0) regulator_unregister(pmic->rdev[id]); - kfree(pmic->rdev); - kfree(pmic->desc); - kfree(pmic); return ret; } @@ -788,10 +785,6 @@ static int __devexit palmas_remove(struct platform_device *pdev) for (id = 0; id < PALMAS_NUM_REGS; id++) regulator_unregister(pmic->rdev[id]); - - kfree(pmic->rdev); - kfree(pmic->desc); - kfree(pmic); return 0; } diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index 7d5f56e..4267789 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -910,14 +910,17 @@ static inline int cmos_poweroff(struct device *dev) static u32 rtc_handler(void *context) { + struct device *dev = context; + + pm_wakeup_event(dev, 0); acpi_clear_event(ACPI_EVENT_RTC); acpi_disable_event(ACPI_EVENT_RTC, 0); return ACPI_INTERRUPT_HANDLED; } -static inline void rtc_wake_setup(void) +static inline void rtc_wake_setup(struct device *dev) { - acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, NULL); + acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev); /* * After the RTC handler is installed, the Fixed_RTC event should * be disabled. Only when the RTC alarm is set will it be enabled. @@ -950,7 +953,7 @@ cmos_wake_setup(struct device *dev) if (acpi_disabled) return; - rtc_wake_setup(); + rtc_wake_setup(dev); acpi_rtc_info.wake_on = rtc_wake_on; acpi_rtc_info.wake_off = rtc_wake_off; diff --git a/drivers/staging/ramster/zcache-main.c b/drivers/staging/ramster/zcache-main.c index 4e7ef0e..d46764b 100644 --- a/drivers/staging/ramster/zcache-main.c +++ b/drivers/staging/ramster/zcache-main.c @@ -3002,7 +3002,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) return oid; } -static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_store(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -3025,7 +3025,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, /* returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_load(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -3080,8 +3080,8 @@ static void zcache_frontswap_init(unsigned ignored) } static struct frontswap_ops zcache_frontswap_ops = { - .put_page = zcache_frontswap_put_page, - .get_page = zcache_frontswap_get_page, + .store = zcache_frontswap_store, + .load = zcache_frontswap_load, .invalidate_page = zcache_frontswap_flush_page, .invalidate_area = zcache_frontswap_flush_area, .init = zcache_frontswap_init diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 2734dac..784c796 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1835,7 +1835,7 @@ static int zcache_frontswap_poolid = -1; * Swizzling increases objects per swaptype, increasing tmem concurrency * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from - * frontswap_get_page(), but has side-effects. Hence using 8. + * frontswap_load(), but has side-effects. Hence using 8. */ #define SWIZ_BITS 8 #define SWIZ_MASK ((1 << SWIZ_BITS) - 1) @@ -1849,7 +1849,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) return oid; } -static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_store(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -1870,7 +1870,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, /* returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_load(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -1919,8 +1919,8 @@ static void zcache_frontswap_init(unsigned ignored) } static struct frontswap_ops zcache_frontswap_ops = { - .put_page = zcache_frontswap_put_page, - .get_page = zcache_frontswap_get_page, + .store = zcache_frontswap_store, + .load = zcache_frontswap_load, .invalidate_page = zcache_frontswap_flush_page, .invalidate_area = zcache_frontswap_flush_area, .init = zcache_frontswap_init diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index 37c6098..7e6136e 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -587,14 +587,14 @@ static void sbp_management_request_logout( { struct sbp_tport *tport = agent->tport; struct sbp_tpg *tpg = tport->tpg; - int login_id; + int id; struct sbp_login_descriptor *login; - login_id = LOGOUT_ORB_LOGIN_ID(be32_to_cpu(req->orb.misc)); + id = LOGOUT_ORB_LOGIN_ID(be32_to_cpu(req->orb.misc)); - login = sbp_login_find_by_id(tpg, login_id); + login = sbp_login_find_by_id(tpg, id); if (!login) { - pr_warn("cannot find login: %d\n", login_id); + pr_warn("cannot find login: %d\n", id); req->status.status = cpu_to_be32( STATUS_BLOCK_RESP(STATUS_RESP_REQUEST_COMPLETE) | diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 686dba1..9f99d04 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -133,16 +133,11 @@ static struct se_device *fd_create_virtdevice( ret = PTR_ERR(dev_p); goto fail; } - - /* O_DIRECT too? */ - flags = O_RDWR | O_CREAT | O_LARGEFILE; - /* - * If fd_buffered_io=1 has not been set explicitly (the default), - * use O_SYNC to force FILEIO writes to disk. + * Use O_DSYNC by default instead of O_SYNC to forgo syncing + * of pure timestamp updates. */ - if (!(fd_dev->fbd_flags & FDBD_USE_BUFFERED_IO)) - flags |= O_SYNC; + flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC; file = filp_open(dev_p, flags, 0600); if (IS_ERR(file)) { @@ -380,23 +375,6 @@ static void fd_emulate_sync_cache(struct se_cmd *cmd) } } -static void fd_emulate_write_fua(struct se_cmd *cmd) -{ - struct se_device *dev = cmd->se_dev; - struct fd_dev *fd_dev = dev->dev_ptr; - loff_t start = cmd->t_task_lba * - dev->se_sub_dev->se_dev_attrib.block_size; - loff_t end = start + cmd->data_length; - int ret; - - pr_debug("FILEIO: FUA WRITE LBA: %llu, bytes: %u\n", - cmd->t_task_lba, cmd->data_length); - - ret = vfs_fsync_range(fd_dev->fd_file, start, end, 1); - if (ret != 0) - pr_err("FILEIO: vfs_fsync_range() failed: %d\n", ret); -} - static int fd_execute_cmd(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, enum dma_data_direction data_direction) { @@ -411,19 +389,21 @@ static int fd_execute_cmd(struct se_cmd *cmd, struct scatterlist *sgl, ret = fd_do_readv(cmd, sgl, sgl_nents); } else { ret = fd_do_writev(cmd, sgl, sgl_nents); - + /* + * Perform implict vfs_fsync_range() for fd_do_writev() ops + * for SCSI WRITEs with Forced Unit Access (FUA) set. + * Allow this to happen independent of WCE=0 setting. + */ if (ret > 0 && - dev->se_sub_dev->se_dev_attrib.emulate_write_cache > 0 && dev->se_sub_dev->se_dev_attrib.emulate_fua_write > 0 && (cmd->se_cmd_flags & SCF_FUA)) { - /* - * We might need to be a bit smarter here - * and return some sense data to let the initiator - * know the FUA WRITE cache sync failed..? - */ - fd_emulate_write_fua(cmd); - } + struct fd_dev *fd_dev = dev->dev_ptr; + loff_t start = cmd->t_task_lba * + dev->se_sub_dev->se_dev_attrib.block_size; + loff_t end = start + cmd->data_length; + vfs_fsync_range(fd_dev->fd_file, start, end, 1); + } } if (ret < 0) { @@ -442,7 +422,6 @@ enum { static match_table_t tokens = { {Opt_fd_dev_name, "fd_dev_name=%s"}, {Opt_fd_dev_size, "fd_dev_size=%s"}, - {Opt_fd_buffered_io, "fd_buffered_io=%d"}, {Opt_err, NULL} }; @@ -454,7 +433,7 @@ static ssize_t fd_set_configfs_dev_params( struct fd_dev *fd_dev = se_dev->se_dev_su_ptr; char *orig, *ptr, *arg_p, *opts; substring_t args[MAX_OPT_ARGS]; - int ret = 0, arg, token; + int ret = 0, token; opts = kstrdup(page, GFP_KERNEL); if (!opts) @@ -498,19 +477,6 @@ static ssize_t fd_set_configfs_dev_params( " bytes\n", fd_dev->fd_dev_size); fd_dev->fbd_flags |= FBDF_HAS_SIZE; break; - case Opt_fd_buffered_io: - match_int(args, &arg); - if (arg != 1) { - pr_err("bogus fd_buffered_io=%d value\n", arg); - ret = -EINVAL; - goto out; - } - - pr_debug("FILEIO: Using buffered I/O" - " operations for struct fd_dev\n"); - - fd_dev->fbd_flags |= FDBD_USE_BUFFERED_IO; - break; default: break; } @@ -542,10 +508,8 @@ static ssize_t fd_show_configfs_dev_params( ssize_t bl = 0; bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id); - bl += sprintf(b + bl, " File: %s Size: %llu Mode: %s\n", - fd_dev->fd_dev_name, fd_dev->fd_dev_size, - (fd_dev->fbd_flags & FDBD_USE_BUFFERED_IO) ? - "Buffered" : "Synchronous"); + bl += sprintf(b + bl, " File: %s Size: %llu Mode: O_DSYNC\n", + fd_dev->fd_dev_name, fd_dev->fd_dev_size); return bl; } diff --git a/drivers/target/target_core_file.h b/drivers/target/target_core_file.h index fbd59ef..70ce7fd 100644 --- a/drivers/target/target_core_file.h +++ b/drivers/target/target_core_file.h @@ -14,7 +14,6 @@ #define FBDF_HAS_PATH 0x01 #define FBDF_HAS_SIZE 0x02 -#define FDBD_USE_BUFFERED_IO 0x04 struct fd_dev { u32 fbd_flags; diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 4604153..1bd9163 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -2179,6 +2179,16 @@ static int __devinit sci_init_single(struct platform_device *dev, return 0; } +static void sci_cleanup_single(struct sci_port *port) +{ + sci_free_gpios(port); + + clk_put(port->iclk); + clk_put(port->fclk); + + pm_runtime_disable(port->port.dev); +} + #ifdef CONFIG_SERIAL_SH_SCI_CONSOLE static void serial_console_putchar(struct uart_port *port, int ch) { @@ -2360,14 +2370,10 @@ static int sci_remove(struct platform_device *dev) cpufreq_unregister_notifier(&port->freq_transition, CPUFREQ_TRANSITION_NOTIFIER); - sci_free_gpios(port); - uart_remove_one_port(&sci_uart_driver, &port->port); - clk_put(port->iclk); - clk_put(port->fclk); + sci_cleanup_single(port); - pm_runtime_disable(&dev->dev); return 0; } @@ -2385,14 +2391,20 @@ static int __devinit sci_probe_single(struct platform_device *dev, index+1, SCI_NPORTS); dev_notice(&dev->dev, "Consider bumping " "CONFIG_SERIAL_SH_SCI_NR_UARTS!\n"); - return 0; + return -EINVAL; } ret = sci_init_single(dev, sciport, index, p); if (ret) return ret; - return uart_add_one_port(&sci_uart_driver, &sciport->port); + ret = uart_add_one_port(&sci_uart_driver, &sciport->port); + if (ret) { + sci_cleanup_single(sciport); + return ret; + } + + return 0; } static int __devinit sci_probe(struct platform_device *dev) @@ -2413,24 +2425,22 @@ static int __devinit sci_probe(struct platform_device *dev) ret = sci_probe_single(dev, dev->id, p, sp); if (ret) - goto err_unreg; + return ret; sp->freq_transition.notifier_call = sci_notifier; ret = cpufreq_register_notifier(&sp->freq_transition, CPUFREQ_TRANSITION_NOTIFIER); - if (unlikely(ret < 0)) - goto err_unreg; + if (unlikely(ret < 0)) { + sci_cleanup_single(sp); + return ret; + } #ifdef CONFIG_SH_STANDARD_BIOS sh_bios_gdb_detach(); #endif return 0; - -err_unreg: - sci_remove(dev); - return ret; } static int sci_suspend(struct device *dev) diff --git a/drivers/video/omap2/displays/panel-taal.c b/drivers/video/omap2/displays/panel-taal.c index 2ce9992..901576e 100644 --- a/drivers/video/omap2/displays/panel-taal.c +++ b/drivers/video/omap2/displays/panel-taal.c @@ -526,7 +526,7 @@ static ssize_t taal_num_errors_show(struct device *dev, { struct omap_dss_device *dssdev = to_dss_device(dev); struct taal_data *td = dev_get_drvdata(&dssdev->dev); - u8 errors; + u8 errors = 0; int r; mutex_lock(&td->lock); diff --git a/drivers/video/omap2/dss/core.c b/drivers/video/omap2/dss/core.c index 72ded9c..5066eee 100644 --- a/drivers/video/omap2/dss/core.c +++ b/drivers/video/omap2/dss/core.c @@ -194,8 +194,7 @@ static inline int dss_initialize_debugfs(void) static inline void dss_uninitialize_debugfs(void) { } -static inline int dss_debugfs_create_file(const char *name, - void (*write)(struct seq_file *)) +int dss_debugfs_create_file(const char *name, void (*write)(struct seq_file *)) { return 0; } diff --git a/drivers/video/omap2/dss/dsi.c b/drivers/video/omap2/dss/dsi.c index ec363d8..ca8382d 100644 --- a/drivers/video/omap2/dss/dsi.c +++ b/drivers/video/omap2/dss/dsi.c @@ -3724,7 +3724,7 @@ static int dsi_compute_interleave_lp(int blank, int enter_hs, int exit_hs, /* CLKIN4DDR = 16 * TXBYTECLKHS */ tlp_avail = thsbyte_clk * (blank - trans_lp); - ttxclkesc = tdsi_fclk / lp_clk_div; + ttxclkesc = tdsi_fclk * lp_clk_div; lp_inter = ((tlp_avail - 8 * thsbyte_clk - 5 * tdsi_fclk) / ttxclkesc - 26) / 16; diff --git a/drivers/video/omap2/dss/dss.c b/drivers/video/omap2/dss/dss.c index 6ea1ff1..7706323 100644 --- a/drivers/video/omap2/dss/dss.c +++ b/drivers/video/omap2/dss/dss.c @@ -731,7 +731,7 @@ static void dss_runtime_put(void) DSSDBG("dss_runtime_put\n"); r = pm_runtime_put_sync(&dss.pdev->dev); - WARN_ON(r < 0); + WARN_ON(r < 0 && r != -EBUSY); } /* DEBUGFS */ diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index dcb79521..89f264c 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -269,7 +269,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) } /* returns 0 if the page was successfully put into frontswap, -1 if not */ -static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, +static int tmem_frontswap_store(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -295,7 +295,7 @@ static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, * returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, +static int tmem_frontswap_load(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -362,8 +362,8 @@ static int __init no_frontswap(char *s) __setup("nofrontswap", no_frontswap); static struct frontswap_ops __initdata tmem_frontswap_ops = { - .put_page = tmem_frontswap_put_page, - .get_page = tmem_frontswap_get_page, + .store = tmem_frontswap_store, + .load = tmem_frontswap_load, .invalidate_page = tmem_frontswap_flush_page, .invalidate_area = tmem_frontswap_flush_area, .init = tmem_frontswap_init diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 20350a9..6df0cbe 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -174,6 +174,7 @@ struct smb_version_operations { void (*add_credits)(struct TCP_Server_Info *, const unsigned int); void (*set_credits)(struct TCP_Server_Info *, const int); int * (*get_credits_field)(struct TCP_Server_Info *); + __u64 (*get_next_mid)(struct TCP_Server_Info *); /* data offset from read response message */ unsigned int (*read_data_offset)(char *); /* data length from read response message */ @@ -399,6 +400,12 @@ set_credits(struct TCP_Server_Info *server, const int val) server->ops->set_credits(server, val); } +static inline __u64 +get_next_mid(struct TCP_Server_Info *server) +{ + return server->ops->get_next_mid(server); +} + /* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 5ec21ec..0a6cbfe 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -114,7 +114,6 @@ extern int small_smb_init_no_tc(const int smb_cmd, const int wct, void **request_buf); extern int CIFS_SessSetup(unsigned int xid, struct cifs_ses *ses, const struct nls_table *nls_cp); -extern __u64 GetNextMid(struct TCP_Server_Info *server); extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601); extern u64 cifs_UnixTimeToNT(struct timespec); extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index b5ad716..5b40073 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -268,7 +268,7 @@ small_smb_init_no_tc(const int smb_command, const int wct, return rc; buffer = (struct smb_hdr *)*request_buf; - buffer->Mid = GetNextMid(ses->server); + buffer->Mid = get_next_mid(ses->server); if (ses->capabilities & CAP_UNICODE) buffer->Flags2 |= SMBFLG2_UNICODE; if (ses->capabilities & CAP_STATUS32) @@ -402,7 +402,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifs_ses *ses) cFYI(1, "secFlags 0x%x", secFlags); - pSMB->hdr.Mid = GetNextMid(server); + pSMB->hdr.Mid = get_next_mid(server); pSMB->hdr.Flags2 |= (SMBFLG2_UNICODE | SMBFLG2_ERR_STATUS); if ((secFlags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) @@ -782,7 +782,7 @@ CIFSSMBLogoff(const int xid, struct cifs_ses *ses) return rc; } - pSMB->hdr.Mid = GetNextMid(ses->server); + pSMB->hdr.Mid = get_next_mid(ses->server); if (ses->server->sec_mode & (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) @@ -4762,7 +4762,7 @@ getDFSRetry: /* server pointer checked in called function, but should never be null here anyway */ - pSMB->hdr.Mid = GetNextMid(ses->server); + pSMB->hdr.Mid = get_next_mid(ses->server); pSMB->hdr.Tid = ses->ipc_tid; pSMB->hdr.Uid = ses->Suid; if (ses->capabilities & CAP_STATUS32) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index ccafded..78db68a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1058,13 +1058,15 @@ cifs_demultiplex_thread(void *p) if (mid_entry != NULL) { if (!mid_entry->multiRsp || mid_entry->multiEnd) mid_entry->callback(mid_entry); - } else if (!server->ops->is_oplock_break(buf, server)) { + } else if (!server->ops->is_oplock_break || + !server->ops->is_oplock_break(buf, server)) { cERROR(1, "No task to wake, unknown frame received! " "NumMids %d", atomic_read(&midCount)); cifs_dump_mem("Received Data is: ", buf, HEADER_SIZE(server)); #ifdef CONFIG_CIFS_DEBUG2 - server->ops->dump_detail(buf); + if (server->ops->dump_detail) + server->ops->dump_detail(buf); cifs_dump_mids(server); #endif /* CIFS_DEBUG2 */ @@ -3938,7 +3940,7 @@ CIFSTCon(unsigned int xid, struct cifs_ses *ses, header_assemble(smb_buffer, SMB_COM_TREE_CONNECT_ANDX, NULL /*no tid */ , 4 /*wct */ ); - smb_buffer->Mid = GetNextMid(ses->server); + smb_buffer->Mid = get_next_mid(ses->server); smb_buffer->Uid = ses->Suid; pSMB = (TCONX_REQ *) smb_buffer; pSMBr = (TCONX_RSP *) smb_buffer_response; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 253170d..513adbc 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -876,7 +876,7 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) struct cifsLockInfo *li, *tmp; struct cifs_tcon *tcon; struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); - unsigned int num, max_num; + unsigned int num, max_num, max_buf; LOCKING_ANDX_RANGE *buf, *cur; int types[] = {LOCKING_ANDX_LARGE_FILES, LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; @@ -892,8 +892,19 @@ cifs_push_mandatory_locks(struct cifsFileInfo *cfile) return rc; } - max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / - sizeof(LOCKING_ANDX_RANGE); + /* + * Accessing maxBuf is racy with cifs_reconnect - need to store value + * and check it for zero before using. + */ + max_buf = tcon->ses->server->maxBuf; + if (!max_buf) { + mutex_unlock(&cinode->lock_mutex); + FreeXid(xid); + return -EINVAL; + } + + max_num = (max_buf - sizeof(struct smb_hdr)) / + sizeof(LOCKING_ANDX_RANGE); buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) { mutex_unlock(&cinode->lock_mutex); @@ -1218,7 +1229,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) int types[] = {LOCKING_ANDX_LARGE_FILES, LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES}; unsigned int i; - unsigned int max_num, num; + unsigned int max_num, num, max_buf; LOCKING_ANDX_RANGE *buf, *cur; struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode); @@ -1228,8 +1239,16 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) INIT_LIST_HEAD(&tmp_llist); - max_num = (tcon->ses->server->maxBuf - sizeof(struct smb_hdr)) / - sizeof(LOCKING_ANDX_RANGE); + /* + * Accessing maxBuf is racy with cifs_reconnect - need to store value + * and check it for zero before using. + */ + max_buf = tcon->ses->server->maxBuf; + if (!max_buf) + return -EINVAL; + + max_num = (max_buf - sizeof(struct smb_hdr)) / + sizeof(LOCKING_ANDX_RANGE); buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL); if (!buf) return -ENOMEM; @@ -1247,46 +1266,7 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) continue; if (types[i] != li->type) continue; - if (!cinode->can_cache_brlcks) { - cur->Pid = cpu_to_le16(li->pid); - cur->LengthLow = cpu_to_le32((u32)li->length); - cur->LengthHigh = - cpu_to_le32((u32)(li->length>>32)); - cur->OffsetLow = cpu_to_le32((u32)li->offset); - cur->OffsetHigh = - cpu_to_le32((u32)(li->offset>>32)); - /* - * We need to save a lock here to let us add - * it again to the file's list if the unlock - * range request fails on the server. - */ - list_move(&li->llist, &tmp_llist); - if (++num == max_num) { - stored_rc = cifs_lockv(xid, tcon, - cfile->netfid, - li->type, num, - 0, buf); - if (stored_rc) { - /* - * We failed on the unlock range - * request - add all locks from - * the tmp list to the head of - * the file's list. - */ - cifs_move_llist(&tmp_llist, - &cfile->llist); - rc = stored_rc; - } else - /* - * The unlock range request - * succeed - free the tmp list. - */ - cifs_free_llist(&tmp_llist); - cur = buf; - num = 0; - } else - cur++; - } else { + if (cinode->can_cache_brlcks) { /* * We can cache brlock requests - simply remove * a lock from the file's list. @@ -1294,7 +1274,41 @@ cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock, int xid) list_del(&li->llist); cifs_del_lock_waiters(li); kfree(li); + continue; } + cur->Pid = cpu_to_le16(li->pid); + cur->LengthLow = cpu_to_le32((u32)li->length); + cur->LengthHigh = cpu_to_le32((u32)(li->length>>32)); + cur->OffsetLow = cpu_to_le32((u32)li->offset); + cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32)); + /* + * We need to save a lock here to let us add it again to + * the file's list if the unlock range request fails on + * the server. + */ + list_move(&li->llist, &tmp_llist); + if (++num == max_num) { + stored_rc = cifs_lockv(xid, tcon, cfile->netfid, + li->type, num, 0, buf); + if (stored_rc) { + /* + * We failed on the unlock range + * request - add all locks from the tmp + * list to the head of the file's list. + */ + cifs_move_llist(&tmp_llist, + &cfile->llist); + rc = stored_rc; + } else + /* + * The unlock range request succeed - + * free the tmp list. + */ + cifs_free_llist(&tmp_llist); + cur = buf; + num = 0; + } else + cur++; } if (num) { stored_rc = cifs_lockv(xid, tcon, cfile->netfid, diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index e2552d2..557506a 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -212,93 +212,6 @@ cifs_small_buf_release(void *buf_to_free) return; } -/* - * Find a free multiplex id (SMB mid). Otherwise there could be - * mid collisions which might cause problems, demultiplexing the - * wrong response to this request. Multiplex ids could collide if - * one of a series requests takes much longer than the others, or - * if a very large number of long lived requests (byte range - * locks or FindNotify requests) are pending. No more than - * 64K-1 requests can be outstanding at one time. If no - * mids are available, return zero. A future optimization - * could make the combination of mids and uid the key we use - * to demultiplex on (rather than mid alone). - * In addition to the above check, the cifs demultiplex - * code already used the command code as a secondary - * check of the frame and if signing is negotiated the - * response would be discarded if the mid were the same - * but the signature was wrong. Since the mid is not put in the - * pending queue until later (when it is about to be dispatched) - * we do have to limit the number of outstanding requests - * to somewhat less than 64K-1 although it is hard to imagine - * so many threads being in the vfs at one time. - */ -__u64 GetNextMid(struct TCP_Server_Info *server) -{ - __u64 mid = 0; - __u16 last_mid, cur_mid; - bool collision; - - spin_lock(&GlobalMid_Lock); - - /* mid is 16 bit only for CIFS/SMB */ - cur_mid = (__u16)((server->CurrentMid) & 0xffff); - /* we do not want to loop forever */ - last_mid = cur_mid; - cur_mid++; - - /* - * This nested loop looks more expensive than it is. - * In practice the list of pending requests is short, - * fewer than 50, and the mids are likely to be unique - * on the first pass through the loop unless some request - * takes longer than the 64 thousand requests before it - * (and it would also have to have been a request that - * did not time out). - */ - while (cur_mid != last_mid) { - struct mid_q_entry *mid_entry; - unsigned int num_mids; - - collision = false; - if (cur_mid == 0) - cur_mid++; - - num_mids = 0; - list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { - ++num_mids; - if (mid_entry->mid == cur_mid && - mid_entry->mid_state == MID_REQUEST_SUBMITTED) { - /* This mid is in use, try a different one */ - collision = true; - break; - } - } - - /* - * if we have more than 32k mids in the list, then something - * is very wrong. Possibly a local user is trying to DoS the - * box by issuing long-running calls and SIGKILL'ing them. If - * we get to 2^16 mids then we're in big trouble as this - * function could loop forever. - * - * Go ahead and assign out the mid in this situation, but force - * an eventual reconnect to clean out the pending_mid_q. - */ - if (num_mids > 32768) - server->tcpStatus = CifsNeedReconnect; - - if (!collision) { - mid = (__u64)cur_mid; - server->CurrentMid = mid; - break; - } - cur_mid++; - } - spin_unlock(&GlobalMid_Lock); - return mid; -} - /* NB: MID can not be set if treeCon not passed in, in that case it is responsbility of caller to set the mid */ void @@ -334,7 +247,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , /* Uid is not converted */ buffer->Uid = treeCon->ses->Suid; - buffer->Mid = GetNextMid(treeCon->ses->server); + buffer->Mid = get_next_mid(treeCon->ses->server); } if (treeCon->Flags & SMB_SHARE_IS_IN_DFS) buffer->Flags2 |= SMBFLG2_DFS; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index d9d615f..6dec38f 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -125,6 +125,94 @@ cifs_get_credits_field(struct TCP_Server_Info *server) return &server->credits; } +/* + * Find a free multiplex id (SMB mid). Otherwise there could be + * mid collisions which might cause problems, demultiplexing the + * wrong response to this request. Multiplex ids could collide if + * one of a series requests takes much longer than the others, or + * if a very large number of long lived requests (byte range + * locks or FindNotify requests) are pending. No more than + * 64K-1 requests can be outstanding at one time. If no + * mids are available, return zero. A future optimization + * could make the combination of mids and uid the key we use + * to demultiplex on (rather than mid alone). + * In addition to the above check, the cifs demultiplex + * code already used the command code as a secondary + * check of the frame and if signing is negotiated the + * response would be discarded if the mid were the same + * but the signature was wrong. Since the mid is not put in the + * pending queue until later (when it is about to be dispatched) + * we do have to limit the number of outstanding requests + * to somewhat less than 64K-1 although it is hard to imagine + * so many threads being in the vfs at one time. + */ +static __u64 +cifs_get_next_mid(struct TCP_Server_Info *server) +{ + __u64 mid = 0; + __u16 last_mid, cur_mid; + bool collision; + + spin_lock(&GlobalMid_Lock); + + /* mid is 16 bit only for CIFS/SMB */ + cur_mid = (__u16)((server->CurrentMid) & 0xffff); + /* we do not want to loop forever */ + last_mid = cur_mid; + cur_mid++; + + /* + * This nested loop looks more expensive than it is. + * In practice the list of pending requests is short, + * fewer than 50, and the mids are likely to be unique + * on the first pass through the loop unless some request + * takes longer than the 64 thousand requests before it + * (and it would also have to have been a request that + * did not time out). + */ + while (cur_mid != last_mid) { + struct mid_q_entry *mid_entry; + unsigned int num_mids; + + collision = false; + if (cur_mid == 0) + cur_mid++; + + num_mids = 0; + list_for_each_entry(mid_entry, &server->pending_mid_q, qhead) { + ++num_mids; + if (mid_entry->mid == cur_mid && + mid_entry->mid_state == MID_REQUEST_SUBMITTED) { + /* This mid is in use, try a different one */ + collision = true; + break; + } + } + + /* + * if we have more than 32k mids in the list, then something + * is very wrong. Possibly a local user is trying to DoS the + * box by issuing long-running calls and SIGKILL'ing them. If + * we get to 2^16 mids then we're in big trouble as this + * function could loop forever. + * + * Go ahead and assign out the mid in this situation, but force + * an eventual reconnect to clean out the pending_mid_q. + */ + if (num_mids > 32768) + server->tcpStatus = CifsNeedReconnect; + + if (!collision) { + mid = (__u64)cur_mid; + server->CurrentMid = mid; + break; + } + cur_mid++; + } + spin_unlock(&GlobalMid_Lock); + return mid; +} + struct smb_version_operations smb1_operations = { .send_cancel = send_nt_cancel, .compare_fids = cifs_compare_fids, @@ -133,6 +221,7 @@ struct smb_version_operations smb1_operations = { .add_credits = cifs_add_credits, .set_credits = cifs_set_credits, .get_credits_field = cifs_get_credits_field, + .get_next_mid = cifs_get_next_mid, .read_data_offset = cifs_read_data_offset, .read_data_length = cifs_read_data_length, .map_error = map_smb_to_linux_error, diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 1b36ffe..3097ee5 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -779,7 +779,7 @@ send_lock_cancel(const unsigned int xid, struct cifs_tcon *tcon, pSMB->LockType = LOCKING_ANDX_CANCEL_LOCK|LOCKING_ANDX_LARGE_FILES; pSMB->Timeout = 0; - pSMB->hdr.Mid = GetNextMid(ses->server); + pSMB->hdr.Mid = get_next_mid(ses->server); return SendReceive(xid, ses, in_buf, out_buf, &bytes_returned, 0); diff --git a/fs/dcache.c b/fs/dcache.c index 85c9e2b..4046904 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -683,6 +683,8 @@ EXPORT_SYMBOL(dget_parent); /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question + * @want_discon: flag, used by d_splice_alias, to request + * that only a DISCONNECTED alias be returned. * * If inode has a hashed alias, or is a directory and has any alias, * acquire the reference to alias and return it. Otherwise return NULL. @@ -691,9 +693,10 @@ EXPORT_SYMBOL(dget_parent); * of a filesystem. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer - * any other hashed alias over that. + * any other hashed alias over that one unless @want_discon is set, + * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. */ -static struct dentry *__d_find_alias(struct inode *inode) +static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { struct dentry *alias, *discon_alias; @@ -705,7 +708,7 @@ again: if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - } else { + } else if (!want_discon) { __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; @@ -736,7 +739,7 @@ struct dentry *d_find_alias(struct inode *inode) if (!list_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); - de = __d_find_alias(inode); + de = __d_find_alias(inode, 0); spin_unlock(&inode->i_lock); } return de; @@ -1647,8 +1650,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (inode && S_ISDIR(inode->i_mode)) { spin_lock(&inode->i_lock); - new = __d_find_any_alias(inode); + new = __d_find_alias(inode, 1); if (new) { + BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); spin_unlock(&inode->i_lock); security_d_instantiate(new, inode); d_move(new, dentry); @@ -2478,7 +2482,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) struct dentry *alias; /* Does an aliased dentry already exist? */ - alias = __d_find_alias(inode); + alias = __d_find_alias(inode, 0); if (alias) { actual = alias; write_seqlock(&rename_lock); diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c index e32bc91..5a7b691 100644 --- a/fs/exofs/sys.c +++ b/fs/exofs/sys.c @@ -109,7 +109,7 @@ static struct kobj_type odev_ktype = { static struct kobj_type uuid_ktype = { }; -void exofs_sysfs_dbg_print() +void exofs_sysfs_dbg_print(void) { #ifdef CONFIG_EXOFS_DEBUG struct kobject *k_name, *k_tmp; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 99b6324..cee7812 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -90,8 +90,8 @@ unsigned ext4_num_overhead_clusters(struct super_block *sb, * unusual file system layouts. */ if (ext4_block_in_group(sb, ext4_block_bitmap(sb, gdp), block_group)) { - block_cluster = EXT4_B2C(sbi, (start - - ext4_block_bitmap(sb, gdp))); + block_cluster = EXT4_B2C(sbi, + ext4_block_bitmap(sb, gdp) - start); if (block_cluster < num_clusters) block_cluster = -1; else if (block_cluster == num_clusters) { @@ -102,7 +102,7 @@ unsigned ext4_num_overhead_clusters(struct super_block *sb, if (ext4_block_in_group(sb, ext4_inode_bitmap(sb, gdp), block_group)) { inode_cluster = EXT4_B2C(sbi, - start - ext4_inode_bitmap(sb, gdp)); + ext4_inode_bitmap(sb, gdp) - start); if (inode_cluster < num_clusters) inode_cluster = -1; else if (inode_cluster == num_clusters) { @@ -114,7 +114,7 @@ unsigned ext4_num_overhead_clusters(struct super_block *sb, itbl_blk = ext4_inode_table(sb, gdp); for (i = 0; i < sbi->s_itb_per_group; i++) { if (ext4_block_in_group(sb, itbl_blk + i, block_group)) { - c = EXT4_B2C(sbi, start - itbl_blk + i); + c = EXT4_B2C(sbi, itbl_blk + i - start); if ((c < num_clusters) || (c == inode_cluster) || (c == block_cluster) || (c == itbl_cluster)) continue; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 8ad112a..e34deac 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -123,7 +123,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) else ext4_clear_inode_flag(inode, i); } - ei->i_flags = flags; ext4_set_inode_flags(inode); inode->i_ctime = ext4_current_time(inode); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 8d2fb8c..41a3ccf 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -664,6 +664,7 @@ static long writeback_sb_inodes(struct super_block *sb, /* Wait for I_SYNC. This function drops i_lock... */ inode_sleep_on_writeback(inode); /* Inode may be gone, start again */ + spin_lock(&wb->list_lock); continue; } inode->i_state |= I_SYNC; diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 42593c5..03ff5b1 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -75,19 +75,13 @@ static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf, unsigned global_limit) { unsigned long t; - char tmp[32]; unsigned limit = (1 << 16) - 1; int err; - if (*ppos || count >= sizeof(tmp) - 1) - return -EINVAL; - - if (copy_from_user(tmp, buf, count)) + if (*ppos) return -EINVAL; - tmp[count] = '\0'; - - err = strict_strtoul(tmp, 0, &t); + err = kstrtoul_from_user(buf, count, 0, &t); if (err) return err; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index df5ac04..334e0b1 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -775,6 +775,8 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, struct kstat *stat) { + unsigned int blkbits; + stat->dev = inode->i_sb->s_dev; stat->ino = attr->ino; stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); @@ -790,7 +792,13 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, stat->ctime.tv_nsec = attr->ctimensec; stat->size = attr->size; stat->blocks = attr->blocks; - stat->blksize = (1 << inode->i_blkbits); + + if (attr->blksize != 0) + blkbits = ilog2(attr->blksize); + else + blkbits = inode->i_sb->s_blocksize_bits; + + stat->blksize = 1 << blkbits; } static int fuse_do_getattr(struct inode *inode, struct kstat *stat, @@ -863,6 +871,7 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, if (stat) { generic_fillattr(inode, stat); stat->mode = fi->orig_i_mode; + stat->ino = fi->orig_ino; } } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 9562109..b321a68 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -2173,6 +2173,44 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, return ret; } +long fuse_file_fallocate(struct file *file, int mode, loff_t offset, + loff_t length) +{ + struct fuse_file *ff = file->private_data; + struct fuse_conn *fc = ff->fc; + struct fuse_req *req; + struct fuse_fallocate_in inarg = { + .fh = ff->fh, + .offset = offset, + .length = length, + .mode = mode + }; + int err; + + if (fc->no_fallocate) + return -EOPNOTSUPP; + + req = fuse_get_req(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + + req->in.h.opcode = FUSE_FALLOCATE; + req->in.h.nodeid = ff->nodeid; + req->in.numargs = 1; + req->in.args[0].size = sizeof(inarg); + req->in.args[0].value = &inarg; + fuse_request_send(fc, req); + err = req->out.h.error; + if (err == -ENOSYS) { + fc->no_fallocate = 1; + err = -EOPNOTSUPP; + } + fuse_put_request(fc, req); + + return err; +} +EXPORT_SYMBOL_GPL(fuse_file_fallocate); + static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read = do_sync_read, @@ -2190,6 +2228,7 @@ static const struct file_operations fuse_file_operations = { .unlocked_ioctl = fuse_file_ioctl, .compat_ioctl = fuse_file_compat_ioctl, .poll = fuse_file_poll, + .fallocate = fuse_file_fallocate, }; static const struct file_operations fuse_direct_io_file_operations = { @@ -2206,6 +2245,7 @@ static const struct file_operations fuse_direct_io_file_operations = { .unlocked_ioctl = fuse_file_ioctl, .compat_ioctl = fuse_file_compat_ioctl, .poll = fuse_file_poll, + .fallocate = fuse_file_fallocate, /* no splice_read */ }; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 572cefc..771fb63 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -82,6 +82,9 @@ struct fuse_inode { preserve the original mode */ umode_t orig_i_mode; + /** 64 bit inode number */ + u64 orig_ino; + /** Version of last attribute change */ u64 attr_version; @@ -478,6 +481,9 @@ struct fuse_conn { /** Are BSD file locking primitives not implemented by fs? */ unsigned no_flock:1; + /** Is fallocate not implemented by fs? */ + unsigned no_fallocate:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 42678a3..1cd6165 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -91,6 +91,7 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->nlookup = 0; fi->attr_version = 0; fi->writectr = 0; + fi->orig_ino = 0; INIT_LIST_HEAD(&fi->write_files); INIT_LIST_HEAD(&fi->queued_writes); INIT_LIST_HEAD(&fi->writepages); @@ -139,6 +140,18 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) return 0; } +/* + * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down + * so that it will fit. + */ +static ino_t fuse_squash_ino(u64 ino64) +{ + ino_t ino = (ino_t) ino64; + if (sizeof(ino_t) < sizeof(u64)) + ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; + return ino; +} + void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid) { @@ -148,7 +161,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->attr_version = ++fc->attr_version; fi->i_time = attr_valid; - inode->i_ino = attr->ino; + inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); set_nlink(inode, attr->nlink); inode->i_uid = attr->uid; @@ -174,6 +187,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, fi->orig_i_mode = inode->i_mode; if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) inode->i_mode &= ~S_ISVTX; + + fi->orig_ino = attr->ino; } void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, diff --git a/fs/proc/base.c b/fs/proc/base.c index 616f41a..437195f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1803,7 +1803,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - unsigned i_mode, f_mode = file->f_mode; + unsigned f_mode = file->f_mode; rcu_read_unlock(); put_files_struct(files); @@ -1819,12 +1819,14 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_gid = GLOBAL_ROOT_GID; } - i_mode = S_IFLNK; - if (f_mode & FMODE_READ) - i_mode |= S_IRUSR | S_IXUSR; - if (f_mode & FMODE_WRITE) - i_mode |= S_IWUSR | S_IXUSR; - inode->i_mode = i_mode; + if (S_ISLNK(inode->i_mode)) { + unsigned i_mode = S_IFLNK; + if (f_mode & FMODE_READ) + i_mode |= S_IRUSR | S_IXUSR; + if (f_mode & FMODE_WRITE) + i_mode |= S_IWUSR | S_IXUSR; + inode->i_mode = i_mode; + } security_task_to_inode(task, inode); put_task_struct(task); @@ -1859,6 +1861,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, ei = PROC_I(inode); ei->fd = fd; + inode->i_mode = S_IFLNK; inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; ei->op.proc_get_link = proc_fd_link; diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 685a837..84a7e6f 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2918,6 +2918,9 @@ int dbg_debugfs_init_fs(struct ubifs_info *c) struct dentry *dent; struct ubifs_debug_info *d = c->dbg; + if (!IS_ENABLED(DEBUG_FS)) + return 0; + n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME, c->vi.ubi_num, c->vi.vol_id); if (n == UBIFS_DFS_DIR_LEN) { @@ -3010,7 +3013,8 @@ out: */ void dbg_debugfs_exit_fs(struct ubifs_info *c) { - debugfs_remove_recursive(c->dbg->dfs_dir); + if (IS_ENABLED(DEBUG_FS)) + debugfs_remove_recursive(c->dbg->dfs_dir); } struct ubifs_global_debug_info ubifs_dbg; @@ -3095,6 +3099,9 @@ int dbg_debugfs_init(void) const char *fname; struct dentry *dent; + if (!IS_ENABLED(DEBUG_FS)) + return 0; + fname = "ubifs"; dent = debugfs_create_dir(fname, NULL); if (IS_ERR_OR_NULL(dent)) @@ -3159,7 +3166,8 @@ out: */ void dbg_debugfs_exit(void) { - debugfs_remove_recursive(dfs_rootdir); + if (IS_ENABLED(DEBUG_FS)) + debugfs_remove_recursive(dfs_rootdir); } /** diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index b0d6282..9e6e1c6 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -440,8 +440,8 @@ static inline int acpi_pm_device_sleep_wake(struct device *dev, bool enable) #else /* CONFIG_ACPI */ -static int register_acpi_bus_type(struct acpi_bus_type *bus) { return 0; } -static int unregister_acpi_bus_type(struct acpi_bus_type *bus) { return 0; } +static inline int register_acpi_bus_type(void *bus) { return 0; } +static inline int unregister_acpi_bus_type(void *bus) { return 0; } #endif /* CONFIG_ACPI */ diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h index 2520a6e..9f02005 100644 --- a/include/asm-generic/bug.h +++ b/include/asm-generic/bug.h @@ -2,6 +2,7 @@ #define _ASM_GENERIC_BUG_H #include <linux/compiler.h> +#include <linux/kernel.h> #ifdef CONFIG_BUG diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 73e4560..bac55c2 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -54,7 +54,7 @@ struct drm_mode_object { struct drm_object_properties *properties; }; -#define DRM_OBJECT_MAX_PROPERTY 16 +#define DRM_OBJECT_MAX_PROPERTY 24 struct drm_object_properties { int count; uint32_t ids[DRM_OBJECT_MAX_PROPERTY]; diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 58d0bda..81368ab 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -181,6 +181,7 @@ {0x1002, 0x6747, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6748, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6749, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x674A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6750, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6751, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6758, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_NEW_MEMMAP}, \ @@ -198,6 +199,7 @@ {0x1002, 0x6767, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6768, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6770, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6771, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6772, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6778, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6779, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAICOS|RADEON_NEW_MEMMAP}, \ @@ -229,10 +231,11 @@ {0x1002, 0x6827, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6829, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x682B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x682D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x682F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ - {0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ - {0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6830, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x6831, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6837, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6838, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6839, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VERDE|RADEON_NEW_MEMMAP}, \ @@ -531,6 +534,7 @@ {0x1002, 0x9645, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO2|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9647, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ {0x1002, 0x9648, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ + {0x1002, 0x9649, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP},\ {0x1002, 0x964a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x964b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x964c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SUMO|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ @@ -550,6 +554,7 @@ {0x1002, 0x9807, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9808, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9809, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x980A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_PALM|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9900, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9901, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9903, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ @@ -561,11 +566,19 @@ {0x1002, 0x9909, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x990A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x990F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9910, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9913, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9917, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9918, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9919, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9990, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9991, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9992, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9993, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9994, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x99A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x99A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x99A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARUBA|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0, 0, 0} #define r128_PCI_IDS \ diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h index b6d7ce9..6873358 100644 --- a/include/drm/exynos_drm.h +++ b/include/drm/exynos_drm.h @@ -64,6 +64,7 @@ struct drm_exynos_gem_map_off { * A structure for mapping buffer. * * @handle: a handle to gem object created. + * @pad: just padding to be 64-bit aligned. * @size: memory size to be mapped. * @mapped: having user virtual address mmaped. * - this variable would be filled by exynos gem module @@ -72,7 +73,8 @@ struct drm_exynos_gem_map_off { */ struct drm_exynos_gem_mmap { unsigned int handle; - unsigned int size; + unsigned int pad; + uint64_t size; uint64_t mapped; }; diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 81e803e..acba8943 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -132,6 +132,7 @@ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); +extern void clockevents_config(struct clock_event_device *dev, u32 freq); extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); diff --git a/include/linux/compaction.h b/include/linux/compaction.h index e988037..51a90b7 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -1,8 +1,6 @@ #ifndef _LINUX_COMPACTION_H #define _LINUX_COMPACTION_H -#include <linux/node.h> - /* Return values for compact_zone() and try_to_compact_pages() */ /* compaction didn't start as it was not possible or direct reclaim was more suitable */ #define COMPACT_SKIPPED 0 @@ -13,23 +11,6 @@ /* The full zone was compacted */ #define COMPACT_COMPLETE 3 -/* - * compaction supports three modes - * - * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans - * MIGRATE_MOVABLE pageblocks as migration sources and targets. - * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans - * MIGRATE_MOVABLE pageblocks as migration sources. - * MIGRATE_UNMOVABLE pageblocks are scanned as potential migration - * targets and convers them to MIGRATE_MOVABLE if possible - * COMPACT_SYNC uses synchronous migration and scans all pageblocks - */ -enum compact_mode { - COMPACT_ASYNC_MOVABLE, - COMPACT_ASYNC_UNMOVABLE, - COMPACT_SYNC, -}; - #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h new file mode 100644 index 0000000..0e4e2ee --- /dev/null +++ b/include/linux/frontswap.h @@ -0,0 +1,127 @@ +#ifndef _LINUX_FRONTSWAP_H +#define _LINUX_FRONTSWAP_H + +#include <linux/swap.h> +#include <linux/mm.h> +#include <linux/bitops.h> + +struct frontswap_ops { + void (*init)(unsigned); + int (*store)(unsigned, pgoff_t, struct page *); + int (*load)(unsigned, pgoff_t, struct page *); + void (*invalidate_page)(unsigned, pgoff_t); + void (*invalidate_area)(unsigned); +}; + +extern bool frontswap_enabled; +extern struct frontswap_ops + frontswap_register_ops(struct frontswap_ops *ops); +extern void frontswap_shrink(unsigned long); +extern unsigned long frontswap_curr_pages(void); +extern void frontswap_writethrough(bool); + +extern void __frontswap_init(unsigned type); +extern int __frontswap_store(struct page *page); +extern int __frontswap_load(struct page *page); +extern void __frontswap_invalidate_page(unsigned, pgoff_t); +extern void __frontswap_invalidate_area(unsigned); + +#ifdef CONFIG_FRONTSWAP + +static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) +{ + bool ret = false; + + if (frontswap_enabled && sis->frontswap_map) + ret = test_bit(offset, sis->frontswap_map); + return ret; +} + +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) +{ + if (frontswap_enabled && sis->frontswap_map) + set_bit(offset, sis->frontswap_map); +} + +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ + if (frontswap_enabled && sis->frontswap_map) + clear_bit(offset, sis->frontswap_map); +} + +static inline void frontswap_map_set(struct swap_info_struct *p, + unsigned long *map) +{ + p->frontswap_map = map; +} + +static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) +{ + return p->frontswap_map; +} +#else +/* all inline routines become no-ops and all externs are ignored */ + +#define frontswap_enabled (0) + +static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) +{ + return false; +} + +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) +{ +} + +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ +} + +static inline void frontswap_map_set(struct swap_info_struct *p, + unsigned long *map) +{ +} + +static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) +{ + return NULL; +} +#endif + +static inline int frontswap_store(struct page *page) +{ + int ret = -1; + + if (frontswap_enabled) + ret = __frontswap_store(page); + return ret; +} + +static inline int frontswap_load(struct page *page) +{ + int ret = -1; + + if (frontswap_enabled) + ret = __frontswap_load(page); + return ret; +} + +static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset) +{ + if (frontswap_enabled) + __frontswap_invalidate_page(type, offset); +} + +static inline void frontswap_invalidate_area(unsigned type) +{ + if (frontswap_enabled) + __frontswap_invalidate_area(type); +} + +static inline void frontswap_init(unsigned type) +{ + if (frontswap_enabled) + __frontswap_init(type); +} + +#endif /* _LINUX_FRONTSWAP_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 51978ed..17fd887 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -802,13 +802,14 @@ struct inode { unsigned int __i_nlink; }; dev_t i_rdev; + loff_t i_size; struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; + unsigned int i_blkbits; blkcnt_t i_blocks; - loff_t i_size; #ifdef __NEED_I_SIZE_ORDERED seqcount_t i_size_seqcount; @@ -828,9 +829,8 @@ struct inode { struct list_head i_dentry; struct rcu_head i_rcu; }; - atomic_t i_count; - unsigned int i_blkbits; u64 i_version; + atomic_t i_count; atomic_t i_dio_count; atomic_t i_writecount; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 8f2ab8f..9303348 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -54,6 +54,9 @@ * 7.18 * - add FUSE_IOCTL_DIR flag * - add FUSE_NOTIFY_DELETE + * + * 7.19 + * - add FUSE_FALLOCATE */ #ifndef _LINUX_FUSE_H @@ -85,7 +88,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 18 +#define FUSE_KERNEL_MINOR_VERSION 19 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -278,6 +281,7 @@ enum fuse_opcode { FUSE_POLL = 40, FUSE_NOTIFY_REPLY = 41, FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -571,6 +575,14 @@ struct fuse_notify_poll_wakeup_out { __u64 kh; }; +struct fuse_fallocate_in { + __u64 fh; + __u64 offset; + __u64 length; + __u32 mode; + __u32 padding; +}; + struct fuse_in_header { __u32 len; __u32 opcode; diff --git a/include/linux/i2c-mux-pinctrl.h b/include/linux/i2c-mux-pinctrl.h new file mode 100644 index 0000000..a65c864 --- /dev/null +++ b/include/linux/i2c-mux-pinctrl.h @@ -0,0 +1,41 @@ +/* + * i2c-mux-pinctrl platform data + * + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _LINUX_I2C_MUX_PINCTRL_H +#define _LINUX_I2C_MUX_PINCTRL_H + +/** + * struct i2c_mux_pinctrl_platform_data - Platform data for i2c-mux-pinctrl + * @parent_bus_num: Parent I2C bus number + * @base_bus_num: Base I2C bus number for the child busses. 0 for dynamic. + * @bus_count: Number of child busses. Also the number of elements in + * @pinctrl_states + * @pinctrl_states: The names of the pinctrl state to select for each child bus + * @pinctrl_state_idle: The pinctrl state to select when no child bus is being + * accessed. If NULL, the most recently used pinctrl state will be left + * selected. + */ +struct i2c_mux_pinctrl_platform_data { + int parent_bus_num; + int base_bus_num; + int bus_count; + const char **pinctrl_states; + const char *pinctrl_state_idle; +}; + +#endif diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e4baff5..9e65eff 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,6 +149,7 @@ extern struct cred init_cred; .normal_prio = MAX_PRIO-20, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ + .nr_cpus_allowed= NR_CPUS, \ .mm = NULL, \ .active_mm = &init_mm, \ .se = { \ @@ -157,7 +158,6 @@ extern struct cred init_cred; .rt = { \ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ .time_slice = RR_TIMESLICE, \ - .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ INIT_PUSHABLE_TASKS(tsk) \ diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1b14d25..d6a5806 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -128,7 +128,7 @@ struct kparam_array * The ops can have NULL set or get functions. */ #define module_param_cb(name, ops, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, 0) + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1) /** * <level>_param_cb - general callback for a module/cmdline parameter @@ -192,7 +192,7 @@ struct kparam_array { (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ - (perm) + sizeof(__check_old_set_param(set))*0, 0) + (perm) + sizeof(__check_old_set_param(set))*0, -1) /* We don't get oldget: it's often a new-style param_get_uint, etc. */ static inline int @@ -272,7 +272,7 @@ static inline void __kernel_param_unlock(void) */ #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ - __module_param_call("", name, ¶m_ops_##type, &var, perm, 0) + __module_param_call("", name, ¶m_ops_##type, &var, perm, -1) #endif /* !MODULE */ /** @@ -290,7 +290,7 @@ static inline void __kernel_param_unlock(void) = { len, string }; \ __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_ops_string, \ - .str = &__param_string_##name, perm, 0); \ + .str = &__param_string_##name, perm, -1); \ __MODULE_PARM_TYPE(name, "string") /** @@ -432,7 +432,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_array_ops, \ .arr = &__param_arr_##name, \ - perm, 0); \ + perm, -1); \ __MODULE_PARM_TYPE(name, "array of " #type) extern struct kernel_param_ops param_array_ops; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 4541f33..dca19e6 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -393,6 +393,18 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) extern void (*ip_ct_attach)(struct sk_buff *, struct sk_buff *) __rcu; extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu; + +struct nf_conn; +struct nlattr; + +struct nfq_ct_hook { + size_t (*build_size)(const struct nf_conn *ct); + int (*build)(struct sk_buff *skb, struct nf_conn *ct); + int (*parse)(const struct nlattr *attr, struct nf_conn *ct); + void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, + u32 ctinfo, int off); +}; +extern struct nfq_ct_hook *nfq_ct_hook; #else static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} #endif diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index 1697036..874ae8f 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -10,6 +10,7 @@ header-y += nfnetlink.h header-y += nfnetlink_acct.h header-y += nfnetlink_compat.h header-y += nfnetlink_conntrack.h +header-y += nfnetlink_cthelper.h header-y += nfnetlink_cttimeout.h header-y += nfnetlink_log.h header-y += nfnetlink_queue.h diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h index 0ce91d5..0dfc8b7 100644 --- a/include/linux/netfilter/nf_conntrack_sip.h +++ b/include/linux/netfilter/nf_conntrack_sip.h @@ -2,6 +2,8 @@ #define __NF_CONNTRACK_SIP_H__ #ifdef __KERNEL__ +#include <net/netfilter/nf_conntrack_expect.h> + #define SIP_PORT 5060 #define SIP_TIMEOUT 3600 diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index a1048c1..18341cd 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -50,7 +50,8 @@ struct nfgenmsg { #define NFNL_SUBSYS_IPSET 6 #define NFNL_SUBSYS_ACCT 7 #define NFNL_SUBSYS_CTNETLINK_TIMEOUT 8 -#define NFNL_SUBSYS_COUNT 9 +#define NFNL_SUBSYS_CTHELPER 9 +#define NFNL_SUBSYS_COUNT 10 #ifdef __KERNEL__ diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index e58e4b9..7688833 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -191,6 +191,7 @@ enum ctattr_expect_nat { enum ctattr_help { CTA_HELP_UNSPEC, CTA_HELP_NAME, + CTA_HELP_INFO, __CTA_HELP_MAX }; #define CTA_HELP_MAX (__CTA_HELP_MAX - 1) diff --git a/include/linux/netfilter/nfnetlink_cthelper.h b/include/linux/netfilter/nfnetlink_cthelper.h new file mode 100644 index 0000000..33659f6 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_cthelper.h @@ -0,0 +1,55 @@ +#ifndef _NFNL_CTHELPER_H_ +#define _NFNL_CTHELPER_H_ + +#define NFCT_HELPER_STATUS_DISABLED 0 +#define NFCT_HELPER_STATUS_ENABLED 1 + +enum nfnl_acct_msg_types { + NFNL_MSG_CTHELPER_NEW, + NFNL_MSG_CTHELPER_GET, + NFNL_MSG_CTHELPER_DEL, + NFNL_MSG_CTHELPER_MAX +}; + +enum nfnl_cthelper_type { + NFCTH_UNSPEC, + NFCTH_NAME, + NFCTH_TUPLE, + NFCTH_QUEUE_NUM, + NFCTH_POLICY, + NFCTH_PRIV_DATA_LEN, + NFCTH_STATUS, + __NFCTH_MAX +}; +#define NFCTH_MAX (__NFCTH_MAX - 1) + +enum nfnl_cthelper_policy_type { + NFCTH_POLICY_SET_UNSPEC, + NFCTH_POLICY_SET_NUM, + NFCTH_POLICY_SET, + NFCTH_POLICY_SET1 = NFCTH_POLICY_SET, + NFCTH_POLICY_SET2, + NFCTH_POLICY_SET3, + NFCTH_POLICY_SET4, + __NFCTH_POLICY_SET_MAX +}; +#define NFCTH_POLICY_SET_MAX (__NFCTH_POLICY_SET_MAX - 1) + +enum nfnl_cthelper_pol_type { + NFCTH_POLICY_UNSPEC, + NFCTH_POLICY_NAME, + NFCTH_POLICY_EXPECT_MAX, + NFCTH_POLICY_EXPECT_TIMEOUT, + __NFCTH_POLICY_MAX +}; +#define NFCTH_POLICY_MAX (__NFCTH_POLICY_MAX - 1) + +enum nfnl_cthelper_tuple_type { + NFCTH_TUPLE_UNSPEC, + NFCTH_TUPLE_L3PROTONUM, + NFCTH_TUPLE_L4PROTONUM, + __NFCTH_TUPLE_MAX, +}; +#define NFCTH_TUPLE_MAX (__NFCTH_TUPLE_MAX - 1) + +#endif /* _NFNL_CTHELPER_H */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index a6c1dda..e0d8fd8 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -42,6 +42,8 @@ enum nfqnl_attr_type { NFQA_IFINDEX_PHYSOUTDEV, /* __u32 ifindex */ NFQA_HWADDR, /* nfqnl_msg_packet_hw */ NFQA_PAYLOAD, /* opaque data payload */ + NFQA_CT, /* nf_conntrack_netlink.h */ + NFQA_CT_INFO, /* enum ip_conntrack_info */ __NFQA_MAX }; @@ -92,5 +94,6 @@ enum nfqnl_attr_config { /* Flags for NFQA_CFG_FLAGS */ #define NFQA_CFG_F_FAIL_OPEN (1 << 0) +#define NFQA_CFG_F_CONNTRACK (1 << 1) #endif /* _NFNETLINK_QUEUE_H */ diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index fa0946c..e2b1280 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -66,6 +66,7 @@ enum nf_ip_hook_priorities { NF_IP_PRI_SECURITY = 50, NF_IP_PRI_NAT_SRC = 100, NF_IP_PRI_SELINUX_LAST = 225, + NF_IP_PRI_CONNTRACK_HELPER = 300, NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, NF_IP_PRI_LAST = INT_MAX, }; diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 57c0251..7c8a513 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,6 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_SECURITY = 50, NF_IP6_PRI_NAT_SRC = 100, NF_IP6_PRI_SELINUX_LAST = 225, + NF_IP6_PRI_CONNTRACK_HELPER = 300, NF_IP6_PRI_LAST = INT_MAX, }; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f325786..45db49f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -555,6 +555,8 @@ enum perf_event_type { PERF_RECORD_MAX, /* non-ABI */ }; +#define PERF_MAX_STACK_DEPTH 127 + enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, PERF_CONTEXT_KERNEL = (__u64)-128, @@ -609,8 +611,6 @@ struct perf_guest_info_callbacks { #include <linux/sysfs.h> #include <asm/local.h> -#define PERF_MAX_STACK_DEPTH 255 - struct perf_callchain_entry { __u64 nr; __u64 ip[PERF_MAX_STACK_DEPTH]; diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 711e0a3..3988012 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -127,8 +127,8 @@ #define PR_SET_PTRACER 0x59616d61 # define PR_SET_PTRACER_ANY ((unsigned long)-1) -#define PR_SET_CHILD_SUBREAPER 36 -#define PR_GET_CHILD_SUBREAPER 37 +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 /* * If no_new_privs is set, then operations that grant new privileges (i.e. @@ -142,7 +142,9 @@ * asking selinux for a specific new context (e.g. with runcon) will result * in execve returning -EPERM. */ -#define PR_SET_NO_NEW_PRIVS 38 -#define PR_GET_NO_NEW_PRIVS 39 +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 + +#define PR_GET_TID_ADDRESS 40 #endif /* _LINUX_PRCTL_H */ diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0d04cd6..ffc444c 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -368,8 +368,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) iter->index++; if (likely(*slot)) return slot; - if (flags & RADIX_TREE_ITER_CONTIG) + if (flags & RADIX_TREE_ITER_CONTIG) { + /* forbid switching to the next chunk */ + iter->next_index = 0; break; + } } } return NULL; diff --git a/include/linux/sched.h b/include/linux/sched.h index f34437e..4059c0f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(unsigned long ticks); +extern void update_cpu_load_nohz(void); extern unsigned long get_parent_ip(unsigned long addr); @@ -438,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm); /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ +#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) @@ -875,6 +877,8 @@ struct sched_group_power { * Number of busy cpus in this group. */ atomic_t nr_busy_cpus; + + unsigned long cpumask[0]; /* iteration mask */ }; struct sched_group { @@ -899,6 +903,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } +/* + * cpumask masking which cpus in the group are allowed to iterate up the domain + * tree. + */ +static inline struct cpumask *sched_group_mask(struct sched_group *sg) +{ + return to_cpumask(sg->sgp->cpumask); +} + /** * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. * @group: The group whose first cpu is to be returned. @@ -1187,7 +1200,6 @@ struct sched_rt_entity { struct list_head run_list; unsigned long timeout; unsigned int time_slice; - int nr_cpus_allowed; struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED @@ -1252,6 +1264,7 @@ struct task_struct { #endif unsigned int policy; + int nr_cpus_allowed; cpumask_t cpus_allowed; #ifdef CONFIG_PREEMPT_RCU diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b534a1b..642cb73 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -225,14 +225,11 @@ enum { /* device driver is going to provide hardware time stamp */ SKBTX_IN_PROGRESS = 1 << 2, - /* ensure the originating sk reference is available on driver level */ - SKBTX_DRV_NEEDS_SK_REF = 1 << 3, - /* device driver supports TX zero-copy buffers */ - SKBTX_DEV_ZEROCOPY = 1 << 4, + SKBTX_DEV_ZEROCOPY = 1 << 3, /* generate wifi status information (where possible) */ - SKBTX_WIFI_STATUS = 1 << 5, + SKBTX_WIFI_STATUS = 1 << 4, }; /* diff --git a/include/linux/swap.h b/include/linux/swap.h index b666193..c84ec68 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -197,6 +197,10 @@ struct swap_info_struct { struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ +#ifdef CONFIG_FRONTSWAP + unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ + atomic_t frontswap_pages; /* frontswap pages in-use counter */ +#endif }; struct swap_list_t { diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h new file mode 100644 index 0000000..e282624 --- /dev/null +++ b/include/linux/swapfile.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_SWAPFILE_H +#define _LINUX_SWAPFILE_H + +/* + * these were static in swapfile.c but frontswap.c needs them and we don't + * want to expose them to the dozens of source files that include swap.h + */ +extern spinlock_t swap_lock; +extern struct swap_list_t swap_list; +extern struct swap_info_struct *swap_info[]; +extern int try_to_unuse(unsigned int, bool, unsigned long); + +#endif /* _LINUX_SWAPFILE_H */ diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index b455c7c..60da41f 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -12,6 +12,9 @@ enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, + /* below are referred only from vga_switcheroo_get_client_state() */ + VGA_SWITCHEROO_INIT, + VGA_SWITCHEROO_NOT_FOUND, }; enum vga_switcheroo_client_id { @@ -50,6 +53,8 @@ void vga_switcheroo_unregister_handler(void); int vga_switcheroo_process_delayed_switch(void); +int vga_switcheroo_get_client_state(struct pci_dev *dev); + #else static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {} @@ -62,5 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id, bool active) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } +static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } + #endif diff --git a/include/net/dst.h b/include/net/dst.h index 8197ead..f0bf3b8 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -48,8 +48,8 @@ struct dst_entry { #else void *__pad1; #endif - int (*input)(struct sk_buff*); - int (*output)(struct sk_buff*); + int (*input)(struct sk_buff *); + int (*output)(struct sk_buff *); int flags; #define DST_HOST 0x0001 @@ -241,7 +241,7 @@ dst_metric_locked(const struct dst_entry *dst, int metric) return dst_metric(dst, RTAX_LOCK) & (1<<metric); } -static inline void dst_hold(struct dst_entry * dst) +static inline void dst_hold(struct dst_entry *dst) { /* * If your kernel compilation stops here, please check @@ -264,8 +264,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time) dst->lastuse = time; } -static inline -struct dst_entry * dst_clone(struct dst_entry * dst) +static inline struct dst_entry *dst_clone(struct dst_entry *dst) { if (dst) atomic_inc(&dst->__refcnt); @@ -371,12 +370,12 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb) } extern int dst_discard(struct sk_buff *skb); -extern void *dst_alloc(struct dst_ops * ops, struct net_device *dev, +extern void *dst_alloc(struct dst_ops *ops, struct net_device *dev, int initial_ref, int initial_obsolete, int flags); -extern void __dst_free(struct dst_entry * dst); -extern struct dst_entry *dst_destroy(struct dst_entry * dst); +extern void __dst_free(struct dst_entry *dst); +extern struct dst_entry *dst_destroy(struct dst_entry *dst); -static inline void dst_free(struct dst_entry * dst) +static inline void dst_free(struct dst_entry *dst) { if (dst->obsolete > 1) return; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a2cda24..58cb3fc 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -140,10 +140,10 @@ extern void rt6_redirect(const struct in6_addr *dest, u8 *lladdr, int on_link); -extern void rt6_pmtu_discovery(const struct in6_addr *daddr, - const struct in6_addr *saddr, - struct net_device *dev, - u32 pmtu); +extern void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, + int oif, u32 mark); +extern void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, + __be32 mtu); struct netlink_callback; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index cce7f6a..f1494fe 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -39,36 +39,6 @@ union nf_conntrack_expect_proto { /* insert expect proto private data here */ }; -/* Add protocol helper include file here */ -#include <linux/netfilter/nf_conntrack_ftp.h> -#include <linux/netfilter/nf_conntrack_pptp.h> -#include <linux/netfilter/nf_conntrack_h323.h> -#include <linux/netfilter/nf_conntrack_sane.h> -#include <linux/netfilter/nf_conntrack_sip.h> - -/* per conntrack: application helper private data */ -union nf_conntrack_help { - /* insert conntrack helper private data (master) here */ -#if defined(CONFIG_NF_CONNTRACK_FTP) || defined(CONFIG_NF_CONNTRACK_FTP_MODULE) - struct nf_ct_ftp_master ct_ftp_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_PPTP) || \ - defined(CONFIG_NF_CONNTRACK_PPTP_MODULE) - struct nf_ct_pptp_master ct_pptp_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_H323) || \ - defined(CONFIG_NF_CONNTRACK_H323_MODULE) - struct nf_ct_h323_master ct_h323_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_SANE) || \ - defined(CONFIG_NF_CONNTRACK_SANE_MODULE) - struct nf_ct_sane_master ct_sane_info; -#endif -#if defined(CONFIG_NF_CONNTRACK_SIP) || defined(CONFIG_NF_CONNTRACK_SIP_MODULE) - struct nf_ct_sip_master ct_sip_info; -#endif -}; - #include <linux/types.h> #include <linux/skbuff.h> #include <linux/timer.h> @@ -89,12 +59,13 @@ struct nf_conn_help { /* Helper. if any */ struct nf_conntrack_helper __rcu *helper; - union nf_conntrack_help help; - struct hlist_head expectations; /* Current number of expected connections */ u8 expecting[NF_CT_MAX_EXPECT_CLASSES]; + + /* private helper information. */ + char data[]; }; #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 4619caa..983f002 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -59,10 +59,12 @@ static inline struct net *nf_ct_exp_net(struct nf_conntrack_expect *exp) return nf_ct_net(exp->master); } +#define NF_CT_EXP_POLICY_NAME_LEN 16 + struct nf_conntrack_expect_policy { unsigned int max_expected; unsigned int timeout; - const char *name; + char name[NF_CT_EXP_POLICY_NAME_LEN]; }; #define NF_CT_EXPECT_CLASS_DEFAULT 0 diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index 96755c3..8b4d1fc2 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -80,10 +80,13 @@ static inline void nf_ct_ext_free(struct nf_conn *ct) } /* Add this type, returns pointer to data or NULL. */ -void * -__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp); +void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, + size_t var_alloc_len, gfp_t gfp); + #define nf_ct_ext_add(ct, id, gfp) \ - ((id##_TYPE *)__nf_ct_ext_add((ct), (id), (gfp))) + ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), 0, (gfp))) +#define nf_ct_ext_add_length(ct, id, len, gfp) \ + ((id##_TYPE *)__nf_ct_ext_add_length((ct), (id), (len), (gfp))) #define NF_CT_EXT_F_PREALLOC 0x0001 diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 1d18894..9aad956 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -11,18 +11,27 @@ #define _NF_CONNTRACK_HELPER_H #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_expect.h> struct module; +enum nf_ct_helper_flags { + NF_CT_HELPER_F_USERSPACE = (1 << 0), + NF_CT_HELPER_F_CONFIGURED = (1 << 1), +}; + #define NF_CT_HELPER_NAME_LEN 16 struct nf_conntrack_helper { struct hlist_node hnode; /* Internal use. */ - const char *name; /* name of the module */ + char name[NF_CT_HELPER_NAME_LEN]; /* name of the module */ struct module *me; /* pointer to self */ const struct nf_conntrack_expect_policy *expect_policy; + /* length of internal data, ie. sizeof(struct nf_ct_*_master) */ + size_t data_len; + /* Tuple of things we will help (compared against server response) */ struct nf_conntrack_tuple tuple; @@ -35,8 +44,12 @@ struct nf_conntrack_helper { void (*destroy)(struct nf_conn *ct); + int (*from_nlattr)(struct nlattr *attr, struct nf_conn *ct); int (*to_nlattr)(struct sk_buff *skb, const struct nf_conn *ct); unsigned int expect_class_max; + + unsigned int flags; + unsigned int queue_num; /* For user-space helpers. */ }; extern struct nf_conntrack_helper * @@ -48,7 +61,7 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); -extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); +extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp); extern int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags); @@ -60,6 +73,15 @@ static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) return nf_ct_ext_find(ct, NF_CT_EXT_HELPER); } +static inline void *nfct_help_data(const struct nf_conn *ct) +{ + struct nf_conn_help *help; + + help = nf_ct_ext_find(ct, NF_CT_EXT_HELPER); + + return (void *)help->data; +} + extern int nf_conntrack_helper_init(struct net *net); extern void nf_conntrack_helper_fini(struct net *net); @@ -82,4 +104,7 @@ nf_ct_helper_expectfn_find_by_name(const char *name); struct nf_ct_helper_expectfn * nf_ct_helper_expectfn_find_by_symbol(const void *symbol); +extern struct hlist_head *nf_ct_helper_hash; +extern unsigned int nf_ct_helper_hsize; + #endif /*_NF_CONNTRACK_HELPER_H*/ diff --git a/include/net/netfilter/nf_nat_helper.h b/include/net/netfilter/nf_nat_helper.h index 02bb6c2..7d8fb7b 100644 --- a/include/net/netfilter/nf_nat_helper.h +++ b/include/net/netfilter/nf_nat_helper.h @@ -54,4 +54,8 @@ extern void nf_nat_follow_master(struct nf_conn *ct, extern s16 nf_nat_get_offset(const struct nf_conn *ct, enum ip_conntrack_dir dir, u32 seq); + +extern void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + u32 dir, int off); + #endif diff --git a/include/net/netfilter/nfnetlink_queue.h b/include/net/netfilter/nfnetlink_queue.h new file mode 100644 index 0000000..86267a5 --- /dev/null +++ b/include/net/netfilter/nfnetlink_queue.h @@ -0,0 +1,43 @@ +#ifndef _NET_NFNL_QUEUE_H_ +#define _NET_NFNL_QUEUE_H_ + +#include <linux/netfilter/nf_conntrack_common.h> + +struct nf_conn; + +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT +struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size, + enum ip_conntrack_info *ctinfo); +struct nf_conn *nfqnl_ct_parse(const struct sk_buff *skb, + const struct nlattr *attr, + enum ip_conntrack_info *ctinfo); +int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo); +void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int diff); +#else +inline struct nf_conn * +nfqnl_ct_get(struct sk_buff *entskb, size_t *size, enum ip_conntrack_info *ctinfo) +{ + return NULL; +} + +inline struct nf_conn *nfqnl_ct_parse(const struct sk_buff *skb, + const struct nlattr *attr, + enum ip_conntrack_info *ctinfo) +{ + return NULL; +} + +inline int +nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + return 0; +} + +inline void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int diff) +{ +} +#endif /* NF_CONNTRACK */ +#endif diff --git a/include/net/route.h b/include/net/route.h index a36ae42..47eb25a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -215,7 +215,10 @@ static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 s return ip_route_input_common(skb, dst, src, tos, devin, true); } -extern void ip_rt_send_redirect(struct sk_buff *skb); +extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, + int oif, u32 mark, u8 protocol, int flow_flags); +extern void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu); +extern void ip_rt_send_redirect(struct sk_buff *skb); extern unsigned int inet_addr_type(struct net *net, __be32 addr); extern unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, __be32 addr); diff --git a/init/main.c b/init/main.c index 1ca6b32..b5cc0a7 100644 --- a/init/main.c +++ b/init/main.c @@ -508,7 +508,7 @@ asmlinkage void __init start_kernel(void) parse_early_param(); parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, - 0, 0, &unknown_bootoption); + -1, -1, &unknown_bootoption); jump_label_init(); @@ -755,13 +755,8 @@ static void __init do_initcalls(void) { int level; - for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) { - pr_info("initlevel:%d=%s, %d registered initcalls\n", - level, initcall_level_names[level], - (int) (initcall_levels[level+1] - - initcall_levels[level])); + for (level = 0; level < ARRAY_SIZE(initcall_levels) - 1; level++) do_initcall_level(level); - } } /* @@ -393,6 +393,16 @@ static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync) return sfd->file->f_op->fsync(sfd->file, start, end, datasync); } +static long shm_fallocate(struct file *file, int mode, loff_t offset, + loff_t len) +{ + struct shm_file_data *sfd = shm_file_data(file); + + if (!sfd->file->f_op->fallocate) + return -EOPNOTSUPP; + return sfd->file->f_op->fallocate(file, mode, offset, len); +} + static unsigned long shm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) @@ -410,6 +420,7 @@ static const struct file_operations shm_file_operations = { .get_unmapped_area = shm_get_unmapped_area, #endif .llseek = noop_llseek, + .fallocate = shm_fallocate, }; static const struct file_operations shm_file_operations_huge = { @@ -418,6 +429,7 @@ static const struct file_operations shm_file_operations_huge = { .release = shm_release, .get_unmapped_area = shm_get_unmapped_area, .llseek = noop_llseek, + .fallocate = shm_fallocate, }; int is_file_shm_hugepages(struct file *file) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0f3527d..72fcd30 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -896,10 +896,13 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) mutex_unlock(&cgroup_mutex); /* - * Drop the active superblock reference that we took when we - * created the cgroup + * We want to drop the active superblock reference from the + * cgroup creation after all the dentry refs are gone - + * kill_sb gets mighty unhappy otherwise. Mark + * dentry->d_fsdata with cgroup_diput() to tell + * cgroup_d_release() to call deactivate_super(). */ - deactivate_super(cgrp->root->sb); + dentry->d_fsdata = cgroup_diput; /* * if we're getting rid of the cgroup, refcount should ensure @@ -925,6 +928,13 @@ static int cgroup_delete(const struct dentry *d) return 1; } +static void cgroup_d_release(struct dentry *dentry) +{ + /* did cgroup_diput() tell me to deactivate super? */ + if (dentry->d_fsdata == cgroup_diput) + deactivate_super(dentry->d_sb); +} + static void remove_dir(struct dentry *d) { struct dentry *parent = dget(d->d_parent); @@ -1532,6 +1542,7 @@ static int cgroup_get_rootdir(struct super_block *sb) static const struct dentry_operations cgroup_dops = { .d_iput = cgroup_diput, .d_delete = cgroup_delete, + .d_release = cgroup_d_release, }; struct inode *inode = diff --git a/kernel/events/core.c b/kernel/events/core.c index 5b06cbb..f85c015 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3181,7 +3181,6 @@ static void perf_event_for_each(struct perf_event *event, event = event->group_leader; perf_event_for_each_child(event, func); - func(event); list_for_each_entry(sibling, &event->sibling_list, group_entry) perf_event_for_each_child(sibling, func); mutex_unlock(&ctx->mutex); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index fc275e4..eebd6d5 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -275,8 +275,10 @@ void handle_nested_irq(unsigned int irq) kstat_incr_irqs_this_cpu(irq, desc); action = desc->action; - if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) + if (unlikely(!action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; goto out_unlock; + } irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS); raw_spin_unlock_irq(&desc->lock); @@ -324,8 +326,10 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc) desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING); kstat_incr_irqs_this_cpu(irq, desc); - if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) + if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) { + desc->istate |= IRQS_PENDING; goto out_unlock; + } handle_irq_event(desc); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 8e5c56b..001fa5b 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -101,6 +101,9 @@ extern int irq_select_affinity_usr(unsigned int irq, struct cpumask *mask); extern void irq_set_thread_affinity(struct irq_desc *desc); +extern int irq_do_set_affinity(struct irq_data *data, + const struct cpumask *dest, bool force); + /* Inline functions for support of irq chips on slow busses */ static inline void chip_bus_lock(struct irq_desc *desc) { diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ea0c6c2..8c54823 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -142,6 +142,25 @@ static inline void irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { } #endif +int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ + struct irq_desc *desc = irq_data_to_desc(data); + struct irq_chip *chip = irq_data_get_irq_chip(data); + int ret; + + ret = chip->irq_set_affinity(data, mask, false); + switch (ret) { + case IRQ_SET_MASK_OK: + cpumask_copy(data->affinity, mask); + case IRQ_SET_MASK_OK_NOCOPY: + irq_set_thread_affinity(desc); + ret = 0; + } + + return ret; +} + int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) { struct irq_chip *chip = irq_data_get_irq_chip(data); @@ -152,14 +171,7 @@ int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask) return -EINVAL; if (irq_can_move_pcntxt(data)) { - ret = chip->irq_set_affinity(data, mask, false); - switch (ret) { - case IRQ_SET_MASK_OK: - cpumask_copy(data->affinity, mask); - case IRQ_SET_MASK_OK_NOCOPY: - irq_set_thread_affinity(desc); - ret = 0; - } + ret = irq_do_set_affinity(data, mask, false); } else { irqd_set_move_pending(data); irq_copy_pending(desc, mask); @@ -283,9 +295,8 @@ EXPORT_SYMBOL_GPL(irq_set_affinity_notifier); static int setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) { - struct irq_chip *chip = irq_desc_get_chip(desc); struct cpumask *set = irq_default_affinity; - int ret, node = desc->irq_data.node; + int node = desc->irq_data.node; /* Excludes PER_CPU and NO_BALANCE interrupts */ if (!irq_can_set_affinity(irq)) @@ -311,13 +322,7 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask) if (cpumask_intersects(mask, nodemask)) cpumask_and(mask, mask, nodemask); } - ret = chip->irq_set_affinity(&desc->irq_data, mask, false); - switch (ret) { - case IRQ_SET_MASK_OK: - cpumask_copy(desc->irq_data.affinity, mask); - case IRQ_SET_MASK_OK_NOCOPY: - irq_set_thread_affinity(desc); - } + irq_do_set_affinity(&desc->irq_data, mask, false); return 0; } #else diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c index c3c8975..ca3f4aa 100644 --- a/kernel/irq/migration.c +++ b/kernel/irq/migration.c @@ -42,17 +42,8 @@ void irq_move_masked_irq(struct irq_data *idata) * For correct operation this depends on the caller * masking the irqs. */ - if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask) - < nr_cpu_ids)) { - int ret = chip->irq_set_affinity(&desc->irq_data, - desc->pending_mask, false); - switch (ret) { - case IRQ_SET_MASK_OK: - cpumask_copy(desc->irq_data.affinity, desc->pending_mask); - case IRQ_SET_MASK_OK_NOCOPY: - irq_set_thread_affinity(desc); - } - } + if (cpumask_any_and(desc->pending_mask, cpu_online_mask) < nr_cpu_ids) + irq_do_set_affinity(&desc->irq_data, desc->pending_mask, false); cpumask_clear(desc->pending_mask); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 39eb601..d5594a4 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -142,9 +142,8 @@ const_debug unsigned int sysctl_sched_features = #define SCHED_FEAT(name, enabled) \ #name , -static __read_mostly char *sched_feat_names[] = { +static const char * const sched_feat_names[] = { #include "features.h" - NULL }; #undef SCHED_FEAT @@ -2517,25 +2516,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } +#ifdef CONFIG_NO_HZ +/* + * There is no sane way to deal with nohz on smp when using jiffies because the + * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading + * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. + * + * Therefore we cannot use the delta approach from the regular tick since that + * would seriously skew the load calculation. However we'll make do for those + * updates happening while idle (nohz_idle_balance) or coming out of idle + * (tick_nohz_idle_exit). + * + * This means we might still be one tick off for nohz periods. + */ + /* * Called from nohz_idle_balance() to update the load ratings before doing the * idle balance. */ void update_idle_cpu_load(struct rq *this_rq) { - unsigned long curr_jiffies = jiffies; + unsigned long curr_jiffies = ACCESS_ONCE(jiffies); unsigned long load = this_rq->load.weight; unsigned long pending_updates; /* - * Bloody broken means of dealing with nohz, but better than nothing.. - * jiffies is updated by one cpu, another cpu can drift wrt the jiffy - * update and see 0 difference the one time and 2 the next, even though - * we ticked at roughtly the same rate. - * - * Hence we only use this from nohz_idle_balance() and skip this - * nonsense when called from the scheduler_tick() since that's - * guaranteed a stable rate. + * bail if there's load or we're actually up-to-date. */ if (load || curr_jiffies == this_rq->last_load_update_tick) return; @@ -2547,12 +2553,38 @@ void update_idle_cpu_load(struct rq *this_rq) } /* + * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. + */ +void update_cpu_load_nohz(void) +{ + struct rq *this_rq = this_rq(); + unsigned long curr_jiffies = ACCESS_ONCE(jiffies); + unsigned long pending_updates; + + if (curr_jiffies == this_rq->last_load_update_tick) + return; + + raw_spin_lock(&this_rq->lock); + pending_updates = curr_jiffies - this_rq->last_load_update_tick; + if (pending_updates) { + this_rq->last_load_update_tick = curr_jiffies; + /* + * We were idle, this means load 0, the current load might be + * !0 due to remote wakeups and the sort. + */ + __update_cpu_load(this_rq, 0, pending_updates); + } + raw_spin_unlock(&this_rq->lock); +} +#endif /* CONFIG_NO_HZ */ + +/* * Called from scheduler_tick() */ static void update_cpu_load_active(struct rq *this_rq) { /* - * See the mess in update_idle_cpu_load(). + * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). */ this_rq->last_load_update_tick = jiffies; __update_cpu_load(this_rq, this_rq->load.weight, 1); @@ -4982,7 +5014,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) p->sched_class->set_cpus_allowed(p, new_mask); cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = cpumask_weight(new_mask); + p->nr_cpus_allowed = cpumask_weight(new_mask); } /* @@ -5524,15 +5556,20 @@ static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ #ifdef CONFIG_SCHED_DEBUG -static __read_mostly int sched_domain_debug_enabled; +static __read_mostly int sched_debug_enabled; -static int __init sched_domain_debug_setup(char *str) +static int __init sched_debug_setup(char *str) { - sched_domain_debug_enabled = 1; + sched_debug_enabled = 1; return 0; } -early_param("sched_debug", sched_domain_debug_setup); +early_param("sched_debug", sched_debug_setup); + +static inline bool sched_debug(void) +{ + return sched_debug_enabled; +} static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, struct cpumask *groupmask) @@ -5572,7 +5609,12 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - if (!group->sgp->power) { + /* + * Even though we initialize ->power to something semi-sane, + * we leave power_orig unset. This allows us to detect if + * domain iteration is still funny without causing /0 traps. + */ + if (!group->sgp->power_orig) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: domain->cpu_power not " "set\n"); @@ -5620,7 +5662,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) { int level = 0; - if (!sched_domain_debug_enabled) + if (!sched_debug_enabled) return; if (!sd) { @@ -5641,6 +5683,10 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) } #else /* !CONFIG_SCHED_DEBUG */ # define sched_domain_debug(sd, cpu) do { } while (0) +static inline bool sched_debug(void) +{ + return false; +} #endif /* CONFIG_SCHED_DEBUG */ static int sd_degenerate(struct sched_domain *sd) @@ -5962,6 +6008,44 @@ struct sched_domain_topology_level { struct sd_data data; }; +/* + * Build an iteration mask that can exclude certain CPUs from the upwards + * domain traversal. + * + * Asymmetric node setups can result in situations where the domain tree is of + * unequal depth, make sure to skip domains that already cover the entire + * range. + * + * In that case build_sched_domains() will have terminated the iteration early + * and our sibling sd spans will be empty. Domains should always include the + * cpu they're built on, so check that. + * + */ +static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) +{ + const struct cpumask *span = sched_domain_span(sd); + struct sd_data *sdd = sd->private; + struct sched_domain *sibling; + int i; + + for_each_cpu(i, span) { + sibling = *per_cpu_ptr(sdd->sd, i); + if (!cpumask_test_cpu(i, sched_domain_span(sibling))) + continue; + + cpumask_set_cpu(i, sched_group_mask(sg)); + } +} + +/* + * Return the canonical balance cpu for this group, this is the first cpu + * of this group that's also in the iteration mask. + */ +int group_balance_cpu(struct sched_group *sg) +{ + return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg)); +} + static int build_overlap_sched_groups(struct sched_domain *sd, int cpu) { @@ -5980,6 +6064,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) if (cpumask_test_cpu(i, covered)) continue; + child = *per_cpu_ptr(sdd->sd, i); + + /* See the comment near build_group_mask(). */ + if (!cpumask_test_cpu(i, sched_domain_span(child))) + continue; + sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), GFP_KERNEL, cpu_to_node(cpu)); @@ -5987,8 +6077,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) goto fail; sg_span = sched_group_cpus(sg); - - child = *per_cpu_ptr(sdd->sd, i); if (child->child) { child = child->child; cpumask_copy(sg_span, sched_domain_span(child)); @@ -5997,10 +6085,24 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) cpumask_or(covered, covered, sg_span); - sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); - atomic_inc(&sg->sgp->ref); + sg->sgp = *per_cpu_ptr(sdd->sgp, i); + if (atomic_inc_return(&sg->sgp->ref) == 1) + build_group_mask(sd, sg); - if (cpumask_test_cpu(cpu, sg_span)) + /* + * Initialize sgp->power such that even if we mess up the + * domains and no possible iteration will get us here, we won't + * die on a /0 trap. + */ + sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span); + + /* + * Make sure the first group of this domain contains the + * canonical balance cpu. Otherwise the sched_domain iteration + * breaks. See update_sg_lb_stats(). + */ + if ((!groups && cpumask_test_cpu(cpu, sg_span)) || + group_balance_cpu(sg) == cpu) groups = sg; if (!first) @@ -6074,6 +6176,7 @@ build_sched_groups(struct sched_domain *sd, int cpu) cpumask_clear(sched_group_cpus(sg)); sg->sgp->power = 0; + cpumask_setall(sched_group_mask(sg)); for_each_cpu(j, span) { if (get_group(j, sdd, NULL) != group) @@ -6115,7 +6218,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) sg = sg->next; } while (sg != sd->groups); - if (cpu != group_first_cpu(sg)) + if (cpu != group_balance_cpu(sg)) return; update_group_power(sd, cpu); @@ -6165,11 +6268,8 @@ int sched_domain_level_max; static int __init setup_relax_domain_level(char *str) { - unsigned long val; - - val = simple_strtoul(str, NULL, 0); - if (val < sched_domain_level_max) - default_relax_domain_level = val; + if (kstrtoint(str, 0, &default_relax_domain_level)) + pr_warn("Unable to set relax_domain_level\n"); return 1; } @@ -6279,14 +6379,13 @@ static struct sched_domain_topology_level *sched_domain_topology = default_topol #ifdef CONFIG_NUMA static int sched_domains_numa_levels; -static int sched_domains_numa_scale; static int *sched_domains_numa_distance; static struct cpumask ***sched_domains_numa_masks; static int sched_domains_curr_level; static inline int sd_local_flags(int level) { - if (sched_domains_numa_distance[level] > REMOTE_DISTANCE) + if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE) return 0; return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; @@ -6344,6 +6443,42 @@ static const struct cpumask *sd_numa_mask(int cpu) return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; } +static void sched_numa_warn(const char *str) +{ + static int done = false; + int i,j; + + if (done) + return; + + done = true; + + printk(KERN_WARNING "ERROR: %s\n\n", str); + + for (i = 0; i < nr_node_ids; i++) { + printk(KERN_WARNING " "); + for (j = 0; j < nr_node_ids; j++) + printk(KERN_CONT "%02d ", node_distance(i,j)); + printk(KERN_CONT "\n"); + } + printk(KERN_WARNING "\n"); +} + +static bool find_numa_distance(int distance) +{ + int i; + + if (distance == node_distance(0, 0)) + return true; + + for (i = 0; i < sched_domains_numa_levels; i++) { + if (sched_domains_numa_distance[i] == distance) + return true; + } + + return false; +} + static void sched_init_numa(void) { int next_distance, curr_distance = node_distance(0, 0); @@ -6351,7 +6486,6 @@ static void sched_init_numa(void) int level = 0; int i, j, k; - sched_domains_numa_scale = curr_distance; sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); if (!sched_domains_numa_distance) return; @@ -6362,23 +6496,41 @@ static void sched_init_numa(void) * * Assumes node_distance(0,j) includes all distances in * node_distance(i,j) in order to avoid cubic time. - * - * XXX: could be optimized to O(n log n) by using sort() */ next_distance = curr_distance; for (i = 0; i < nr_node_ids; i++) { for (j = 0; j < nr_node_ids; j++) { - int distance = node_distance(0, j); - if (distance > curr_distance && - (distance < next_distance || - next_distance == curr_distance)) - next_distance = distance; + for (k = 0; k < nr_node_ids; k++) { + int distance = node_distance(i, k); + + if (distance > curr_distance && + (distance < next_distance || + next_distance == curr_distance)) + next_distance = distance; + + /* + * While not a strong assumption it would be nice to know + * about cases where if node A is connected to B, B is not + * equally connected to A. + */ + if (sched_debug() && node_distance(k, i) != distance) + sched_numa_warn("Node-distance not symmetric"); + + if (sched_debug() && i && !find_numa_distance(distance)) + sched_numa_warn("Node-0 not representative"); + } + if (next_distance != curr_distance) { + sched_domains_numa_distance[level++] = next_distance; + sched_domains_numa_levels = level; + curr_distance = next_distance; + } else break; } - if (next_distance != curr_distance) { - sched_domains_numa_distance[level++] = next_distance; - sched_domains_numa_levels = level; - curr_distance = next_distance; - } else break; + + /* + * In case of sched_debug() we verify the above assumption. + */ + if (!sched_debug()) + break; } /* * 'level' contains the number of unique distances, excluding the @@ -6403,7 +6555,7 @@ static void sched_init_numa(void) return; for (j = 0; j < nr_node_ids; j++) { - struct cpumask *mask = kzalloc_node(cpumask_size(), GFP_KERNEL, j); + struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL); if (!mask) return; @@ -6490,7 +6642,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map) *per_cpu_ptr(sdd->sg, j) = sg; - sgp = kzalloc_node(sizeof(struct sched_group_power), + sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); if (!sgp) return -ENOMEM; @@ -6543,7 +6695,6 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, if (!sd) return child; - set_domain_attribute(sd, attr); cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); if (child) { sd->level = child->level + 1; @@ -6551,6 +6702,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, child->parent = sd; } sd->child = child; + set_domain_attribute(sd, attr); return sd; } @@ -6691,7 +6843,6 @@ static int init_sched_domains(const struct cpumask *cpu_map) if (!doms_cur) doms_cur = &fallback_doms; cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); - dattr_cur = NULL; err = build_sched_domains(doms_cur[0], NULL); register_sched_domain_sysctl(); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 940e6d1..c099cc6e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2703,7 +2703,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) int want_sd = 1; int sync = wake_flags & WF_SYNC; - if (p->rt.nr_cpus_allowed == 1) + if (p->nr_cpus_allowed == 1) return prev_cpu; if (sd_flag & SD_BALANCE_WAKE) { @@ -3503,15 +3503,22 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) unsigned long scale_rt_power(int cpu) { struct rq *rq = cpu_rq(cpu); - u64 total, available; + u64 total, available, age_stamp, avg; - total = sched_avg_period() + (rq->clock - rq->age_stamp); + /* + * Since we're reading these variables without serialization make sure + * we read them once before doing sanity checks on them. + */ + age_stamp = ACCESS_ONCE(rq->age_stamp); + avg = ACCESS_ONCE(rq->rt_avg); + + total = sched_avg_period() + (rq->clock - age_stamp); - if (unlikely(total < rq->rt_avg)) { + if (unlikely(total < avg)) { /* Ensures that power won't end up being negative */ available = 0; } else { - available = total - rq->rt_avg; + available = total - avg; } if (unlikely((s64)total < SCHED_POWER_SCALE)) @@ -3574,13 +3581,28 @@ void update_group_power(struct sched_domain *sd, int cpu) power = 0; - group = child->groups; - do { - power += group->sgp->power; - group = group->next; - } while (group != child->groups); + if (child->flags & SD_OVERLAP) { + /* + * SD_OVERLAP domains cannot assume that child groups + * span the current group. + */ - sdg->sgp->power = power; + for_each_cpu(cpu, sched_group_cpus(sdg)) + power += power_of(cpu); + } else { + /* + * !SD_OVERLAP domains can assume that child groups + * span the current group. + */ + + group = child->groups; + do { + power += group->sgp->power; + group = group->next; + } while (group != child->groups); + } + + sdg->sgp->power_orig = sdg->sgp->power = power; } /* @@ -3610,7 +3632,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. - * @sd: The sched_domain whose statistics are to be updated. + * @env: The load balancing environment. * @group: sched_group whose statistics are to be updated. * @load_idx: Load index of sched_domain of this_cpu for load calc. * @local_group: Does group contain this_cpu. @@ -3630,7 +3652,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, int i; if (local_group) - balance_cpu = group_first_cpu(group); + balance_cpu = group_balance_cpu(group); /* Tally up the load of all CPUs in the group */ max_cpu_load = 0; @@ -3645,7 +3667,8 @@ static inline void update_sg_lb_stats(struct lb_env *env, /* Bias balancing toward cpus of our domain */ if (local_group) { - if (idle_cpu(i) && !first_idle_cpu) { + if (idle_cpu(i) && !first_idle_cpu && + cpumask_test_cpu(i, sched_group_mask(group))) { first_idle_cpu = 1; balance_cpu = i; } @@ -3719,11 +3742,10 @@ static inline void update_sg_lb_stats(struct lb_env *env, /** * update_sd_pick_busiest - return 1 on busiest group - * @sd: sched_domain whose statistics are to be checked + * @env: The load balancing environment. * @sds: sched_domain statistics * @sg: sched_group candidate to be checked for being the busiest * @sgs: sched_group statistics - * @this_cpu: the current cpu * * Determine if @sg is a busier group than the previously selected * busiest group. @@ -3761,9 +3783,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, /** * update_sd_lb_stats - Update sched_domain's statistics for load balancing. - * @sd: sched_domain whose statistics are to be updated. - * @this_cpu: Cpu for which load balance is currently performed. - * @idle: Idle status of this_cpu + * @env: The load balancing environment. * @cpus: Set of cpus considered for load balancing. * @balance: Should we balance. * @sds: variable to hold the statistics for this sched_domain. @@ -3852,10 +3872,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, * Returns 1 when packing is required and a task should be moved to * this CPU. The amount of the imbalance is returned in *imbalance. * - * @sd: The sched_domain whose packing is to be checked. + * @env: The load balancing environment. * @sds: Statistics of the sched_domain which is to be packed - * @this_cpu: The cpu at whose sched_domain we're performing load-balance. - * @imbalance: returns amount of imbalanced due to packing. */ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) { @@ -3881,9 +3899,8 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) * fix_small_imbalance - Calculate the minor imbalance that exists * amongst the groups of a sched_domain, during * load balancing. + * @env: The load balancing environment. * @sds: Statistics of the sched_domain whose imbalance is to be calculated. - * @this_cpu: The cpu at whose sched_domain we're performing load-balance. - * @imbalance: Variable to store the imbalance. */ static inline void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) @@ -4026,11 +4043,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s * Also calculates the amount of weighted load which should be moved * to restore balance. * - * @sd: The sched_domain whose busiest group is to be returned. - * @this_cpu: The cpu for which load balancing is currently being performed. - * @imbalance: Variable which stores amount of weighted load which should - * be moved to restore balance/put a group to idle. - * @idle: The idle status of this_cpu. + * @env: The load balancing environment. * @cpus: The set of CPUs under consideration for load-balancing. * @balance: Pointer to a variable indicating if this_cpu * is the appropriate cpu to perform load balancing at this_level. diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index c5565c3..573e1ca 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -274,13 +274,16 @@ static void update_rt_migration(struct rt_rq *rt_rq) static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { + struct task_struct *p; + if (!rt_entity_is_task(rt_se)) return; + p = rt_task_of(rt_se); rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total++; - if (rt_se->nr_cpus_allowed > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory++; update_rt_migration(rt_rq); @@ -288,13 +291,16 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { + struct task_struct *p; + if (!rt_entity_is_task(rt_se)) return; + p = rt_task_of(rt_se); rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total--; - if (rt_se->nr_cpus_allowed > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory--; update_rt_migration(rt_rq); @@ -1161,7 +1167,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); - if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) + if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); inc_nr_running(rq); @@ -1225,7 +1231,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) cpu = task_cpu(p); - if (p->rt.nr_cpus_allowed == 1) + if (p->nr_cpus_allowed == 1) goto out; /* For anything but wake ups, just return the task_cpu */ @@ -1260,9 +1266,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) * will have to sort it out. */ if (curr && unlikely(rt_task(curr)) && - (curr->rt.nr_cpus_allowed < 2 || + (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio) && - (p->rt.nr_cpus_allowed > 1)) { + (p->nr_cpus_allowed > 1)) { int target = find_lowest_rq(p); if (target != -1) @@ -1276,10 +1282,10 @@ out: static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { - if (rq->curr->rt.nr_cpus_allowed == 1) + if (rq->curr->nr_cpus_allowed == 1) return; - if (p->rt.nr_cpus_allowed != 1 + if (p->nr_cpus_allowed != 1 && cpupri_find(&rq->rd->cpupri, p, NULL)) return; @@ -1395,7 +1401,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) * The previous task needs to be made eligible for pushing * if it is still active */ - if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) + if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); } @@ -1408,7 +1414,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && - (p->rt.nr_cpus_allowed > 1)) + (p->nr_cpus_allowed > 1)) return 1; return 0; } @@ -1464,7 +1470,7 @@ static int find_lowest_rq(struct task_struct *task) if (unlikely(!lowest_mask)) return -1; - if (task->rt.nr_cpus_allowed == 1) + if (task->nr_cpus_allowed == 1) return -1; /* No other targets possible */ if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) @@ -1556,7 +1562,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) task_running(rq, task) || !task->on_rq)) { - raw_spin_unlock(&lowest_rq->lock); + double_unlock_balance(rq, lowest_rq); lowest_rq = NULL; break; } @@ -1586,7 +1592,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(task_current(rq, p)); - BUG_ON(p->rt.nr_cpus_allowed <= 1); + BUG_ON(p->nr_cpus_allowed <= 1); BUG_ON(!p->on_rq); BUG_ON(!rt_task(p)); @@ -1793,9 +1799,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && has_pushable_tasks(rq) && - p->rt.nr_cpus_allowed > 1 && + p->nr_cpus_allowed > 1 && rt_task(rq->curr) && - (rq->curr->rt.nr_cpus_allowed < 2 || + (rq->curr->nr_cpus_allowed < 2 || rq->curr->prio <= p->prio)) push_rt_tasks(rq); } @@ -1817,7 +1823,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, * Only update if the process changes its state from whether it * can migrate or not. */ - if ((p->rt.nr_cpus_allowed > 1) == (weight > 1)) + if ((p->nr_cpus_allowed > 1) == (weight > 1)) return; rq = task_rq(p); @@ -1979,6 +1985,8 @@ static void watchdog(struct rq *rq, struct task_struct *p) static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) { + struct sched_rt_entity *rt_se = &p->rt; + update_curr_rt(rq); watchdog(rq, p); @@ -1996,12 +2004,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) p->rt.time_slice = RR_TIMESLICE; /* - * Requeue to the end of queue if we are not the only element - * on the queue: + * Requeue to the end of queue if we (and all of our ancestors) are the + * only element on the queue */ - if (p->rt.run_list.prev != p->rt.run_list.next) { - requeue_task_rt(rq, p, 0); - set_tsk_need_resched(p); + for_each_sched_rt_entity(rt_se) { + if (rt_se->run_list.prev != rt_se->run_list.next) { + requeue_task_rt(rq, p, 0); + set_tsk_need_resched(p); + return; + } } } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ba9dccf..6d52cea 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -526,6 +526,8 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag) DECLARE_PER_CPU(struct sched_domain *, sd_llc); DECLARE_PER_CPU(int, sd_llc_id); +extern int group_balance_cpu(struct sched_group *sg); + #endif /* CONFIG_SMP */ #include "stats.h" diff --git a/kernel/smpboot.c b/kernel/smpboot.c index e1a797e..98f60c5 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -31,6 +31,12 @@ void __init idle_thread_set_boot_cpu(void) per_cpu(idle_threads, smp_processor_id()) = current; } +/** + * idle_init - Initialize the idle thread for a cpu + * @cpu: The cpu for which the idle thread should be initialized + * + * Creates the thread if it does not exist. + */ static inline void idle_init(unsigned int cpu) { struct task_struct *tsk = per_cpu(idle_threads, cpu); @@ -45,17 +51,16 @@ static inline void idle_init(unsigned int cpu) } /** - * idle_thread_init - Initialize the idle thread for a cpu - * @cpu: The cpu for which the idle thread should be initialized - * - * Creates the thread if it does not exist. + * idle_threads_init - Initialize idle threads for all cpus */ void __init idle_threads_init(void) { - unsigned int cpu; + unsigned int cpu, boot_cpu; + + boot_cpu = smp_processor_id(); for_each_possible_cpu(cpu) { - if (cpu != smp_processor_id()) + if (cpu != boot_cpu) idle_init(cpu); } } diff --git a/kernel/sys.c b/kernel/sys.c index 9ff89cb..f0ec44d 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1786,27 +1786,13 @@ SYSCALL_DEFINE1(umask, int, mask) } #ifdef CONFIG_CHECKPOINT_RESTORE -static bool vma_flags_mismatch(struct vm_area_struct *vma, - unsigned long required, - unsigned long banned) -{ - return (vma->vm_flags & required) != required || - (vma->vm_flags & banned); -} - static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { + struct vm_area_struct *vma; struct file *exe_file; struct dentry *dentry; int err; - /* - * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's - * remain. So perform a quick test first. - */ - if (mm->num_exe_file_vmas) - return -EBUSY; - exe_file = fget(fd); if (!exe_file) return -EBADF; @@ -1827,17 +1813,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) if (err) goto exit; + down_write(&mm->mmap_sem); + + /* + * Forbid mm->exe_file change if there are mapped other files. + */ + err = -EBUSY; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (vma->vm_file && !path_equal(&vma->vm_file->f_path, + &exe_file->f_path)) + goto exit_unlock; + } + /* * The symlink can be changed only once, just to disallow arbitrary * transitions malicious software might bring in. This means one * could make a snapshot over all processes running and monitor * /proc/pid/exe changes to notice unusual activity if needed. */ - down_write(&mm->mmap_sem); - if (likely(!mm->exe_file)) - set_mm_exe_file(mm, exe_file); - else - err = -EBUSY; + err = -EPERM; + if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) + goto exit_unlock; + + set_mm_exe_file(mm, exe_file); +exit_unlock: up_write(&mm->mmap_sem); exit: @@ -1862,7 +1861,7 @@ static int prctl_set_mm(int opt, unsigned long addr, if (opt == PR_SET_MM_EXE_FILE) return prctl_set_mm_exe_file(mm, (unsigned int)addr); - if (addr >= TASK_SIZE) + if (addr >= TASK_SIZE || addr < mmap_min_addr) return -EINVAL; error = -EINVAL; @@ -1924,12 +1923,6 @@ static int prctl_set_mm(int opt, unsigned long addr, error = -EFAULT; goto out; } -#ifdef CONFIG_STACK_GROWSUP - if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0)) -#else - if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0)) -#endif - goto out; if (opt == PR_SET_MM_START_STACK) mm->start_stack = addr; else if (opt == PR_SET_MM_ARG_START) @@ -1981,12 +1974,22 @@ out: up_read(&mm->mmap_sem); return error; } + +static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) +{ + return put_user(me->clear_child_tid, tid_addr); +} + #else /* CONFIG_CHECKPOINT_RESTORE */ static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) { return -EINVAL; } +static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) +{ + return -EINVAL; +} #endif SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, @@ -2124,6 +2127,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else return -EINVAL; break; + case PR_GET_TID_ADDRESS: + error = prctl_get_tid_address(me, (int __user **)arg2); + break; default: return -EINVAL; } diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 9cd928f..7e1ce01 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -297,8 +297,7 @@ void clockevents_register_device(struct clock_event_device *dev) } EXPORT_SYMBOL_GPL(clockevents_register_device); -static void clockevents_config(struct clock_event_device *dev, - u32 freq) +void clockevents_config(struct clock_event_device *dev, u32 freq) { u64 sec; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6a3a5b9..da70c6d 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -576,6 +576,7 @@ void tick_nohz_idle_exit(void) /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); + update_cpu_load_nohz(); #ifndef CONFIG_VIRT_CPU_ACCOUNTING /* @@ -814,6 +815,16 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer) return HRTIMER_RESTART; } +static int sched_skew_tick; + +static int __init skew_tick(char *str) +{ + get_option(&str, &sched_skew_tick); + + return 0; +} +early_param("skew_tick", skew_tick); + /** * tick_setup_sched_timer - setup the tick emulation timer */ @@ -831,6 +842,14 @@ void tick_setup_sched_timer(void) /* Get the next period (per cpu) */ hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); + /* Offset the tick to avert xtime_lock contention. */ + if (sched_skew_tick) { + u64 offset = ktime_to_ns(tick_period) >> 1; + do_div(offset, num_possible_cpus()); + offset *= smp_processor_id(); + hrtimer_add_expires_ns(&ts->sched_timer, offset); + } + for (;;) { hrtimer_forward(&ts->sched_timer, now, tick_period); hrtimer_start_expires(&ts->sched_timer, diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 6e46cac..6f46a00 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -962,6 +962,7 @@ static cycle_t logarithmic_accumulation(cycle_t offset, int shift) timekeeper.xtime.tv_sec++; leap = second_overflow(timekeeper.xtime.tv_sec); timekeeper.xtime.tv_sec += leap; + timekeeper.wall_to_monotonic.tv_sec -= leap; } /* Accumulate raw time */ @@ -1077,6 +1078,7 @@ static void update_wall_time(void) timekeeper.xtime.tv_sec++; leap = second_overflow(timekeeper.xtime.tv_sec); timekeeper.xtime.tv_sec += leap; + timekeeper.wall_to_monotonic.tv_sec -= leap; } timekeeping_update(false); diff --git a/lib/btree.c b/lib/btree.c index e5ec1e9..f9a4846 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -319,8 +319,8 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo, if (head->height == 0) return NULL; -retry: longcpy(key, __key, geo->keylen); +retry: dec_key(geo, key); node = head->node; @@ -351,7 +351,7 @@ retry: } miss: if (retry_key) { - __key = retry_key; + longcpy(key, retry_key, geo->keylen); retry_key = NULL; goto retry; } @@ -509,6 +509,7 @@ retry: int btree_insert(struct btree_head *head, struct btree_geo *geo, unsigned long *key, void *val, gfp_t gfp) { + BUG_ON(!val); return btree_insert_level(head, geo, key, val, 1, gfp); } EXPORT_SYMBOL_GPL(btree_insert); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d7c878c..e796429 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -686,6 +686,9 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, * during iterating; it can be zero only at the beginning. * And we cannot overflow iter->next_index in a single step, * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. + * + * This condition also used by radix_tree_next_slot() to stop + * contiguous iterating, and forbid swithing to the next chunk. */ index = iter->next_index; if (!index && iter->index) diff --git a/lib/raid6/recov.c b/lib/raid6/recov.c index 1805a5c..a95bccb 100644 --- a/lib/raid6/recov.c +++ b/lib/raid6/recov.c @@ -22,8 +22,8 @@ #include <linux/raid/pq.h> /* Recover two failed data blocks. */ -void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb, - void **ptrs) +static void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, + int failb, void **ptrs) { u8 *p, *q, *dp, *dq; u8 px, qx, db; @@ -66,7 +66,8 @@ void raid6_2data_recov_intx1(int disks, size_t bytes, int faila, int failb, } /* Recover failure of one data block plus the P block */ -void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, void **ptrs) +static void raid6_datap_recov_intx1(int disks, size_t bytes, int faila, + void **ptrs) { u8 *p, *q, *dq; const u8 *qmul; /* Q multiplier table */ diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index 37ae619..ecb710c 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c @@ -19,8 +19,8 @@ static int raid6_has_ssse3(void) boot_cpu_has(X86_FEATURE_SSSE3); } -void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, - void **ptrs) +static void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, + int failb, void **ptrs) { u8 *p, *q, *dp, *dq; const u8 *pbmul; /* P multiplier table for B data */ @@ -194,7 +194,8 @@ void raid6_2data_recov_ssse3(int disks, size_t bytes, int faila, int failb, } -void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, void **ptrs) +static void raid6_datap_recov_ssse3(int disks, size_t bytes, int faila, + void **ptrs) { u8 *p, *q, *dq; const u8 *qmul; /* Q multiplier table */ @@ -389,3 +389,20 @@ config CLEANCACHE in a negligible performance hit. If unsure, say Y to enable cleancache + +config FRONTSWAP + bool "Enable frontswap to cache swap pages if tmem is present" + depends on SWAP + default n + help + Frontswap is so named because it can be thought of as the opposite + of a "backing" store for a swap device. The data is stored into + "transcendent memory", memory that is not directly accessible or + addressable by the kernel and is of unknown and possibly + time-varying size. When space in transcendent memory is available, + a significant swap I/O reduction may be achieved. When none is + available, all frontswap calls are reduced to a single pointer- + compare-against-NULL resulting in a negligible performance hit + and swap data is stored as normal on the matching swap device. + + If unsure, say Y to enable frontswap. diff --git a/mm/Makefile b/mm/Makefile index a156285..2e2fbbe 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o +obj-$(CONFIG_FRONTSWAP) += frontswap.o obj-$(CONFIG_HAS_DMA) += dmapool.o obj-$(CONFIG_HUGETLBFS) += hugetlb.o obj-$(CONFIG_NUMA) += mempolicy.o diff --git a/mm/compaction.c b/mm/compaction.c index 4ac338a..7ea259d 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -236,7 +236,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, */ while (unlikely(too_many_isolated(zone))) { /* async migration should just abort */ - if (cc->mode != COMPACT_SYNC) + if (!cc->sync) return 0; congestion_wait(BLK_RW_ASYNC, HZ/10); @@ -304,8 +304,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, * satisfies the allocation */ pageblock_nr = low_pfn >> pageblock_order; - if (cc->mode != COMPACT_SYNC && - last_pageblock_nr != pageblock_nr && + if (!cc->sync && last_pageblock_nr != pageblock_nr && !migrate_async_suitable(get_pageblock_migratetype(page))) { low_pfn += pageblock_nr_pages; low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1; @@ -326,7 +325,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, continue; } - if (cc->mode != COMPACT_SYNC) + if (!cc->sync) mode |= ISOLATE_ASYNC_MIGRATE; lruvec = mem_cgroup_page_lruvec(page, zone); @@ -361,90 +360,27 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc, #endif /* CONFIG_COMPACTION || CONFIG_CMA */ #ifdef CONFIG_COMPACTION -/* - * Returns true if MIGRATE_UNMOVABLE pageblock was successfully - * converted to MIGRATE_MOVABLE type, false otherwise. - */ -static bool rescue_unmovable_pageblock(struct page *page) -{ - unsigned long pfn, start_pfn, end_pfn; - struct page *start_page, *end_page; - - pfn = page_to_pfn(page); - start_pfn = pfn & ~(pageblock_nr_pages - 1); - end_pfn = start_pfn + pageblock_nr_pages; - - start_page = pfn_to_page(start_pfn); - end_page = pfn_to_page(end_pfn); - - /* Do not deal with pageblocks that overlap zones */ - if (page_zone(start_page) != page_zone(end_page)) - return false; - - for (page = start_page, pfn = start_pfn; page < end_page; pfn++, - page++) { - if (!pfn_valid_within(pfn)) - continue; - - if (PageBuddy(page)) { - int order = page_order(page); - - pfn += (1 << order) - 1; - page += (1 << order) - 1; - - continue; - } else if (page_count(page) == 0 || PageLRU(page)) - continue; - - return false; - } - - set_pageblock_migratetype(page, MIGRATE_MOVABLE); - move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE); - return true; -} -enum smt_result { - GOOD_AS_MIGRATION_TARGET, - FAIL_UNMOVABLE_TARGET, - FAIL_BAD_TARGET, -}; - -/* - * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block - * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page - * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise. - */ -static enum smt_result suitable_migration_target(struct page *page, - struct compact_control *cc) +/* Returns true if the page is within a block suitable for migration to */ +static bool suitable_migration_target(struct page *page) { int migratetype = get_pageblock_migratetype(page); /* Don't interfere with memory hot-remove or the min_free_kbytes blocks */ if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE) - return FAIL_BAD_TARGET; + return false; /* If the page is a large free page, then allow migration */ if (PageBuddy(page) && page_order(page) >= pageblock_order) - return GOOD_AS_MIGRATION_TARGET; + return true; /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ - if (cc->mode != COMPACT_ASYNC_UNMOVABLE && - migrate_async_suitable(migratetype)) - return GOOD_AS_MIGRATION_TARGET; - - if (cc->mode == COMPACT_ASYNC_MOVABLE && - migratetype == MIGRATE_UNMOVABLE) - return FAIL_UNMOVABLE_TARGET; - - if (cc->mode != COMPACT_ASYNC_MOVABLE && - migratetype == MIGRATE_UNMOVABLE && - rescue_unmovable_pageblock(page)) - return GOOD_AS_MIGRATION_TARGET; + if (migrate_async_suitable(migratetype)) + return true; /* Otherwise skip the block */ - return FAIL_BAD_TARGET; + return false; } /* @@ -478,13 +414,6 @@ static void isolate_freepages(struct zone *zone, zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; /* - * isolate_freepages() may be called more than once during - * compact_zone_order() run and we want only the most recent - * count. - */ - cc->nr_pageblocks_skipped = 0; - - /* * Isolate free pages until enough are available to migrate the * pages on cc->migratepages. We stop searching if the migrate * and free page scanners meet or enough free pages are isolated. @@ -492,7 +421,6 @@ static void isolate_freepages(struct zone *zone, for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages; pfn -= pageblock_nr_pages) { unsigned long isolated; - enum smt_result ret; if (!pfn_valid(pfn)) continue; @@ -509,12 +437,9 @@ static void isolate_freepages(struct zone *zone, continue; /* Check the block is suitable for migration */ - ret = suitable_migration_target(page, cc); - if (ret != GOOD_AS_MIGRATION_TARGET) { - if (ret == FAIL_UNMOVABLE_TARGET) - cc->nr_pageblocks_skipped++; + if (!suitable_migration_target(page)) continue; - } + /* * Found a block suitable for isolating free pages from. Now * we disabled interrupts, double check things are ok and @@ -523,14 +448,12 @@ static void isolate_freepages(struct zone *zone, */ isolated = 0; spin_lock_irqsave(&zone->lock, flags); - ret = suitable_migration_target(page, cc); - if (ret == GOOD_AS_MIGRATION_TARGET) { + if (suitable_migration_target(page)) { end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn); isolated = isolate_freepages_block(pfn, end_pfn, freelist, false); nr_freepages += isolated; - } else if (ret == FAIL_UNMOVABLE_TARGET) - cc->nr_pageblocks_skipped++; + } spin_unlock_irqrestore(&zone->lock, flags); /* @@ -762,9 +685,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc) nr_migrate = cc->nr_migratepages; err = migrate_pages(&cc->migratepages, compaction_alloc, - (unsigned long)&cc->freepages, false, - (cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT - : MIGRATE_ASYNC); + (unsigned long)cc, false, + cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC); update_nr_listpages(cc); nr_remaining = cc->nr_migratepages; @@ -793,8 +715,7 @@ out: static unsigned long compact_zone_order(struct zone *zone, int order, gfp_t gfp_mask, - enum compact_mode mode, - unsigned long *nr_pageblocks_skipped) + bool sync) { struct compact_control cc = { .nr_freepages = 0, @@ -802,17 +723,12 @@ static unsigned long compact_zone_order(struct zone *zone, .order = order, .migratetype = allocflags_to_migratetype(gfp_mask), .zone = zone, - .mode = mode, + .sync = sync, }; - unsigned long rc; - INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); - rc = compact_zone(zone, &cc); - *nr_pageblocks_skipped = cc.nr_pageblocks_skipped; - - return rc; + return compact_zone(zone, &cc); } int sysctl_extfrag_threshold = 500; @@ -837,8 +753,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, struct zoneref *z; struct zone *zone; int rc = COMPACT_SKIPPED; - unsigned long nr_pageblocks_skipped; - enum compact_mode mode; /* * Check whether it is worth even starting compaction. The order check is @@ -855,22 +769,12 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist, nodemask) { int status; - mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE; -retry: - status = compact_zone_order(zone, order, gfp_mask, mode, - &nr_pageblocks_skipped); + status = compact_zone_order(zone, order, gfp_mask, sync); rc = max(status, rc); /* If a normal allocation would succeed, stop compacting */ if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0)) break; - - if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) { - if (nr_pageblocks_skipped) { - mode = COMPACT_ASYNC_UNMOVABLE; - goto retry; - } - } } return rc; @@ -904,7 +808,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) if (ok && cc->order > zone->compact_order_failed) zone->compact_order_failed = cc->order + 1; /* Currently async compaction is never deferred. */ - else if (!ok && cc->mode == COMPACT_SYNC) + else if (!ok && cc->sync) defer_compaction(zone, cc->order); } @@ -919,7 +823,7 @@ int compact_pgdat(pg_data_t *pgdat, int order) { struct compact_control cc = { .order = order, - .mode = COMPACT_ASYNC_MOVABLE, + .sync = false, }; return __compact_pgdat(pgdat, &cc); @@ -929,7 +833,7 @@ static int compact_node(int nid) { struct compact_control cc = { .order = -1, - .mode = COMPACT_SYNC, + .sync = true, }; return __compact_pgdat(NODE_DATA(nid), &cc); diff --git a/mm/frontswap.c b/mm/frontswap.c new file mode 100644 index 0000000..e250255 --- /dev/null +++ b/mm/frontswap.c @@ -0,0 +1,314 @@ +/* + * Frontswap frontend + * + * This code provides the generic "frontend" layer to call a matching + * "backend" driver implementation of frontswap. See + * Documentation/vm/frontswap.txt for more information. + * + * Copyright (C) 2009-2012 Oracle Corp. All rights reserved. + * Author: Dan Magenheimer + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/swap.h> +#include <linux/swapops.h> +#include <linux/proc_fs.h> +#include <linux/security.h> +#include <linux/capability.h> +#include <linux/module.h> +#include <linux/uaccess.h> +#include <linux/debugfs.h> +#include <linux/frontswap.h> +#include <linux/swapfile.h> + +/* + * frontswap_ops is set by frontswap_register_ops to contain the pointers + * to the frontswap "backend" implementation functions. + */ +static struct frontswap_ops frontswap_ops __read_mostly; + +/* + * This global enablement flag reduces overhead on systems where frontswap_ops + * has not been registered, so is preferred to the slower alternative: a + * function call that checks a non-global. + */ +bool frontswap_enabled __read_mostly; +EXPORT_SYMBOL(frontswap_enabled); + +/* + * If enabled, frontswap_store will return failure even on success. As + * a result, the swap subsystem will always write the page to swap, in + * effect converting frontswap into a writethrough cache. In this mode, + * there is no direct reduction in swap writes, but a frontswap backend + * can unilaterally "reclaim" any pages in use with no data loss, thus + * providing increases control over maximum memory usage due to frontswap. + */ +static bool frontswap_writethrough_enabled __read_mostly; + +#ifdef CONFIG_DEBUG_FS +/* + * Counters available via /sys/kernel/debug/frontswap (if debugfs is + * properly configured). These are for information only so are not protected + * against increment races. + */ +static u64 frontswap_loads; +static u64 frontswap_succ_stores; +static u64 frontswap_failed_stores; +static u64 frontswap_invalidates; + +static inline void inc_frontswap_loads(void) { + frontswap_loads++; +} +static inline void inc_frontswap_succ_stores(void) { + frontswap_succ_stores++; +} +static inline void inc_frontswap_failed_stores(void) { + frontswap_failed_stores++; +} +static inline void inc_frontswap_invalidates(void) { + frontswap_invalidates++; +} +#else +static inline void inc_frontswap_loads(void) { } +static inline void inc_frontswap_succ_stores(void) { } +static inline void inc_frontswap_failed_stores(void) { } +static inline void inc_frontswap_invalidates(void) { } +#endif +/* + * Register operations for frontswap, returning previous thus allowing + * detection of multiple backends and possible nesting. + */ +struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops) +{ + struct frontswap_ops old = frontswap_ops; + + frontswap_ops = *ops; + frontswap_enabled = true; + return old; +} +EXPORT_SYMBOL(frontswap_register_ops); + +/* + * Enable/disable frontswap writethrough (see above). + */ +void frontswap_writethrough(bool enable) +{ + frontswap_writethrough_enabled = enable; +} +EXPORT_SYMBOL(frontswap_writethrough); + +/* + * Called when a swap device is swapon'd. + */ +void __frontswap_init(unsigned type) +{ + struct swap_info_struct *sis = swap_info[type]; + + BUG_ON(sis == NULL); + if (sis->frontswap_map == NULL) + return; + if (frontswap_enabled) + (*frontswap_ops.init)(type); +} +EXPORT_SYMBOL(__frontswap_init); + +/* + * "Store" data from a page to frontswap and associate it with the page's + * swaptype and offset. Page must be locked and in the swap cache. + * If frontswap already contains a page with matching swaptype and + * offset, the frontswap implmentation may either overwrite the data and + * return success or invalidate the page from frontswap and return failure. + */ +int __frontswap_store(struct page *page) +{ + int ret = -1, dup = 0; + swp_entry_t entry = { .val = page_private(page), }; + int type = swp_type(entry); + struct swap_info_struct *sis = swap_info[type]; + pgoff_t offset = swp_offset(entry); + + BUG_ON(!PageLocked(page)); + BUG_ON(sis == NULL); + if (frontswap_test(sis, offset)) + dup = 1; + ret = (*frontswap_ops.store)(type, offset, page); + if (ret == 0) { + frontswap_set(sis, offset); + inc_frontswap_succ_stores(); + if (!dup) + atomic_inc(&sis->frontswap_pages); + } else if (dup) { + /* + failed dup always results in automatic invalidate of + the (older) page from frontswap + */ + frontswap_clear(sis, offset); + atomic_dec(&sis->frontswap_pages); + inc_frontswap_failed_stores(); + } else + inc_frontswap_failed_stores(); + if (frontswap_writethrough_enabled) + /* report failure so swap also writes to swap device */ + ret = -1; + return ret; +} +EXPORT_SYMBOL(__frontswap_store); + +/* + * "Get" data from frontswap associated with swaptype and offset that were + * specified when the data was put to frontswap and use it to fill the + * specified page with data. Page must be locked and in the swap cache. + */ +int __frontswap_load(struct page *page) +{ + int ret = -1; + swp_entry_t entry = { .val = page_private(page), }; + int type = swp_type(entry); + struct swap_info_struct *sis = swap_info[type]; + pgoff_t offset = swp_offset(entry); + + BUG_ON(!PageLocked(page)); + BUG_ON(sis == NULL); + if (frontswap_test(sis, offset)) + ret = (*frontswap_ops.load)(type, offset, page); + if (ret == 0) + inc_frontswap_loads(); + return ret; +} +EXPORT_SYMBOL(__frontswap_load); + +/* + * Invalidate any data from frontswap associated with the specified swaptype + * and offset so that a subsequent "get" will fail. + */ +void __frontswap_invalidate_page(unsigned type, pgoff_t offset) +{ + struct swap_info_struct *sis = swap_info[type]; + + BUG_ON(sis == NULL); + if (frontswap_test(sis, offset)) { + (*frontswap_ops.invalidate_page)(type, offset); + atomic_dec(&sis->frontswap_pages); + frontswap_clear(sis, offset); + inc_frontswap_invalidates(); + } +} +EXPORT_SYMBOL(__frontswap_invalidate_page); + +/* + * Invalidate all data from frontswap associated with all offsets for the + * specified swaptype. + */ +void __frontswap_invalidate_area(unsigned type) +{ + struct swap_info_struct *sis = swap_info[type]; + + BUG_ON(sis == NULL); + if (sis->frontswap_map == NULL) + return; + (*frontswap_ops.invalidate_area)(type); + atomic_set(&sis->frontswap_pages, 0); + memset(sis->frontswap_map, 0, sis->max / sizeof(long)); +} +EXPORT_SYMBOL(__frontswap_invalidate_area); + +/* + * Frontswap, like a true swap device, may unnecessarily retain pages + * under certain circumstances; "shrink" frontswap is essentially a + * "partial swapoff" and works by calling try_to_unuse to attempt to + * unuse enough frontswap pages to attempt to -- subject to memory + * constraints -- reduce the number of pages in frontswap to the + * number given in the parameter target_pages. + */ +void frontswap_shrink(unsigned long target_pages) +{ + struct swap_info_struct *si = NULL; + int si_frontswap_pages; + unsigned long total_pages = 0, total_pages_to_unuse; + unsigned long pages = 0, pages_to_unuse = 0; + int type; + bool locked = false; + + /* + * we don't want to hold swap_lock while doing a very + * lengthy try_to_unuse, but swap_list may change + * so restart scan from swap_list.head each time + */ + spin_lock(&swap_lock); + locked = true; + total_pages = 0; + for (type = swap_list.head; type >= 0; type = si->next) { + si = swap_info[type]; + total_pages += atomic_read(&si->frontswap_pages); + } + if (total_pages <= target_pages) + goto out; + total_pages_to_unuse = total_pages - target_pages; + for (type = swap_list.head; type >= 0; type = si->next) { + si = swap_info[type]; + si_frontswap_pages = atomic_read(&si->frontswap_pages); + if (total_pages_to_unuse < si_frontswap_pages) + pages = pages_to_unuse = total_pages_to_unuse; + else { + pages = si_frontswap_pages; + pages_to_unuse = 0; /* unuse all */ + } + /* ensure there is enough RAM to fetch pages from frontswap */ + if (security_vm_enough_memory_mm(current->mm, pages)) + continue; + vm_unacct_memory(pages); + break; + } + if (type < 0) + goto out; + locked = false; + spin_unlock(&swap_lock); + try_to_unuse(type, true, pages_to_unuse); +out: + if (locked) + spin_unlock(&swap_lock); + return; +} +EXPORT_SYMBOL(frontswap_shrink); + +/* + * Count and return the number of frontswap pages across all + * swap devices. This is exported so that backend drivers can + * determine current usage without reading debugfs. + */ +unsigned long frontswap_curr_pages(void) +{ + int type; + unsigned long totalpages = 0; + struct swap_info_struct *si = NULL; + + spin_lock(&swap_lock); + for (type = swap_list.head; type >= 0; type = si->next) { + si = swap_info[type]; + totalpages += atomic_read(&si->frontswap_pages); + } + spin_unlock(&swap_lock); + return totalpages; +} +EXPORT_SYMBOL(frontswap_curr_pages); + +static int __init init_frontswap(void) +{ +#ifdef CONFIG_DEBUG_FS + struct dentry *root = debugfs_create_dir("frontswap", NULL); + if (root == NULL) + return -ENXIO; + debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads); + debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores); + debugfs_create_u64("failed_stores", S_IRUGO, root, + &frontswap_failed_stores); + debugfs_create_u64("invalidates", S_IRUGO, + root, &frontswap_invalidates); +#endif + return 0; +} + +module_init(init_frontswap); diff --git a/mm/internal.h b/mm/internal.h index 5cbb781..2ba87fb 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -94,9 +94,6 @@ extern void putback_lru_page(struct page *page); /* * in mm/page_alloc.c */ -extern void set_pageblock_migratetype(struct page *page, int migratetype); -extern int move_freepages_block(struct zone *zone, struct page *page, - int migratetype); extern void __free_pages_bootmem(struct page *page, unsigned int order); extern void prep_compound_page(struct page *page, unsigned long order); #ifdef CONFIG_MEMORY_FAILURE @@ -104,7 +101,6 @@ extern bool is_free_buddy_page(struct page *page); #endif #if defined CONFIG_COMPACTION || defined CONFIG_CMA -#include <linux/compaction.h> /* * in mm/compaction.c @@ -123,14 +119,11 @@ struct compact_control { unsigned long nr_migratepages; /* Number of pages to migrate */ unsigned long free_pfn; /* isolate_freepages search base */ unsigned long migrate_pfn; /* isolate_migratepages search base */ - enum compact_mode mode; /* Compaction mode */ + bool sync; /* Synchronous migration */ int order; /* order a direct compactor needs */ int migratetype; /* MOVABLE, RECLAIMABLE etc */ struct zone *zone; - - /* Number of UNMOVABLE destination pageblocks skipped during scan */ - unsigned long nr_pageblocks_skipped; }; unsigned long diff --git a/mm/migrate.c b/mm/migrate.c index ab81d48..be26d5c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -436,7 +436,10 @@ void migrate_page_copy(struct page *newpage, struct page *page) * is actually a signal that all of the page has become dirty. * Whereas only part of our page may be dirty. */ - __set_page_dirty_nobuffers(newpage); + if (PageSwapBacked(page)) + SetPageDirty(newpage); + else + __set_page_dirty_nobuffers(newpage); } mlock_migrate_page(newpage, page); @@ -1486,7 +1486,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - ret = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); + retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); if (file) fput(file); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ed0e196..416637f 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -183,7 +183,7 @@ static bool oom_unkillable_task(struct task_struct *p, unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages) { - unsigned long points; + long points; if (oom_unkillable_task(p, memcg, nodemask)) return 0; @@ -223,7 +223,7 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, * Never return 0 for an eligible task regardless of the root bonus and * oom_score_adj (oom_score_adj can't be OOM_SCORE_ADJ_MIN here). */ - return points ? points : 1; + return points > 0 ? points : 1; } /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6092f33..4403009 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -219,7 +219,7 @@ EXPORT_SYMBOL(nr_online_nodes); int page_group_by_mobility_disabled __read_mostly; -void set_pageblock_migratetype(struct page *page, int migratetype) +static void set_pageblock_migratetype(struct page *page, int migratetype) { if (unlikely(page_group_by_mobility_disabled)) @@ -954,8 +954,8 @@ static int move_freepages(struct zone *zone, return pages_moved; } -int move_freepages_block(struct zone *zone, struct page *page, - int migratetype) +static int move_freepages_block(struct zone *zone, struct page *page, + int migratetype) { unsigned long start_pfn, end_pfn; struct page *start_page, *end_page; @@ -5651,7 +5651,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end) .nr_migratepages = 0, .order = -1, .zone = page_zone(pfn_to_page(start)), - .mode = COMPACT_SYNC, + .sync = true, }; INIT_LIST_HEAD(&cc.migratepages); diff --git a/mm/page_io.c b/mm/page_io.c index dc76b4d..34f0292 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -18,6 +18,7 @@ #include <linux/bio.h> #include <linux/swapops.h> #include <linux/writeback.h> +#include <linux/frontswap.h> #include <asm/pgtable.h> static struct bio *get_swap_bio(gfp_t gfp_flags, @@ -98,6 +99,12 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); goto out; } + if (frontswap_store(page) == 0) { + set_page_writeback(page); + unlock_page(page); + end_page_writeback(page); + goto out; + } bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); if (bio == NULL) { set_page_dirty(page); @@ -122,6 +129,11 @@ int swap_readpage(struct page *page) VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageUptodate(page)); + if (frontswap_load(page) == 0) { + SetPageUptodate(page); + unlock_page(page); + goto out; + } bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); if (bio == NULL) { unlock_page(page); @@ -683,10 +683,21 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, mutex_lock(&shmem_swaplist_mutex); /* * We needed to drop mutex to make that restrictive page - * allocation; but the inode might already be freed by now, - * and we cannot refer to inode or mapping or info to check. - * However, we do hold page lock on the PageSwapCache page, - * so can check if that still has our reference remaining. + * allocation, but the inode might have been freed while we + * dropped it: although a racing shmem_evict_inode() cannot + * complete without emptying the radix_tree, our page lock + * on this swapcache page is not enough to prevent that - + * free_swap_and_cache() of our swap entry will only + * trylock_page(), removing swap from radix_tree whatever. + * + * We must not proceed to shmem_add_to_page_cache() if the + * inode has been freed, but of course we cannot rely on + * inode or mapping or info to check that. However, we can + * safely check if our swap entry is still in use (and here + * it can't have got reused for another page): if it's still + * in use, then the inode cannot have been freed yet, and we + * can safely proceed (if it's no longer in use, that tells + * nothing about the inode, but we don't need to unuse swap). */ if (!page_swapcount(*pagep)) error = -ENOENT; @@ -730,9 +741,9 @@ int shmem_unuse(swp_entry_t swap, struct page *page) /* * There's a faint possibility that swap page was replaced before - * caller locked it: it will come back later with the right page. + * caller locked it: caller will come back later with the right page. */ - if (unlikely(!PageSwapCache(page))) + if (unlikely(!PageSwapCache(page) || page_private(page) != swap.val)) goto out; /* @@ -995,21 +1006,15 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, newpage = shmem_alloc_page(gfp, info, index); if (!newpage) return -ENOMEM; - VM_BUG_ON(shmem_should_replace_page(newpage, gfp)); - *pagep = newpage; page_cache_get(newpage); copy_highpage(newpage, oldpage); + flush_dcache_page(newpage); - VM_BUG_ON(!PageLocked(oldpage)); __set_page_locked(newpage); - VM_BUG_ON(!PageUptodate(oldpage)); SetPageUptodate(newpage); - VM_BUG_ON(!PageSwapBacked(oldpage)); SetPageSwapBacked(newpage); - VM_BUG_ON(!swap_index); set_page_private(newpage, swap_index); - VM_BUG_ON(!PageSwapCache(oldpage)); SetPageSwapCache(newpage); /* @@ -1019,13 +1024,24 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, spin_lock_irq(&swap_mapping->tree_lock); error = shmem_radix_tree_replace(swap_mapping, swap_index, oldpage, newpage); - __inc_zone_page_state(newpage, NR_FILE_PAGES); - __dec_zone_page_state(oldpage, NR_FILE_PAGES); + if (!error) { + __inc_zone_page_state(newpage, NR_FILE_PAGES); + __dec_zone_page_state(oldpage, NR_FILE_PAGES); + } spin_unlock_irq(&swap_mapping->tree_lock); - BUG_ON(error); - mem_cgroup_replace_page_cache(oldpage, newpage); - lru_cache_add_anon(newpage); + if (unlikely(error)) { + /* + * Is this possible? I think not, now that our callers check + * both PageSwapCache and page_private after getting page lock; + * but be defensive. Reverse old to newpage for clear and free. + */ + oldpage = newpage; + } else { + mem_cgroup_replace_page_cache(oldpage, newpage); + lru_cache_add_anon(newpage); + *pagep = newpage; + } ClearPageSwapCache(oldpage); set_page_private(oldpage, 0); @@ -1033,7 +1049,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp, unlock_page(oldpage); page_cache_release(oldpage); page_cache_release(oldpage); - return 0; + return error; } /* @@ -1107,7 +1123,8 @@ repeat: /* We have to do this with page locked to prevent races */ lock_page(page); - if (!PageSwapCache(page) || page->mapping) { + if (!PageSwapCache(page) || page_private(page) != swap.val || + page->mapping) { error = -EEXIST; /* try again */ goto failed; } diff --git a/mm/swapfile.c b/mm/swapfile.c index 457b10b..de5bc51 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -31,6 +31,8 @@ #include <linux/memcontrol.h> #include <linux/poll.h> #include <linux/oom.h> +#include <linux/frontswap.h> +#include <linux/swapfile.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, static void free_swap_count_continuations(struct swap_info_struct *); static sector_t map_swap_entry(swp_entry_t, struct block_device**); -static DEFINE_SPINLOCK(swap_lock); +DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; long nr_swap_pages; long total_swap_pages; @@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; static const char Unused_offset[] = "Unused swap offset entry "; -static struct swap_list_t swap_list = {-1, -1}; +struct swap_list_t swap_list = {-1, -1}; -static struct swap_info_struct *swap_info[MAX_SWAPFILES]; +struct swap_info_struct *swap_info[MAX_SWAPFILES]; static DEFINE_MUTEX(swapon_mutex); @@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, swap_list.next = p->type; nr_swap_pages++; p->inuse_pages--; + frontswap_invalidate_page(p->type, offset); if ((p->flags & SWP_BLKDEV) && disk->fops->swap_slot_free_notify) disk->fops->swap_slot_free_notify(p->bdev, offset); @@ -985,11 +988,12 @@ static int unuse_mm(struct mm_struct *mm, } /* - * Scan swap_map from current position to next entry still in use. + * Scan swap_map (or frontswap_map if frontswap parameter is true) + * from current position to next entry still in use. * Recycle to start on reaching the end, returning 0 when empty. */ static unsigned int find_next_to_unuse(struct swap_info_struct *si, - unsigned int prev) + unsigned int prev, bool frontswap) { unsigned int max = si->max; unsigned int i = prev; @@ -1015,6 +1019,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, prev = 0; i = 1; } + if (frontswap) { + if (frontswap_test(si, i)) + break; + else + continue; + } count = si->swap_map[i]; if (count && swap_count(count) != SWAP_MAP_BAD) break; @@ -1026,8 +1036,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, * We completely avoid races by reading each swap page in advance, * and then search for the process using it. All the necessary * page table adjustments can then be made atomically. + * + * if the boolean frontswap is true, only unuse pages_to_unuse pages; + * pages_to_unuse==0 means all pages; ignored if frontswap is false */ -static int try_to_unuse(unsigned int type) +int try_to_unuse(unsigned int type, bool frontswap, + unsigned long pages_to_unuse) { struct swap_info_struct *si = swap_info[type]; struct mm_struct *start_mm; @@ -1060,7 +1074,7 @@ static int try_to_unuse(unsigned int type) * one pass through swap_map is enough, but not necessarily: * there are races when an instance of an entry might be missed. */ - while ((i = find_next_to_unuse(si, i)) != 0) { + while ((i = find_next_to_unuse(si, i, frontswap)) != 0) { if (signal_pending(current)) { retval = -EINTR; break; @@ -1227,6 +1241,10 @@ static int try_to_unuse(unsigned int type) * interactive performance. */ cond_resched(); + if (frontswap && pages_to_unuse > 0) { + if (!--pages_to_unuse) + break; + } } mmput(start_mm); @@ -1486,7 +1504,8 @@ bad_bmap: } static void enable_swap_info(struct swap_info_struct *p, int prio, - unsigned char *swap_map) + unsigned char *swap_map, + unsigned long *frontswap_map) { int i, prev; @@ -1496,6 +1515,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, else p->prio = --least_priority; p->swap_map = swap_map; + frontswap_map_set(p, frontswap_map); p->flags |= SWP_WRITEOK; nr_swap_pages += p->pages; total_swap_pages += p->pages; @@ -1512,6 +1532,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, swap_list.head = swap_list.next = p->type; else swap_info[prev]->next = p->type; + frontswap_init(p->type); spin_unlock(&swap_lock); } @@ -1585,7 +1606,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) spin_unlock(&swap_lock); oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); - err = try_to_unuse(type); + err = try_to_unuse(type, false, 0); /* force all pages to be unused */ compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); if (err) { @@ -1596,7 +1617,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) * sys_swapoff for this swap_info_struct at this point. */ /* re-insert swap space back into swap_list */ - enable_swap_info(p, p->prio, p->swap_map); + enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); goto out_dput; } @@ -1622,9 +1643,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; + frontswap_invalidate_area(type); spin_unlock(&swap_lock); mutex_unlock(&swapon_mutex); vfree(swap_map); + vfree(frontswap_map_get(p)); /* Destroy swap account informatin */ swap_cgroup_swapoff(type); @@ -1988,6 +2011,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) sector_t span; unsigned long maxpages; unsigned char *swap_map = NULL; + unsigned long *frontswap_map = NULL; struct page *page = NULL; struct inode *inode = NULL; @@ -2071,6 +2095,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = nr_extents; goto bad_swap; } + /* frontswap enabled? set up bit-per-page map for frontswap */ + if (frontswap_enabled) + frontswap_map = vzalloc(maxpages / sizeof(long)); if (p->bdev) { if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { @@ -2086,14 +2113,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (swap_flags & SWAP_FLAG_PREFER) prio = (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; - enable_swap_info(p, prio, swap_map); + enable_swap_info(p, prio, swap_map, frontswap_map); printk(KERN_INFO "Adding %uk swap on %s. " - "Priority:%d extents:%d across:%lluk %s%s\n", + "Priority:%d extents:%d across:%lluk %s%s%s\n", p->pages<<(PAGE_SHIFT-10), name, p->prio, nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), (p->flags & SWP_SOLIDSTATE) ? "SS" : "", - (p->flags & SWP_DISCARDABLE) ? "D" : ""); + (p->flags & SWP_DISCARDABLE) ? "D" : "", + (frontswap_map) ? "FS" : ""); mutex_unlock(&swapon_mutex); atomic_inc(&proc_poll_event); diff --git a/net/batman-adv/bat_debugfs.c b/net/batman-adv/bat_debugfs.c index 3b588f8..db8273c 100644 --- a/net/batman-adv/bat_debugfs.c +++ b/net/batman-adv/bat_debugfs.c @@ -195,13 +195,13 @@ static int debug_log_setup(struct bat_priv *bat_priv) d = debugfs_create_file("log", S_IFREG | S_IRUSR, bat_priv->debug_dir, bat_priv, &log_fops); - if (d) + if (!d) goto err; return 0; err: - return 1; + return -ENOMEM; } static void debug_log_cleanup(struct bat_priv *bat_priv) @@ -348,8 +348,11 @@ int debugfs_add_meshif(struct net_device *dev) if (!bat_priv->debug_dir) goto out; - bat_socket_setup(bat_priv); - debug_log_setup(bat_priv); + if (bat_socket_setup(bat_priv) < 0) + goto rem_attr; + + if (debug_log_setup(bat_priv) < 0) + goto rem_attr; for (bat_debug = mesh_debuginfos; *bat_debug; ++bat_debug) { file = debugfs_create_file(((*bat_debug)->attr).name, diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index dc53798..6e0859f 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -34,11 +34,12 @@ static struct neigh_node *bat_iv_ogm_neigh_new(struct hard_iface *hard_iface, const uint8_t *neigh_addr, struct orig_node *orig_node, struct orig_node *orig_neigh, - uint32_t seqno) + __be32 seqno) { struct neigh_node *neigh_node; - neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, seqno); + neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, + ntohl(seqno)); if (!neigh_node) goto out; @@ -59,7 +60,7 @@ static int bat_iv_ogm_iface_enable(struct hard_iface *hard_iface) { struct batman_ogm_packet *batman_ogm_packet; uint32_t random_seqno; - int res = -1; + int res = -ENOMEM; /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); @@ -196,8 +197,12 @@ static void bat_iv_ogm_send_to_if(struct forw_packet *forw_packet, /* create clone because function is called more than once */ skb = skb_clone(forw_packet->skb, GFP_ATOMIC); - if (skb) + if (skb) { + batadv_inc_counter(bat_priv, BAT_CNT_MGMT_TX); + batadv_add_counter(bat_priv, BAT_CNT_MGMT_TX_BYTES, + skb->len + ETH_HLEN); send_skb_packet(skb, hard_iface, broadcast_addr); + } } /* send a batman ogm packet */ @@ -542,9 +547,6 @@ static void bat_iv_ogm_forward(struct orig_node *orig_node, "Forwarding packet: tq: %i, ttl: %i\n", batman_ogm_packet->tq, batman_ogm_packet->header.ttl); - batman_ogm_packet->seqno = htonl(batman_ogm_packet->seqno); - batman_ogm_packet->tt_crc = htons(batman_ogm_packet->tt_crc); - /* switch of primaries first hop flag when forwarding */ batman_ogm_packet->flags &= ~PRIMARIES_FIRST_HOP; if (is_single_hop_neigh) @@ -557,26 +559,31 @@ static void bat_iv_ogm_forward(struct orig_node *orig_node, if_incoming, 0, bat_iv_ogm_fwd_send_time()); } -static void bat_iv_ogm_schedule(struct hard_iface *hard_iface, - int tt_num_changes) +static void bat_iv_ogm_schedule(struct hard_iface *hard_iface) { struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batman_ogm_packet *batman_ogm_packet; struct hard_iface *primary_if; - int vis_server; + int vis_server, tt_num_changes = 0; vis_server = atomic_read(&bat_priv->vis_mode); primary_if = primary_if_get_selected(bat_priv); + if (hard_iface == primary_if) + tt_num_changes = batadv_tt_append_diff(bat_priv, + &hard_iface->packet_buff, + &hard_iface->packet_len, + BATMAN_OGM_HLEN); + batman_ogm_packet = (struct batman_ogm_packet *)hard_iface->packet_buff; /* change sequence number to network order */ batman_ogm_packet->seqno = htonl((uint32_t)atomic_read(&hard_iface->seqno)); + atomic_inc(&hard_iface->seqno); batman_ogm_packet->ttvn = atomic_read(&bat_priv->ttvn); - batman_ogm_packet->tt_crc = htons((uint16_t) - atomic_read(&bat_priv->tt_crc)); + batman_ogm_packet->tt_crc = htons(bat_priv->tt_crc); if (tt_num_changes >= 0) batman_ogm_packet->tt_num_changes = tt_num_changes; @@ -592,8 +599,6 @@ static void bat_iv_ogm_schedule(struct hard_iface *hard_iface, else batman_ogm_packet->gw_flags = NO_FLAGS; - atomic_inc(&hard_iface->seqno); - slide_own_bcast_window(hard_iface); bat_iv_ogm_queue_add(bat_priv, hard_iface->packet_buff, hard_iface->packet_len, hard_iface, 1, @@ -721,7 +726,7 @@ update_tt: tt_update_orig(bat_priv, orig_node, tt_buff, batman_ogm_packet->tt_num_changes, batman_ogm_packet->ttvn, - batman_ogm_packet->tt_crc); + ntohs(batman_ogm_packet->tt_crc)); if (orig_node->gw_flags != batman_ogm_packet->gw_flags) gw_node_update(bat_priv, orig_node, @@ -868,13 +873,14 @@ static int bat_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, int32_t seq_diff; int need_update = 0; int set_mark, ret = -1; + uint32_t seqno = ntohl(batman_ogm_packet->seqno); orig_node = get_orig_node(bat_priv, batman_ogm_packet->orig); if (!orig_node) return 0; spin_lock_bh(&orig_node->ogm_cnt_lock); - seq_diff = batman_ogm_packet->seqno - orig_node->last_real_seqno; + seq_diff = seqno - orig_node->last_real_seqno; /* signalize caller that the packet is to be dropped. */ if (!hlist_empty(&orig_node->neigh_list) && @@ -888,7 +894,7 @@ static int bat_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, is_duplicate |= bat_test_bit(tmp_neigh_node->real_bits, orig_node->last_real_seqno, - batman_ogm_packet->seqno); + seqno); if (compare_eth(tmp_neigh_node->addr, ethhdr->h_source) && (tmp_neigh_node->if_incoming == if_incoming)) @@ -910,8 +916,8 @@ static int bat_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, if (need_update) { bat_dbg(DBG_BATMAN, bat_priv, "updating last_seqno: old %u, new %u\n", - orig_node->last_real_seqno, batman_ogm_packet->seqno); - orig_node->last_real_seqno = batman_ogm_packet->seqno; + orig_node->last_real_seqno, seqno); + orig_node->last_real_seqno = seqno; } ret = is_duplicate; @@ -967,8 +973,8 @@ static void bat_iv_ogm_process(const struct ethhdr *ethhdr, "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, ttvn %u, crc %u, changes %u, td %d, TTL %d, V %d, IDF %d)\n", ethhdr->h_source, if_incoming->net_dev->name, if_incoming->net_dev->dev_addr, batman_ogm_packet->orig, - batman_ogm_packet->prev_sender, batman_ogm_packet->seqno, - batman_ogm_packet->ttvn, batman_ogm_packet->tt_crc, + batman_ogm_packet->prev_sender, ntohl(batman_ogm_packet->seqno), + batman_ogm_packet->ttvn, ntohs(batman_ogm_packet->tt_crc), batman_ogm_packet->tt_num_changes, batman_ogm_packet->tq, batman_ogm_packet->header.ttl, batman_ogm_packet->header.version, has_directlink_flag); @@ -1039,7 +1045,7 @@ static void bat_iv_ogm_process(const struct ethhdr *ethhdr, word = &(orig_neigh_node->bcast_own[offset]); bat_set_bit(word, if_incoming_seqno - - batman_ogm_packet->seqno - 2); + ntohl(batman_ogm_packet->seqno) - 2); orig_neigh_node->bcast_own_sum[if_incoming->if_num] = bitmap_weight(word, TQ_LOCAL_WINDOW_SIZE); spin_unlock_bh(&orig_neigh_node->ogm_cnt_lock); @@ -1132,7 +1138,7 @@ static void bat_iv_ogm_process(const struct ethhdr *ethhdr, * seqno and similar ttl as the non-duplicate */ if (is_bidirectional && (!is_duplicate || - ((orig_node->last_real_seqno == batman_ogm_packet->seqno) && + ((orig_node->last_real_seqno == ntohl(batman_ogm_packet->seqno)) && (orig_node->last_ttl - 3 <= batman_ogm_packet->header.ttl)))) bat_iv_ogm_orig_update(bat_priv, orig_node, ethhdr, batman_ogm_packet, if_incoming, @@ -1204,6 +1210,10 @@ static int bat_iv_ogm_receive(struct sk_buff *skb, if (bat_priv->bat_algo_ops->bat_ogm_emit != bat_iv_ogm_emit) return NET_RX_DROP; + batadv_inc_counter(bat_priv, BAT_CNT_MGMT_RX); + batadv_add_counter(bat_priv, BAT_CNT_MGMT_RX_BYTES, + skb->len + ETH_HLEN); + packet_len = skb_headlen(skb); ethhdr = (struct ethhdr *)skb_mac_header(skb); packet_buff = skb->data; @@ -1211,11 +1221,6 @@ static int bat_iv_ogm_receive(struct sk_buff *skb, /* unpack the aggregated packets and process them one by one */ do { - /* network to host order for our 32bit seqno and the - orig_interval */ - batman_ogm_packet->seqno = ntohl(batman_ogm_packet->seqno); - batman_ogm_packet->tt_crc = ntohs(batman_ogm_packet->tt_crc); - tt_buff = packet_buff + buff_pos + BATMAN_OGM_HLEN; bat_iv_ogm_process(ethhdr, batman_ogm_packet, @@ -1234,7 +1239,7 @@ static int bat_iv_ogm_receive(struct sk_buff *skb, } static struct bat_algo_ops batman_iv __read_mostly = { - .name = "BATMAN IV", + .name = "BATMAN_IV", .bat_iface_enable = bat_iv_ogm_iface_enable, .bat_iface_disable = bat_iv_ogm_iface_disable, .bat_iface_update_mac = bat_iv_ogm_iface_update_mac, diff --git a/net/batman-adv/bat_sysfs.c b/net/batman-adv/bat_sysfs.c index 5bc7b66..dc1edbe 100644 --- a/net/batman-adv/bat_sysfs.c +++ b/net/batman-adv/bat_sysfs.c @@ -445,7 +445,7 @@ BAT_ATTR_SIF_UINT(gw_sel_class, S_IRUGO | S_IWUSR, 1, TQ_MAX_VALUE, static BAT_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, show_gw_bwidth, store_gw_bwidth); #ifdef CONFIG_BATMAN_ADV_DEBUG -BAT_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, 15, NULL); +BAT_ATTR_SIF_UINT(log_level, S_IRUGO | S_IWUSR, 0, DBG_ALL, NULL); #endif static struct bat_attribute *mesh_attrs[] = { @@ -680,7 +680,7 @@ void sysfs_del_hardif(struct kobject **hardif_obj) int throw_uevent(struct bat_priv *bat_priv, enum uev_type type, enum uev_action action, const char *data) { - int ret = -1; + int ret = -ENOMEM; struct hard_iface *primary_if = NULL; struct kobject *bat_kobj; char *uevent_env[4] = { NULL, NULL, NULL, NULL }; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 8bf9751..314e37b 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -258,7 +258,7 @@ static void bla_send_claim(struct bat_priv *bat_priv, uint8_t *mac, struct net_device *soft_iface; uint8_t *hw_src; struct bla_claim_dst local_claim_dest; - uint32_t zeroip = 0; + __be32 zeroip = 0; primary_if = primary_if_get_selected(bat_priv); if (!primary_if) @@ -506,11 +506,11 @@ static void bla_send_announce(struct bat_priv *bat_priv, struct backbone_gw *backbone_gw) { uint8_t mac[ETH_ALEN]; - uint16_t crc; + __be16 crc; memcpy(mac, announce_mac, 4); crc = htons(backbone_gw->crc); - memcpy(&mac[4], (uint8_t *)&crc, 2); + memcpy(&mac[4], &crc, 2); bla_send_claim(bat_priv, mac, backbone_gw->vid, CLAIM_TYPE_ANNOUNCE); @@ -627,7 +627,7 @@ static int handle_announce(struct bat_priv *bat_priv, /* handle as ANNOUNCE frame */ backbone_gw->lasttime = jiffies; - crc = ntohs(*((uint16_t *)(&an_addr[4]))); + crc = ntohs(*((__be16 *)(&an_addr[4]))); bat_dbg(DBG_BLA, bat_priv, "handle_announce(): ANNOUNCE vid %d (sent by %pM)... CRC = %04x\n", @@ -1127,6 +1127,14 @@ out: bla_start_timer(bat_priv); } +/* The hash for claim and backbone hash receive the same key because they + * are getting initialized by hash_new with the same key. Reinitializing + * them with to different keys to allow nested locking without generating + * lockdep warnings + */ +static struct lock_class_key claim_hash_lock_class_key; +static struct lock_class_key backbone_hash_lock_class_key; + /* initialize all bla structures */ int bla_init(struct bat_priv *bat_priv) { @@ -1156,18 +1164,23 @@ int bla_init(struct bat_priv *bat_priv) bat_priv->bcast_duplist_curr = 0; if (bat_priv->claim_hash) - return 1; + return 0; bat_priv->claim_hash = hash_new(128); bat_priv->backbone_hash = hash_new(32); if (!bat_priv->claim_hash || !bat_priv->backbone_hash) - return -1; + return -ENOMEM; + + batadv_hash_set_lock_class(bat_priv->claim_hash, + &claim_hash_lock_class_key); + batadv_hash_set_lock_class(bat_priv->backbone_hash, + &backbone_hash_lock_class_key); bat_dbg(DBG_BLA, bat_priv, "bla hashes initialized\n"); bla_start_timer(bat_priv); - return 1; + return 0; } /** diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index ca57ac7..6e3b052 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -162,6 +162,9 @@ ssize_t gw_bandwidth_set(struct net_device *net_dev, char *buff, size_t count) **/ gw_bandwidth_to_kbit((uint8_t)gw_bandwidth_tmp, &down, &up); + if (atomic_read(&bat_priv->gw_bandwidth) == gw_bandwidth_tmp) + return count; + gw_deselect(bat_priv); bat_info(net_dev, "Changing gateway bandwidth from: '%i' to: '%ld' (propagating: %d%s/%d%s)\n", diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index dc334fa..ce78c6d 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -306,10 +306,8 @@ int hardif_enable_interface(struct hard_iface *hard_iface, bat_priv = netdev_priv(hard_iface->soft_iface); ret = bat_priv->bat_algo_ops->bat_iface_enable(hard_iface); - if (ret < 0) { - ret = -ENOMEM; + if (ret < 0) goto err_dev; - } hard_iface->if_num = bat_priv->num_ifaces; bat_priv->num_ifaces++; diff --git a/net/batman-adv/hash.c b/net/batman-adv/hash.c index 117687b..5b2eabe 100644 --- a/net/batman-adv/hash.c +++ b/net/batman-adv/hash.c @@ -69,3 +69,12 @@ free_hash: kfree(hash); return NULL; } + +void batadv_hash_set_lock_class(struct hashtable_t *hash, + struct lock_class_key *key) +{ + uint32_t i; + + for (i = 0; i < hash->size; i++) + lockdep_set_class(&hash->list_locks[i], key); +} diff --git a/net/batman-adv/hash.h b/net/batman-adv/hash.h index d4bd786..3d67ce4 100644 --- a/net/batman-adv/hash.h +++ b/net/batman-adv/hash.h @@ -45,6 +45,10 @@ struct hashtable_t { /* allocates and clears the hash */ struct hashtable_t *hash_new(uint32_t size); +/* set class key for all locks */ +void batadv_hash_set_lock_class(struct hashtable_t *hash, + struct lock_class_key *key); + /* free only the hashtable and the hash itself. */ void hash_destroy(struct hashtable_t *hash); @@ -106,26 +110,23 @@ static inline int hash_add(struct hashtable_t *hash, head = &hash->table[index]; list_lock = &hash->list_locks[index]; - rcu_read_lock(); - __hlist_for_each_rcu(node, head) { + spin_lock_bh(list_lock); + + hlist_for_each(node, head) { if (!compare(node, data)) continue; ret = 1; - goto err_unlock; + goto unlock; } - rcu_read_unlock(); /* no duplicate found in list, add new element */ - spin_lock_bh(list_lock); hlist_add_head_rcu(data_node, head); - spin_unlock_bh(list_lock); ret = 0; - goto out; -err_unlock: - rcu_read_unlock(); +unlock: + spin_unlock_bh(list_lock); out: return ret; } diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 2e98a57..d27db81 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -285,13 +285,13 @@ int bat_socket_setup(struct bat_priv *bat_priv) d = debugfs_create_file(ICMP_SOCKET, S_IFREG | S_IWUSR | S_IRUSR, bat_priv->debug_dir, bat_priv, &fops); - if (d) + if (!d) goto err; return 0; err: - return 1; + return -ENOMEM; } static void bat_socket_add_packet(struct socket_client *socket_client, diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 083a299..46ba302 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -40,7 +40,7 @@ * list traversals just rcu-locked */ struct list_head hardif_list; static int (*recv_packet_handler[256])(struct sk_buff *, struct hard_iface *); -char bat_routing_algo[20] = "BATMAN IV"; +char bat_routing_algo[20] = "BATMAN_IV"; static struct hlist_head bat_algo_list; unsigned char broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -92,6 +92,7 @@ static void __exit batman_exit(void) int mesh_init(struct net_device *soft_iface) { struct bat_priv *bat_priv = netdev_priv(soft_iface); + int ret; spin_lock_init(&bat_priv->forw_bat_list_lock); spin_lock_init(&bat_priv->forw_bcast_list_lock); @@ -110,30 +111,32 @@ int mesh_init(struct net_device *soft_iface) INIT_LIST_HEAD(&bat_priv->tt_req_list); INIT_LIST_HEAD(&bat_priv->tt_roam_list); - if (originator_init(bat_priv) < 1) + ret = originator_init(bat_priv); + if (ret < 0) goto err; - if (tt_init(bat_priv) < 1) + ret = tt_init(bat_priv); + if (ret < 0) goto err; tt_local_add(soft_iface, soft_iface->dev_addr, NULL_IFINDEX); - if (vis_init(bat_priv) < 1) + ret = vis_init(bat_priv); + if (ret < 0) goto err; - if (bla_init(bat_priv) < 1) + ret = bla_init(bat_priv); + if (ret < 0) goto err; atomic_set(&bat_priv->gw_reselect, 0); atomic_set(&bat_priv->mesh_state, MESH_ACTIVE); - goto end; + + return 0; err: mesh_free(soft_iface); - return -1; - -end: - return 0; + return ret; } void mesh_free(struct net_device *soft_iface) @@ -153,6 +156,8 @@ void mesh_free(struct net_device *soft_iface) bla_free(bat_priv); + free_percpu(bat_priv->bat_counters); + atomic_set(&bat_priv->mesh_state, MESH_INACTIVE); } @@ -317,12 +322,13 @@ static struct bat_algo_ops *bat_algo_get(char *name) int bat_algo_register(struct bat_algo_ops *bat_algo_ops) { struct bat_algo_ops *bat_algo_ops_tmp; - int ret = -1; + int ret; bat_algo_ops_tmp = bat_algo_get(bat_algo_ops->name); if (bat_algo_ops_tmp) { pr_info("Trying to register already registered routing algorithm: %s\n", bat_algo_ops->name); + ret = -EEXIST; goto out; } @@ -335,6 +341,7 @@ int bat_algo_register(struct bat_algo_ops *bat_algo_ops) !bat_algo_ops->bat_ogm_emit) { pr_info("Routing algo '%s' does not implement required ops\n", bat_algo_ops->name); + ret = -EINVAL; goto out; } @@ -349,7 +356,7 @@ out: int bat_algo_select(struct bat_priv *bat_priv, char *name) { struct bat_algo_ops *bat_algo_ops; - int ret = -1; + int ret = -EINVAL; bat_algo_ops = bat_algo_get(name); if (!bat_algo_ops) @@ -379,14 +386,19 @@ int bat_algo_seq_print_text(struct seq_file *seq, void *offset) static int param_set_ra(const char *val, const struct kernel_param *kp) { struct bat_algo_ops *bat_algo_ops; + char *algo_name = (char *)val; + size_t name_len = strlen(algo_name); + + if (algo_name[name_len - 1] == '\n') + algo_name[name_len - 1] = '\0'; - bat_algo_ops = bat_algo_get((char *)val); + bat_algo_ops = bat_algo_get(algo_name); if (!bat_algo_ops) { - pr_err("Routing algorithm '%s' is not supported\n", val); + pr_err("Routing algorithm '%s' is not supported\n", algo_name); return -EINVAL; } - return param_set_copystring(val, kp); + return param_set_copystring(algo_name, kp); } static const struct kernel_param_ops param_ops_ra = { diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index f4a3ec0..6e0cbdc 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -28,7 +28,7 @@ #define DRIVER_DEVICE "batman-adv" #ifndef SOURCE_VERSION -#define SOURCE_VERSION "2012.2.0" +#define SOURCE_VERSION "2012.3.0" #endif /* B.A.T.M.A.N. parameters */ @@ -138,6 +138,7 @@ enum dbg_level { #include <linux/kthread.h> /* kernel threads */ #include <linux/pkt_sched.h> /* schedule types */ #include <linux/workqueue.h> /* workqueue */ +#include <linux/percpu.h> #include <linux/slab.h> #include <net/sock.h> /* struct sock */ #include <linux/jiffies.h> @@ -242,4 +243,30 @@ static inline bool has_timed_out(unsigned long timestamp, unsigned int timeout) _dummy > smallest_signed_int(_dummy); }) #define seq_after(x, y) seq_before(y, x) +/* Stop preemption on local cpu while incrementing the counter */ +static inline void batadv_add_counter(struct bat_priv *bat_priv, size_t idx, + size_t count) +{ + int cpu = get_cpu(); + per_cpu_ptr(bat_priv->bat_counters, cpu)[idx] += count; + put_cpu(); +} + +#define batadv_inc_counter(b, i) batadv_add_counter(b, i, 1) + +/* Sum and return the cpu-local counters for index 'idx' */ +static inline uint64_t batadv_sum_counter(struct bat_priv *bat_priv, size_t idx) +{ + uint64_t *counters; + int cpu; + int sum = 0; + + for_each_possible_cpu(cpu) { + counters = per_cpu_ptr(bat_priv->bat_counters, cpu); + sum += counters[idx]; + } + + return sum; +} + #endif /* _NET_BATMAN_ADV_MAIN_H_ */ diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 41147942..cf83c54 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -50,7 +50,7 @@ static int compare_orig(const struct hlist_node *node, const void *data2) int originator_init(struct bat_priv *bat_priv) { if (bat_priv->orig_hash) - return 1; + return 0; bat_priv->orig_hash = hash_new(1024); @@ -58,10 +58,10 @@ int originator_init(struct bat_priv *bat_priv) goto err; start_purge_timer(bat_priv); - return 1; + return 0; err: - return 0; + return -ENOMEM; } void neigh_node_free_ref(struct neigh_node *neigh_node) @@ -488,7 +488,7 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num) data_ptr = kmalloc(max_if_num * sizeof(unsigned long) * NUM_WORDS, GFP_ATOMIC); if (!data_ptr) - return -1; + return -ENOMEM; memcpy(data_ptr, orig_node->bcast_own, (max_if_num - 1) * sizeof(unsigned long) * NUM_WORDS); @@ -497,7 +497,7 @@ static int orig_node_add_if(struct orig_node *orig_node, int max_if_num) data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); if (!data_ptr) - return -1; + return -ENOMEM; memcpy(data_ptr, orig_node->bcast_own_sum, (max_if_num - 1) * sizeof(uint8_t)); @@ -528,7 +528,7 @@ int orig_hash_add_if(struct hard_iface *hard_iface, int max_if_num) ret = orig_node_add_if(orig_node, max_if_num); spin_unlock_bh(&orig_node->ogm_cnt_lock); - if (ret == -1) + if (ret == -ENOMEM) goto err; } rcu_read_unlock(); @@ -554,7 +554,7 @@ static int orig_node_del_if(struct orig_node *orig_node, chunk_size = sizeof(unsigned long) * NUM_WORDS; data_ptr = kmalloc(max_if_num * chunk_size, GFP_ATOMIC); if (!data_ptr) - return -1; + return -ENOMEM; /* copy first part */ memcpy(data_ptr, orig_node->bcast_own, del_if_num * chunk_size); @@ -573,7 +573,7 @@ free_bcast_own: data_ptr = kmalloc(max_if_num * sizeof(uint8_t), GFP_ATOMIC); if (!data_ptr) - return -1; + return -ENOMEM; memcpy(data_ptr, orig_node->bcast_own_sum, del_if_num * sizeof(uint8_t)); @@ -612,7 +612,7 @@ int orig_hash_del_if(struct hard_iface *hard_iface, int max_if_num) hard_iface->if_num); spin_unlock_bh(&orig_node->ogm_cnt_lock); - if (ret == -1) + if (ret == -ENOMEM) goto err; } rcu_read_unlock(); diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 0ee1af7..033d994 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -105,7 +105,7 @@ enum bla_claimframe { struct bla_claim_dst { uint8_t magic[3]; /* FF:43:05 */ uint8_t type; /* bla_claimframe */ - uint16_t group; /* group id */ + __be16 group; /* group id */ } __packed; struct batman_header { @@ -117,14 +117,14 @@ struct batman_header { struct batman_ogm_packet { struct batman_header header; uint8_t flags; /* 0x40: DIRECTLINK flag, 0x20 VIS_SERVER flag... */ - uint32_t seqno; + __be32 seqno; uint8_t orig[ETH_ALEN]; uint8_t prev_sender[ETH_ALEN]; uint8_t gw_flags; /* flags related to gateway class */ uint8_t tq; uint8_t tt_num_changes; uint8_t ttvn; /* translation table version number */ - uint16_t tt_crc; + __be16 tt_crc; } __packed; #define BATMAN_OGM_HLEN sizeof(struct batman_ogm_packet) @@ -134,7 +134,7 @@ struct icmp_packet { uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[ETH_ALEN]; uint8_t orig[ETH_ALEN]; - uint16_t seqno; + __be16 seqno; uint8_t uid; uint8_t reserved; } __packed; @@ -148,7 +148,7 @@ struct icmp_packet_rr { uint8_t msg_type; /* see ICMP message types above */ uint8_t dst[ETH_ALEN]; uint8_t orig[ETH_ALEN]; - uint16_t seqno; + __be16 seqno; uint8_t uid; uint8_t rr_cur; uint8_t rr[BAT_RR_LEN][ETH_ALEN]; @@ -167,20 +167,20 @@ struct unicast_frag_packet { uint8_t flags; uint8_t align; uint8_t orig[ETH_ALEN]; - uint16_t seqno; + __be16 seqno; } __packed; struct bcast_packet { struct batman_header header; uint8_t reserved; - uint32_t seqno; + __be32 seqno; uint8_t orig[ETH_ALEN]; } __packed; struct vis_packet { struct batman_header header; uint8_t vis_type; /* which type of vis-participant sent this? */ - uint32_t seqno; /* sequence number */ + __be32 seqno; /* sequence number */ uint8_t entries; /* number of entries behind this struct */ uint8_t reserved; uint8_t vis_orig[ETH_ALEN]; /* originator reporting its neighbors */ @@ -206,7 +206,7 @@ struct tt_query_packet { * if TT_REQUEST: crc associated with the * ttvn * if TT_RESPONSE: table_size */ - uint16_t tt_data; + __be16 tt_data; } __packed; struct roam_adv_packet { diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 840e2c6..9cfd23c 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -573,7 +573,7 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if) { struct bat_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct tt_query_packet *tt_query; - uint16_t tt_len; + uint16_t tt_size; struct ethhdr *ethhdr; /* drop packet if it has not necessary minimum size */ @@ -596,10 +596,10 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if) tt_query = (struct tt_query_packet *)skb->data; - tt_query->tt_data = ntohs(tt_query->tt_data); - switch (tt_query->flags & TT_QUERY_TYPE_MASK) { case TT_REQUEST: + batadv_inc_counter(bat_priv, BAT_CNT_TT_REQUEST_RX); + /* If we cannot provide an answer the tt_request is * forwarded */ if (!send_tt_response(bat_priv, tt_query)) { @@ -607,22 +607,25 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if) "Routing TT_REQUEST to %pM [%c]\n", tt_query->dst, (tt_query->flags & TT_FULL_TABLE ? 'F' : '.')); - tt_query->tt_data = htons(tt_query->tt_data); return route_unicast_packet(skb, recv_if); } break; case TT_RESPONSE: + batadv_inc_counter(bat_priv, BAT_CNT_TT_RESPONSE_RX); + if (is_my_mac(tt_query->dst)) { /* packet needs to be linearized to access the TT * changes */ if (skb_linearize(skb) < 0) goto out; + /* skb_linearize() possibly changed skb->data */ + tt_query = (struct tt_query_packet *)skb->data; - tt_len = tt_query->tt_data * sizeof(struct tt_change); + tt_size = tt_len(ntohs(tt_query->tt_data)); /* Ensure we have all the claimed data */ if (unlikely(skb_headlen(skb) < - sizeof(struct tt_query_packet) + tt_len)) + sizeof(struct tt_query_packet) + tt_size)) goto out; handle_tt_response(bat_priv, tt_query); @@ -631,7 +634,6 @@ int recv_tt_query(struct sk_buff *skb, struct hard_iface *recv_if) "Routing TT_RESPONSE to %pM [%c]\n", tt_query->dst, (tt_query->flags & TT_FULL_TABLE ? 'F' : '.')); - tt_query->tt_data = htons(tt_query->tt_data); return route_unicast_packet(skb, recv_if); } break; @@ -663,6 +665,8 @@ int recv_roam_adv(struct sk_buff *skb, struct hard_iface *recv_if) if (is_broadcast_ether_addr(ethhdr->h_source)) goto out; + batadv_inc_counter(bat_priv, BAT_CNT_TT_ROAM_ADV_RX); + roam_adv_packet = (struct roam_adv_packet *)skb->data; if (!is_my_mac(roam_adv_packet->dst)) @@ -870,6 +874,11 @@ static int route_unicast_packet(struct sk_buff *skb, struct hard_iface *recv_if) /* decrement ttl */ unicast_packet->header.ttl--; + /* Update stats counter */ + batadv_inc_counter(bat_priv, BAT_CNT_FORWARD); + batadv_add_counter(bat_priv, BAT_CNT_FORWARD_BYTES, + skb->len + ETH_HLEN); + /* route it */ send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = NET_RX_SUCCESS; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index f47299f..79f8973 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -77,62 +77,9 @@ send_skb_err: return NET_XMIT_DROP; } -static void realloc_packet_buffer(struct hard_iface *hard_iface, - int new_len) -{ - unsigned char *new_buff; - - new_buff = kmalloc(new_len, GFP_ATOMIC); - - /* keep old buffer if kmalloc should fail */ - if (new_buff) { - memcpy(new_buff, hard_iface->packet_buff, - BATMAN_OGM_HLEN); - - kfree(hard_iface->packet_buff); - hard_iface->packet_buff = new_buff; - hard_iface->packet_len = new_len; - } -} - -/* when calling this function (hard_iface == primary_if) has to be true */ -static int prepare_packet_buffer(struct bat_priv *bat_priv, - struct hard_iface *hard_iface) -{ - int new_len; - - new_len = BATMAN_OGM_HLEN + - tt_len((uint8_t)atomic_read(&bat_priv->tt_local_changes)); - - /* if we have too many changes for one packet don't send any - * and wait for the tt table request which will be fragmented */ - if (new_len > hard_iface->soft_iface->mtu) - new_len = BATMAN_OGM_HLEN; - - realloc_packet_buffer(hard_iface, new_len); - - atomic_set(&bat_priv->tt_crc, tt_local_crc(bat_priv)); - - /* reset the sending counter */ - atomic_set(&bat_priv->tt_ogm_append_cnt, TT_OGM_APPEND_MAX); - - return tt_changes_fill_buffer(bat_priv, - hard_iface->packet_buff + BATMAN_OGM_HLEN, - hard_iface->packet_len - BATMAN_OGM_HLEN); -} - -static int reset_packet_buffer(struct bat_priv *bat_priv, - struct hard_iface *hard_iface) -{ - realloc_packet_buffer(hard_iface, BATMAN_OGM_HLEN); - return 0; -} - void schedule_bat_ogm(struct hard_iface *hard_iface) { struct bat_priv *bat_priv = netdev_priv(hard_iface->soft_iface); - struct hard_iface *primary_if; - int tt_num_changes = -1; if ((hard_iface->if_status == IF_NOT_IN_USE) || (hard_iface->if_status == IF_TO_BE_REMOVED)) @@ -148,26 +95,7 @@ void schedule_bat_ogm(struct hard_iface *hard_iface) if (hard_iface->if_status == IF_TO_BE_ACTIVATED) hard_iface->if_status = IF_ACTIVE; - primary_if = primary_if_get_selected(bat_priv); - - if (hard_iface == primary_if) { - /* if at least one change happened */ - if (atomic_read(&bat_priv->tt_local_changes) > 0) { - tt_commit_changes(bat_priv); - tt_num_changes = prepare_packet_buffer(bat_priv, - hard_iface); - } - - /* if the changes have been sent often enough */ - if (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt)) - tt_num_changes = reset_packet_buffer(bat_priv, - hard_iface); - } - - if (primary_if) - hardif_free_ref(primary_if); - - bat_priv->bat_algo_ops->bat_ogm_schedule(hard_iface, tt_num_changes); + bat_priv->bat_algo_ops->bat_ogm_schedule(hard_iface); } static void forw_packet_free(struct forw_packet *forw_packet) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 6e2530b..304a7ba 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -45,6 +45,10 @@ static void bat_get_drvinfo(struct net_device *dev, static u32 bat_get_msglevel(struct net_device *dev); static void bat_set_msglevel(struct net_device *dev, u32 value); static u32 bat_get_link(struct net_device *dev); +static void batadv_get_strings(struct net_device *dev, u32 stringset, u8 *data); +static void batadv_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data); +static int batadv_get_sset_count(struct net_device *dev, int stringset); static const struct ethtool_ops bat_ethtool_ops = { .get_settings = bat_get_settings, @@ -52,6 +56,9 @@ static const struct ethtool_ops bat_ethtool_ops = { .get_msglevel = bat_get_msglevel, .set_msglevel = bat_set_msglevel, .get_link = bat_get_link, + .get_strings = batadv_get_strings, + .get_ethtool_stats = batadv_get_ethtool_stats, + .get_sset_count = batadv_get_sset_count, }; int my_skb_head_push(struct sk_buff *skb, unsigned int len) @@ -399,13 +406,18 @@ struct net_device *softif_create(const char *name) bat_priv->primary_if = NULL; bat_priv->num_ifaces = 0; + bat_priv->bat_counters = __alloc_percpu(sizeof(uint64_t) * BAT_CNT_NUM, + __alignof__(uint64_t)); + if (!bat_priv->bat_counters) + goto unreg_soft_iface; + ret = bat_algo_select(bat_priv, bat_routing_algo); if (ret < 0) - goto unreg_soft_iface; + goto free_bat_counters; ret = sysfs_add_meshif(soft_iface); if (ret < 0) - goto unreg_soft_iface; + goto free_bat_counters; ret = debugfs_add_meshif(soft_iface); if (ret < 0) @@ -421,6 +433,8 @@ unreg_debugfs: debugfs_del_meshif(soft_iface); unreg_sysfs: sysfs_del_meshif(soft_iface); +free_bat_counters: + free_percpu(bat_priv->bat_counters); unreg_soft_iface: unregister_netdevice(soft_iface); return NULL; @@ -486,3 +500,51 @@ static u32 bat_get_link(struct net_device *dev) { return 1; } + +/* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 + * Declare each description string in struct.name[] to get fixed sized buffer + * and compile time checking for strings longer than ETH_GSTRING_LEN. + */ +static const struct { + const char name[ETH_GSTRING_LEN]; +} bat_counters_strings[] = { + { "forward" }, + { "forward_bytes" }, + { "mgmt_tx" }, + { "mgmt_tx_bytes" }, + { "mgmt_rx" }, + { "mgmt_rx_bytes" }, + { "tt_request_tx" }, + { "tt_request_rx" }, + { "tt_response_tx" }, + { "tt_response_rx" }, + { "tt_roam_adv_tx" }, + { "tt_roam_adv_rx" }, +}; + +static void batadv_get_strings(struct net_device *dev, uint32_t stringset, + uint8_t *data) +{ + if (stringset == ETH_SS_STATS) + memcpy(data, bat_counters_strings, + sizeof(bat_counters_strings)); +} + +static void batadv_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct bat_priv *bat_priv = netdev_priv(dev); + int i; + + for (i = 0; i < BAT_CNT_NUM; i++) + data[i] = batadv_sum_counter(bat_priv, i); +} + +static int batadv_get_sset_count(struct net_device *dev, int stringset) +{ + if (stringset == ETH_SS_STATS) + return BAT_CNT_NUM; + + return -EOPNOTSUPP; +} diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a66c2dc..a1a51cc 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -181,14 +181,14 @@ int tt_len(int changes_num) static int tt_local_init(struct bat_priv *bat_priv) { if (bat_priv->tt_local_hash) - return 1; + return 0; bat_priv->tt_local_hash = hash_new(1024); if (!bat_priv->tt_local_hash) - return 0; + return -ENOMEM; - return 1; + return 0; } void tt_local_add(struct net_device *soft_iface, const uint8_t *addr, @@ -275,14 +275,64 @@ out: tt_global_entry_free_ref(tt_global_entry); } -int tt_changes_fill_buffer(struct bat_priv *bat_priv, - unsigned char *buff, int buff_len) +static void tt_realloc_packet_buff(unsigned char **packet_buff, + int *packet_buff_len, int min_packet_len, + int new_packet_len) +{ + unsigned char *new_buff; + + new_buff = kmalloc(new_packet_len, GFP_ATOMIC); + + /* keep old buffer if kmalloc should fail */ + if (new_buff) { + memcpy(new_buff, *packet_buff, min_packet_len); + kfree(*packet_buff); + *packet_buff = new_buff; + *packet_buff_len = new_packet_len; + } +} + +static void tt_prepare_packet_buff(struct bat_priv *bat_priv, + unsigned char **packet_buff, + int *packet_buff_len, int min_packet_len) +{ + struct hard_iface *primary_if; + int req_len; + + primary_if = primary_if_get_selected(bat_priv); + + req_len = min_packet_len; + req_len += tt_len(atomic_read(&bat_priv->tt_local_changes)); + + /* if we have too many changes for one packet don't send any + * and wait for the tt table request which will be fragmented + */ + if ((!primary_if) || (req_len > primary_if->soft_iface->mtu)) + req_len = min_packet_len; + + tt_realloc_packet_buff(packet_buff, packet_buff_len, + min_packet_len, req_len); + + if (primary_if) + hardif_free_ref(primary_if); +} + +static int tt_changes_fill_buff(struct bat_priv *bat_priv, + unsigned char **packet_buff, + int *packet_buff_len, int min_packet_len) { - int count = 0, tot_changes = 0; struct tt_change_node *entry, *safe; + int count = 0, tot_changes = 0, new_len; + unsigned char *tt_buff; + + tt_prepare_packet_buff(bat_priv, packet_buff, + packet_buff_len, min_packet_len); - if (buff_len > 0) - tot_changes = buff_len / tt_len(1); + new_len = *packet_buff_len - min_packet_len; + tt_buff = *packet_buff + min_packet_len; + + if (new_len > 0) + tot_changes = new_len / tt_len(1); spin_lock_bh(&bat_priv->tt_changes_list_lock); atomic_set(&bat_priv->tt_local_changes, 0); @@ -290,7 +340,7 @@ int tt_changes_fill_buffer(struct bat_priv *bat_priv, list_for_each_entry_safe(entry, safe, &bat_priv->tt_changes_list, list) { if (count < tot_changes) { - memcpy(buff + tt_len(count), + memcpy(tt_buff + tt_len(count), &entry->change, sizeof(struct tt_change)); count++; } @@ -304,22 +354,20 @@ int tt_changes_fill_buffer(struct bat_priv *bat_priv, kfree(bat_priv->tt_buff); bat_priv->tt_buff_len = 0; bat_priv->tt_buff = NULL; - /* We check whether this new OGM has no changes due to size - * problems */ - if (buff_len > 0) { - /** - * if kmalloc() fails we will reply with the full table + /* check whether this new OGM has no changes due to size problems */ + if (new_len > 0) { + /* if kmalloc() fails we will reply with the full table * instead of providing the diff */ - bat_priv->tt_buff = kmalloc(buff_len, GFP_ATOMIC); + bat_priv->tt_buff = kmalloc(new_len, GFP_ATOMIC); if (bat_priv->tt_buff) { - memcpy(bat_priv->tt_buff, buff, buff_len); - bat_priv->tt_buff_len = buff_len; + memcpy(bat_priv->tt_buff, tt_buff, new_len); + bat_priv->tt_buff_len = new_len; } } spin_unlock_bh(&bat_priv->tt_buff_lock); - return tot_changes; + return count; } int tt_local_seq_print_text(struct seq_file *seq, void *offset) @@ -491,14 +539,14 @@ static void tt_local_table_free(struct bat_priv *bat_priv) static int tt_global_init(struct bat_priv *bat_priv) { if (bat_priv->tt_global_hash) - return 1; + return 0; bat_priv->tt_global_hash = hash_new(1024); if (!bat_priv->tt_global_hash) - return 0; + return -ENOMEM; - return 1; + return 0; } static void tt_changes_list_free(struct bat_priv *bat_priv) @@ -1105,7 +1153,7 @@ static uint16_t tt_global_crc(struct bat_priv *bat_priv, } /* Calculates the checksum of the local table */ -uint16_t tt_local_crc(struct bat_priv *bat_priv) +static uint16_t batadv_tt_local_crc(struct bat_priv *bat_priv) { uint16_t total = 0, total_one; struct hashtable_t *hash = bat_priv->tt_local_hash; @@ -1356,6 +1404,8 @@ static int send_tt_request(struct bat_priv *bat_priv, dst_orig_node->orig, neigh_node->addr, (full_table ? 'F' : '.')); + batadv_inc_counter(bat_priv, BAT_CNT_TT_REQUEST_TX); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = 0; @@ -1416,7 +1466,7 @@ static bool send_other_tt_response(struct bat_priv *bat_priv, /* I don't have the requested data */ if (orig_ttvn != req_ttvn || - tt_request->tt_data != req_dst_orig_node->tt_crc) + tt_request->tt_data != htons(req_dst_orig_node->tt_crc)) goto out; /* If the full table has been explicitly requested */ @@ -1480,6 +1530,8 @@ static bool send_other_tt_response(struct bat_priv *bat_priv, res_dst_orig_node->orig, neigh_node->addr, req_dst_orig_node->orig, req_ttvn); + batadv_inc_counter(bat_priv, BAT_CNT_TT_RESPONSE_TX); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = true; goto out; @@ -1596,6 +1648,8 @@ static bool send_my_tt_response(struct bat_priv *bat_priv, orig_node->orig, neigh_node->addr, (tt_response->flags & TT_FULL_TABLE ? 'F' : '.')); + batadv_inc_counter(bat_priv, BAT_CNT_TT_RESPONSE_TX); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = true; goto out; @@ -1672,7 +1726,7 @@ static void tt_fill_gtable(struct bat_priv *bat_priv, _tt_update_changes(bat_priv, orig_node, (struct tt_change *)(tt_response + 1), - tt_response->tt_data, tt_response->ttvn); + ntohs(tt_response->tt_data), tt_response->ttvn); spin_lock_bh(&orig_node->tt_buff_lock); kfree(orig_node->tt_buff); @@ -1727,7 +1781,8 @@ void handle_tt_response(struct bat_priv *bat_priv, bat_dbg(DBG_TT, bat_priv, "Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n", - tt_response->src, tt_response->ttvn, tt_response->tt_data, + tt_response->src, tt_response->ttvn, + ntohs(tt_response->tt_data), (tt_response->flags & TT_FULL_TABLE ? 'F' : '.')); /* we should have never asked a backbone gw */ @@ -1741,7 +1796,8 @@ void handle_tt_response(struct bat_priv *bat_priv, if (tt_response->flags & TT_FULL_TABLE) tt_fill_gtable(bat_priv, tt_response); else - tt_update_changes(bat_priv, orig_node, tt_response->tt_data, + tt_update_changes(bat_priv, orig_node, + ntohs(tt_response->tt_data), tt_response->ttvn, (struct tt_change *)(tt_response + 1)); @@ -1767,11 +1823,15 @@ out: int tt_init(struct bat_priv *bat_priv) { - if (!tt_local_init(bat_priv)) - return 0; + int ret; - if (!tt_global_init(bat_priv)) - return 0; + ret = tt_local_init(bat_priv); + if (ret < 0) + return ret; + + ret = tt_global_init(bat_priv); + if (ret < 0) + return ret; tt_start_timer(bat_priv); @@ -1895,6 +1955,8 @@ static void send_roam_adv(struct bat_priv *bat_priv, uint8_t *client, "Sending ROAMING_ADV to %pM (client %pM) via %pM\n", orig_node->orig, client, neigh_node->addr); + batadv_inc_counter(bat_priv, BAT_CNT_TT_ROAM_ADV_TX); + send_skb_packet(skb, neigh_node->if_incoming, neigh_node->addr); ret = 0; @@ -2011,20 +2073,56 @@ static void tt_local_purge_pending_clients(struct bat_priv *bat_priv) } -void tt_commit_changes(struct bat_priv *bat_priv) +static int tt_commit_changes(struct bat_priv *bat_priv, + unsigned char **packet_buff, int *packet_buff_len, + int packet_min_len) { - uint16_t changed_num = tt_set_flags(bat_priv->tt_local_hash, - TT_CLIENT_NEW, false); - /* all the reset entries have now to be effectively counted as local - * entries */ + uint16_t changed_num = 0; + + if (atomic_read(&bat_priv->tt_local_changes) < 1) + return -ENOENT; + + changed_num = tt_set_flags(bat_priv->tt_local_hash, + TT_CLIENT_NEW, false); + + /* all reset entries have to be counted as local entries */ atomic_add(changed_num, &bat_priv->num_local_tt); tt_local_purge_pending_clients(bat_priv); + bat_priv->tt_crc = batadv_tt_local_crc(bat_priv); /* Increment the TTVN only once per OGM interval */ atomic_inc(&bat_priv->ttvn); bat_dbg(DBG_TT, bat_priv, "Local changes committed, updating to ttvn %u\n", (uint8_t)atomic_read(&bat_priv->ttvn)); bat_priv->tt_poss_change = false; + + /* reset the sending counter */ + atomic_set(&bat_priv->tt_ogm_append_cnt, TT_OGM_APPEND_MAX); + + return tt_changes_fill_buff(bat_priv, packet_buff, + packet_buff_len, packet_min_len); +} + +/* when calling this function (hard_iface == primary_if) has to be true */ +int batadv_tt_append_diff(struct bat_priv *bat_priv, + unsigned char **packet_buff, int *packet_buff_len, + int packet_min_len) +{ + int tt_num_changes; + + /* if at least one change happened */ + tt_num_changes = tt_commit_changes(bat_priv, packet_buff, + packet_buff_len, packet_min_len); + + /* if the changes have been sent often enough */ + if ((tt_num_changes < 0) && + (!atomic_dec_not_zero(&bat_priv->tt_ogm_append_cnt))) { + tt_realloc_packet_buff(packet_buff, packet_buff_len, + packet_min_len, packet_min_len); + tt_num_changes = 0; + } + + return tt_num_changes; } bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst) diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index c43374d..d6ea30f 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -23,8 +23,6 @@ #define _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ int tt_len(int changes_num); -int tt_changes_fill_buffer(struct bat_priv *bat_priv, - unsigned char *buff, int buff_len); int tt_init(struct bat_priv *bat_priv); void tt_local_add(struct net_device *soft_iface, const uint8_t *addr, int ifindex); @@ -41,18 +39,19 @@ void tt_global_del_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, const char *message); struct orig_node *transtable_search(struct bat_priv *bat_priv, const uint8_t *src, const uint8_t *addr); -uint16_t tt_local_crc(struct bat_priv *bat_priv); void tt_free(struct bat_priv *bat_priv); bool send_tt_response(struct bat_priv *bat_priv, struct tt_query_packet *tt_request); bool is_my_client(struct bat_priv *bat_priv, const uint8_t *addr); void handle_tt_response(struct bat_priv *bat_priv, struct tt_query_packet *tt_response); -void tt_commit_changes(struct bat_priv *bat_priv); bool is_ap_isolated(struct bat_priv *bat_priv, uint8_t *src, uint8_t *dst); void tt_update_orig(struct bat_priv *bat_priv, struct orig_node *orig_node, const unsigned char *tt_buff, uint8_t tt_num_changes, uint8_t ttvn, uint16_t tt_crc); +int batadv_tt_append_diff(struct bat_priv *bat_priv, + unsigned char **packet_buff, int *packet_buff_len, + int packet_min_len); bool tt_global_client_is_roaming(struct bat_priv *bat_priv, uint8_t *addr); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 61308e8..bf71d52 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -148,9 +148,26 @@ struct bcast_duplist_entry { }; #endif +enum bat_counters { + BAT_CNT_FORWARD, + BAT_CNT_FORWARD_BYTES, + BAT_CNT_MGMT_TX, + BAT_CNT_MGMT_TX_BYTES, + BAT_CNT_MGMT_RX, + BAT_CNT_MGMT_RX_BYTES, + BAT_CNT_TT_REQUEST_TX, + BAT_CNT_TT_REQUEST_RX, + BAT_CNT_TT_RESPONSE_TX, + BAT_CNT_TT_RESPONSE_RX, + BAT_CNT_TT_ROAM_ADV_TX, + BAT_CNT_TT_ROAM_ADV_RX, + BAT_CNT_NUM, +}; + struct bat_priv { atomic_t mesh_state; struct net_device_stats stats; + uint64_t __percpu *bat_counters; /* Per cpu counters */ atomic_t aggregated_ogms; /* boolean */ atomic_t bonding; /* boolean */ atomic_t fragmentation; /* boolean */ @@ -210,7 +227,7 @@ struct bat_priv { spinlock_t vis_list_lock; /* protects vis_info::recv_list */ atomic_t num_local_tt; /* Checksum of the local table, recomputed before sending a new OGM */ - atomic_t tt_crc; + uint16_t tt_crc; unsigned char *tt_buff; int16_t tt_buff_len; spinlock_t tt_buff_lock; /* protects tt_buff */ @@ -388,8 +405,7 @@ struct bat_algo_ops { /* called when primary interface is selected / changed */ void (*bat_primary_iface_set)(struct hard_iface *hard_iface); /* prepare a new outgoing OGM for the send queue */ - void (*bat_ogm_schedule)(struct hard_iface *hard_iface, - int tt_num_changes); + void (*bat_ogm_schedule)(struct hard_iface *hard_iface); /* send scheduled OGM */ void (*bat_ogm_emit)(struct forw_packet *forw_packet); }; diff --git a/net/batman-adv/vis.c b/net/batman-adv/vis.c index cec216f..01d5da5 100644 --- a/net/batman-adv/vis.c +++ b/net/batman-adv/vis.c @@ -207,7 +207,6 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) int vis_server = atomic_read(&bat_priv->vis_mode); size_t buff_pos, buf_size; char *buff; - int compare; primary_if = primary_if_get_selected(bat_priv); if (!primary_if) @@ -228,14 +227,18 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) entries = (struct vis_info_entry *) ((char *)packet + sizeof(*packet)); + vis_data_insert_interface(packet->vis_orig, + &vis_if_list, true); + for (j = 0; j < packet->entries; j++) { if (entries[j].quality == 0) continue; - compare = - compare_eth(entries[j].src, packet->vis_orig); + if (compare_eth(entries[j].src, + packet->vis_orig)) + continue; vis_data_insert_interface(entries[j].src, &vis_if_list, - compare); + false); } hlist_for_each_entry(entry, pos, &vis_if_list, list) { @@ -276,14 +279,18 @@ int vis_seq_print_text(struct seq_file *seq, void *offset) entries = (struct vis_info_entry *) ((char *)packet + sizeof(*packet)); + vis_data_insert_interface(packet->vis_orig, + &vis_if_list, true); + for (j = 0; j < packet->entries; j++) { if (entries[j].quality == 0) continue; - compare = - compare_eth(entries[j].src, packet->vis_orig); + if (compare_eth(entries[j].src, + packet->vis_orig)) + continue; vis_data_insert_interface(entries[j].src, &vis_if_list, - compare); + false); } hlist_for_each_entry(entry, pos, &vis_if_list, list) { @@ -626,7 +633,7 @@ static int generate_vis_packet(struct bat_priv *bat_priv) best_tq = find_best_vis_server(bat_priv, info); if (best_tq < 0) - return -1; + return best_tq; } for (i = 0; i < hash->size; i++) { @@ -878,7 +885,7 @@ int vis_init(struct bat_priv *bat_priv) int hash_added; if (bat_priv->vis_hash) - return 1; + return 0; spin_lock_bh(&bat_priv->vis_hash_lock); @@ -929,7 +936,7 @@ int vis_init(struct bat_priv *bat_priv) spin_unlock_bh(&bat_priv->vis_hash_lock); start_vis_timer(bat_priv); - return 1; + return 0; free_info: kfree(bat_priv->my_vis_info); @@ -937,7 +944,7 @@ free_info: err: spin_unlock_bh(&bat_priv->vis_hash_lock); vis_quit(bat_priv); - return 0; + return -ENOMEM; } /* Decrease the reference count on a hash item info */ diff --git a/net/can/raw.c b/net/can/raw.c index cde1b4a..46cca3a 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -681,9 +681,6 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, if (err < 0) goto free_skb; - /* to be able to check the received tx sock reference in raw_rcv() */ - skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; - skb->dev = dev; skb->sk = sk; diff --git a/net/core/dev.c b/net/core/dev.c index c6e29ea6..57c4f9b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2089,25 +2089,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) return 0; } -/* - * Try to orphan skb early, right before transmission by the device. - * We cannot orphan skb if tx timestamp is requested or the sk-reference - * is needed on driver level for other reasons, e.g. see net/can/raw.c - */ -static inline void skb_orphan_try(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk && !skb_shinfo(skb)->tx_flags) { - /* skb_tx_hash() wont be able to get sk. - * We copy sk_hash into skb->rxhash - */ - if (!skb->rxhash) - skb->rxhash = sk->sk_hash; - skb_orphan(skb); - } -} - static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) { return ((features & NETIF_F_GEN_CSUM) || @@ -2193,8 +2174,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); - skb_orphan_try(skb); - features = netif_skb_features(skb); if (vlan_tx_tag_present(skb) && @@ -2304,7 +2283,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else - hash = (__force u16) skb->protocol ^ skb->rxhash; + hash = (__force u16) skb->protocol; hash = jhash_1word(hash, hashrnd); return (u16) (((u64) hash * qcount) >> 32) + qoffset; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 3d84fb9..f9f40b9 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -362,22 +362,23 @@ EXPORT_SYMBOL(netpoll_send_skb_on_dev); void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { - int total_len, eth_len, ip_len, udp_len; + int total_len, ip_len, udp_len; struct sk_buff *skb; struct udphdr *udph; struct iphdr *iph; struct ethhdr *eth; udp_len = len + sizeof(*udph); - ip_len = eth_len = udp_len + sizeof(*iph); - total_len = eth_len + ETH_HLEN + NET_IP_ALIGN; + ip_len = udp_len + sizeof(*iph); + total_len = ip_len + LL_RESERVED_SPACE(np->dev); - skb = find_skb(np, total_len, total_len - len); + skb = find_skb(np, total_len + np->dev->needed_tailroom, + total_len - len); if (!skb) return; skb_copy_to_linear_data(skb, msg, len); - skb->len += len; + skb_put(skb, len); skb_push(skb, sizeof(*udph)); skb_reset_transport_header(skb); diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 656c7c7..0a36007 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -196,92 +196,66 @@ static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = { static LIST_HEAD(dcb_app_list); static DEFINE_SPINLOCK(dcb_lock); -/* standard netlink reply call */ -static int dcbnl_reply(u8 value, u8 event, u8 cmd, u8 attr, u32 pid, - u32 seq, u16 flags) +static struct sk_buff *dcbnl_newmsg(int type, u8 cmd, u32 port, u32 seq, + u32 flags, struct nlmsghdr **nlhp) { - struct sk_buff *dcbnl_skb; + struct sk_buff *skb; struct dcbmsg *dcb; struct nlmsghdr *nlh; - int ret = -EINVAL; - dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!dcbnl_skb) - return ret; + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!skb) + return NULL; - nlh = NLMSG_NEW(dcbnl_skb, pid, seq, event, sizeof(*dcb), flags); + nlh = nlmsg_put(skb, port, seq, type, sizeof(*dcb), flags); + BUG_ON(!nlh); - dcb = NLMSG_DATA(nlh); + dcb = nlmsg_data(nlh); dcb->dcb_family = AF_UNSPEC; dcb->cmd = cmd; dcb->dcb_pad = 0; - ret = nla_put_u8(dcbnl_skb, attr, value); - if (ret) - goto err; + if (nlhp) + *nlhp = nlh; - /* end the message, assign the nlmsg_len. */ - nlmsg_end(dcbnl_skb, nlh); - ret = rtnl_unicast(dcbnl_skb, &init_net, pid); - if (ret) - return -EINVAL; - - return 0; -nlmsg_failure: -err: - kfree_skb(dcbnl_skb); - return ret; + return skb; } -static int dcbnl_getstate(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_getstate(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - int ret = -EINVAL; - /* if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->getstate) */ if (!netdev->dcbnl_ops->getstate) - return ret; - - ret = dcbnl_reply(netdev->dcbnl_ops->getstate(netdev), RTM_GETDCB, - DCB_CMD_GSTATE, DCB_ATTR_STATE, pid, seq, flags); + return -EOPNOTSUPP; - return ret; + return nla_put_u8(skb, DCB_ATTR_STATE, + netdev->dcbnl_ops->getstate(netdev)); } -static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_getpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - struct sk_buff *dcbnl_skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1], *nest; u8 value; - int ret = -EINVAL; + int ret; int i; int getall = 0; - if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->getpfccfg) - return ret; + if (!tb[DCB_ATTR_PFC_CFG]) + return -EINVAL; + + if (!netdev->dcbnl_ops->getpfccfg) + return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest); if (ret) - goto err_out; - - dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!dcbnl_skb) - goto err_out; - - nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_PFC_GCFG; + return ret; - nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PFC_CFG); + nest = nla_nest_start(skb, DCB_ATTR_PFC_CFG); if (!nest) - goto err; + return -EMSGSIZE; if (data[DCB_PFC_UP_ATTR_ALL]) getall = 1; @@ -292,103 +266,53 @@ static int dcbnl_getpfccfg(struct net_device *netdev, struct nlattr **tb, netdev->dcbnl_ops->getpfccfg(netdev, i - DCB_PFC_UP_ATTR_0, &value); - ret = nla_put_u8(dcbnl_skb, i, value); - + ret = nla_put_u8(skb, i, value); if (ret) { - nla_nest_cancel(dcbnl_skb, nest); - goto err; + nla_nest_cancel(skb, nest); + return ret; } } - nla_nest_end(dcbnl_skb, nest); - - nlmsg_end(dcbnl_skb, nlh); - - ret = rtnl_unicast(dcbnl_skb, &init_net, pid); - if (ret) - goto err_out; + nla_nest_end(skb, nest); return 0; -nlmsg_failure: -err: - kfree_skb(dcbnl_skb); -err_out: - return -EINVAL; } -static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_getperm_hwaddr(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - struct sk_buff *dcbnl_skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; u8 perm_addr[MAX_ADDR_LEN]; - int ret = -EINVAL; if (!netdev->dcbnl_ops->getpermhwaddr) - return ret; - - dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!dcbnl_skb) - goto err_out; - - nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_GPERM_HWADDR; + return -EOPNOTSUPP; netdev->dcbnl_ops->getpermhwaddr(netdev, perm_addr); - ret = nla_put(dcbnl_skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr), - perm_addr); - - nlmsg_end(dcbnl_skb, nlh); - - ret = rtnl_unicast(dcbnl_skb, &init_net, pid); - if (ret) - goto err_out; - - return 0; - -nlmsg_failure: - kfree_skb(dcbnl_skb); -err_out: - return -EINVAL; + return nla_put(skb, DCB_ATTR_PERM_HWADDR, sizeof(perm_addr), perm_addr); } -static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_getcap(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - struct sk_buff *dcbnl_skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; struct nlattr *data[DCB_CAP_ATTR_MAX + 1], *nest; u8 value; - int ret = -EINVAL; + int ret; int i; int getall = 0; - if (!tb[DCB_ATTR_CAP] || !netdev->dcbnl_ops->getcap) - return ret; + if (!tb[DCB_ATTR_CAP]) + return -EINVAL; + + if (!netdev->dcbnl_ops->getcap) + return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_CAP_ATTR_MAX, tb[DCB_ATTR_CAP], dcbnl_cap_nest); if (ret) - goto err_out; - - dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!dcbnl_skb) - goto err_out; - - nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_GCAP; + return ret; - nest = nla_nest_start(dcbnl_skb, DCB_ATTR_CAP); + nest = nla_nest_start(skb, DCB_ATTR_CAP); if (!nest) - goto err; + return -EMSGSIZE; if (data[DCB_CAP_ATTR_ALL]) getall = 1; @@ -398,69 +322,41 @@ static int dcbnl_getcap(struct net_device *netdev, struct nlattr **tb, continue; if (!netdev->dcbnl_ops->getcap(netdev, i, &value)) { - ret = nla_put_u8(dcbnl_skb, i, value); - + ret = nla_put_u8(skb, i, value); if (ret) { - nla_nest_cancel(dcbnl_skb, nest); - goto err; + nla_nest_cancel(skb, nest); + return ret; } } } - nla_nest_end(dcbnl_skb, nest); - - nlmsg_end(dcbnl_skb, nlh); - - ret = rtnl_unicast(dcbnl_skb, &init_net, pid); - if (ret) - goto err_out; + nla_nest_end(skb, nest); return 0; -nlmsg_failure: -err: - kfree_skb(dcbnl_skb); -err_out: - return -EINVAL; } -static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_getnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - struct sk_buff *dcbnl_skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; struct nlattr *data[DCB_NUMTCS_ATTR_MAX + 1], *nest; u8 value; - int ret = -EINVAL; + int ret; int i; int getall = 0; - if (!tb[DCB_ATTR_NUMTCS] || !netdev->dcbnl_ops->getnumtcs) - return ret; + if (!tb[DCB_ATTR_NUMTCS]) + return -EINVAL; + + if (!netdev->dcbnl_ops->getnumtcs) + return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], dcbnl_numtcs_nest); - if (ret) { - ret = -EINVAL; - goto err_out; - } - - dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!dcbnl_skb) { - ret = -EINVAL; - goto err_out; - } - - nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_GNUMTCS; + if (ret) + return ret; - nest = nla_nest_start(dcbnl_skb, DCB_ATTR_NUMTCS); - if (!nest) { - ret = -EINVAL; - goto err; - } + nest = nla_nest_start(skb, DCB_ATTR_NUMTCS); + if (!nest) + return -EMSGSIZE; if (data[DCB_NUMTCS_ATTR_ALL]) getall = 1; @@ -471,53 +367,37 @@ static int dcbnl_getnumtcs(struct net_device *netdev, struct nlattr **tb, ret = netdev->dcbnl_ops->getnumtcs(netdev, i, &value); if (!ret) { - ret = nla_put_u8(dcbnl_skb, i, value); - + ret = nla_put_u8(skb, i, value); if (ret) { - nla_nest_cancel(dcbnl_skb, nest); - ret = -EINVAL; - goto err; + nla_nest_cancel(skb, nest); + return ret; } - } else { - goto err; - } - } - nla_nest_end(dcbnl_skb, nest); - - nlmsg_end(dcbnl_skb, nlh); - - ret = rtnl_unicast(dcbnl_skb, &init_net, pid); - if (ret) { - ret = -EINVAL; - goto err_out; + } else + return -EINVAL; } + nla_nest_end(skb, nest); return 0; -nlmsg_failure: -err: - kfree_skb(dcbnl_skb); -err_out: - return ret; } -static int dcbnl_setnumtcs(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_setnumtcs(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { struct nlattr *data[DCB_NUMTCS_ATTR_MAX + 1]; - int ret = -EINVAL; + int ret; u8 value; int i; - if (!tb[DCB_ATTR_NUMTCS] || !netdev->dcbnl_ops->setnumtcs) - return ret; + if (!tb[DCB_ATTR_NUMTCS]) + return -EINVAL; + + if (!netdev->dcbnl_ops->setnumtcs) + return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_NUMTCS_ATTR_MAX, tb[DCB_ATTR_NUMTCS], dcbnl_numtcs_nest); - - if (ret) { - ret = -EINVAL; - goto err; - } + if (ret) + return ret; for (i = DCB_NUMTCS_ATTR_ALL+1; i <= DCB_NUMTCS_ATTR_MAX; i++) { if (data[i] == NULL) @@ -526,84 +406,68 @@ static int dcbnl_setnumtcs(struct net_device *netdev, struct nlattr **tb, value = nla_get_u8(data[i]); ret = netdev->dcbnl_ops->setnumtcs(netdev, i, value); - if (ret) - goto operr; + break; } -operr: - ret = dcbnl_reply(!!ret, RTM_SETDCB, DCB_CMD_SNUMTCS, - DCB_ATTR_NUMTCS, pid, seq, flags); - -err: - return ret; + return nla_put_u8(skb, DCB_ATTR_NUMTCS, !!ret); } -static int dcbnl_getpfcstate(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_getpfcstate(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - int ret = -EINVAL; - if (!netdev->dcbnl_ops->getpfcstate) - return ret; - - ret = dcbnl_reply(netdev->dcbnl_ops->getpfcstate(netdev), RTM_GETDCB, - DCB_CMD_PFC_GSTATE, DCB_ATTR_PFC_STATE, - pid, seq, flags); + return -EOPNOTSUPP; - return ret; + return nla_put_u8(skb, DCB_ATTR_PFC_STATE, + netdev->dcbnl_ops->getpfcstate(netdev)); } -static int dcbnl_setpfcstate(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_setpfcstate(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - int ret = -EINVAL; u8 value; - if (!tb[DCB_ATTR_PFC_STATE] || !netdev->dcbnl_ops->setpfcstate) - return ret; + if (!tb[DCB_ATTR_PFC_STATE]) + return -EINVAL; + + if (!netdev->dcbnl_ops->setpfcstate) + return -EOPNOTSUPP; value = nla_get_u8(tb[DCB_ATTR_PFC_STATE]); netdev->dcbnl_ops->setpfcstate(netdev, value); - ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SSTATE, DCB_ATTR_PFC_STATE, - pid, seq, flags); - - return ret; + return nla_put_u8(skb, DCB_ATTR_PFC_STATE, 0); } -static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_getapp(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - struct sk_buff *dcbnl_skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; struct nlattr *app_nest; struct nlattr *app_tb[DCB_APP_ATTR_MAX + 1]; u16 id; u8 up, idtype; - int ret = -EINVAL; + int ret; if (!tb[DCB_ATTR_APP]) - goto out; + return -EINVAL; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], dcbnl_app_nest); if (ret) - goto out; + return ret; - ret = -EINVAL; /* all must be non-null */ if ((!app_tb[DCB_APP_ATTR_IDTYPE]) || (!app_tb[DCB_APP_ATTR_ID])) - goto out; + return -EINVAL; /* either by eth type or by socket number */ idtype = nla_get_u8(app_tb[DCB_APP_ATTR_IDTYPE]); if ((idtype != DCB_APP_IDTYPE_ETHTYPE) && (idtype != DCB_APP_IDTYPE_PORTNUM)) - goto out; + return -EINVAL; id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]); @@ -617,138 +481,106 @@ static int dcbnl_getapp(struct net_device *netdev, struct nlattr **tb, up = dcb_getapp(netdev, &app); } - /* send this back */ - dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!dcbnl_skb) - goto out; - - nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_GAPP; - - app_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_APP); + app_nest = nla_nest_start(skb, DCB_ATTR_APP); if (!app_nest) - goto out_cancel; + return -EMSGSIZE; - ret = nla_put_u8(dcbnl_skb, DCB_APP_ATTR_IDTYPE, idtype); + ret = nla_put_u8(skb, DCB_APP_ATTR_IDTYPE, idtype); if (ret) goto out_cancel; - ret = nla_put_u16(dcbnl_skb, DCB_APP_ATTR_ID, id); + ret = nla_put_u16(skb, DCB_APP_ATTR_ID, id); if (ret) goto out_cancel; - ret = nla_put_u8(dcbnl_skb, DCB_APP_ATTR_PRIORITY, up); + ret = nla_put_u8(skb, DCB_APP_ATTR_PRIORITY, up); if (ret) goto out_cancel; - nla_nest_end(dcbnl_skb, app_nest); - nlmsg_end(dcbnl_skb, nlh); - - ret = rtnl_unicast(dcbnl_skb, &init_net, pid); - if (ret) - goto nlmsg_failure; + nla_nest_end(skb, app_nest); - goto out; + return 0; out_cancel: - nla_nest_cancel(dcbnl_skb, app_nest); -nlmsg_failure: - kfree_skb(dcbnl_skb); -out: + nla_nest_cancel(skb, app_nest); return ret; } -static int dcbnl_setapp(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_setapp(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - int err, ret = -EINVAL; + int ret; u16 id; u8 up, idtype; struct nlattr *app_tb[DCB_APP_ATTR_MAX + 1]; if (!tb[DCB_ATTR_APP]) - goto out; + return -EINVAL; ret = nla_parse_nested(app_tb, DCB_APP_ATTR_MAX, tb[DCB_ATTR_APP], dcbnl_app_nest); if (ret) - goto out; + return ret; - ret = -EINVAL; /* all must be non-null */ if ((!app_tb[DCB_APP_ATTR_IDTYPE]) || (!app_tb[DCB_APP_ATTR_ID]) || (!app_tb[DCB_APP_ATTR_PRIORITY])) - goto out; + return -EINVAL; /* either by eth type or by socket number */ idtype = nla_get_u8(app_tb[DCB_APP_ATTR_IDTYPE]); if ((idtype != DCB_APP_IDTYPE_ETHTYPE) && (idtype != DCB_APP_IDTYPE_PORTNUM)) - goto out; + return -EINVAL; id = nla_get_u16(app_tb[DCB_APP_ATTR_ID]); up = nla_get_u8(app_tb[DCB_APP_ATTR_PRIORITY]); if (netdev->dcbnl_ops->setapp) { - err = netdev->dcbnl_ops->setapp(netdev, idtype, id, up); + ret = netdev->dcbnl_ops->setapp(netdev, idtype, id, up); } else { struct dcb_app app; app.selector = idtype; app.protocol = id; app.priority = up; - err = dcb_setapp(netdev, &app); + ret = dcb_setapp(netdev, &app); } - ret = dcbnl_reply(err, RTM_SETDCB, DCB_CMD_SAPP, DCB_ATTR_APP, - pid, seq, flags); + ret = nla_put_u8(skb, DCB_ATTR_APP, ret); dcbnl_cee_notify(netdev, RTM_SETDCB, DCB_CMD_SAPP, seq, 0); -out: + return ret; } -static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags, int dir) +static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, + struct nlattr **tb, struct sk_buff *skb, int dir) { - struct sk_buff *dcbnl_skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; struct nlattr *pg_nest, *param_nest, *data; struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1]; struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1]; u8 prio, pgid, tc_pct, up_map; - int ret = -EINVAL; + int ret; int getall = 0; int i; - if (!tb[DCB_ATTR_PG_CFG] || - !netdev->dcbnl_ops->getpgtccfgtx || + if (!tb[DCB_ATTR_PG_CFG]) + return -EINVAL; + + if (!netdev->dcbnl_ops->getpgtccfgtx || !netdev->dcbnl_ops->getpgtccfgrx || !netdev->dcbnl_ops->getpgbwgcfgtx || !netdev->dcbnl_ops->getpgbwgcfgrx) - return ret; + return -EOPNOTSUPP; ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); - if (ret) - goto err_out; - - dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!dcbnl_skb) - goto err_out; - - nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = (dir) ? DCB_CMD_PGRX_GCFG : DCB_CMD_PGTX_GCFG; + return ret; - pg_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_PG_CFG); + pg_nest = nla_nest_start(skb, DCB_ATTR_PG_CFG); if (!pg_nest) - goto err; + return -EMSGSIZE; if (pg_tb[DCB_PG_ATTR_TC_ALL]) getall = 1; @@ -766,7 +598,7 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, if (ret) goto err_pg; - param_nest = nla_nest_start(dcbnl_skb, i); + param_nest = nla_nest_start(skb, i); if (!param_nest) goto err_pg; @@ -789,33 +621,33 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, if (param_tb[DCB_TC_ATTR_PARAM_PGID] || param_tb[DCB_TC_ATTR_PARAM_ALL]) { - ret = nla_put_u8(dcbnl_skb, + ret = nla_put_u8(skb, DCB_TC_ATTR_PARAM_PGID, pgid); if (ret) goto err_param; } if (param_tb[DCB_TC_ATTR_PARAM_UP_MAPPING] || param_tb[DCB_TC_ATTR_PARAM_ALL]) { - ret = nla_put_u8(dcbnl_skb, + ret = nla_put_u8(skb, DCB_TC_ATTR_PARAM_UP_MAPPING, up_map); if (ret) goto err_param; } if (param_tb[DCB_TC_ATTR_PARAM_STRICT_PRIO] || param_tb[DCB_TC_ATTR_PARAM_ALL]) { - ret = nla_put_u8(dcbnl_skb, + ret = nla_put_u8(skb, DCB_TC_ATTR_PARAM_STRICT_PRIO, prio); if (ret) goto err_param; } if (param_tb[DCB_TC_ATTR_PARAM_BW_PCT] || param_tb[DCB_TC_ATTR_PARAM_ALL]) { - ret = nla_put_u8(dcbnl_skb, DCB_TC_ATTR_PARAM_BW_PCT, + ret = nla_put_u8(skb, DCB_TC_ATTR_PARAM_BW_PCT, tc_pct); if (ret) goto err_param; } - nla_nest_end(dcbnl_skb, param_nest); + nla_nest_end(skb, param_nest); } if (pg_tb[DCB_PG_ATTR_BW_ID_ALL]) @@ -838,80 +670,71 @@ static int __dcbnl_pg_getcfg(struct net_device *netdev, struct nlattr **tb, netdev->dcbnl_ops->getpgbwgcfgtx(netdev, i - DCB_PG_ATTR_BW_ID_0, &tc_pct); } - ret = nla_put_u8(dcbnl_skb, i, tc_pct); - + ret = nla_put_u8(skb, i, tc_pct); if (ret) goto err_pg; } - nla_nest_end(dcbnl_skb, pg_nest); - - nlmsg_end(dcbnl_skb, nlh); - - ret = rtnl_unicast(dcbnl_skb, &init_net, pid); - if (ret) - goto err_out; + nla_nest_end(skb, pg_nest); return 0; err_param: - nla_nest_cancel(dcbnl_skb, param_nest); + nla_nest_cancel(skb, param_nest); err_pg: - nla_nest_cancel(dcbnl_skb, pg_nest); -nlmsg_failure: -err: - kfree_skb(dcbnl_skb); -err_out: - ret = -EINVAL; - return ret; + nla_nest_cancel(skb, pg_nest); + + return -EMSGSIZE; } -static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_pgtx_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 0); + return __dcbnl_pg_getcfg(netdev, nlh, tb, skb, 0); } -static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_pgrx_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - return __dcbnl_pg_getcfg(netdev, tb, pid, seq, flags, 1); + return __dcbnl_pg_getcfg(netdev, nlh, tb, skb, 1); } -static int dcbnl_setstate(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_setstate(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - int ret = -EINVAL; u8 value; - if (!tb[DCB_ATTR_STATE] || !netdev->dcbnl_ops->setstate) - return ret; + if (!tb[DCB_ATTR_STATE]) + return -EINVAL; - value = nla_get_u8(tb[DCB_ATTR_STATE]); + if (!netdev->dcbnl_ops->setstate) + return -EOPNOTSUPP; - ret = dcbnl_reply(netdev->dcbnl_ops->setstate(netdev, value), - RTM_SETDCB, DCB_CMD_SSTATE, DCB_ATTR_STATE, - pid, seq, flags); + value = nla_get_u8(tb[DCB_ATTR_STATE]); - return ret; + return nla_put_u8(skb, DCB_ATTR_STATE, + netdev->dcbnl_ops->setstate(netdev, value)); } -static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_setpfccfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { struct nlattr *data[DCB_PFC_UP_ATTR_MAX + 1]; int i; - int ret = -EINVAL; + int ret; u8 value; - if (!tb[DCB_ATTR_PFC_CFG] || !netdev->dcbnl_ops->setpfccfg) - return ret; + if (!tb[DCB_ATTR_PFC_CFG]) + return -EINVAL; + + if (!netdev->dcbnl_ops->setpfccfg) + return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_PFC_UP_ATTR_MAX, tb[DCB_ATTR_PFC_CFG], dcbnl_pfc_up_nest); if (ret) - goto err; + return ret; for (i = DCB_PFC_UP_ATTR_0; i <= DCB_PFC_UP_ATTR_7; i++) { if (data[i] == NULL) @@ -921,50 +744,53 @@ static int dcbnl_setpfccfg(struct net_device *netdev, struct nlattr **tb, data[i]->nla_type - DCB_PFC_UP_ATTR_0, value); } - ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_PFC_SCFG, DCB_ATTR_PFC_CFG, - pid, seq, flags); -err: - return ret; + return nla_put_u8(skb, DCB_ATTR_PFC_CFG, 0); } -static int dcbnl_setall(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_setall(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - int ret = -EINVAL; + int ret; - if (!tb[DCB_ATTR_SET_ALL] || !netdev->dcbnl_ops->setall) - return ret; + if (!tb[DCB_ATTR_SET_ALL]) + return -EINVAL; + + if (!netdev->dcbnl_ops->setall) + return -EOPNOTSUPP; - ret = dcbnl_reply(netdev->dcbnl_ops->setall(netdev), RTM_SETDCB, - DCB_CMD_SET_ALL, DCB_ATTR_SET_ALL, pid, seq, flags); + ret = nla_put_u8(skb, DCB_ATTR_SET_ALL, + netdev->dcbnl_ops->setall(netdev)); dcbnl_cee_notify(netdev, RTM_SETDCB, DCB_CMD_SET_ALL, seq, 0); return ret; } -static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags, int dir) +static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb, + int dir) { struct nlattr *pg_tb[DCB_PG_ATTR_MAX + 1]; struct nlattr *param_tb[DCB_TC_ATTR_PARAM_MAX + 1]; - int ret = -EINVAL; + int ret; int i; u8 pgid; u8 up_map; u8 prio; u8 tc_pct; - if (!tb[DCB_ATTR_PG_CFG] || - !netdev->dcbnl_ops->setpgtccfgtx || + if (!tb[DCB_ATTR_PG_CFG]) + return -EINVAL; + + if (!netdev->dcbnl_ops->setpgtccfgtx || !netdev->dcbnl_ops->setpgtccfgrx || !netdev->dcbnl_ops->setpgbwgcfgtx || !netdev->dcbnl_ops->setpgbwgcfgrx) - return ret; + return -EOPNOTSUPP; ret = nla_parse_nested(pg_tb, DCB_PG_ATTR_MAX, tb[DCB_ATTR_PG_CFG], dcbnl_pg_nest); if (ret) - goto err; + return ret; for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { if (!pg_tb[i]) @@ -973,7 +799,7 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb, ret = nla_parse_nested(param_tb, DCB_TC_ATTR_PARAM_MAX, pg_tb[i], dcbnl_tc_param_nest); if (ret) - goto err; + return ret; pgid = DCB_ATTR_VALUE_UNDEFINED; prio = DCB_ATTR_VALUE_UNDEFINED; @@ -1026,63 +852,48 @@ static int __dcbnl_pg_setcfg(struct net_device *netdev, struct nlattr **tb, } } - ret = dcbnl_reply(0, RTM_SETDCB, - (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG), - DCB_ATTR_PG_CFG, pid, seq, flags); - -err: - return ret; + return nla_put_u8(skb, + (dir ? DCB_CMD_PGRX_SCFG : DCB_CMD_PGTX_SCFG), 0); } -static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_pgtx_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 0); + return __dcbnl_pg_setcfg(netdev, nlh, seq, tb, skb, 0); } -static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_pgrx_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - return __dcbnl_pg_setcfg(netdev, tb, pid, seq, flags, 1); + return __dcbnl_pg_setcfg(netdev, nlh, seq, tb, skb, 1); } -static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - struct sk_buff *dcbnl_skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; struct nlattr *bcn_nest; struct nlattr *bcn_tb[DCB_BCN_ATTR_MAX + 1]; u8 value_byte; u32 value_integer; - int ret = -EINVAL; + int ret; bool getall = false; int i; - if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->getbcnrp || + if (!tb[DCB_ATTR_BCN]) + return -EINVAL; + + if (!netdev->dcbnl_ops->getbcnrp || !netdev->dcbnl_ops->getbcncfg) - return ret; + return -EOPNOTSUPP; ret = nla_parse_nested(bcn_tb, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN], dcbnl_bcn_nest); - if (ret) - goto err_out; - - dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!dcbnl_skb) - goto err_out; - - nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_BCN_GCFG; + return ret; - bcn_nest = nla_nest_start(dcbnl_skb, DCB_ATTR_BCN); + bcn_nest = nla_nest_start(skb, DCB_ATTR_BCN); if (!bcn_nest) - goto err; + return -EMSGSIZE; if (bcn_tb[DCB_BCN_ATTR_ALL]) getall = true; @@ -1093,7 +904,7 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlattr **tb, netdev->dcbnl_ops->getbcnrp(netdev, i - DCB_BCN_ATTR_RP_0, &value_byte); - ret = nla_put_u8(dcbnl_skb, i, value_byte); + ret = nla_put_u8(skb, i, value_byte); if (ret) goto err_bcn; } @@ -1104,49 +915,41 @@ static int dcbnl_bcn_getcfg(struct net_device *netdev, struct nlattr **tb, netdev->dcbnl_ops->getbcncfg(netdev, i, &value_integer); - ret = nla_put_u32(dcbnl_skb, i, value_integer); + ret = nla_put_u32(skb, i, value_integer); if (ret) goto err_bcn; } - nla_nest_end(dcbnl_skb, bcn_nest); - - nlmsg_end(dcbnl_skb, nlh); - - ret = rtnl_unicast(dcbnl_skb, &init_net, pid); - if (ret) - goto err_out; + nla_nest_end(skb, bcn_nest); return 0; err_bcn: - nla_nest_cancel(dcbnl_skb, bcn_nest); -nlmsg_failure: -err: - kfree_skb(dcbnl_skb); -err_out: - ret = -EINVAL; + nla_nest_cancel(skb, bcn_nest); return ret; } -static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { struct nlattr *data[DCB_BCN_ATTR_MAX + 1]; int i; - int ret = -EINVAL; + int ret; u8 value_byte; u32 value_int; - if (!tb[DCB_ATTR_BCN] || !netdev->dcbnl_ops->setbcncfg || + if (!tb[DCB_ATTR_BCN]) + return -EINVAL; + + if (!netdev->dcbnl_ops->setbcncfg || !netdev->dcbnl_ops->setbcnrp) - return ret; + return -EOPNOTSUPP; ret = nla_parse_nested(data, DCB_BCN_ATTR_MAX, tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest); if (ret) - goto err; + return ret; for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) { if (data[i] == NULL) @@ -1164,10 +967,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlattr **tb, i, value_int); } - ret = dcbnl_reply(0, RTM_SETDCB, DCB_CMD_BCN_SCFG, DCB_ATTR_BCN, - pid, seq, flags); -err: - return ret; + return nla_put_u8(skb, DCB_ATTR_BCN, 0); } static int dcbnl_build_peer_app(struct net_device *netdev, struct sk_buff* skb, @@ -1233,20 +1033,21 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) struct dcb_app_type *itr; const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; int dcbx; - int err = -EMSGSIZE; + int err; if (nla_put_string(skb, DCB_ATTR_IFNAME, netdev->name)) - goto nla_put_failure; + return -EMSGSIZE; + ieee = nla_nest_start(skb, DCB_ATTR_IEEE); if (!ieee) - goto nla_put_failure; + return -EMSGSIZE; if (ops->ieee_getets) { struct ieee_ets ets; err = ops->ieee_getets(netdev, &ets); if (!err && nla_put(skb, DCB_ATTR_IEEE_ETS, sizeof(ets), &ets)) - goto nla_put_failure; + return -EMSGSIZE; } if (ops->ieee_getmaxrate) { @@ -1256,7 +1057,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) err = nla_put(skb, DCB_ATTR_IEEE_MAXRATE, sizeof(maxrate), &maxrate); if (err) - goto nla_put_failure; + return -EMSGSIZE; } } @@ -1265,12 +1066,12 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) err = ops->ieee_getpfc(netdev, &pfc); if (!err && nla_put(skb, DCB_ATTR_IEEE_PFC, sizeof(pfc), &pfc)) - goto nla_put_failure; + return -EMSGSIZE; } app = nla_nest_start(skb, DCB_ATTR_IEEE_APP_TABLE); if (!app) - goto nla_put_failure; + return -EMSGSIZE; spin_lock(&dcb_lock); list_for_each_entry(itr, &dcb_app_list, list) { @@ -1279,7 +1080,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) &itr->app); if (err) { spin_unlock(&dcb_lock); - goto nla_put_failure; + return -EMSGSIZE; } } } @@ -1298,7 +1099,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) err = ops->ieee_peer_getets(netdev, &ets); if (!err && nla_put(skb, DCB_ATTR_IEEE_PEER_ETS, sizeof(ets), &ets)) - goto nla_put_failure; + return -EMSGSIZE; } if (ops->ieee_peer_getpfc) { @@ -1306,7 +1107,7 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) err = ops->ieee_peer_getpfc(netdev, &pfc); if (!err && nla_put(skb, DCB_ATTR_IEEE_PEER_PFC, sizeof(pfc), &pfc)) - goto nla_put_failure; + return -EMSGSIZE; } if (ops->peer_getappinfo && ops->peer_getapptable) { @@ -1315,20 +1116,17 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev) DCB_ATTR_IEEE_APP_UNSPEC, DCB_ATTR_IEEE_APP); if (err) - goto nla_put_failure; + return -EMSGSIZE; } nla_nest_end(skb, ieee); if (dcbx >= 0) { err = nla_put_u8(skb, DCB_ATTR_DCBX, dcbx); if (err) - goto nla_put_failure; + return -EMSGSIZE; } return 0; - -nla_put_failure: - return err; } static int dcbnl_cee_pg_fill(struct sk_buff *skb, struct net_device *dev, @@ -1340,13 +1138,13 @@ static int dcbnl_cee_pg_fill(struct sk_buff *skb, struct net_device *dev, struct nlattr *pg = nla_nest_start(skb, i); if (!pg) - goto nla_put_failure; + return -EMSGSIZE; for (i = DCB_PG_ATTR_TC_0; i <= DCB_PG_ATTR_TC_7; i++) { struct nlattr *tc_nest = nla_nest_start(skb, i); if (!tc_nest) - goto nla_put_failure; + return -EMSGSIZE; pgid = DCB_ATTR_VALUE_UNDEFINED; prio = DCB_ATTR_VALUE_UNDEFINED; @@ -1364,7 +1162,7 @@ static int dcbnl_cee_pg_fill(struct sk_buff *skb, struct net_device *dev, nla_put_u8(skb, DCB_TC_ATTR_PARAM_UP_MAPPING, up_map) || nla_put_u8(skb, DCB_TC_ATTR_PARAM_STRICT_PRIO, prio) || nla_put_u8(skb, DCB_TC_ATTR_PARAM_BW_PCT, tc_pct)) - goto nla_put_failure; + return -EMSGSIZE; nla_nest_end(skb, tc_nest); } @@ -1378,13 +1176,10 @@ static int dcbnl_cee_pg_fill(struct sk_buff *skb, struct net_device *dev, ops->getpgbwgcfgtx(dev, i - DCB_PG_ATTR_BW_ID_0, &tc_pct); if (nla_put_u8(skb, i, tc_pct)) - goto nla_put_failure; + return -EMSGSIZE; } nla_nest_end(skb, pg); return 0; - -nla_put_failure: - return -EMSGSIZE; } static int dcbnl_cee_fill(struct sk_buff *skb, struct net_device *netdev) @@ -1531,27 +1326,16 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, struct net *net = dev_net(dev); struct sk_buff *skb; struct nlmsghdr *nlh; - struct dcbmsg *dcb; const struct dcbnl_rtnl_ops *ops = dev->dcbnl_ops; int err; if (!ops) return -EOPNOTSUPP; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + skb = dcbnl_newmsg(event, cmd, pid, seq, 0, &nlh); if (!skb) return -ENOBUFS; - nlh = nlmsg_put(skb, pid, 0, event, sizeof(*dcb), 0); - if (nlh == NULL) { - nlmsg_free(skb); - return -EMSGSIZE; - } - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = cmd; - if (dcbx_ver == DCB_CAP_DCBX_VER_IEEE) err = dcbnl_ieee_fill(skb, dev); else @@ -1559,8 +1343,7 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, if (err < 0) { /* Report error to broadcast listeners */ - nlmsg_cancel(skb, nlh); - kfree_skb(skb); + nlmsg_free(skb); rtnl_set_sk_err(net, RTNLGRP_DCB, err); } else { /* End nlmsg and notify broadcast listeners */ @@ -1590,15 +1373,15 @@ EXPORT_SYMBOL(dcbnl_cee_notify); * No attempt is made to reconcile the case where only part of the * cmd can be completed. */ -static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; - int err = -EOPNOTSUPP; + int err; if (!ops) - return err; + return -EOPNOTSUPP; if (!tb[DCB_ATTR_IEEE]) return -EINVAL; @@ -1649,58 +1432,28 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlattr **tb, } err: - dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_SET, DCB_ATTR_IEEE, - pid, seq, flags); + err = nla_put_u8(skb, DCB_ATTR_IEEE, err); dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_SET, seq, 0); return err; } -static int dcbnl_ieee_get(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_ieee_get(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - struct net *net = dev_net(netdev); - struct sk_buff *skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; - int err; if (!ops) return -EOPNOTSUPP; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - nlh = nlmsg_put(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - if (nlh == NULL) { - nlmsg_free(skb); - return -EMSGSIZE; - } - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_IEEE_GET; - - err = dcbnl_ieee_fill(skb, netdev); - - if (err < 0) { - nlmsg_cancel(skb, nlh); - kfree_skb(skb); - } else { - nlmsg_end(skb, nlh); - err = rtnl_unicast(skb, net, pid); - } - - return err; + return dcbnl_ieee_fill(skb, netdev); } -static int dcbnl_ieee_del(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; - int err = -EOPNOTSUPP; + int err; if (!ops) return -EOPNOTSUPP; @@ -1733,32 +1486,26 @@ static int dcbnl_ieee_del(struct net_device *netdev, struct nlattr **tb, } err: - dcbnl_reply(err, RTM_SETDCB, DCB_CMD_IEEE_DEL, DCB_ATTR_IEEE, - pid, seq, flags); + err = nla_put_u8(skb, DCB_ATTR_IEEE, err); dcbnl_ieee_notify(netdev, RTM_SETDCB, DCB_CMD_IEEE_DEL, seq, 0); return err; } /* DCBX configuration */ -static int dcbnl_getdcbx(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_getdcbx(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - int ret; - if (!netdev->dcbnl_ops->getdcbx) return -EOPNOTSUPP; - ret = dcbnl_reply(netdev->dcbnl_ops->getdcbx(netdev), RTM_GETDCB, - DCB_CMD_GDCBX, DCB_ATTR_DCBX, pid, seq, flags); - - return ret; + return nla_put_u8(skb, DCB_ATTR_DCBX, + netdev->dcbnl_ops->getdcbx(netdev)); } -static int dcbnl_setdcbx(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_setdcbx(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - int ret; u8 value; if (!netdev->dcbnl_ops->setdcbx) @@ -1769,19 +1516,13 @@ static int dcbnl_setdcbx(struct net_device *netdev, struct nlattr **tb, value = nla_get_u8(tb[DCB_ATTR_DCBX]); - ret = dcbnl_reply(netdev->dcbnl_ops->setdcbx(netdev, value), - RTM_SETDCB, DCB_CMD_SDCBX, DCB_ATTR_DCBX, - pid, seq, flags); - - return ret; + return nla_put_u8(skb, DCB_ATTR_DCBX, + netdev->dcbnl_ops->setdcbx(netdev, value)); } -static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - struct sk_buff *dcbnl_skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1], *nest; u8 value; int ret, i; @@ -1796,25 +1537,11 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlattr **tb, ret = nla_parse_nested(data, DCB_FEATCFG_ATTR_MAX, tb[DCB_ATTR_FEATCFG], dcbnl_featcfg_nest); if (ret) - goto err_out; - - dcbnl_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!dcbnl_skb) { - ret = -ENOBUFS; - goto err_out; - } - - nlh = NLMSG_NEW(dcbnl_skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_GFEATCFG; + return ret; - nest = nla_nest_start(dcbnl_skb, DCB_ATTR_FEATCFG); - if (!nest) { - ret = -EMSGSIZE; - goto nla_put_failure; - } + nest = nla_nest_start(skb, DCB_ATTR_FEATCFG); + if (!nest) + return -EMSGSIZE; if (data[DCB_FEATCFG_ATTR_ALL]) getall = 1; @@ -1825,28 +1552,21 @@ static int dcbnl_getfeatcfg(struct net_device *netdev, struct nlattr **tb, ret = netdev->dcbnl_ops->getfeatcfg(netdev, i, &value); if (!ret) - ret = nla_put_u8(dcbnl_skb, i, value); + ret = nla_put_u8(skb, i, value); if (ret) { - nla_nest_cancel(dcbnl_skb, nest); + nla_nest_cancel(skb, nest); goto nla_put_failure; } } - nla_nest_end(dcbnl_skb, nest); + nla_nest_end(skb, nest); - nlmsg_end(dcbnl_skb, nlh); - - return rtnl_unicast(dcbnl_skb, &init_net, pid); nla_put_failure: - nlmsg_cancel(dcbnl_skb, nlh); -nlmsg_failure: - kfree_skb(dcbnl_skb); -err_out: return ret; } -static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { struct nlattr *data[DCB_FEATCFG_ATTR_MAX + 1]; int ret, i; @@ -1876,60 +1596,73 @@ static int dcbnl_setfeatcfg(struct net_device *netdev, struct nlattr **tb, goto err; } err: - dcbnl_reply(ret, RTM_SETDCB, DCB_CMD_SFEATCFG, DCB_ATTR_FEATCFG, - pid, seq, flags); + ret = nla_put_u8(skb, DCB_ATTR_FEATCFG, ret); return ret; } /* Handle CEE DCBX GET commands. */ -static int dcbnl_cee_get(struct net_device *netdev, struct nlattr **tb, - u32 pid, u32 seq, u16 flags) +static int dcbnl_cee_get(struct net_device *netdev, struct nlmsghdr *nlh, + u32 seq, struct nlattr **tb, struct sk_buff *skb) { - struct net *net = dev_net(netdev); - struct sk_buff *skb; - struct nlmsghdr *nlh; - struct dcbmsg *dcb; const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; - int err; if (!ops) return -EOPNOTSUPP; - skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - nlh = nlmsg_put(skb, pid, seq, RTM_GETDCB, sizeof(*dcb), flags); - if (nlh == NULL) { - nlmsg_free(skb); - return -EMSGSIZE; - } + return dcbnl_cee_fill(skb, netdev); +} - dcb = NLMSG_DATA(nlh); - dcb->dcb_family = AF_UNSPEC; - dcb->cmd = DCB_CMD_CEE_GET; +struct reply_func { + /* reply netlink message type */ + int type; - err = dcbnl_cee_fill(skb, netdev); + /* function to fill message contents */ + int (*cb)(struct net_device *, struct nlmsghdr *, u32, + struct nlattr **, struct sk_buff *); +}; - if (err < 0) { - nlmsg_cancel(skb, nlh); - nlmsg_free(skb); - } else { - nlmsg_end(skb, nlh); - err = rtnl_unicast(skb, net, pid); - } - return err; -} +static const struct reply_func reply_funcs[DCB_CMD_MAX+1] = { + [DCB_CMD_GSTATE] = { RTM_GETDCB, dcbnl_getstate }, + [DCB_CMD_SSTATE] = { RTM_SETDCB, dcbnl_setstate }, + [DCB_CMD_PFC_GCFG] = { RTM_GETDCB, dcbnl_getpfccfg }, + [DCB_CMD_PFC_SCFG] = { RTM_SETDCB, dcbnl_setpfccfg }, + [DCB_CMD_GPERM_HWADDR] = { RTM_GETDCB, dcbnl_getperm_hwaddr }, + [DCB_CMD_GCAP] = { RTM_GETDCB, dcbnl_getcap }, + [DCB_CMD_GNUMTCS] = { RTM_GETDCB, dcbnl_getnumtcs }, + [DCB_CMD_SNUMTCS] = { RTM_SETDCB, dcbnl_setnumtcs }, + [DCB_CMD_PFC_GSTATE] = { RTM_GETDCB, dcbnl_getpfcstate }, + [DCB_CMD_PFC_SSTATE] = { RTM_SETDCB, dcbnl_setpfcstate }, + [DCB_CMD_GAPP] = { RTM_GETDCB, dcbnl_getapp }, + [DCB_CMD_SAPP] = { RTM_SETDCB, dcbnl_setapp }, + [DCB_CMD_PGTX_GCFG] = { RTM_GETDCB, dcbnl_pgtx_getcfg }, + [DCB_CMD_PGTX_SCFG] = { RTM_SETDCB, dcbnl_pgtx_setcfg }, + [DCB_CMD_PGRX_GCFG] = { RTM_GETDCB, dcbnl_pgrx_getcfg }, + [DCB_CMD_PGRX_SCFG] = { RTM_SETDCB, dcbnl_pgrx_setcfg }, + [DCB_CMD_SET_ALL] = { RTM_SETDCB, dcbnl_setall }, + [DCB_CMD_BCN_GCFG] = { RTM_GETDCB, dcbnl_bcn_getcfg }, + [DCB_CMD_BCN_SCFG] = { RTM_SETDCB, dcbnl_bcn_setcfg }, + [DCB_CMD_IEEE_GET] = { RTM_GETDCB, dcbnl_ieee_get }, + [DCB_CMD_IEEE_SET] = { RTM_SETDCB, dcbnl_ieee_set }, + [DCB_CMD_IEEE_DEL] = { RTM_SETDCB, dcbnl_ieee_del }, + [DCB_CMD_GDCBX] = { RTM_GETDCB, dcbnl_getdcbx }, + [DCB_CMD_SDCBX] = { RTM_SETDCB, dcbnl_setdcbx }, + [DCB_CMD_GFEATCFG] = { RTM_GETDCB, dcbnl_getfeatcfg }, + [DCB_CMD_SFEATCFG] = { RTM_SETDCB, dcbnl_setfeatcfg }, + [DCB_CMD_CEE_GET] = { RTM_GETDCB, dcbnl_cee_get }, +}; static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct net_device *netdev; - struct dcbmsg *dcb = (struct dcbmsg *)NLMSG_DATA(nlh); + struct dcbmsg *dcb = nlmsg_data(nlh); struct nlattr *tb[DCB_ATTR_MAX + 1]; u32 pid = skb ? NETLINK_CB(skb).pid : 0; int ret = -EINVAL; + struct sk_buff *reply_skb; + struct nlmsghdr *reply_nlh = NULL; + const struct reply_func *fn; if (!net_eq(net, &init_net)) return -EINVAL; @@ -1939,136 +1672,78 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (ret < 0) return ret; + if (dcb->cmd > DCB_CMD_MAX) + return -EINVAL; + + /* check if a reply function has been defined for the command */ + fn = &reply_funcs[dcb->cmd]; + if (!fn->cb) + return -EOPNOTSUPP; + if (!tb[DCB_ATTR_IFNAME]) return -EINVAL; netdev = dev_get_by_name(&init_net, nla_data(tb[DCB_ATTR_IFNAME])); if (!netdev) - return -EINVAL; + return -ENODEV; - if (!netdev->dcbnl_ops) - goto errout; - - switch (dcb->cmd) { - case DCB_CMD_GSTATE: - ret = dcbnl_getstate(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_PFC_GCFG: - ret = dcbnl_getpfccfg(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_GPERM_HWADDR: - ret = dcbnl_getperm_hwaddr(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_PGTX_GCFG: - ret = dcbnl_pgtx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_PGRX_GCFG: - ret = dcbnl_pgrx_getcfg(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_BCN_GCFG: - ret = dcbnl_bcn_getcfg(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_SSTATE: - ret = dcbnl_setstate(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_PFC_SCFG: - ret = dcbnl_setpfccfg(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); + if (!netdev->dcbnl_ops) { + ret = -EOPNOTSUPP; goto out; + } - case DCB_CMD_SET_ALL: - ret = dcbnl_setall(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_PGTX_SCFG: - ret = dcbnl_pgtx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_PGRX_SCFG: - ret = dcbnl_pgrx_setcfg(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_GCAP: - ret = dcbnl_getcap(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_GNUMTCS: - ret = dcbnl_getnumtcs(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_SNUMTCS: - ret = dcbnl_setnumtcs(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_PFC_GSTATE: - ret = dcbnl_getpfcstate(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_PFC_SSTATE: - ret = dcbnl_setpfcstate(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_BCN_SCFG: - ret = dcbnl_bcn_setcfg(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_GAPP: - ret = dcbnl_getapp(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_SAPP: - ret = dcbnl_setapp(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_IEEE_SET: - ret = dcbnl_ieee_set(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_IEEE_GET: - ret = dcbnl_ieee_get(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_IEEE_DEL: - ret = dcbnl_ieee_del(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_GDCBX: - ret = dcbnl_getdcbx(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_SDCBX: - ret = dcbnl_setdcbx(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_GFEATCFG: - ret = dcbnl_getfeatcfg(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); - goto out; - case DCB_CMD_SFEATCFG: - ret = dcbnl_setfeatcfg(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); + reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, pid, nlh->nlmsg_seq, + nlh->nlmsg_flags, &reply_nlh); + if (!reply_skb) { + ret = -ENOBUFS; goto out; - case DCB_CMD_CEE_GET: - ret = dcbnl_cee_get(netdev, tb, pid, nlh->nlmsg_seq, - nlh->nlmsg_flags); + } + + ret = fn->cb(netdev, nlh, nlh->nlmsg_seq, tb, reply_skb); + if (ret < 0) { + nlmsg_free(reply_skb); goto out; - default: - goto errout; } -errout: - ret = -EINVAL; + + nlmsg_end(reply_skb, reply_nlh); + + ret = rtnl_unicast(reply_skb, &init_net, pid); out: dev_put(netdev); return ret; } +static struct dcb_app_type *dcb_app_lookup(const struct dcb_app *app, + int ifindex, int prio) +{ + struct dcb_app_type *itr; + + list_for_each_entry(itr, &dcb_app_list, list) { + if (itr->app.selector == app->selector && + itr->app.protocol == app->protocol && + itr->ifindex == ifindex && + (!prio || itr->app.priority == prio)) + return itr; + } + + return NULL; +} + +static int dcb_app_add(const struct dcb_app *app, int ifindex) +{ + struct dcb_app_type *entry; + + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + return -ENOMEM; + + memcpy(&entry->app, app, sizeof(*app)); + entry->ifindex = ifindex; + list_add(&entry->list, &dcb_app_list); + + return 0; +} + /** * dcb_getapp - retrieve the DCBX application user priority * @@ -2082,14 +1757,8 @@ u8 dcb_getapp(struct net_device *dev, struct dcb_app *app) u8 prio = 0; spin_lock(&dcb_lock); - list_for_each_entry(itr, &dcb_app_list, list) { - if (itr->app.selector == app->selector && - itr->app.protocol == app->protocol && - itr->ifindex == dev->ifindex) { - prio = itr->app.priority; - break; - } - } + if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) + prio = itr->app.priority; spin_unlock(&dcb_lock); return prio; @@ -2107,6 +1776,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) { struct dcb_app_type *itr; struct dcb_app_type event; + int err = 0; event.ifindex = dev->ifindex; memcpy(&event.app, new, sizeof(event.app)); @@ -2115,36 +1785,23 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new) spin_lock(&dcb_lock); /* Search for existing match and replace */ - list_for_each_entry(itr, &dcb_app_list, list) { - if (itr->app.selector == new->selector && - itr->app.protocol == new->protocol && - itr->ifindex == dev->ifindex) { - if (new->priority) - itr->app.priority = new->priority; - else { - list_del(&itr->list); - kfree(itr); - } - goto out; + if ((itr = dcb_app_lookup(new, dev->ifindex, 0))) { + if (new->priority) + itr->app.priority = new->priority; + else { + list_del(&itr->list); + kfree(itr); } + goto out; } /* App type does not exist add new application type */ - if (new->priority) { - struct dcb_app_type *entry; - entry = kmalloc(sizeof(struct dcb_app_type), GFP_ATOMIC); - if (!entry) { - spin_unlock(&dcb_lock); - return -ENOMEM; - } - - memcpy(&entry->app, new, sizeof(*new)); - entry->ifindex = dev->ifindex; - list_add(&entry->list, &dcb_app_list); - } + if (new->priority) + err = dcb_app_add(new, dev->ifindex); out: spin_unlock(&dcb_lock); - call_dcbevent_notifiers(DCB_APP_EVENT, &event); - return 0; + if (!err) + call_dcbevent_notifiers(DCB_APP_EVENT, &event); + return err; } EXPORT_SYMBOL(dcb_setapp); @@ -2161,13 +1818,8 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app) u8 prio = 0; spin_lock(&dcb_lock); - list_for_each_entry(itr, &dcb_app_list, list) { - if (itr->app.selector == app->selector && - itr->app.protocol == app->protocol && - itr->ifindex == dev->ifindex) { - prio |= 1 << itr->app.priority; - } - } + if ((itr = dcb_app_lookup(app, dev->ifindex, 0))) + prio |= 1 << itr->app.priority; spin_unlock(&dcb_lock); return prio; @@ -2183,7 +1835,6 @@ EXPORT_SYMBOL(dcb_ieee_getapp_mask); */ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) { - struct dcb_app_type *itr, *entry; struct dcb_app_type event; int err = 0; @@ -2194,26 +1845,12 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new) spin_lock(&dcb_lock); /* Search for existing match and abort if found */ - list_for_each_entry(itr, &dcb_app_list, list) { - if (itr->app.selector == new->selector && - itr->app.protocol == new->protocol && - itr->app.priority == new->priority && - itr->ifindex == dev->ifindex) { - err = -EEXIST; - goto out; - } - } - - /* App entry does not exist add new entry */ - entry = kmalloc(sizeof(struct dcb_app_type), GFP_ATOMIC); - if (!entry) { - err = -ENOMEM; + if (dcb_app_lookup(new, dev->ifindex, new->priority)) { + err = -EEXIST; goto out; } - memcpy(&entry->app, new, sizeof(*new)); - entry->ifindex = dev->ifindex; - list_add(&entry->list, &dcb_app_list); + err = dcb_app_add(new, dev->ifindex); out: spin_unlock(&dcb_lock); if (!err) @@ -2240,19 +1877,12 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del) spin_lock(&dcb_lock); /* Search for existing match and remove it. */ - list_for_each_entry(itr, &dcb_app_list, list) { - if (itr->app.selector == del->selector && - itr->app.protocol == del->protocol && - itr->app.priority == del->priority && - itr->ifindex == dev->ifindex) { - list_del(&itr->list); - kfree(itr); - err = 0; - goto out; - } + if ((itr = dcb_app_lookup(del, dev->ifindex, del->priority))) { + list_del(&itr->list); + kfree(itr); + err = 0; } -out: spin_unlock(&dcb_lock); if (!err) call_dcbevent_notifiers(DCB_APP_EVENT, &event); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index fa9512d..9991be0 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -165,6 +165,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); + dst->ops->update_pmtu(dst, ntohl(info)); + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { dccp_sync_mss(sk, dst_mtu(dst)); } /* else let the usual retransmit timer handle it */ diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index e8f2617..916d5ec 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -408,6 +408,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) return; pr_debug("pmtu discovery on SA AH/%08x/%08x\n", ntohl(ah->spi), ntohl(iph->daddr)); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index cb982a6..7b95b49 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -494,6 +494,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", ntohl(esph->spi), ntohl(iph->daddr)); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e5b7182..415f823 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -779,9 +779,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg) int type = nla_type(nla); if (type) { + u32 val; + if (type > RTAX_MAX) goto err_inval; - fi->fib_metrics[type - 1] = nla_get_u32(nla); + val = nla_get_u32(nla); + if (type == RTAX_ADVMSS && val > 65535 - 40) + val = 65535 - 40; + fi->fib_metrics[type - 1] = val; } } } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f49047b..594cec3 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -516,9 +516,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return; - case ICMP_FRAG_NEEDED: - /* Soft state for pmtu is maintained by IP core. */ - return; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -538,7 +535,16 @@ static void ipgre_err(struct sk_buff *skb, u32 info) flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, p[1]); - if (t == NULL || t->parms.iph.daddr == 0 || + if (t == NULL) + goto out; + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + t->parms.link, 0, IPPROTO_GRE, 0); + goto out; + } + + if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) goto out; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 63b64c4..b913754 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -42,6 +42,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%pI4\n", spi, &iph->daddr); + ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_COMP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2d0f99b..715338a 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -348,9 +348,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; - case ICMP_FRAG_NEEDED: - /* Soft state for pmtu is maintained by IP core. */ - return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -369,7 +366,17 @@ static int ipip_err(struct sk_buff *skb, u32 info) rcu_read_lock(); t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); - if (t == NULL || t->parms.iph.daddr == 0) + if (t == NULL) + goto out; + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + t->dev->ifindex, 0, IPPROTO_IPIP, 0); + err = 0; + goto out; + } + + if (t->parms.iph.daddr == 0) goto out; err = 0; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d79b961..e7ff2dc 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -95,11 +95,11 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv4_confirm(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ipv4_helper(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -110,24 +110,38 @@ static unsigned int ipv4_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return NF_ACCEPT; help = nfct_help(ct); if (!help) - goto out; + return NF_ACCEPT; /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); if (!helper) - goto out; + return NF_ACCEPT; ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), ct, ctinfo); - if (ret != NF_ACCEPT) { + if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL, "nf_ct_%s: dropping packet", helper->name); - return ret; } + return ret; +} + +static unsigned int ipv4_confirm(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + goto out; /* adjust seqs for loopback traffic only in outgoing direction */ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && @@ -185,6 +199,13 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .priority = NF_IP_PRI_CONNTRACK, }, { + .hook = ipv4_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_HELPER, + }, + { .hook = ipv4_confirm, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, @@ -192,6 +213,13 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .priority = NF_IP_PRI_CONNTRACK_CONFIRM, }, { + .hook = ipv4_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_HELPER, + }, + { .hook = ipv4_confirm, .owner = THIS_MODULE, .pf = NFPROTO_IPV4, diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 7b22382..3c04d24 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -13,10 +13,10 @@ #include <linux/skbuff.h> #include <linux/udp.h> -#include <net/netfilter/nf_nat_helper.h> -#include <net/netfilter/nf_nat_rule.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_nat_rule.h> #include <linux/netfilter/nf_conntrack_amanda.h> MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index cad29c1..c6784a1 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -95,7 +95,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, TransportAddress *taddr, int count) { - const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + const struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int i; __be16 port; @@ -178,7 +178,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int i; u_int16_t nated_port; @@ -330,7 +330,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); u_int16_t nated_port = ntohs(port); @@ -419,7 +419,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); u_int16_t nated_port = ntohs(port); union nf_inet_addr addr; diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index af65958..2e59ad0 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -153,6 +153,19 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo, } EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); +void nf_nat_tcp_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + u32 ctinfo, int off) +{ + const struct tcphdr *th; + + if (nf_ct_protonum(ct) != IPPROTO_TCP) + return; + + th = (struct tcphdr *)(skb_network_header(skb)+ ip_hdrlen(skb)); + nf_nat_set_seq_adjust(ct, ctinfo, th->seq, off); +} +EXPORT_SYMBOL_GPL(nf_nat_tcp_seq_adjust); + static void nf_nat_csum(struct sk_buff *skb, const struct iphdr *iph, void *data, int datalen, __sum16 *check, int oldlen) { diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index c273d58..3881408 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -49,7 +49,7 @@ static void pptp_nat_expected(struct nf_conn *ct, const struct nf_nat_pptp *nat_pptp_info; struct nf_nat_ipv4_range range; - ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; + ct_pptp_info = nfct_help_data(master); nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; /* And here goes the grand finale of corrosion... */ @@ -123,7 +123,7 @@ pptp_outbound_pkt(struct sk_buff *skb, __be16 new_callid; unsigned int cid_off; - ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; + ct_pptp_info = nfct_help_data(ct); nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; new_callid = ct_pptp_info->pns_call_id; @@ -192,7 +192,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig, struct nf_ct_pptp_master *ct_pptp_info; struct nf_nat_pptp *nat_pptp_info; - ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info; + ct_pptp_info = nfct_help_data(ct); nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; /* save original PAC call ID in nat_info */ diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index a2901bf..9dbb8d2 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -8,10 +8,10 @@ #include <linux/module.h> #include <linux/udp.h> -#include <net/netfilter/nf_nat_helper.h> -#include <net/netfilter/nf_nat_rule.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_nat_rule.h> #include <linux/netfilter/nf_conntrack_tftp.h> MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 2c00e8b..340fcf2 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -371,6 +371,7 @@ void ping_err(struct sk_buff *skb, u32 info) break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); if (inet_sock->pmtudisc != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4032b81..659ddfb 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -216,6 +216,9 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) int err = 0; int harderr = 0; + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) + ipv4_sk_update_pmtu(skb, sk, info); + /* Report error on raw socket, if: 1. User requested ip_recverr. 2. Socket is connected (otherwise the error indication diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 655506a..a91f6d3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1711,6 +1711,34 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } +void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, + int oif, u32 mark, u8 protocol, int flow_flags) +{ + const struct iphdr *iph = (const struct iphdr *)skb->data; + struct flowi4 fl4; + struct rtable *rt; + + flowi4_init_output(&fl4, oif, mark, RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, + protocol, flow_flags | FLOWI_FLAG_PRECOW_METRICS, + iph->daddr, iph->saddr, 0, 0); + rt = __ip_route_output_key(net, &fl4); + if (!IS_ERR(rt)) { + ip_rt_update_pmtu(&rt->dst, mtu); + ip_rt_put(rt); + } +} +EXPORT_SYMBOL_GPL(ipv4_update_pmtu); + +void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) +{ + const struct inet_sock *inet = inet_sk(sk); + + return ipv4_update_pmtu(skb, sock_net(sk), mtu, + sk->sk_bound_dev_if, sk->sk_mark, + inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, + inet_sk_flowi_flags(sk)); +} +EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); static void ipv4_validate_peer(struct rtable *rt) { @@ -1923,8 +1951,6 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4, if (dst_mtu(dst) > IP_MAX_MTU) dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU); - if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40) - dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40); #ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index eaca736..db017ef 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -615,6 +615,7 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + ipv4_sk_update_pmtu(skb, sk, info); if (inet->pmtudisc != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index f1a4a2c..49d4d26 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -35,6 +35,7 @@ #include <linux/pfkeyv2.h> #include <linux/string.h> #include <linux/scatterlist.h> +#include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -621,7 +622,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, NETDEBUG(KERN_DEBUG "pmtu discovery on SA AH/%08x/%pI6\n", ntohl(ah->spi), &iph->daddr); - + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index db1521f..89a615b 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -39,6 +39,7 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/spinlock.h> +#include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -442,6 +443,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; pr_debug("pmtu discovery on SA ESP/%08x/%pI6\n", ntohl(esph->spi), &iph->daddr); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index ed89bba..5247d5c 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -649,7 +649,6 @@ static int icmpv6_rcv(struct sk_buff *skb) struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); const struct in6_addr *saddr, *daddr; - const struct ipv6hdr *orig_hdr; struct icmp6hdr *hdr; u8 type; @@ -661,7 +660,7 @@ static int icmpv6_rcv(struct sk_buff *skb) XFRM_STATE_ICMP)) goto drop_no_count; - if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(*orig_hdr))) + if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr))) goto drop_no_count; nh = skb_network_offset(skb); @@ -722,9 +721,6 @@ static int icmpv6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto discard_it; hdr = icmp6_hdr(skb); - orig_hdr = (struct ipv6hdr *) (hdr + 1); - rt6_pmtu_discovery(&orig_hdr->daddr, &orig_hdr->saddr, dev, - ntohl(hdr->icmp6_mtu)); /* * Drop through to notify diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 5cb75bf..9283238 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -46,6 +46,7 @@ #include <linux/list.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> +#include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> @@ -74,6 +75,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, pr_debug("pmtu discovery on SA IPCOMP/%08x/%pI6\n", spi, &iph->daddr); + ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index fca10da..4794f96 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -143,11 +143,11 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, return NF_ACCEPT; } -static unsigned int ipv6_confirm(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ipv6_helper(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { struct nf_conn *ct; const struct nf_conn_help *help; @@ -161,15 +161,15 @@ static unsigned int ipv6_confirm(unsigned int hooknum, /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED_REPLY) - goto out; + return NF_ACCEPT; help = nfct_help(ct); if (!help) - goto out; + return NF_ACCEPT; /* rcu_read_lock()ed by nf_hook_slow */ helper = rcu_dereference(help->helper); if (!helper) - goto out; + return NF_ACCEPT; protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); @@ -179,12 +179,19 @@ static unsigned int ipv6_confirm(unsigned int hooknum, } ret = helper->help(skb, protoff, ct, ctinfo); - if (ret != NF_ACCEPT) { + if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) { nf_log_packet(NFPROTO_IPV6, hooknum, skb, in, out, NULL, "nf_ct_%s: dropping packet", helper->name); - return ret; } -out: + return ret; +} + +static unsigned int ipv6_confirm(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } @@ -254,6 +261,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { .priority = NF_IP6_PRI_CONNTRACK, }, { + .hook = ipv6_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_CONNTRACK_HELPER, + }, + { .hook = ipv6_confirm, .owner = THIS_MODULE, .pf = NFPROTO_IPV6, @@ -261,6 +275,13 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { .priority = NF_IP6_PRI_LAST, }, { + .hook = ipv6_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_CONNTRACK_HELPER, + }, + { .hook = ipv6_confirm, .owner = THIS_MODULE, .pf = NFPROTO_IPV6, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 93d6983..43b0042 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -328,9 +328,10 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, return; harderr = icmpv6_err_convert(type, code, &err); - if (type == ICMPV6_PKT_TOOBIG) + if (type == ICMPV6_PKT_TOOBIG) { + ip6_sk_update_pmtu(skb, sk, info); harderr = (np->pmtudisc == IPV6_PMTUDISC_DO); - + } if (np->recverr) { u8 *payload = skb->data; if (!inet->hdrincl) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 58a3ec2..e649cd7 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1049,7 +1049,10 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info*)dst; + dst_confirm(dst); if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { + struct net *net = dev_net(dst->dev); + rt6->rt6i_flags |= RTF_MODIFIED; if (mtu < IPV6_MIN_MTU) { u32 features = dst_metric(dst, RTAX_FEATURES); @@ -1058,9 +1061,39 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) dst_metric_set(dst, RTAX_FEATURES, features); } dst_metric_set(dst, RTAX_MTU, mtu); + rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } } +void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, + int oif, u32 mark) +{ + const struct ipv6hdr *iph = (struct ipv6hdr *) skb->data; + struct dst_entry *dst; + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_oif = oif; + fl6.flowi6_mark = mark; + fl6.flowi6_flags = FLOWI_FLAG_PRECOW_METRICS; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = (*(__be32 *) iph) & IPV6_FLOWINFO_MASK; + + dst = ip6_route_output(net, NULL, &fl6); + if (!dst->error) + ip6_rt_update_pmtu(dst, ntohl(mtu)); + dst_release(dst); +} +EXPORT_SYMBOL_GPL(ip6_update_pmtu); + +void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu) +{ + ip6_update_pmtu(skb, sock_net(sk), mtu, + sk->sk_bound_dev_if, sk->sk_mark); +} +EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu); + static unsigned int ip6_default_advmss(const struct dst_entry *dst) { struct net_device *dev = dst->dev; @@ -1704,116 +1737,6 @@ out: } /* - * Handle ICMP "packet too big" messages - * i.e. Path MTU discovery - */ - -static void rt6_do_pmtu_disc(const struct in6_addr *daddr, const struct in6_addr *saddr, - struct net *net, u32 pmtu, int ifindex) -{ - struct rt6_info *rt, *nrt; - int allfrag = 0; -again: - rt = rt6_lookup(net, daddr, saddr, ifindex, 0); - if (!rt) - return; - - if (rt6_check_expired(rt)) { - ip6_del_rt(rt); - goto again; - } - - if (pmtu >= dst_mtu(&rt->dst)) - goto out; - - if (pmtu < IPV6_MIN_MTU) { - /* - * According to RFC2460, PMTU is set to the IPv6 Minimum Link - * MTU (1280) and a fragment header should always be included - * after a node receiving Too Big message reporting PMTU is - * less than the IPv6 Minimum Link MTU. - */ - pmtu = IPV6_MIN_MTU; - allfrag = 1; - } - - /* New mtu received -> path was valid. - They are sent only in response to data packets, - so that this nexthop apparently is reachable. --ANK - */ - dst_confirm(&rt->dst); - - /* Host route. If it is static, it would be better - not to override it, but add new one, so that - when cache entry will expire old pmtu - would return automatically. - */ - if (rt->rt6i_flags & RTF_CACHE) { - dst_metric_set(&rt->dst, RTAX_MTU, pmtu); - if (allfrag) { - u32 features = dst_metric(&rt->dst, RTAX_FEATURES); - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(&rt->dst, RTAX_FEATURES, features); - } - rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); - rt->rt6i_flags |= RTF_MODIFIED; - goto out; - } - - /* Network route. - Two cases are possible: - 1. It is connected route. Action: COW - 2. It is gatewayed route or NONEXTHOP route. Action: clone it. - */ - if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, daddr, saddr); - else - nrt = rt6_alloc_clone(rt, daddr); - - if (nrt) { - dst_metric_set(&nrt->dst, RTAX_MTU, pmtu); - if (allfrag) { - u32 features = dst_metric(&nrt->dst, RTAX_FEATURES); - features |= RTAX_FEATURE_ALLFRAG; - dst_metric_set(&nrt->dst, RTAX_FEATURES, features); - } - - /* According to RFC 1981, detecting PMTU increase shouldn't be - * happened within 5 mins, the recommended timer is 10 mins. - * Here this route expiration time is set to ip6_rt_mtu_expires - * which is 10 mins. After 10 mins the decreased pmtu is expired - * and detecting PMTU increase will be automatically happened. - */ - rt6_update_expires(nrt, net->ipv6.sysctl.ip6_rt_mtu_expires); - nrt->rt6i_flags |= RTF_DYNAMIC; - ip6_ins_rt(nrt); - } -out: - dst_release(&rt->dst); -} - -void rt6_pmtu_discovery(const struct in6_addr *daddr, const struct in6_addr *saddr, - struct net_device *dev, u32 pmtu) -{ - struct net *net = dev_net(dev); - - /* - * RFC 1981 states that a node "MUST reduce the size of the packets it - * is sending along the path" that caused the Packet Too Big message. - * Since it's not possible in the general case to determine which - * interface was used to send the original packet, we update the MTU - * on the interface that will be used to send future packets. We also - * update the MTU on the interface that received the Packet Too Big in - * case the original packet was forced out that interface with - * SO_BINDTODEVICE or similar. This is the next best thing to the - * correct behaviour, which would be to update the MTU on all - * interfaces. - */ - rt6_do_pmtu_disc(daddr, saddr, net, pmtu, 0); - rt6_do_pmtu_disc(daddr, saddr, net, pmtu, dev->ifindex); -} - -/* * Misc support functions */ @@ -3051,13 +2974,13 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; - ret = register_pernet_subsys(&ip6_route_net_ops); + ret = register_pernet_subsys(&ipv6_inetpeer_ops); if (ret) goto out_dst_entries; - ret = register_pernet_subsys(&ipv6_inetpeer_ops); + ret = register_pernet_subsys(&ip6_route_net_ops); if (ret) - goto out_register_subsys; + goto out_register_inetpeer; ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; @@ -3074,7 +2997,7 @@ int __init ip6_route_init(void) #endif ret = fib6_init(); if (ret) - goto out_register_inetpeer; + goto out_register_subsys; ret = xfrm6_init(); if (ret) @@ -3103,10 +3026,10 @@ xfrm6_init: xfrm6_fini(); out_fib6_init: fib6_gc_cleanup(); -out_register_inetpeer: - unregister_pernet_subsys(&ipv6_inetpeer_ops); out_register_subsys: unregister_pernet_subsys(&ip6_route_net_ops); +out_register_inetpeer: + unregister_pernet_subsys(&ipv6_inetpeer_ops); out_dst_entries: dst_entries_destroy(&ip6_dst_blackhole_ops); out_kmem_cache: diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6041571..49aea94 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -527,9 +527,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; - case ICMP_FRAG_NEEDED: - /* Soft state for pmtu is maintained by IP core. */ - return 0; default: /* All others are translated to HOST_UNREACH. rfc2003 contains "deep thoughts" about NET_UNREACH, @@ -551,7 +548,17 @@ static int ipip6_err(struct sk_buff *skb, u32 info) skb->dev, iph->daddr, iph->saddr); - if (t == NULL || t->parms.iph.daddr == 0) + if (t == NULL) + goto out; + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + t->dev->ifindex, 0, IPPROTO_IPV6, 0); + err = 0; + goto out; + } + + if (t->parms.iph.daddr == 0) goto out; err = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f91b0bf..26a8862 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -415,6 +415,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } else dst_hold(dst); + dst->ops->update_pmtu(dst, ntohl(info)); + if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f05099f..051ad48 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -479,6 +479,9 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; + if (type == ICMPV6_PKT_TOOBIG) + ip6_sk_update_pmtu(skb, sk, info); + np = inet6_sk(sk); if (!icmpv6_err_convert(type, code, &err) && !np->recverr) diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 07d7d55..cd6f7a9 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -372,7 +372,6 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, skb_trim(skb, skb->dev->mtu); } skb->protocol = ETH_P_AF_IUCV; - skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return -ENOMEM; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 209c1ed..c19b214 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -335,6 +335,27 @@ config NF_CT_NETLINK_TIMEOUT If unsure, say `N'. +config NF_CT_NETLINK_HELPER + tristate 'Connection tracking helpers in user-space via Netlink' + select NETFILTER_NETLINK + depends on NF_CT_NETLINK + depends on NETFILTER_NETLINK_QUEUE + depends on NETFILTER_NETLINK_QUEUE_CT + depends on NETFILTER_ADVANCED + help + This option enables the user-space connection tracking helpers + infrastructure. + + If unsure, say `N'. + +config NETFILTER_NETLINK_QUEUE_CT + bool "NFQUEUE integration with Connection Tracking" + default n + depends on NETFILTER_NETLINK_QUEUE + help + If this option is enabled, NFQUEUE can include Connection Tracking + information together with the packet is the enqueued via NFNETLINK. + endif # NF_CONNTRACK # transparent proxy support diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 4e7960c..1c5160f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -9,6 +9,8 @@ obj-$(CONFIG_NETFILTER) = netfilter.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o +nfnetlink_queue-y := nfnetlink_queue_core.o +nfnetlink_queue-$(CONFIG_NETFILTER_NETLINK_QUEUE_CT) += nfnetlink_queue_ct.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o @@ -24,6 +26,7 @@ obj-$(CONFIG_NF_CT_PROTO_UDPLITE) += nf_conntrack_proto_udplite.o # netlink interface for nf_conntrack obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o obj-$(CONFIG_NF_CT_NETLINK_TIMEOUT) += nfnetlink_cttimeout.o +obj-$(CONFIG_NF_CT_NETLINK_HELPER) += nfnetlink_cthelper.o # connection tracking helpers nf_conntrack_h323-objs := nf_conntrack_h323_main.o nf_conntrack_h323_asn1.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index e19f365..7eef845 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -264,6 +264,10 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct) rcu_read_unlock(); } EXPORT_SYMBOL(nf_conntrack_destroy); + +struct nfq_ct_hook *nfq_ct_hook; +EXPORT_SYMBOL_GPL(nfq_ct_hook); + #endif /* CONFIG_NF_CONNTRACK */ #ifdef CONFIG_PROC_FS diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1ee2082..cf48755 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -819,7 +819,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = exp->master; if (exp->helper) { - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + help = nf_ct_helper_ext_add(ct, exp->helper, + GFP_ATOMIC); if (help) rcu_assign_pointer(help->helper, exp->helper); } diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 641ff5f..1a95459 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -44,7 +44,8 @@ void __nf_ct_ext_destroy(struct nf_conn *ct) EXPORT_SYMBOL(__nf_ct_ext_destroy); static void * -nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) +nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, + size_t var_alloc_len, gfp_t gfp) { unsigned int off, len; struct nf_ct_ext_type *t; @@ -54,8 +55,8 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) t = rcu_dereference(nf_ct_ext_types[id]); BUG_ON(t == NULL); off = ALIGN(sizeof(struct nf_ct_ext), t->align); - len = off + t->len; - alloc_size = t->alloc_size; + len = off + t->len + var_alloc_len; + alloc_size = t->alloc_size + var_alloc_len; rcu_read_unlock(); *ext = kzalloc(alloc_size, gfp); @@ -68,7 +69,8 @@ nf_ct_ext_create(struct nf_ct_ext **ext, enum nf_ct_ext_id id, gfp_t gfp) return (void *)(*ext) + off; } -void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) +void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, + size_t var_alloc_len, gfp_t gfp) { struct nf_ct_ext *old, *new; int i, newlen, newoff; @@ -79,7 +81,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) old = ct->ext; if (!old) - return nf_ct_ext_create(&ct->ext, id, gfp); + return nf_ct_ext_create(&ct->ext, id, var_alloc_len, gfp); if (__nf_ct_ext_exist(old, id)) return NULL; @@ -89,7 +91,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) BUG_ON(t == NULL); newoff = ALIGN(old->len, t->align); - newlen = newoff + t->len; + newlen = newoff + t->len + var_alloc_len; rcu_read_unlock(); new = __krealloc(old, newlen, gfp); @@ -117,7 +119,7 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) memset((void *)new + newoff, 0, newlen - newoff); return (void *)new + newoff; } -EXPORT_SYMBOL(__nf_ct_ext_add); +EXPORT_SYMBOL(__nf_ct_ext_add_length); static void update_alloc_size(struct nf_ct_ext_type *type) { diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 8c5c95c..4bb771d 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -358,7 +358,7 @@ static int help(struct sk_buff *skb, u32 seq; int dir = CTINFO2DIR(ctinfo); unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff); - struct nf_ct_ftp_master *ct_ftp_info = &nfct_help(ct)->help.ct_ftp_info; + struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct); struct nf_conntrack_expect *exp; union nf_inet_addr *daddr; struct nf_conntrack_man cmd = {}; @@ -512,7 +512,6 @@ out_update_nl: } static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly; -static char ftp_names[MAX_PORTS][2][sizeof("ftp-65535")] __read_mostly; static const struct nf_conntrack_expect_policy ftp_exp_policy = { .max_expected = 1, @@ -541,7 +540,6 @@ static void nf_conntrack_ftp_fini(void) static int __init nf_conntrack_ftp_init(void) { int i, j = -1, ret = 0; - char *tmpname; ftp_buffer = kmalloc(65536, GFP_KERNEL); if (!ftp_buffer) @@ -556,17 +554,16 @@ static int __init nf_conntrack_ftp_init(void) ftp[i][0].tuple.src.l3num = PF_INET; ftp[i][1].tuple.src.l3num = PF_INET6; for (j = 0; j < 2; j++) { + ftp[i][j].data_len = sizeof(struct nf_ct_ftp_master); ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; ftp[i][j].expect_policy = &ftp_exp_policy; ftp[i][j].me = THIS_MODULE; ftp[i][j].help = help; - tmpname = &ftp_names[i][j][0]; if (ports[i] == FTP_PORT) - sprintf(tmpname, "ftp"); + sprintf(ftp[i][j].name, "ftp"); else - sprintf(tmpname, "ftp-%d", ports[i]); - ftp[i][j].name = tmpname; + sprintf(ftp[i][j].name, "ftp-%d", ports[i]); pr_debug("nf_ct_ftp: registering helper for pf: %d " "port: %d\n", diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 31f50bc..4283b20 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -114,7 +114,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int *datalen, int *dataoff) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); const struct tcphdr *th; struct tcphdr _tcph; @@ -617,6 +617,7 @@ static const struct nf_conntrack_expect_policy h245_exp_policy = { static struct nf_conntrack_helper nf_conntrack_helper_h245 __read_mostly = { .name = "H.245", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_UNSPEC, .tuple.dst.protonum = IPPROTO_UDP, .help = h245_help, @@ -1169,6 +1170,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { { .name = "Q.931", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, @@ -1244,7 +1246,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, unsigned char **data, TransportAddress *taddr, int count) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int ret = 0; int i; @@ -1359,7 +1361,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationRequest *rrq) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int ret; typeof(set_ras_addr_hook) set_ras_addr; @@ -1394,7 +1396,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationConfirm *rcf) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int ret; struct nf_conntrack_expect *exp; @@ -1443,7 +1445,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, UnregistrationRequest *urq) { - struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); int ret; typeof(set_sig_addr_hook) set_sig_addr; @@ -1475,7 +1477,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionRequest *arq) { - const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info; + const struct nf_ct_h323_master *info = nfct_help_data(ct); int dir = CTINFO2DIR(ctinfo); __be16 port; union nf_inet_addr addr; @@ -1742,6 +1744,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { { .name = "RAS", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET, .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, @@ -1751,6 +1754,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { { .name = "RAS", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_h323_master), .tuple.src.l3num = AF_INET6, .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 4fa2ff9..c4bc637 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -30,8 +30,10 @@ #include <net/netfilter/nf_conntrack_extend.h> static DEFINE_MUTEX(nf_ct_helper_mutex); -static struct hlist_head *nf_ct_helper_hash __read_mostly; -static unsigned int nf_ct_helper_hsize __read_mostly; +struct hlist_head *nf_ct_helper_hash __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_helper_hash); +unsigned int nf_ct_helper_hsize __read_mostly; +EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); static unsigned int nf_ct_helper_count __read_mostly; static bool nf_ct_auto_assign_helper __read_mostly = true; @@ -161,11 +163,14 @@ nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_try_module_get); -struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) +struct nf_conn_help * +nf_ct_helper_ext_add(struct nf_conn *ct, + struct nf_conntrack_helper *helper, gfp_t gfp) { struct nf_conn_help *help; - help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp); + help = nf_ct_ext_add_length(ct, NF_CT_EXT_HELPER, + helper->data_len, gfp); if (help) INIT_HLIST_HEAD(&help->expectations); else @@ -218,13 +223,19 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, } if (help == NULL) { - help = nf_ct_helper_ext_add(ct, flags); + help = nf_ct_helper_ext_add(ct, helper, flags); if (help == NULL) { ret = -ENOMEM; goto out; } } else { - memset(&help->help, 0, sizeof(help->help)); + /* We only allow helper re-assignment of the same sort since + * we cannot reallocate the helper extension area. + */ + if (help->helper != helper) { + RCU_INIT_POINTER(help->helper, NULL); + goto out; + } } rcu_assign_pointer(help->helper, helper); @@ -319,6 +330,9 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_expectfn_find_by_symbol); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { + int ret = 0; + struct nf_conntrack_helper *cur; + struct hlist_node *n; unsigned int h = helper_hash(&me->tuple); BUG_ON(me->expect_policy == NULL); @@ -326,11 +340,19 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); mutex_lock(&nf_ct_helper_mutex); + hlist_for_each_entry(cur, n, &nf_ct_helper_hash[h], hnode) { + if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 && + cur->tuple.src.l3num == me->tuple.src.l3num && + cur->tuple.dst.protonum == me->tuple.dst.protonum) { + ret = -EEXIST; + goto out; + } + } hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); nf_ct_helper_count++; +out: mutex_unlock(&nf_ct_helper_mutex); - - return 0; + return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 81366c1..009c52c 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -221,7 +221,6 @@ static int help(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper irc[MAX_PORTS] __read_mostly; -static char irc_names[MAX_PORTS][sizeof("irc-65535")] __read_mostly; static struct nf_conntrack_expect_policy irc_exp_policy; static void nf_conntrack_irc_fini(void); @@ -229,7 +228,6 @@ static void nf_conntrack_irc_fini(void); static int __init nf_conntrack_irc_init(void) { int i, ret; - char *tmpname; if (max_dcc_channels < 1) { printk(KERN_ERR "nf_ct_irc: max_dcc_channels must not be zero\n"); @@ -255,12 +253,10 @@ static int __init nf_conntrack_irc_init(void) irc[i].me = THIS_MODULE; irc[i].help = help; - tmpname = &irc_names[i][0]; if (ports[i] == IRC_PORT) - sprintf(tmpname, "irc"); + sprintf(irc[i].name, "irc"); else - sprintf(tmpname, "irc-%u", i); - irc[i].name = tmpname; + sprintf(irc[i].name, "irc-%u", i); ret = nf_conntrack_helper_register(&irc[i]); if (ret) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 6f4b00a8..31d1d8f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -46,6 +46,7 @@ #ifdef CONFIG_NF_NAT_NEEDED #include <net/netfilter/nf_nat_core.h> #include <net/netfilter/nf_nat_protocol.h> +#include <net/netfilter/nf_nat_helper.h> #endif #include <linux/netfilter/nfnetlink.h> @@ -901,7 +902,8 @@ static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = { }; static inline int -ctnetlink_parse_help(const struct nlattr *attr, char **helper_name) +ctnetlink_parse_help(const struct nlattr *attr, char **helper_name, + struct nlattr **helpinfo) { struct nlattr *tb[CTA_HELP_MAX+1]; @@ -912,6 +914,9 @@ ctnetlink_parse_help(const struct nlattr *attr, char **helper_name) *helper_name = nla_data(tb[CTA_HELP_NAME]); + if (tb[CTA_HELP_INFO]) + *helpinfo = tb[CTA_HELP_INFO]; + return 0; } @@ -1172,13 +1177,14 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) struct nf_conntrack_helper *helper; struct nf_conn_help *help = nfct_help(ct); char *helpname = NULL; + struct nlattr *helpinfo = NULL; int err; /* don't change helper of sibling connections */ if (ct->master) return -EBUSY; - err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); + err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo); if (err < 0) return err; @@ -1213,20 +1219,17 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[]) } if (help) { - if (help->helper == helper) + if (help->helper == helper) { + /* update private helper data if allowed. */ + if (helper->from_nlattr && helpinfo) + helper->from_nlattr(helpinfo, ct); return 0; - if (help->helper) + } else return -EBUSY; - /* need to zero data of old helper */ - memset(&help->help, 0, sizeof(help->help)); - } else { - /* we cannot set a helper for an existing conntrack */ - return -EOPNOTSUPP; } - rcu_assign_pointer(help->helper, helper); - - return 0; + /* we cannot set a helper for an existing conntrack */ + return -EOPNOTSUPP; } static inline int @@ -1410,8 +1413,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, rcu_read_lock(); if (cda[CTA_HELP]) { char *helpname = NULL; - - err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); + struct nlattr *helpinfo = NULL; + + err = ctnetlink_parse_help(cda[CTA_HELP], &helpname, &helpinfo); if (err < 0) goto err2; @@ -1440,11 +1444,14 @@ ctnetlink_create_conntrack(struct net *net, u16 zone, } else { struct nf_conn_help *help; - help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); + help = nf_ct_helper_ext_add(ct, helper, GFP_ATOMIC); if (help == NULL) { err = -ENOMEM; goto err2; } + /* set private helper data if allowed. */ + if (helper->from_nlattr && helpinfo) + helper->from_nlattr(helpinfo, ct); /* not in hash table yet so not strictly necessary */ RCU_INIT_POINTER(help->helper, helper); @@ -1620,6 +1627,142 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, return err; } +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT +static size_t +ctnetlink_nfqueue_build_size(const struct nf_conn *ct) +{ + return 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ + + 3 * nla_total_size(0) /* CTA_TUPLE_IP */ + + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */ + + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ + + nla_total_size(0) /* CTA_PROTOINFO */ + + nla_total_size(0) /* CTA_HELP */ + + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + + ctnetlink_secctx_size(ct) +#ifdef CONFIG_NF_NAT_NEEDED + + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ +#endif +#ifdef CONFIG_NF_CONNTRACK_MARK + + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ +#endif + + ctnetlink_proto_size(ct) + ; +} + +static int +ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct) +{ + struct nlattr *nest_parms; + + rcu_read_lock(); + nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest_parms); + + nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest_parms); + + if (nf_ct_zone(ct)) { + if (nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct)))) + goto nla_put_failure; + } + + if (ctnetlink_dump_id(skb, ct) < 0) + goto nla_put_failure; + + if (ctnetlink_dump_status(skb, ct) < 0) + goto nla_put_failure; + + if (ctnetlink_dump_timeout(skb, ct) < 0) + goto nla_put_failure; + + if (ctnetlink_dump_protoinfo(skb, ct) < 0) + goto nla_put_failure; + + if (ctnetlink_dump_helpinfo(skb, ct) < 0) + goto nla_put_failure; + +#ifdef CONFIG_NF_CONNTRACK_SECMARK + if (ct->secmark && ctnetlink_dump_secctx(skb, ct) < 0) + goto nla_put_failure; +#endif + if (ct->master && ctnetlink_dump_master(skb, ct) < 0) + goto nla_put_failure; + + if ((ct->status & IPS_SEQ_ADJUST) && + ctnetlink_dump_nat_seq_adj(skb, ct) < 0) + goto nla_put_failure; + +#ifdef CONFIG_NF_CONNTRACK_MARK + if (ct->mark && ctnetlink_dump_mark(skb, ct) < 0) + goto nla_put_failure; +#endif + rcu_read_unlock(); + return 0; + +nla_put_failure: + rcu_read_unlock(); + return -ENOSPC; +} + +static int +ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct) +{ + int err; + + if (cda[CTA_TIMEOUT]) { + err = ctnetlink_change_timeout(ct, cda); + if (err < 0) + return err; + } + if (cda[CTA_STATUS]) { + err = ctnetlink_change_status(ct, cda); + if (err < 0) + return err; + } + if (cda[CTA_HELP]) { + err = ctnetlink_change_helper(ct, cda); + if (err < 0) + return err; + } +#if defined(CONFIG_NF_CONNTRACK_MARK) + if (cda[CTA_MARK]) + ct->mark = ntohl(nla_get_be32(cda[CTA_MARK])); +#endif + return 0; +} + +static int +ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct) +{ + struct nlattr *cda[CTA_MAX+1]; + + nla_parse_nested(cda, CTA_MAX, attr, ct_nla_policy); + + return ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct); +} + +static struct nfq_ct_hook ctnetlink_nfqueue_hook = { + .build_size = ctnetlink_nfqueue_build_size, + .build = ctnetlink_nfqueue_build, + .parse = ctnetlink_nfqueue_parse, +#ifdef CONFIG_NF_NAT_NEEDED + .seq_adjust = nf_nat_tcp_seq_adjust, +#endif +}; +#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */ + /*********************************************************************** * EXPECT ***********************************************************************/ @@ -2424,7 +2567,10 @@ static int __init ctnetlink_init(void) pr_err("ctnetlink_init: cannot register pernet operations\n"); goto err_unreg_exp_subsys; } - +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT + /* setup interaction between nf_queue and nf_conntrack_netlink. */ + RCU_INIT_POINTER(nfq_ct_hook, &ctnetlink_nfqueue_hook); +#endif return 0; err_unreg_exp_subsys: @@ -2442,6 +2588,9 @@ static void __exit ctnetlink_exit(void) unregister_pernet_subsys(&ctnetlink_net_ops); nfnetlink_subsys_unregister(&ctnl_exp_subsys); nfnetlink_subsys_unregister(&ctnl_subsys); +#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT + RCU_INIT_POINTER(nfq_ct_hook, NULL); +#endif } module_init(ctnetlink_init); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 31d56b2..6fed9ec 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -174,7 +174,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, static void pptp_destroy_siblings(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); - const struct nf_conn_help *help = nfct_help(ct); + const struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); struct nf_conntrack_tuple t; nf_ct_gre_keymap_destroy(ct); @@ -182,16 +182,16 @@ static void pptp_destroy_siblings(struct nf_conn *ct) /* try original (pns->pac) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; - t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; + t.src.u.gre.key = ct_pptp_info->pns_call_id; + t.dst.u.gre.key = ct_pptp_info->pac_call_id; if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; - t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; + t.src.u.gre.key = ct_pptp_info->pac_call_id; + t.dst.u.gre.key = ct_pptp_info->pns_call_id; if (!destroy_sibling_or_exp(net, ct, &t)) pr_debug("failed to timeout reply pac->pns ct/exp\n"); } @@ -269,7 +269,7 @@ pptp_inbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + struct nf_ct_pptp_master *info = nfct_help_data(ct); u_int16_t msg; __be16 cid = 0, pcid = 0; typeof(nf_nat_pptp_hook_inbound) nf_nat_pptp_inbound; @@ -396,7 +396,7 @@ pptp_outbound_pkt(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + struct nf_ct_pptp_master *info = nfct_help_data(ct); u_int16_t msg; __be16 cid = 0, pcid = 0; typeof(nf_nat_pptp_hook_outbound) nf_nat_pptp_outbound; @@ -506,7 +506,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, { int dir = CTINFO2DIR(ctinfo); - const struct nf_ct_pptp_master *info = &nfct_help(ct)->help.ct_pptp_info; + const struct nf_ct_pptp_master *info = nfct_help_data(ct); const struct tcphdr *tcph; struct tcphdr _tcph; const struct pptp_pkt_hdr *pptph; @@ -592,6 +592,7 @@ static const struct nf_conntrack_expect_policy pptp_exp_policy = { static struct nf_conntrack_helper pptp __read_mostly = { .name = "pptp", .me = THIS_MODULE, + .data_len = sizeof(struct nf_ct_pptp_master), .tuple.src.l3num = AF_INET, .tuple.src.u.tcp.port = cpu_to_be16(PPTP_CONTROL_PORT), .tuple.dst.protonum = IPPROTO_TCP, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 25ba5a2..5cac41c 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -117,10 +117,10 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, { struct net *net = nf_ct_net(ct); struct netns_proto_gre *net_gre = gre_pernet(net); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); struct nf_ct_gre_keymap **kmp, *km; - kmp = &help->help.ct_pptp_info.keymap[dir]; + kmp = &ct_pptp_info->keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ read_lock_bh(&net_gre->keymap_lock); @@ -158,19 +158,19 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); struct netns_proto_gre *net_gre = gre_pernet(net); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct); enum ip_conntrack_dir dir; pr_debug("entering for ct %p\n", ct); write_lock_bh(&net_gre->keymap_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { - if (help->help.ct_pptp_info.keymap[dir]) { + if (ct_pptp_info->keymap[dir]) { pr_debug("removing %p from list\n", - help->help.ct_pptp_info.keymap[dir]); - list_del(&help->help.ct_pptp_info.keymap[dir]->list); - kfree(help->help.ct_pptp_info.keymap[dir]); - help->help.ct_pptp_info.keymap[dir] = NULL; + ct_pptp_info->keymap[dir]); + list_del(&ct_pptp_info->keymap[dir]->list); + kfree(ct_pptp_info->keymap[dir]); + ct_pptp_info->keymap[dir] = NULL; } } write_unlock_bh(&net_gre->keymap_lock); diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 8501823..295429f 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -69,13 +69,12 @@ static int help(struct sk_buff *skb, void *sb_ptr; int ret = NF_ACCEPT; int dir = CTINFO2DIR(ctinfo); - struct nf_ct_sane_master *ct_sane_info; + struct nf_ct_sane_master *ct_sane_info = nfct_help_data(ct); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple *tuple; struct sane_request *req; struct sane_reply_net_start *reply; - ct_sane_info = &nfct_help(ct)->help.ct_sane_info; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) @@ -163,7 +162,6 @@ out: } static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly; -static char sane_names[MAX_PORTS][2][sizeof("sane-65535")] __read_mostly; static const struct nf_conntrack_expect_policy sane_exp_policy = { .max_expected = 1, @@ -190,7 +188,6 @@ static void nf_conntrack_sane_fini(void) static int __init nf_conntrack_sane_init(void) { int i, j = -1, ret = 0; - char *tmpname; sane_buffer = kmalloc(65536, GFP_KERNEL); if (!sane_buffer) @@ -205,17 +202,16 @@ static int __init nf_conntrack_sane_init(void) sane[i][0].tuple.src.l3num = PF_INET; sane[i][1].tuple.src.l3num = PF_INET6; for (j = 0; j < 2; j++) { + sane[i][j].data_len = sizeof(struct nf_ct_sane_master); sane[i][j].tuple.src.u.tcp.port = htons(ports[i]); sane[i][j].tuple.dst.protonum = IPPROTO_TCP; sane[i][j].expect_policy = &sane_exp_policy; sane[i][j].me = THIS_MODULE; sane[i][j].help = help; - tmpname = &sane_names[i][j][0]; if (ports[i] == SANE_PORT) - sprintf(tmpname, "sane"); + sprintf(sane[i][j].name, "sane"); else - sprintf(tmpname, "sane-%d", ports[i]); - sane[i][j].name = tmpname; + sprintf(sane[i][j].name, "sane-%d", ports[i]); pr_debug("nf_ct_sane: registering helper for pf: %d " "port: %d\n", diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 93faf6a..758a1ba 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1075,12 +1075,12 @@ static int process_invite_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dataoff, dptr, datalen, cseq); - else if (help->help.ct_sip_info.invite_cseq == cseq) + else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } @@ -1091,12 +1091,12 @@ static int process_update_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dataoff, dptr, datalen, cseq); - else if (help->help.ct_sip_info.invite_cseq == cseq) + else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } @@ -1107,12 +1107,12 @@ static int process_prack_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); if ((code >= 100 && code <= 199) || (code >= 200 && code <= 299)) return process_sdp(skb, dataoff, dptr, datalen, cseq); - else if (help->help.ct_sip_info.invite_cseq == cseq) + else if (ct_sip_info->invite_cseq == cseq) flush_expectations(ct, true); return NF_ACCEPT; } @@ -1123,13 +1123,13 @@ static int process_invite_request(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); unsigned int ret; flush_expectations(ct, true); ret = process_sdp(skb, dataoff, dptr, datalen, cseq); if (ret == NF_ACCEPT) - help->help.ct_sip_info.invite_cseq = cseq; + ct_sip_info->invite_cseq = cseq; return ret; } @@ -1154,7 +1154,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned int matchoff, matchlen; struct nf_conntrack_expect *exp; @@ -1235,7 +1235,7 @@ static int process_register_request(struct sk_buff *skb, unsigned int dataoff, store_cseq: if (ret == NF_ACCEPT) - help->help.ct_sip_info.register_cseq = cseq; + ct_sip_info->register_cseq = cseq; return ret; } @@ -1245,7 +1245,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff, { enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); - struct nf_conn_help *help = nfct_help(ct); + struct nf_ct_sip_master *ct_sip_info = nfct_help_data(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); union nf_inet_addr addr; __be16 port; @@ -1262,7 +1262,7 @@ static int process_register_response(struct sk_buff *skb, unsigned int dataoff, * responses, so we store the sequence number of the last valid * request and compare it here. */ - if (help->help.ct_sip_info.register_cseq != cseq) + if (ct_sip_info->register_cseq != cseq) return NF_ACCEPT; if (code >= 100 && code <= 199) @@ -1556,7 +1556,6 @@ static void nf_conntrack_sip_fini(void) static int __init nf_conntrack_sip_init(void) { int i, j, ret; - char *tmpname; if (ports_c == 0) ports[ports_c++] = SIP_PORT; @@ -1579,17 +1578,16 @@ static int __init nf_conntrack_sip_init(void) sip[i][3].help = sip_help_tcp; for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { + sip[i][j].data_len = sizeof(struct nf_ct_sip_master); sip[i][j].tuple.src.u.udp.port = htons(ports[i]); sip[i][j].expect_policy = sip_exp_policy; sip[i][j].expect_class_max = SIP_EXPECT_MAX; sip[i][j].me = THIS_MODULE; - tmpname = &sip_names[i][j][0]; if (ports[i] == SIP_PORT) - sprintf(tmpname, "sip"); + sprintf(sip_names[i][j], "sip"); else - sprintf(tmpname, "sip-%u", i); - sip[i][j].name = tmpname; + sprintf(sip_names[i][j], "sip-%u", i); pr_debug("port #%u: %u\n", i, ports[i]); diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 75466fd..81fc61c 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -92,7 +92,6 @@ static int tftp_help(struct sk_buff *skb, } static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly; -static char tftp_names[MAX_PORTS][2][sizeof("tftp-65535")] __read_mostly; static const struct nf_conntrack_expect_policy tftp_exp_policy = { .max_expected = 1, @@ -112,7 +111,6 @@ static void nf_conntrack_tftp_fini(void) static int __init nf_conntrack_tftp_init(void) { int i, j, ret; - char *tmpname; if (ports_c == 0) ports[ports_c++] = TFTP_PORT; @@ -129,12 +127,10 @@ static int __init nf_conntrack_tftp_init(void) tftp[i][j].me = THIS_MODULE; tftp[i][j].help = tftp_help; - tmpname = &tftp_names[i][j][0]; if (ports[i] == TFTP_PORT) - sprintf(tmpname, "tftp"); + sprintf(tftp[i][j].name, "tftp"); else - sprintf(tmpname, "tftp-%u", i); - tftp[i][j].name = tmpname; + sprintf(tftp[i][j].name, "tftp-%u", i); ret = nf_conntrack_helper_register(&tftp[i][j]); if (ret) { diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c new file mode 100644 index 0000000..d683619 --- /dev/null +++ b/net/netfilter/nfnetlink_cthelper.c @@ -0,0 +1,672 @@ +/* + * (C) 2012 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation (or any later at your option). + * + * This software has been sponsored by Vyatta Inc. <http://www.vyatta.com> + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/rculist.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/list.h> +#include <linux/errno.h> +#include <net/netlink.h> +#include <net/sock.h> + +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_ecache.h> + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> +#include <linux/netfilter/nfnetlink_cthelper.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("nfnl_cthelper: User-space connection tracking helpers"); + +static int +nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) +{ + const struct nf_conn_help *help; + struct nf_conntrack_helper *helper; + + help = nfct_help(ct); + if (help == NULL) + return NF_DROP; + + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + if (helper == NULL) + return NF_DROP; + + /* This is an user-space helper not yet configured, skip. */ + if ((helper->flags & + (NF_CT_HELPER_F_USERSPACE | NF_CT_HELPER_F_CONFIGURED)) == + NF_CT_HELPER_F_USERSPACE) + return NF_ACCEPT; + + /* If the user-space helper is not available, don't block traffic. */ + return NF_QUEUE_NR(helper->queue_num) | NF_VERDICT_FLAG_QUEUE_BYPASS; +} + +static const struct nla_policy nfnl_cthelper_tuple_pol[NFCTH_TUPLE_MAX+1] = { + [NFCTH_TUPLE_L3PROTONUM] = { .type = NLA_U16, }, + [NFCTH_TUPLE_L4PROTONUM] = { .type = NLA_U8, }, +}; + +static int +nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_TUPLE_MAX+1]; + + nla_parse_nested(tb, NFCTH_TUPLE_MAX, attr, nfnl_cthelper_tuple_pol); + + if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) + return -EINVAL; + + tuple->src.l3num = ntohs(nla_get_u16(tb[NFCTH_TUPLE_L3PROTONUM])); + tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); + + return 0; +} + +static int +nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) +{ + const struct nf_conn_help *help = nfct_help(ct); + + if (help->helper->data_len == 0) + return -EINVAL; + + memcpy(&help->data, nla_data(attr), help->helper->data_len); + return 0; +} + +static int +nfnl_cthelper_to_nlattr(struct sk_buff *skb, const struct nf_conn *ct) +{ + const struct nf_conn_help *help = nfct_help(ct); + + if (help->helper->data_len && + nla_put(skb, CTA_HELP_INFO, help->helper->data_len, &help->data)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -ENOSPC; +} + +static const struct nla_policy nfnl_cthelper_expect_pol[NFCTH_POLICY_MAX+1] = { + [NFCTH_POLICY_NAME] = { .type = NLA_NUL_STRING, + .len = NF_CT_HELPER_NAME_LEN-1 }, + [NFCTH_POLICY_EXPECT_MAX] = { .type = NLA_U32, }, + [NFCTH_POLICY_EXPECT_TIMEOUT] = { .type = NLA_U32, }, +}; + +static int +nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, + const struct nlattr *attr) +{ + struct nlattr *tb[NFCTH_POLICY_MAX+1]; + + nla_parse_nested(tb, NFCTH_POLICY_MAX, attr, nfnl_cthelper_expect_pol); + + if (!tb[NFCTH_POLICY_NAME] || + !tb[NFCTH_POLICY_EXPECT_MAX] || + !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) + return -EINVAL; + + strncpy(expect_policy->name, + nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN); + expect_policy->max_expected = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); + expect_policy->timeout = + ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_TIMEOUT])); + + return 0; +} + +static const struct nla_policy +nfnl_cthelper_expect_policy_set[NFCTH_POLICY_SET_MAX+1] = { + [NFCTH_POLICY_SET_NUM] = { .type = NLA_U32, }, +}; + +static int +nfnl_cthelper_parse_expect_policy(struct nf_conntrack_helper *helper, + const struct nlattr *attr) +{ + int i, ret; + struct nf_conntrack_expect_policy *expect_policy; + struct nlattr *tb[NFCTH_POLICY_SET_MAX+1]; + + nla_parse_nested(tb, NFCTH_POLICY_SET_MAX, attr, + nfnl_cthelper_expect_policy_set); + + if (!tb[NFCTH_POLICY_SET_NUM]) + return -EINVAL; + + helper->expect_class_max = + ntohl(nla_get_be32(tb[NFCTH_POLICY_SET_NUM])); + + if (helper->expect_class_max != 0 && + helper->expect_class_max > NF_CT_MAX_EXPECT_CLASSES) + return -EOVERFLOW; + + expect_policy = kzalloc(sizeof(struct nf_conntrack_expect_policy) * + helper->expect_class_max, GFP_KERNEL); + if (expect_policy == NULL) + return -ENOMEM; + + for (i=0; i<helper->expect_class_max; i++) { + if (!tb[NFCTH_POLICY_SET+i]) + goto err; + + ret = nfnl_cthelper_expect_policy(&expect_policy[i], + tb[NFCTH_POLICY_SET+i]); + if (ret < 0) + goto err; + } + helper->expect_policy = expect_policy; + return 0; +err: + kfree(expect_policy); + return -EINVAL; +} + +static int +nfnl_cthelper_create(const struct nlattr * const tb[], + struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_helper *helper; + int ret; + + if (!tb[NFCTH_TUPLE] || !tb[NFCTH_POLICY] || !tb[NFCTH_PRIV_DATA_LEN]) + return -EINVAL; + + helper = kzalloc(sizeof(struct nf_conntrack_helper), GFP_KERNEL); + if (helper == NULL) + return -ENOMEM; + + ret = nfnl_cthelper_parse_expect_policy(helper, tb[NFCTH_POLICY]); + if (ret < 0) + goto err; + + strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); + helper->data_len = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); + helper->flags |= NF_CT_HELPER_F_USERSPACE; + memcpy(&helper->tuple, tuple, sizeof(struct nf_conntrack_tuple)); + + helper->me = THIS_MODULE; + helper->help = nfnl_userspace_cthelper; + helper->from_nlattr = nfnl_cthelper_from_nlattr; + helper->to_nlattr = nfnl_cthelper_to_nlattr; + + /* Default to queue number zero, this can be updated at any time. */ + if (tb[NFCTH_QUEUE_NUM]) + helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM])); + + if (tb[NFCTH_STATUS]) { + int status = ntohl(nla_get_be32(tb[NFCTH_STATUS])); + + switch(status) { + case NFCT_HELPER_STATUS_ENABLED: + helper->flags |= NF_CT_HELPER_F_CONFIGURED; + break; + case NFCT_HELPER_STATUS_DISABLED: + helper->flags &= ~NF_CT_HELPER_F_CONFIGURED; + break; + } + } + + ret = nf_conntrack_helper_register(helper); + if (ret < 0) + goto err; + + return 0; +err: + kfree(helper); + return ret; +} + +static int +nfnl_cthelper_update(const struct nlattr * const tb[], + struct nf_conntrack_helper *helper) +{ + int ret; + + if (tb[NFCTH_PRIV_DATA_LEN]) + return -EBUSY; + + if (tb[NFCTH_POLICY]) { + ret = nfnl_cthelper_parse_expect_policy(helper, + tb[NFCTH_POLICY]); + if (ret < 0) + return ret; + } + if (tb[NFCTH_QUEUE_NUM]) + helper->queue_num = ntohl(nla_get_be32(tb[NFCTH_QUEUE_NUM])); + + if (tb[NFCTH_STATUS]) { + int status = ntohl(nla_get_be32(tb[NFCTH_STATUS])); + + switch(status) { + case NFCT_HELPER_STATUS_ENABLED: + helper->flags |= NF_CT_HELPER_F_CONFIGURED; + break; + case NFCT_HELPER_STATUS_DISABLED: + helper->flags &= ~NF_CT_HELPER_F_CONFIGURED; + break; + } + } + return 0; +} + +static int +nfnl_cthelper_new(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + const char *helper_name; + struct nf_conntrack_helper *cur, *helper = NULL; + struct nf_conntrack_tuple tuple; + struct hlist_node *n; + int ret = 0, i; + + if (!tb[NFCTH_NAME] || !tb[NFCTH_TUPLE]) + return -EINVAL; + + helper_name = nla_data(tb[NFCTH_NAME]); + + ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); + if (ret < 0) + return ret; + + rcu_read_lock(); + for (i = 0; i < nf_ct_helper_hsize && !helper; i++) { + hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + if (strncmp(cur->name, helper_name, + NF_CT_HELPER_NAME_LEN) != 0) + continue; + + if ((tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; + + if (nlh->nlmsg_flags & NLM_F_EXCL) { + ret = -EEXIST; + goto err; + } + helper = cur; + break; + } + } + rcu_read_unlock(); + + if (helper == NULL) + ret = nfnl_cthelper_create(tb, &tuple); + else + ret = nfnl_cthelper_update(tb, helper); + + return ret; +err: + rcu_read_unlock(); + return ret; +} + +static int +nfnl_cthelper_dump_tuple(struct sk_buff *skb, + struct nf_conntrack_helper *helper) +{ + struct nlattr *nest_parms; + + nest_parms = nla_nest_start(skb, NFCTH_TUPLE | NLA_F_NESTED); + if (nest_parms == NULL) + goto nla_put_failure; + + if (nla_put_be16(skb, NFCTH_TUPLE_L3PROTONUM, + htons(helper->tuple.src.l3num))) + goto nla_put_failure; + + if (nla_put_u8(skb, NFCTH_TUPLE_L4PROTONUM, helper->tuple.dst.protonum)) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + return 0; + +nla_put_failure: + return -1; +} + +static int +nfnl_cthelper_dump_policy(struct sk_buff *skb, + struct nf_conntrack_helper *helper) +{ + int i; + struct nlattr *nest_parms1, *nest_parms2; + + nest_parms1 = nla_nest_start(skb, NFCTH_POLICY | NLA_F_NESTED); + if (nest_parms1 == NULL) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_POLICY_SET_NUM, + htonl(helper->expect_class_max))) + goto nla_put_failure; + + for (i=0; i<helper->expect_class_max; i++) { + nest_parms2 = nla_nest_start(skb, + (NFCTH_POLICY_SET+i) | NLA_F_NESTED); + if (nest_parms2 == NULL) + goto nla_put_failure; + + if (nla_put_string(skb, NFCTH_POLICY_NAME, + helper->expect_policy[i].name)) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_MAX, + htonl(helper->expect_policy[i].max_expected))) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_POLICY_EXPECT_TIMEOUT, + htonl(helper->expect_policy[i].timeout))) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms2); + } + nla_nest_end(skb, nest_parms1); + return 0; + +nla_put_failure: + return -1; +} + +static int +nfnl_cthelper_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + int event, struct nf_conntrack_helper *helper) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0; + int status; + + event |= NFNL_SUBSYS_CTHELPER << 8; + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (nla_put_string(skb, NFCTH_NAME, helper->name)) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_QUEUE_NUM, htonl(helper->queue_num))) + goto nla_put_failure; + + if (nfnl_cthelper_dump_tuple(skb, helper) < 0) + goto nla_put_failure; + + if (nfnl_cthelper_dump_policy(skb, helper) < 0) + goto nla_put_failure; + + if (nla_put_be32(skb, NFCTH_PRIV_DATA_LEN, htonl(helper->data_len))) + goto nla_put_failure; + + if (helper->flags & NF_CT_HELPER_F_CONFIGURED) + status = NFCT_HELPER_STATUS_ENABLED; + else + status = NFCT_HELPER_STATUS_DISABLED; + + if (nla_put_be32(skb, NFCTH_STATUS, htonl(status))) + goto nla_put_failure; + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +nfnl_cthelper_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_conntrack_helper *cur, *last; + struct hlist_node *n; + + rcu_read_lock(); + last = (struct nf_conntrack_helper *)cb->args[1]; + for (; cb->args[0] < nf_ct_helper_hsize; cb->args[0]++) { +restart: + hlist_for_each_entry_rcu(cur, n, + &nf_ct_helper_hash[cb->args[0]], hnode) { + + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + if (cb->args[1]) { + if (cur != last) + continue; + cb->args[1] = 0; + } + if (nfnl_cthelper_fill_info(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur) < 0) { + cb->args[1] = (unsigned long)cur; + goto out; + } + } + } + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } +out: + rcu_read_unlock(); + return skb->len; +} + +static int +nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + int ret = -ENOENT, i; + struct nf_conntrack_helper *cur; + struct hlist_node *n; + struct sk_buff *skb2; + char *helper_name = NULL; + struct nf_conntrack_tuple tuple; + bool tuple_set = false; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nfnl_cthelper_dump_table, + }; + return netlink_dump_start(nfnl, skb, nlh, &c); + } + + if (tb[NFCTH_NAME]) + helper_name = nla_data(tb[NFCTH_NAME]); + + if (tb[NFCTH_TUPLE]) { + ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); + if (ret < 0) + return ret; + + tuple_set = true; + } + + for (i = 0; i < nf_ct_helper_hsize; i++) { + hlist_for_each_entry_rcu(cur, n, &nf_ct_helper_hash[i], hnode) { + + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + if (helper_name && strncmp(cur->name, helper_name, + NF_CT_HELPER_NAME_LEN) != 0) { + continue; + } + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { + ret = -ENOMEM; + break; + } + + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_CTHELPER_NEW, cur); + if (ret <= 0) { + kfree_skb(skb2); + break; + } + + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + MSG_DONTWAIT); + if (ret > 0) + ret = 0; + + /* this avoids a loop in nfnetlink. */ + return ret == -EAGAIN ? -ENOBUFS : ret; + } + } + return ret; +} + +static int +nfnl_cthelper_del(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + char *helper_name = NULL; + struct nf_conntrack_helper *cur; + struct hlist_node *n, *tmp; + struct nf_conntrack_tuple tuple; + bool tuple_set = false, found = false; + int i, j = 0, ret; + + if (tb[NFCTH_NAME]) + helper_name = nla_data(tb[NFCTH_NAME]); + + if (tb[NFCTH_TUPLE]) { + ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); + if (ret < 0) + return ret; + + tuple_set = true; + } + + for (i = 0; i < nf_ct_helper_hsize; i++) { + hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i], + hnode) { + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + j++; + + if (helper_name && strncmp(cur->name, helper_name, + NF_CT_HELPER_NAME_LEN) != 0) { + continue; + } + if (tuple_set && + (tuple.src.l3num != cur->tuple.src.l3num || + tuple.dst.protonum != cur->tuple.dst.protonum)) + continue; + + found = true; + nf_conntrack_helper_unregister(cur); + } + } + /* Make sure we return success if we flush and there is no helpers */ + return (found || j == 0) ? 0 : -ENOENT; +} + +static const struct nla_policy nfnl_cthelper_policy[NFCTH_MAX+1] = { + [NFCTH_NAME] = { .type = NLA_NUL_STRING, + .len = NF_CT_HELPER_NAME_LEN-1 }, + [NFCTH_QUEUE_NUM] = { .type = NLA_U32, }, +}; + +static const struct nfnl_callback nfnl_cthelper_cb[NFNL_MSG_CTHELPER_MAX] = { + [NFNL_MSG_CTHELPER_NEW] = { .call = nfnl_cthelper_new, + .attr_count = NFCTH_MAX, + .policy = nfnl_cthelper_policy }, + [NFNL_MSG_CTHELPER_GET] = { .call = nfnl_cthelper_get, + .attr_count = NFCTH_MAX, + .policy = nfnl_cthelper_policy }, + [NFNL_MSG_CTHELPER_DEL] = { .call = nfnl_cthelper_del, + .attr_count = NFCTH_MAX, + .policy = nfnl_cthelper_policy }, +}; + +static const struct nfnetlink_subsystem nfnl_cthelper_subsys = { + .name = "cthelper", + .subsys_id = NFNL_SUBSYS_CTHELPER, + .cb_count = NFNL_MSG_CTHELPER_MAX, + .cb = nfnl_cthelper_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTHELPER); + +static int __init nfnl_cthelper_init(void) +{ + int ret; + + ret = nfnetlink_subsys_register(&nfnl_cthelper_subsys); + if (ret < 0) { + pr_err("nfnl_cthelper: cannot register with nfnetlink.\n"); + goto err_out; + } + return 0; +err_out: + return ret; +} + +static void __exit nfnl_cthelper_exit(void) +{ + struct nf_conntrack_helper *cur; + struct hlist_node *n, *tmp; + int i; + + nfnetlink_subsys_unregister(&nfnl_cthelper_subsys); + + for (i=0; i<nf_ct_helper_hsize; i++) { + hlist_for_each_entry_safe(cur, n, tmp, &nf_ct_helper_hash[i], + hnode) { + /* skip non-userspace conntrack helpers. */ + if (!(cur->flags & NF_CT_HELPER_F_USERSPACE)) + continue; + + nf_conntrack_helper_unregister(cur); + } + } +} + +module_init(nfnl_cthelper_init); +module_exit(nfnl_cthelper_exit); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue_core.c index 630da3d..d36b95e 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -30,6 +30,7 @@ #include <linux/list.h> #include <net/sock.h> #include <net/netfilter/nf_queue.h> +#include <net/netfilter/nfnetlink_queue.h> #include <linux/atomic.h> @@ -233,6 +234,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, struct sk_buff *entskb = entry->skb; struct net_device *indev; struct net_device *outdev; + struct nf_conn *ct = NULL; + enum ip_conntrack_info uninitialized_var(ctinfo); size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) @@ -266,6 +269,8 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, break; } + if (queue->flags & NFQA_CFG_F_CONNTRACK) + ct = nfqnl_ct_get(entskb, &size, &ctinfo); skb = alloc_skb(size, GFP_ATOMIC); if (!skb) @@ -389,6 +394,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, BUG(); } + if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0) + goto nla_put_failure; + nlh->nlmsg_len = skb->tail - old_tail; return skb; @@ -469,12 +477,10 @@ err_out: } static int -nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e) +nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff) { struct sk_buff *nskb; - int diff; - diff = data_len - e->skb->len; if (diff < 0) { if (pskb_trim(e->skb, data_len)) return -ENOMEM; @@ -632,6 +638,7 @@ static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = { [NFQA_VERDICT_HDR] = { .len = sizeof(struct nfqnl_msg_verdict_hdr) }, [NFQA_MARK] = { .type = NLA_U32 }, [NFQA_PAYLOAD] = { .type = NLA_UNSPEC }, + [NFQA_CT] = { .type = NLA_UNSPEC }, }; static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { @@ -732,6 +739,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_instance *queue; unsigned int verdict; struct nf_queue_entry *entry; + enum ip_conntrack_info uninitialized_var(ctinfo); + struct nf_conn *ct = NULL; queue = instance_lookup(queue_num); if (!queue) @@ -750,11 +759,22 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, if (entry == NULL) return -ENOENT; + rcu_read_lock(); + if (nfqa[NFQA_CT] && (queue->flags & NFQA_CFG_F_CONNTRACK)) + ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo); + if (nfqa[NFQA_PAYLOAD]) { + u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]); + int diff = payload_len - entry->skb->len; + if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]), - nla_len(nfqa[NFQA_PAYLOAD]), entry) < 0) + payload_len, entry, diff) < 0) verdict = NF_DROP; + + if (ct) + nfqnl_ct_seq_adjust(skb, ct, ctinfo, diff); } + rcu_read_unlock(); if (nfqa[NFQA_MARK]) entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK])); diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c new file mode 100644 index 0000000..68ef550 --- /dev/null +++ b/net/netfilter/nfnetlink_queue_ct.c @@ -0,0 +1,97 @@ +/* + * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/skbuff.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_queue.h> +#include <net/netfilter/nf_conntrack.h> + +struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size, + enum ip_conntrack_info *ctinfo) +{ + struct nfq_ct_hook *nfq_ct; + struct nf_conn *ct; + + /* rcu_read_lock()ed by __nf_queue already. */ + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return NULL; + + ct = nf_ct_get(entskb, ctinfo); + if (ct) { + if (!nf_ct_is_untracked(ct)) + *size += nfq_ct->build_size(ct); + else + ct = NULL; + } + return ct; +} + +struct nf_conn * +nfqnl_ct_parse(const struct sk_buff *skb, const struct nlattr *attr, + enum ip_conntrack_info *ctinfo) +{ + struct nfq_ct_hook *nfq_ct; + struct nf_conn *ct; + + /* rcu_read_lock()ed by __nf_queue already. */ + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return NULL; + + ct = nf_ct_get(skb, ctinfo); + if (ct && !nf_ct_is_untracked(ct)) + nfq_ct->parse(attr, ct); + + return ct; +} + +int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo) +{ + struct nfq_ct_hook *nfq_ct; + struct nlattr *nest_parms; + u_int32_t tmp; + + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return 0; + + nest_parms = nla_nest_start(skb, NFQA_CT | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + if (nfq_ct->build(skb, ct) < 0) + goto nla_put_failure; + + nla_nest_end(skb, nest_parms); + + tmp = ctinfo; + if (nla_put_be32(skb, NFQA_CT_INFO, htonl(tmp))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int diff) +{ + struct nfq_ct_hook *nfq_ct; + + nfq_ct = rcu_dereference(nfq_ct_hook); + if (nfq_ct == NULL) + return; + + if ((ct->status & IPS_NAT_MASK) && diff) + nfq_ct->seq_adjust(skb, ct, ctinfo, diff); +} diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index a51de9b..1160185 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -112,6 +112,8 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) goto err3; if (info->helper[0]) { + struct nf_conntrack_helper *helper; + ret = -ENOENT; proto = xt_ct_find_proto(par); if (!proto) { @@ -120,19 +122,21 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) goto err3; } - ret = -ENOMEM; - help = nf_ct_helper_ext_add(ct, GFP_KERNEL); - if (help == NULL) - goto err3; - ret = -ENOENT; - help->helper = nf_conntrack_helper_try_module_get(info->helper, - par->family, - proto); - if (help->helper == NULL) { + helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (helper == NULL) { pr_info("No such helper \"%s\"\n", info->helper); goto err3; } + + ret = -ENOMEM; + help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = helper; } __set_bit(IPS_TEMPLATE_BIT, &ct->status); @@ -202,6 +206,8 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) goto err3; if (info->helper[0]) { + struct nf_conntrack_helper *helper; + ret = -ENOENT; proto = xt_ct_find_proto(par); if (!proto) { @@ -210,19 +216,21 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) goto err3; } - ret = -ENOMEM; - help = nf_ct_helper_ext_add(ct, GFP_KERNEL); - if (help == NULL) - goto err3; - ret = -ENOENT; - help->helper = nf_conntrack_helper_try_module_get(info->helper, - par->family, - proto); - if (help->helper == NULL) { + helper = nf_conntrack_helper_try_module_get(info->helper, + par->family, + proto); + if (helper == NULL) { pr_info("No such helper \"%s\"\n", info->helper); goto err3; } + + ret = -ENOMEM; + help = nf_ct_helper_ext_add(ct, helper, GFP_KERNEL); + if (help == NULL) + goto err3; + + help->helper = helper; } #ifdef CONFIG_NF_CONNTRACK_TIMEOUT diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index a68aed7..ec2118d 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -502,10 +502,8 @@ static int snd_compr_pause(struct snd_compr_stream *stream) if (stream->runtime->state != SNDRV_PCM_STATE_RUNNING) return -EPERM; retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_PAUSE_PUSH); - if (!retval) { + if (!retval) stream->runtime->state = SNDRV_PCM_STATE_PAUSED; - wake_up(&stream->runtime->sleep); - } return retval; } @@ -544,6 +542,10 @@ static int snd_compr_stop(struct snd_compr_stream *stream) if (!retval) { stream->runtime->state = SNDRV_PCM_STATE_SETUP; wake_up(&stream->runtime->sleep); + stream->runtime->hw_pointer = 0; + stream->runtime->app_pointer = 0; + stream->runtime->total_bytes_available = 0; + stream->runtime->total_bytes_transferred = 0; } return retval; } diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 2b6392b..0276382 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2484,9 +2484,9 @@ static void azx_notifier_unregister(struct azx *chip) static int DELAYED_INIT_MARK azx_first_init(struct azx *chip); static int DELAYED_INIT_MARK azx_probe_continue(struct azx *chip); +#ifdef SUPPORT_VGA_SWITCHEROO static struct pci_dev __devinit *get_bound_vga(struct pci_dev *pci); -#ifdef SUPPORT_VGA_SWITCHEROO static void azx_vs_set_state(struct pci_dev *pci, enum vga_switcheroo_state state) { @@ -2578,6 +2578,7 @@ static int __devinit register_vga_switcheroo(struct azx *chip) #else #define init_vga_switcheroo(chip) /* NOP */ #define register_vga_switcheroo(chip) 0 +#define check_hdmi_disabled(pci) false #endif /* SUPPORT_VGA_SWITCHER */ /* @@ -2638,6 +2639,7 @@ static int azx_dev_free(struct snd_device *device) return azx_free(device->device_data); } +#ifdef SUPPORT_VGA_SWITCHEROO /* * Check of disabled HDMI controller by vga-switcheroo */ @@ -2670,12 +2672,13 @@ static bool __devinit check_hdmi_disabled(struct pci_dev *pci) struct pci_dev *p = get_bound_vga(pci); if (p) { - if (vga_default_device() && p != vga_default_device()) + if (vga_switcheroo_get_client_state(p) == VGA_SWITCHEROO_OFF) vga_inactive = true; pci_dev_put(p); } return vga_inactive; } +#endif /* SUPPORT_VGA_SWITCHEROO */ /* * white/black-listing for position_fix @@ -3351,6 +3354,11 @@ static DEFINE_PCI_DEVICE_TABLE(azx_ids) = { { PCI_DEVICE(0x6549, 0x1200), .driver_data = AZX_DRIVER_TERA | AZX_DCAPS_NO_64BIT }, /* Creative X-Fi (CA0110-IBG) */ + /* CTHDA chips */ + { PCI_DEVICE(0x1102, 0x0010), + .driver_data = AZX_DRIVER_CTHDA | AZX_DCAPS_PRESET_CTHDA }, + { PCI_DEVICE(0x1102, 0x0012), + .driver_data = AZX_DRIVER_CTHDA | AZX_DCAPS_PRESET_CTHDA }, #if !defined(CONFIG_SND_CTXFI) && !defined(CONFIG_SND_CTXFI_MODULE) /* the following entry conflicts with snd-ctxfi driver, * as ctxfi driver mutates from HD-audio to native mode with @@ -3367,11 +3375,6 @@ static DEFINE_PCI_DEVICE_TABLE(azx_ids) = { .driver_data = AZX_DRIVER_CTX | AZX_DCAPS_CTX_WORKAROUND | AZX_DCAPS_RIRB_PRE_DELAY | AZX_DCAPS_POSFIX_LPIB }, #endif - /* CTHDA chips */ - { PCI_DEVICE(0x1102, 0x0010), - .driver_data = AZX_DRIVER_CTHDA | AZX_DCAPS_PRESET_CTHDA }, - { PCI_DEVICE(0x1102, 0x0012), - .driver_data = AZX_DRIVER_CTHDA | AZX_DCAPS_PRESET_CTHDA }, /* Vortex86MX */ { PCI_DEVICE(0x17f3, 0x3010), .driver_data = AZX_DRIVER_GENERIC }, /* VMware HDAudio */ diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 3acb582..172370b 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -4061,7 +4061,7 @@ static void cx_auto_init_digital(struct hda_codec *codec) static int cx_auto_init(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; - /*snd_hda_sequence_write(codec, cx_auto_init_verbs);*/ + snd_hda_gen_apply_verbs(codec); cx_auto_init_output(codec); cx_auto_init_input(codec); cx_auto_init_digital(codec); diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 224410e..f8f4906 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1896,6 +1896,7 @@ static int alc_init(struct hda_codec *codec) alc_fix_pll(codec); alc_auto_init_amp(codec, spec->init_amp); + snd_hda_gen_apply_verbs(codec); alc_init_special_input_src(codec); alc_auto_init_std(codec); @@ -6439,6 +6440,7 @@ enum { ALC662_FIXUP_ASUS_MODE7, ALC662_FIXUP_ASUS_MODE8, ALC662_FIXUP_NO_JACK_DETECT, + ALC662_FIXUP_ZOTAC_Z68, }; static const struct alc_fixup alc662_fixups[] = { @@ -6588,6 +6590,13 @@ static const struct alc_fixup alc662_fixups[] = { .type = ALC_FIXUP_FUNC, .v.func = alc_fixup_no_jack_detect, }, + [ALC662_FIXUP_ZOTAC_Z68] = { + .type = ALC_FIXUP_PINS, + .v.pins = (const struct alc_pincfg[]) { + { 0x1b, 0x02214020 }, /* Front HP */ + { } + } + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -6601,6 +6610,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD), + SND_PCI_QUIRK(0x19da, 0xa130, "Zotac Z68", ALC662_FIXUP_ZOTAC_Z68), SND_PCI_QUIRK(0x1b35, 0x2206, "CZC P10T", ALC662_FIXUP_CZC_P10T), #if 0 diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index a75c376..0418fa1 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -99,8 +99,9 @@ static void wm2000_reset(struct wm2000_priv *wm2000) } static int wm2000_poll_bit(struct i2c_client *i2c, - unsigned int reg, u8 mask, int timeout) + unsigned int reg, u8 mask) { + int timeout = 4000; int val; val = wm2000_read(i2c, reg); @@ -119,7 +120,7 @@ static int wm2000_poll_bit(struct i2c_client *i2c, static int wm2000_power_up(struct i2c_client *i2c, int analogue) { struct wm2000_priv *wm2000 = dev_get_drvdata(&i2c->dev); - int ret, timeout; + int ret; BUG_ON(wm2000->anc_mode != ANC_OFF); @@ -140,13 +141,13 @@ static int wm2000_power_up(struct i2c_client *i2c, int analogue) /* Wait for ANC engine to become ready */ if (!wm2000_poll_bit(i2c, WM2000_REG_ANC_STAT, - WM2000_ANC_ENG_IDLE, 1)) { + WM2000_ANC_ENG_IDLE)) { dev_err(&i2c->dev, "ANC engine failed to reset\n"); return -ETIMEDOUT; } if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_BOOT_COMPLETE, 1)) { + WM2000_STATUS_BOOT_COMPLETE)) { dev_err(&i2c->dev, "ANC engine failed to initialise\n"); return -ETIMEDOUT; } @@ -173,16 +174,13 @@ static int wm2000_power_up(struct i2c_client *i2c, int analogue) dev_dbg(&i2c->dev, "Download complete\n"); if (analogue) { - timeout = 248; - wm2000_write(i2c, WM2000_REG_ANA_VMID_PU_TIME, timeout / 4); + wm2000_write(i2c, WM2000_REG_ANA_VMID_PU_TIME, 248 / 4); wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_ANA_SEQ_INCLUDE | WM2000_MODE_MOUSE_ENABLE | WM2000_MODE_THERMAL_ENABLE); } else { - timeout = 10; - wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_MOUSE_ENABLE | WM2000_MODE_THERMAL_ENABLE); @@ -201,9 +199,8 @@ static int wm2000_power_up(struct i2c_client *i2c, int analogue) wm2000_write(i2c, WM2000_REG_SYS_CTL2, WM2000_ANC_INT_N_CLR); if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_MOUSE_ACTIVE, timeout)) { - dev_err(&i2c->dev, "Timed out waiting for device after %dms\n", - timeout * 10); + WM2000_STATUS_MOUSE_ACTIVE)) { + dev_err(&i2c->dev, "Timed out waiting for device\n"); return -ETIMEDOUT; } @@ -218,28 +215,25 @@ static int wm2000_power_up(struct i2c_client *i2c, int analogue) static int wm2000_power_down(struct i2c_client *i2c, int analogue) { struct wm2000_priv *wm2000 = dev_get_drvdata(&i2c->dev); - int timeout; if (analogue) { - timeout = 248; - wm2000_write(i2c, WM2000_REG_ANA_VMID_PD_TIME, timeout / 4); + wm2000_write(i2c, WM2000_REG_ANA_VMID_PD_TIME, 248 / 4); wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_ANA_SEQ_INCLUDE | WM2000_MODE_POWER_DOWN); } else { - timeout = 10; wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_POWER_DOWN); } if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_POWER_DOWN_COMPLETE, timeout)) { + WM2000_STATUS_POWER_DOWN_COMPLETE)) { dev_err(&i2c->dev, "Timeout waiting for ANC power down\n"); return -ETIMEDOUT; } if (!wm2000_poll_bit(i2c, WM2000_REG_ANC_STAT, - WM2000_ANC_ENG_IDLE, 1)) { + WM2000_ANC_ENG_IDLE)) { dev_err(&i2c->dev, "Timeout waiting for ANC engine idle\n"); return -ETIMEDOUT; } @@ -268,13 +262,13 @@ static int wm2000_enter_bypass(struct i2c_client *i2c, int analogue) } if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_ANC_DISABLED, 10)) { + WM2000_STATUS_ANC_DISABLED)) { dev_err(&i2c->dev, "Timeout waiting for ANC disable\n"); return -ETIMEDOUT; } if (!wm2000_poll_bit(i2c, WM2000_REG_ANC_STAT, - WM2000_ANC_ENG_IDLE, 1)) { + WM2000_ANC_ENG_IDLE)) { dev_err(&i2c->dev, "Timeout waiting for ANC engine idle\n"); return -ETIMEDOUT; } @@ -311,7 +305,7 @@ static int wm2000_exit_bypass(struct i2c_client *i2c, int analogue) wm2000_write(i2c, WM2000_REG_SYS_CTL2, WM2000_ANC_INT_N_CLR); if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_MOUSE_ACTIVE, 10)) { + WM2000_STATUS_MOUSE_ACTIVE)) { dev_err(&i2c->dev, "Timed out waiting for MOUSE\n"); return -ETIMEDOUT; } @@ -325,38 +319,32 @@ static int wm2000_exit_bypass(struct i2c_client *i2c, int analogue) static int wm2000_enter_standby(struct i2c_client *i2c, int analogue) { struct wm2000_priv *wm2000 = dev_get_drvdata(&i2c->dev); - int timeout; BUG_ON(wm2000->anc_mode != ANC_ACTIVE); if (analogue) { - timeout = 248; - wm2000_write(i2c, WM2000_REG_ANA_VMID_PD_TIME, timeout / 4); + wm2000_write(i2c, WM2000_REG_ANA_VMID_PD_TIME, 248 / 4); wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_ANA_SEQ_INCLUDE | WM2000_MODE_THERMAL_ENABLE | WM2000_MODE_STANDBY_ENTRY); } else { - timeout = 10; - wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_THERMAL_ENABLE | WM2000_MODE_STANDBY_ENTRY); } if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_ANC_DISABLED, timeout)) { + WM2000_STATUS_ANC_DISABLED)) { dev_err(&i2c->dev, "Timed out waiting for ANC disable after 1ms\n"); return -ETIMEDOUT; } - if (!wm2000_poll_bit(i2c, WM2000_REG_ANC_STAT, WM2000_ANC_ENG_IDLE, - 1)) { + if (!wm2000_poll_bit(i2c, WM2000_REG_ANC_STAT, WM2000_ANC_ENG_IDLE)) { dev_err(&i2c->dev, - "Timed out waiting for standby after %dms\n", - timeout * 10); + "Timed out waiting for standby\n"); return -ETIMEDOUT; } @@ -374,23 +362,19 @@ static int wm2000_enter_standby(struct i2c_client *i2c, int analogue) static int wm2000_exit_standby(struct i2c_client *i2c, int analogue) { struct wm2000_priv *wm2000 = dev_get_drvdata(&i2c->dev); - int timeout; BUG_ON(wm2000->anc_mode != ANC_STANDBY); wm2000_write(i2c, WM2000_REG_SYS_CTL1, 0); if (analogue) { - timeout = 248; - wm2000_write(i2c, WM2000_REG_ANA_VMID_PU_TIME, timeout / 4); + wm2000_write(i2c, WM2000_REG_ANA_VMID_PU_TIME, 248 / 4); wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_ANA_SEQ_INCLUDE | WM2000_MODE_THERMAL_ENABLE | WM2000_MODE_MOUSE_ENABLE); } else { - timeout = 10; - wm2000_write(i2c, WM2000_REG_SYS_MODE_CNTRL, WM2000_MODE_THERMAL_ENABLE | WM2000_MODE_MOUSE_ENABLE); @@ -400,9 +384,8 @@ static int wm2000_exit_standby(struct i2c_client *i2c, int analogue) wm2000_write(i2c, WM2000_REG_SYS_CTL2, WM2000_ANC_INT_N_CLR); if (!wm2000_poll_bit(i2c, WM2000_REG_SYS_STATUS, - WM2000_STATUS_MOUSE_ACTIVE, timeout)) { - dev_err(&i2c->dev, "Timed out waiting for MOUSE after %dms\n", - timeout * 10); + WM2000_STATUS_MOUSE_ACTIVE)) { + dev_err(&i2c->dev, "Timed out waiting for MOUSE\n"); return -ETIMEDOUT; } diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 993639d..aa8c98b 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -46,6 +46,39 @@ #define WM8994_NUM_DRC 3 #define WM8994_NUM_EQ 3 +static struct { + unsigned int reg; + unsigned int mask; +} wm8994_vu_bits[] = { + { WM8994_LEFT_LINE_INPUT_1_2_VOLUME, WM8994_IN1_VU }, + { WM8994_RIGHT_LINE_INPUT_1_2_VOLUME, WM8994_IN1_VU }, + { WM8994_LEFT_LINE_INPUT_3_4_VOLUME, WM8994_IN2_VU }, + { WM8994_RIGHT_LINE_INPUT_3_4_VOLUME, WM8994_IN2_VU }, + { WM8994_SPEAKER_VOLUME_LEFT, WM8994_SPKOUT_VU }, + { WM8994_SPEAKER_VOLUME_RIGHT, WM8994_SPKOUT_VU }, + { WM8994_LEFT_OUTPUT_VOLUME, WM8994_HPOUT1_VU }, + { WM8994_RIGHT_OUTPUT_VOLUME, WM8994_HPOUT1_VU }, + { WM8994_LEFT_OPGA_VOLUME, WM8994_MIXOUT_VU }, + { WM8994_RIGHT_OPGA_VOLUME, WM8994_MIXOUT_VU }, + + { WM8994_AIF1_DAC1_LEFT_VOLUME, WM8994_AIF1DAC1_VU }, + { WM8994_AIF1_DAC1_RIGHT_VOLUME, WM8994_AIF1DAC1_VU }, + { WM8994_AIF1_DAC2_LEFT_VOLUME, WM8994_AIF1DAC2_VU }, + { WM8994_AIF1_DAC2_RIGHT_VOLUME, WM8994_AIF1DAC2_VU }, + { WM8994_AIF2_DAC_LEFT_VOLUME, WM8994_AIF2DAC_VU }, + { WM8994_AIF2_DAC_RIGHT_VOLUME, WM8994_AIF2DAC_VU }, + { WM8994_AIF1_ADC1_LEFT_VOLUME, WM8994_AIF1ADC1_VU }, + { WM8994_AIF1_ADC1_RIGHT_VOLUME, WM8994_AIF1ADC1_VU }, + { WM8994_AIF1_ADC2_LEFT_VOLUME, WM8994_AIF1ADC2_VU }, + { WM8994_AIF1_ADC2_RIGHT_VOLUME, WM8994_AIF1ADC2_VU }, + { WM8994_AIF2_ADC_LEFT_VOLUME, WM8994_AIF2ADC_VU }, + { WM8994_AIF2_ADC_RIGHT_VOLUME, WM8994_AIF1ADC2_VU }, + { WM8994_DAC1_LEFT_VOLUME, WM8994_DAC1_VU }, + { WM8994_DAC1_RIGHT_VOLUME, WM8994_DAC1_VU }, + { WM8994_DAC2_LEFT_VOLUME, WM8994_DAC2_VU }, + { WM8994_DAC2_RIGHT_VOLUME, WM8994_DAC2_VU }, +}; + static int wm8994_drc_base[] = { WM8994_AIF1_DRC1_1, WM8994_AIF1_DRC2_1, @@ -989,6 +1022,7 @@ static int aif1clk_ev(struct snd_soc_dapm_widget *w, struct snd_soc_codec *codec = w->codec; struct wm8994 *control = codec->control_data; int mask = WM8994_AIF1DAC1L_ENA | WM8994_AIF1DAC1R_ENA; + int i; int dac; int adc; int val; @@ -1047,6 +1081,13 @@ static int aif1clk_ev(struct snd_soc_dapm_widget *w, WM8994_AIF1DAC2L_ENA); break; + case SND_SOC_DAPM_POST_PMU: + for (i = 0; i < ARRAY_SIZE(wm8994_vu_bits); i++) + snd_soc_write(codec, wm8994_vu_bits[i].reg, + snd_soc_read(codec, + wm8994_vu_bits[i].reg)); + break; + case SND_SOC_DAPM_PRE_PMD: case SND_SOC_DAPM_POST_PMD: snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_5, @@ -1072,6 +1113,7 @@ static int aif2clk_ev(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_codec *codec = w->codec; + int i; int dac; int adc; int val; @@ -1122,6 +1164,13 @@ static int aif2clk_ev(struct snd_soc_dapm_widget *w, WM8994_AIF2DACR_ENA); break; + case SND_SOC_DAPM_POST_PMU: + for (i = 0; i < ARRAY_SIZE(wm8994_vu_bits); i++) + snd_soc_write(codec, wm8994_vu_bits[i].reg, + snd_soc_read(codec, + wm8994_vu_bits[i].reg)); + break; + case SND_SOC_DAPM_PRE_PMD: case SND_SOC_DAPM_POST_PMD: snd_soc_update_bits(codec, WM8994_POWER_MANAGEMENT_5, @@ -1190,17 +1239,19 @@ static int late_enable_ev(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_PRE_PMU: if (wm8994->aif1clk_enable) { - aif1clk_ev(w, kcontrol, event); + aif1clk_ev(w, kcontrol, SND_SOC_DAPM_PRE_PMU); snd_soc_update_bits(codec, WM8994_AIF1_CLOCKING_1, WM8994_AIF1CLK_ENA_MASK, WM8994_AIF1CLK_ENA); + aif1clk_ev(w, kcontrol, SND_SOC_DAPM_POST_PMU); wm8994->aif1clk_enable = 0; } if (wm8994->aif2clk_enable) { - aif2clk_ev(w, kcontrol, event); + aif2clk_ev(w, kcontrol, SND_SOC_DAPM_PRE_PMU); snd_soc_update_bits(codec, WM8994_AIF2_CLOCKING_1, WM8994_AIF2CLK_ENA_MASK, WM8994_AIF2CLK_ENA); + aif2clk_ev(w, kcontrol, SND_SOC_DAPM_POST_PMU); wm8994->aif2clk_enable = 0; } break; @@ -1221,15 +1272,17 @@ static int late_disable_ev(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_POST_PMD: if (wm8994->aif1clk_disable) { + aif1clk_ev(w, kcontrol, SND_SOC_DAPM_PRE_PMD); snd_soc_update_bits(codec, WM8994_AIF1_CLOCKING_1, WM8994_AIF1CLK_ENA_MASK, 0); - aif1clk_ev(w, kcontrol, event); + aif1clk_ev(w, kcontrol, SND_SOC_DAPM_POST_PMD); wm8994->aif1clk_disable = 0; } if (wm8994->aif2clk_disable) { + aif2clk_ev(w, kcontrol, SND_SOC_DAPM_PRE_PMD); snd_soc_update_bits(codec, WM8994_AIF2_CLOCKING_1, WM8994_AIF2CLK_ENA_MASK, 0); - aif2clk_ev(w, kcontrol, event); + aif2clk_ev(w, kcontrol, SND_SOC_DAPM_POST_PMD); wm8994->aif2clk_disable = 0; } break; @@ -1527,9 +1580,11 @@ SND_SOC_DAPM_POST("Late Disable PGA", late_disable_ev) static const struct snd_soc_dapm_widget wm8994_lateclk_widgets[] = { SND_SOC_DAPM_SUPPLY("AIF1CLK", WM8994_AIF1_CLOCKING_1, 0, 0, aif1clk_ev, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | + SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_SUPPLY("AIF2CLK", WM8994_AIF2_CLOCKING_1, 0, 0, aif2clk_ev, - SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_PRE_PMD), + SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMU | + SND_SOC_DAPM_PRE_PMD), SND_SOC_DAPM_PGA("Direct Voice", SND_SOC_NOPM, 0, 0, NULL, 0), SND_SOC_DAPM_MIXER("SPKL", WM8994_POWER_MANAGEMENT_3, 8, 0, left_speaker_mixer, ARRAY_SIZE(left_speaker_mixer)), @@ -3879,39 +3934,11 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec) pm_runtime_put(codec->dev); - /* Latch volume updates (right only; we always do left then right). */ - snd_soc_update_bits(codec, WM8994_AIF1_DAC1_LEFT_VOLUME, - WM8994_AIF1DAC1_VU, WM8994_AIF1DAC1_VU); - snd_soc_update_bits(codec, WM8994_AIF1_DAC1_RIGHT_VOLUME, - WM8994_AIF1DAC1_VU, WM8994_AIF1DAC1_VU); - snd_soc_update_bits(codec, WM8994_AIF1_DAC2_LEFT_VOLUME, - WM8994_AIF1DAC2_VU, WM8994_AIF1DAC2_VU); - snd_soc_update_bits(codec, WM8994_AIF1_DAC2_RIGHT_VOLUME, - WM8994_AIF1DAC2_VU, WM8994_AIF1DAC2_VU); - snd_soc_update_bits(codec, WM8994_AIF2_DAC_LEFT_VOLUME, - WM8994_AIF2DAC_VU, WM8994_AIF2DAC_VU); - snd_soc_update_bits(codec, WM8994_AIF2_DAC_RIGHT_VOLUME, - WM8994_AIF2DAC_VU, WM8994_AIF2DAC_VU); - snd_soc_update_bits(codec, WM8994_AIF1_ADC1_LEFT_VOLUME, - WM8994_AIF1ADC1_VU, WM8994_AIF1ADC1_VU); - snd_soc_update_bits(codec, WM8994_AIF1_ADC1_RIGHT_VOLUME, - WM8994_AIF1ADC1_VU, WM8994_AIF1ADC1_VU); - snd_soc_update_bits(codec, WM8994_AIF1_ADC2_LEFT_VOLUME, - WM8994_AIF1ADC2_VU, WM8994_AIF1ADC2_VU); - snd_soc_update_bits(codec, WM8994_AIF1_ADC2_RIGHT_VOLUME, - WM8994_AIF1ADC2_VU, WM8994_AIF1ADC2_VU); - snd_soc_update_bits(codec, WM8994_AIF2_ADC_LEFT_VOLUME, - WM8994_AIF2ADC_VU, WM8994_AIF1ADC2_VU); - snd_soc_update_bits(codec, WM8994_AIF2_ADC_RIGHT_VOLUME, - WM8994_AIF2ADC_VU, WM8994_AIF1ADC2_VU); - snd_soc_update_bits(codec, WM8994_DAC1_LEFT_VOLUME, - WM8994_DAC1_VU, WM8994_DAC1_VU); - snd_soc_update_bits(codec, WM8994_DAC1_RIGHT_VOLUME, - WM8994_DAC1_VU, WM8994_DAC1_VU); - snd_soc_update_bits(codec, WM8994_DAC2_LEFT_VOLUME, - WM8994_DAC2_VU, WM8994_DAC2_VU); - snd_soc_update_bits(codec, WM8994_DAC2_RIGHT_VOLUME, - WM8994_DAC2_VU, WM8994_DAC2_VU); + /* Latch volume update bits */ + for (i = 0; i < ARRAY_SIZE(wm8994_vu_bits); i++) + snd_soc_update_bits(codec, wm8994_vu_bits[i].reg, + wm8994_vu_bits[i].mask, + wm8994_vu_bits[i].mask); /* Set the low bit of the 3D stereo depth so TLV matches */ snd_soc_update_bits(codec, WM8994_AIF1_DAC1_FILTERS_2, diff --git a/sound/soc/fsl/imx-audmux.c b/sound/soc/fsl/imx-audmux.c index f237003..0803274 100644 --- a/sound/soc/fsl/imx-audmux.c +++ b/sound/soc/fsl/imx-audmux.c @@ -26,6 +26,7 @@ #include <linux/of_device.h> #include <linux/platform_device.h> #include <linux/slab.h> +#include <linux/pinctrl/consumer.h> #include "imx-audmux.h" @@ -249,6 +250,7 @@ EXPORT_SYMBOL_GPL(imx_audmux_v2_configure_port); static int __devinit imx_audmux_probe(struct platform_device *pdev) { struct resource *res; + struct pinctrl *pinctrl; const struct of_device_id *of_id = of_match_device(imx_audmux_dt_ids, &pdev->dev); @@ -257,6 +259,12 @@ static int __devinit imx_audmux_probe(struct platform_device *pdev) if (!audmux_base) return -EADDRNOTAVAIL; + pinctrl = devm_pinctrl_get_select_default(&pdev->dev); + if (IS_ERR(pinctrl)) { + dev_err(&pdev->dev, "setup pinctrl failed!"); + return PTR_ERR(pinctrl); + } + audmux_clk = clk_get(&pdev->dev, "audmux"); if (IS_ERR(audmux_clk)) { dev_dbg(&pdev->dev, "cannot get clock: %ld\n", diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 90ee77d..89eae93 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -913,7 +913,7 @@ static int is_connected_input_ep(struct snd_soc_dapm_widget *widget, /* do we need to add this widget to the list ? */ if (list) { int err; - err = dapm_list_add_widget(list, path->sink); + err = dapm_list_add_widget(list, path->source); if (err < 0) { dev_err(widget->dapm->dev, "could not add widget %s\n", widget->name); @@ -954,7 +954,7 @@ int snd_soc_dapm_dai_get_connected_widgets(struct snd_soc_dai *dai, int stream, if (stream == SNDRV_PCM_STREAM_PLAYBACK) paths = is_connected_output_ep(dai->playback_widget, list); else - paths = is_connected_input_ep(dai->playback_widget, list); + paths = is_connected_input_ep(dai->capture_widget, list); trace_snd_soc_dapm_connected(paths, stream); dapm_clear_walk(&card->dapm); diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index bedd171..48fd15b 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -794,6 +794,9 @@ static struct snd_soc_pcm_runtime *dpcm_get_be(struct snd_soc_card *card, for (i = 0; i < card->num_links; i++) { be = &card->rtd[i]; + if (!be->dai_link->no_pcm) + continue; + if (be->cpu_dai->playback_widget == widget || be->codec_dai->playback_widget == widget) return be; @@ -803,6 +806,9 @@ static struct snd_soc_pcm_runtime *dpcm_get_be(struct snd_soc_card *card, for (i = 0; i < card->num_links; i++) { be = &card->rtd[i]; + if (!be->dai_link->no_pcm) + continue; + if (be->cpu_dai->capture_widget == widget || be->codec_dai->capture_widget == widget) return be; diff --git a/sound/soc/tegra/tegra30_ahub.c b/sound/soc/tegra/tegra30_ahub.c index 57cd419..f43edb3 100644 --- a/sound/soc/tegra/tegra30_ahub.c +++ b/sound/soc/tegra/tegra30_ahub.c @@ -629,3 +629,4 @@ MODULE_AUTHOR("Stephen Warren <swarren@nvidia.com>"); MODULE_DESCRIPTION("Tegra30 AHUB driver"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS("platform:" DRV_NAME); +MODULE_DEVICE_TABLE(of, tegra30_ahub_of_match); diff --git a/sound/usb/card.h b/sound/usb/card.h index 0d37238..2b9ffff 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -119,6 +119,7 @@ struct snd_usb_substream { unsigned long unlink_mask; /* bitmask of unlinked urbs */ /* data and sync endpoints for this stream */ + unsigned int ep_num; /* the endpoint number */ struct snd_usb_endpoint *data_endpoint; struct snd_usb_endpoint *sync_endpoint; unsigned long flags; diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 6b7d7a2..083ed81 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -97,6 +97,7 @@ static void snd_usb_init_substream(struct snd_usb_stream *as, subs->formats |= fp->formats; subs->num_formats++; subs->fmt_type = fp->fmt_type; + subs->ep_num = fp->endpoint; } /* @@ -119,9 +120,7 @@ int snd_usb_add_audio_stream(struct snd_usb_audio *chip, if (as->fmt_type != fp->fmt_type) continue; subs = &as->substream[stream]; - if (!subs->data_endpoint) - continue; - if (subs->data_endpoint->ep_num == fp->endpoint) { + if (subs->ep_num == fp->endpoint) { list_add_tail(&fp->list, &subs->fmt_list); subs->num_formats++; subs->formats |= fp->formats; @@ -134,7 +133,7 @@ int snd_usb_add_audio_stream(struct snd_usb_audio *chip, if (as->fmt_type != fp->fmt_type) continue; subs = &as->substream[stream]; - if (subs->data_endpoint) + if (subs->ep_num) continue; err = snd_pcm_new_stream(as->pcm, stream, 1); if (err < 0) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 5476bc0..b4b572e 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,4 +1,6 @@ tools/perf +tools/scripts +tools/lib/traceevent include/linux/const.h include/linux/perf_event.h include/linux/rbtree.h diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8c767c6..25249f7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -152,7 +152,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, if (symbol_conf.use_callchain) { err = callchain_append(he->callchain, - &evsel->hists.callchain_cursor, + &callchain_cursor, sample->period); if (err) return err; @@ -162,7 +162,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel, * so we don't allocated the extra space needed because the stdio * code will not use it. */ - if (al->sym != NULL && use_browser > 0) { + if (he->ms.sym != NULL && use_browser > 0) { struct annotation *notes = symbol__annotation(he->ms.sym); assert(evsel != NULL); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 62ae30d..2625899 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1129,7 +1129,7 @@ static int add_default_attributes(void) return 0; if (!evsel_list->nr_entries) { - if (perf_evlist__add_attrs_array(evsel_list, default_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0) return -1; } @@ -1139,21 +1139,21 @@ static int add_default_attributes(void) return 0; /* Append detailed run extra attributes: */ - if (perf_evlist__add_attrs_array(evsel_list, detailed_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) return -1; if (detailed_run < 2) return 0; /* Append very detailed run extra attributes: */ - if (perf_evlist__add_attrs_array(evsel_list, very_detailed_attrs) < 0) + if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) return -1; if (detailed_run < 3) return 0; /* Append very, very detailed run extra attributes: */ - return perf_evlist__add_attrs_array(evsel_list, very_very_detailed_attrs); + return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); } int cmd_stat(int argc, const char **argv, const char *prefix __used) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 871b540..6bb0277 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -787,7 +787,7 @@ static void perf_event__process_sample(struct perf_tool *tool, } if (symbol_conf.use_callchain) { - err = callchain_append(he->callchain, &evsel->hists.callchain_cursor, + err = callchain_append(he->callchain, &callchain_cursor, sample->period); if (err) return; diff --git a/tools/perf/design.txt b/tools/perf/design.txt index bd0bb1b..67e5d0c 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt @@ -409,14 +409,15 @@ Counters can be enabled and disabled in two ways: via ioctl and via prctl. When a counter is disabled, it doesn't count or generate events but does continue to exist and maintain its count value. -An individual counter or counter group can be enabled with +An individual counter can be enabled with - ioctl(fd, PERF_EVENT_IOC_ENABLE); + ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); or disabled with - ioctl(fd, PERF_EVENT_IOC_DISABLE); + ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); +For a counter group, pass PERF_IOC_FLAG_GROUP as the third argument. Enabling or disabling the leader of a group enables or disables the whole group; that is, while the group leader is disabled, none of the counters in the group will count. Enabling or disabling a member of a diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 4deea6a..34b1c46 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -668,7 +668,7 @@ static int annotate_browser__run(struct annotate_browser *browser, int evidx, "q/ESC/CTRL+C Exit\n\n" "-> Go to target\n" "<- Exit\n" - "h Cycle thru hottest instructions\n" + "H Cycle thru hottest instructions\n" "j Toggle showing jump to target arrows\n" "J Toggle showing number of jump sources on targets\n" "n Search next string\n" diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN index ad73300..95264f3 100755 --- a/tools/perf/util/PERF-VERSION-GEN +++ b/tools/perf/util/PERF-VERSION-GEN @@ -12,7 +12,7 @@ LF=' # First check if there is a .git to get the version from git describe # otherwise try to get the version from the kernel makefile if test -d ../../.git -o -f ../../.git && - VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && + VN=$(git describe --match 'v[0-9].[0-9]*' --abbrev=4 HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; v[0-9]*) diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9f7106a..3a6bff4 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -18,6 +18,8 @@ #include "util.h" #include "callchain.h" +__thread struct callchain_cursor callchain_cursor; + bool ip_callchain__valid(struct ip_callchain *chain, const union perf_event *event) { diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 7f9c0f1..3bdb407 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -76,6 +76,8 @@ struct callchain_cursor { struct callchain_cursor_node *curr; }; +extern __thread struct callchain_cursor callchain_cursor; + static inline void callchain_init(struct callchain_root *root) { INIT_LIST_HEAD(&root->node.siblings); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4ac5f5a..7400fb3 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -159,6 +159,17 @@ out_delete_partial_list: return -1; } +int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, + struct perf_event_attr *attrs, size_t nr_attrs) +{ + size_t i; + + for (i = 0; i < nr_attrs; i++) + event_attr_init(attrs + i); + + return perf_evlist__add_attrs(evlist, attrs, nr_attrs); +} + static int trace_event__id(const char *evname) { char *filename, *colon; @@ -263,7 +274,8 @@ void perf_evlist__disable(struct perf_evlist *evlist) for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) - ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_DISABLE); + ioctl(FD(pos, cpu, thread), + PERF_EVENT_IOC_DISABLE, 0); } } } @@ -276,7 +288,8 @@ void perf_evlist__enable(struct perf_evlist *evlist) for (cpu = 0; cpu < evlist->cpus->nr; cpu++) { list_for_each_entry(pos, &evlist->entries, node) { for (thread = 0; thread < evlist->threads->nr; thread++) - ioctl(FD(pos, cpu, thread), PERF_EVENT_IOC_ENABLE); + ioctl(FD(pos, cpu, thread), + PERF_EVENT_IOC_ENABLE, 0); } } } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 58abb63..989bee9 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -54,6 +54,8 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); int perf_evlist__add_default(struct perf_evlist *evlist); int perf_evlist__add_attrs(struct perf_evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); +int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, + struct perf_event_attr *attrs, size_t nr_attrs); int perf_evlist__add_tracepoints(struct perf_evlist *evlist, const char *tracepoints[], size_t nr_tracepoints); int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, @@ -62,6 +64,8 @@ int perf_evlist__set_tracepoints_handlers(struct perf_evlist *evlist, #define perf_evlist__add_attrs_array(evlist, array) \ perf_evlist__add_attrs(evlist, array, ARRAY_SIZE(array)) +#define perf_evlist__add_default_attrs(evlist, array) \ + __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) #define perf_evlist__add_tracepoints_array(evlist, array) \ perf_evlist__add_tracepoints(evlist, array, ARRAY_SIZE(array)) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 91d1913..9f6cebd 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -494,16 +494,24 @@ int perf_evsel__open_per_thread(struct perf_evsel *evsel, } static int perf_event__parse_id_sample(const union perf_event *event, u64 type, - struct perf_sample *sample) + struct perf_sample *sample, + bool swapped) { const u64 *array = event->sample.array; + union u64_swap u; array += ((event->header.size - sizeof(event->header)) / sizeof(u64)) - 1; if (type & PERF_SAMPLE_CPU) { - u32 *p = (u32 *)array; - sample->cpu = *p; + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + } + + sample->cpu = u.val32[0]; array--; } @@ -523,9 +531,16 @@ static int perf_event__parse_id_sample(const union perf_event *event, u64 type, } if (type & PERF_SAMPLE_TID) { - u32 *p = (u32 *)array; - sample->pid = p[0]; - sample->tid = p[1]; + u.val64 = *array; + if (swapped) { + /* undo swap of u64, then swap on individual u32s */ + u.val64 = bswap_64(u.val64); + u.val32[0] = bswap_32(u.val32[0]); + u.val32[1] = bswap_32(u.val32[1]); + } + + sample->pid = u.val32[0]; + sample->tid = u.val32[1]; } return 0; @@ -562,7 +577,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type, if (event->header.type != PERF_RECORD_SAMPLE) { if (!sample_id_all) return 0; - return perf_event__parse_id_sample(event, type, data); + return perf_event__parse_id_sample(event, type, data, swapped); } array = event->sample.array; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1293b5e..514e2a4 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -378,7 +378,7 @@ void hist_entry__free(struct hist_entry *he) * collapse the histogram */ -static bool hists__collapse_insert_entry(struct hists *hists, +static bool hists__collapse_insert_entry(struct hists *hists __used, struct rb_root *root, struct hist_entry *he) { @@ -397,8 +397,9 @@ static bool hists__collapse_insert_entry(struct hists *hists, iter->period += he->period; iter->nr_events += he->nr_events; if (symbol_conf.use_callchain) { - callchain_cursor_reset(&hists->callchain_cursor); - callchain_merge(&hists->callchain_cursor, iter->callchain, + callchain_cursor_reset(&callchain_cursor); + callchain_merge(&callchain_cursor, + iter->callchain, he->callchain); } hist_entry__free(he); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index cfc64e2..34bb556 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -67,8 +67,6 @@ struct hists { struct events_stats stats; u64 event_stream; u16 col_len[HISTC_NR_COLS]; - /* Best would be to reuse the session callchain cursor */ - struct callchain_cursor callchain_cursor; }; struct hist_entry *__hists__add_entry(struct hists *self, diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index 1915de2..3322b84 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -57,6 +57,10 @@ void setup_pager(void) } if (!pager) pager = getenv("PAGER"); + if (!pager) { + if (!access("/usr/bin/pager", X_OK)) + pager = "/usr/bin/pager"; + } if (!pager) pager = "less"; else if (!*pager || !strcmp(pager, "cat")) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 59dccc9..0dda25d 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2164,16 +2164,12 @@ int del_perf_probe_events(struct strlist *dellist) error: if (kfd >= 0) { - if (namelist) - strlist__delete(namelist); - + strlist__delete(namelist); close(kfd); } if (ufd >= 0) { - if (unamelist) - strlist__delete(unamelist); - + strlist__delete(unamelist); close(ufd); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 93d355d..2600916 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -288,7 +288,8 @@ struct branch_info *machine__resolve_bstack(struct machine *self, return bi; } -int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, +int machine__resolve_callchain(struct machine *self, + struct perf_evsel *evsel __used, struct thread *thread, struct ip_callchain *chain, struct symbol **parent) @@ -297,7 +298,12 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, unsigned int i; int err; - callchain_cursor_reset(&evsel->hists.callchain_cursor); + callchain_cursor_reset(&callchain_cursor); + + if (chain->nr > PERF_MAX_STACK_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } for (i = 0; i < chain->nr; i++) { u64 ip; @@ -317,7 +323,14 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, case PERF_CONTEXT_USER: cpumode = PERF_RECORD_MISC_USER; break; default: - break; + pr_debug("invalid callchain context: " + "%"PRId64"\n", (s64) ip); + /* + * It seems the callchain is corrupted. + * Discard all. + */ + callchain_cursor_reset(&callchain_cursor); + return 0; } continue; } @@ -333,7 +346,7 @@ int machine__resolve_callchain(struct machine *self, struct perf_evsel *evsel, break; } - err = callchain_cursor_append(&evsel->hists.callchain_cursor, + err = callchain_cursor_append(&callchain_cursor, ip, al.map, al.sym); if (err) return err; @@ -441,37 +454,65 @@ void mem_bswap_64(void *src, int byte_size) } } -static void perf_event__all64_swap(union perf_event *event) +static void swap_sample_id_all(union perf_event *event, void *data) +{ + void *end = (void *) event + event->header.size; + int size = end - data; + + BUG_ON(size % sizeof(u64)); + mem_bswap_64(data, size); +} + +static void perf_event__all64_swap(union perf_event *event, + bool sample_id_all __used) { struct perf_event_header *hdr = &event->header; mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr)); } -static void perf_event__comm_swap(union perf_event *event) +static void perf_event__comm_swap(union perf_event *event, bool sample_id_all) { event->comm.pid = bswap_32(event->comm.pid); event->comm.tid = bswap_32(event->comm.tid); + + if (sample_id_all) { + void *data = &event->comm.comm; + + data += ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } } -static void perf_event__mmap_swap(union perf_event *event) +static void perf_event__mmap_swap(union perf_event *event, + bool sample_id_all) { event->mmap.pid = bswap_32(event->mmap.pid); event->mmap.tid = bswap_32(event->mmap.tid); event->mmap.start = bswap_64(event->mmap.start); event->mmap.len = bswap_64(event->mmap.len); event->mmap.pgoff = bswap_64(event->mmap.pgoff); + + if (sample_id_all) { + void *data = &event->mmap.filename; + + data += ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } } -static void perf_event__task_swap(union perf_event *event) +static void perf_event__task_swap(union perf_event *event, bool sample_id_all) { event->fork.pid = bswap_32(event->fork.pid); event->fork.tid = bswap_32(event->fork.tid); event->fork.ppid = bswap_32(event->fork.ppid); event->fork.ptid = bswap_32(event->fork.ptid); event->fork.time = bswap_64(event->fork.time); + + if (sample_id_all) + swap_sample_id_all(event, &event->fork + 1); } -static void perf_event__read_swap(union perf_event *event) +static void perf_event__read_swap(union perf_event *event, bool sample_id_all) { event->read.pid = bswap_32(event->read.pid); event->read.tid = bswap_32(event->read.tid); @@ -479,6 +520,9 @@ static void perf_event__read_swap(union perf_event *event) event->read.time_enabled = bswap_64(event->read.time_enabled); event->read.time_running = bswap_64(event->read.time_running); event->read.id = bswap_64(event->read.id); + + if (sample_id_all) + swap_sample_id_all(event, &event->read + 1); } static u8 revbyte(u8 b) @@ -530,7 +574,8 @@ void perf_event__attr_swap(struct perf_event_attr *attr) swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); } -static void perf_event__hdr_attr_swap(union perf_event *event) +static void perf_event__hdr_attr_swap(union perf_event *event, + bool sample_id_all __used) { size_t size; @@ -541,18 +586,21 @@ static void perf_event__hdr_attr_swap(union perf_event *event) mem_bswap_64(event->attr.id, size); } -static void perf_event__event_type_swap(union perf_event *event) +static void perf_event__event_type_swap(union perf_event *event, + bool sample_id_all __used) { event->event_type.event_type.event_id = bswap_64(event->event_type.event_type.event_id); } -static void perf_event__tracing_data_swap(union perf_event *event) +static void perf_event__tracing_data_swap(union perf_event *event, + bool sample_id_all __used) { event->tracing_data.size = bswap_32(event->tracing_data.size); } -typedef void (*perf_event__swap_op)(union perf_event *event); +typedef void (*perf_event__swap_op)(union perf_event *event, + bool sample_id_all); static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_MMAP] = perf_event__mmap_swap, @@ -986,6 +1034,15 @@ static int perf_session__process_user_event(struct perf_session *session, union } } +static void event_swap(union perf_event *event, bool sample_id_all) +{ + perf_event__swap_op swap; + + swap = perf_event__swap_ops[event->header.type]; + if (swap) + swap(event, sample_id_all); +} + static int perf_session__process_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool, @@ -994,9 +1051,8 @@ static int perf_session__process_event(struct perf_session *session, struct perf_sample sample; int ret; - if (session->header.needs_swap && - perf_event__swap_ops[event->header.type]) - perf_event__swap_ops[event->header.type](event); + if (session->header.needs_swap) + event_swap(event, session->sample_id_all); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; @@ -1428,7 +1484,6 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, int print_sym, int print_dso, int print_symoffset) { struct addr_location al; - struct callchain_cursor *cursor = &evsel->hists.callchain_cursor; struct callchain_cursor_node *node; if (perf_event__preprocess_sample(event, machine, &al, sample, @@ -1446,10 +1501,10 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, error("Failed to resolve callchain. Skipping\n"); return; } - callchain_cursor_commit(cursor); + callchain_cursor_commit(&callchain_cursor); while (1) { - node = callchain_cursor_current(cursor); + node = callchain_cursor_current(&callchain_cursor); if (!node) break; @@ -1460,12 +1515,12 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample, } if (print_dso) { printf(" ("); - map__fprintf_dsoname(al.map, stdout); + map__fprintf_dsoname(node->map, stdout); printf(")"); } printf("\n"); - callchain_cursor_advance(cursor); + callchain_cursor_advance(&callchain_cursor); } } else { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e2ba885..3e2e5ea 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -323,6 +323,7 @@ struct dso *dso__new(const char *name) dso->sorted_by_name = 0; dso->has_build_id = 0; dso->kernel = DSO_TYPE_USER; + dso->needs_swap = DSO_SWAP__UNSET; INIT_LIST_HEAD(&dso->node); } @@ -1156,6 +1157,33 @@ static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr) return -1; } +static int dso__swap_init(struct dso *dso, unsigned char eidata) +{ + static unsigned int const endian = 1; + + dso->needs_swap = DSO_SWAP__NO; + + switch (eidata) { + case ELFDATA2LSB: + /* We are big endian, DSO is little endian. */ + if (*(unsigned char const *)&endian != 1) + dso->needs_swap = DSO_SWAP__YES; + break; + + case ELFDATA2MSB: + /* We are little endian, DSO is big endian. */ + if (*(unsigned char const *)&endian != 0) + dso->needs_swap = DSO_SWAP__YES; + break; + + default: + pr_err("unrecognized DSO data encoding %d\n", eidata); + return -EINVAL; + } + + return 0; +} + static int dso__load_sym(struct dso *dso, struct map *map, const char *name, int fd, symbol_filter_t filter, int kmodule, int want_symtab) @@ -1187,6 +1215,9 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, goto out_elf_end; } + if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) + goto out_elf_end; + /* Always reject images with a mismatched build-id: */ if (dso->has_build_id) { u8 build_id[BUILD_ID_SIZE]; @@ -1272,7 +1303,7 @@ static int dso__load_sym(struct dso *dso, struct map *map, const char *name, if (opdsec && sym.st_shndx == opdidx) { u32 offset = sym.st_value - opdshdr.sh_addr; u64 *opd = opddata->d_buf + offset; - sym.st_value = *opd; + sym.st_value = DSO__SWAP(dso, u64, *opd); sym.st_shndx = elf_addr_to_index(elf, sym.st_value); } @@ -2786,8 +2817,11 @@ int machine__load_vmlinux_path(struct machine *machine, enum map_type type, struct map *dso__new_map(const char *name) { + struct map *map = NULL; struct dso *dso = dso__new(name); - struct map *map = map__new2(0, dso, MAP__FUNCTION); + + if (dso) + map = map__new2(0, dso, MAP__FUNCTION); return map; } diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 5649d63..af0752b 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -9,6 +9,7 @@ #include <linux/list.h> #include <linux/rbtree.h> #include <stdio.h> +#include <byteswap.h> #ifdef HAVE_CPLUS_DEMANGLE extern char *cplus_demangle(const char *, int); @@ -160,11 +161,18 @@ enum dso_kernel_type { DSO_TYPE_GUEST_KERNEL }; +enum dso_swap_type { + DSO_SWAP__UNSET, + DSO_SWAP__NO, + DSO_SWAP__YES, +}; + struct dso { struct list_head node; struct rb_root symbols[MAP__NR_TYPES]; struct rb_root symbol_names[MAP__NR_TYPES]; enum dso_kernel_type kernel; + enum dso_swap_type needs_swap; u8 adjust_symbols:1; u8 has_build_id:1; u8 hit:1; @@ -182,6 +190,28 @@ struct dso { char name[0]; }; +#define DSO__SWAP(dso, type, val) \ +({ \ + type ____r = val; \ + BUG_ON(dso->needs_swap == DSO_SWAP__UNSET); \ + if (dso->needs_swap == DSO_SWAP__YES) { \ + switch (sizeof(____r)) { \ + case 2: \ + ____r = bswap_16(val); \ + break; \ + case 4: \ + ____r = bswap_32(val); \ + break; \ + case 8: \ + ____r = bswap_64(val); \ + break; \ + default: \ + BUG_ON(1); \ + } \ + } \ + ____r; \ +}) + struct dso *dso__new(const char *name); void dso__delete(struct dso *dso); diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ab2f682..16de7ad 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -73,8 +73,8 @@ int backwards_count; char *progname; int num_cpus; -cpu_set_t *cpu_mask; -size_t cpu_mask_size; +cpu_set_t *cpu_present_set, *cpu_mask; +size_t cpu_present_setsize, cpu_mask_size; struct counters { unsigned long long tsc; /* per thread */ @@ -103,6 +103,12 @@ struct timeval tv_even; struct timeval tv_odd; struct timeval tv_delta; +int mark_cpu_present(int pkg, int core, int cpu) +{ + CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); + return 0; +} + /* * cpu_mask_init(ncpus) * @@ -118,6 +124,18 @@ void cpu_mask_init(int ncpus) } cpu_mask_size = CPU_ALLOC_SIZE(ncpus); CPU_ZERO_S(cpu_mask_size, cpu_mask); + + /* + * Allocate and initialize cpu_present_set + */ + cpu_present_set = CPU_ALLOC(ncpus); + if (cpu_present_set == NULL) { + perror("CPU_ALLOC"); + exit(3); + } + cpu_present_setsize = CPU_ALLOC_SIZE(ncpus); + CPU_ZERO_S(cpu_present_setsize, cpu_present_set); + for_all_cpus(mark_cpu_present); } void cpu_mask_uninit() @@ -125,6 +143,9 @@ void cpu_mask_uninit() CPU_FREE(cpu_mask); cpu_mask = NULL; cpu_mask_size = 0; + CPU_FREE(cpu_present_set); + cpu_present_set = NULL; + cpu_present_setsize = 0; } int cpu_migrate(int cpu) @@ -912,6 +933,8 @@ int is_snb(unsigned int family, unsigned int model) switch (model) { case 0x2A: case 0x2D: + case 0x3A: /* IVB */ + case 0x3D: /* IVB Xeon */ return 1; } return 0; @@ -1047,6 +1070,9 @@ int fork_it(char **argv) int retval; pid_t child_pid; get_counters(cnt_even); + + /* clear affinity side-effect of get_counters() */ + sched_setaffinity(0, cpu_present_setsize, cpu_present_set); gettimeofday(&tv_even, (struct timezone *)NULL); child_pid = fork(); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a6a0365..5afb431 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -332,6 +332,7 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, */ hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link) if (ei->type == KVM_IRQ_ROUTING_MSI || + ue->type == KVM_IRQ_ROUTING_MSI || ue->u.irqchip.irqchip == ei->irqchip.irqchip) return r; |