summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-block-rssd21
-rw-r--r--Documentation/device-mapper/verity.txt131
-rw-r--r--Documentation/prctl/no_new_privs.txt50
-rw-r--r--Documentation/stable_kernel_rules.txt6
-rw-r--r--MAINTAINERS4
-rw-r--r--Makefile2
-rw-r--r--arch/arm/kernel/vmlinux.lds.S2
-rw-r--r--arch/arm/mach-exynos/Kconfig8
-rw-r--r--arch/arm/mach-imx/clk-imx6q.c6
-rw-r--r--arch/arm/mach-omap2/board-flash.c5
-rw-r--r--arch/arm/mach-omap2/clock44xx_data.c5
-rw-r--r--arch/arm/mach-shmobile/board-armadillo800eva.c1
-rw-r--r--arch/arm/mach-shmobile/board-kzm9d.c1
-rw-r--r--arch/arm/mach-shmobile/board-kzm9g.c1
-rw-r--r--arch/arm/mach-shmobile/board-mackerel.c3
-rw-r--r--arch/arm/mach-shmobile/clock-sh73a0.c8
-rw-r--r--arch/arm/mach-shmobile/intc-r8a7779.c7
-rw-r--r--arch/arm/mach-shmobile/platsmp.c5
-rw-r--r--arch/arm/mach-shmobile/setup-sh7372.c2
-rw-r--r--arch/arm/mm/mmu.c74
-rw-r--r--arch/arm/plat-samsung/include/plat/map-s3c.h2
-rw-r--r--arch/arm/plat-samsung/include/plat/watchdog-reset.h2
-rw-r--r--arch/powerpc/include/asm/hw_irq.h5
-rw-r--r--arch/powerpc/kernel/entry_64.S97
-rw-r--r--arch/powerpc/kernel/irq.c2
-rw-r--r--arch/powerpc/kernel/prom_init.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S2
-rw-r--r--arch/powerpc/mm/numa.c2
-rw-r--r--arch/powerpc/net/bpf_jit_64.S2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c4
-rw-r--r--arch/powerpc/platforms/pseries/processor_idle.c2
-rw-r--r--arch/powerpc/xmon/xmon.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c27
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl25
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/kgdb.c8
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/lib/csum-wrappers_64.c2
-rw-r--r--block/blk-cgroup.c9
-rw-r--r--block/blk-core.c25
-rw-r--r--block/blk-timeout.c41
-rw-r--r--block/cfq-iosched.c30
-rw-r--r--block/scsi_ioctl.c5
-rw-r--r--drivers/acpi/acpi_pad.c7
-rw-r--r--drivers/acpi/apei/apei-base.c17
-rw-r--r--drivers/acpi/apei/apei-internal.h9
-rw-r--r--drivers/acpi/apei/ghes.c6
-rw-r--r--drivers/acpi/processor_idle.c32
-rw-r--r--drivers/acpi/sysfs.c4
-rw-r--r--drivers/acpi/video.c2
-rw-r--r--drivers/base/power/main.c6
-rw-r--r--drivers/block/drbd/drbd_bitmap.c11
-rw-r--r--drivers/block/drbd/drbd_req.c66
-rw-r--r--drivers/block/floppy.c1
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c166
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h5
-rw-r--r--drivers/block/umem.c40
-rw-r--r--drivers/block/xen-blkback/common.h2
-rw-r--r--drivers/block/xen-blkfront.c58
-rw-r--r--drivers/clk/clk.c28
-rw-r--r--drivers/gpu/drm/drm_edid.c27
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c37
-rw-r--r--drivers/gpu/drm/radeon/radeon_gart.c13
-rw-r--r--drivers/gpu/drm/radeon/radeon_gem.c10
-rw-r--r--drivers/gpu/drm/radeon/si.c4
-rw-r--r--drivers/hwmon/coretemp.c4
-rw-r--r--drivers/ieee802154/at86rf230.c3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c5
-rw-r--r--drivers/infiniband/hw/mlx4/main.c62
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h1
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c1
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c35
-rw-r--r--drivers/md/dm-thin.c7
-rw-r--r--drivers/md/md.c8
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/persistent-data/dm-space-map-checker.c54
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c11
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c11
-rw-r--r--drivers/md/raid1.c13
-rw-r--r--drivers/md/raid10.c26
-rw-r--r--drivers/md/raid5.c67
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h3
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c28
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/l2t.c6
-rw-r--r--drivers/net/ethernet/chelsio/cxgb3/l2t.h2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c4
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c10
-rw-r--r--drivers/net/ethernet/intel/e1000e/defines.h1
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c75
-rw-r--r--drivers/net/ethernet/intel/igbvf/ethtool.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c382
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c316
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_rx.c30
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c91
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c59
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mcg.c524
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h29
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4_en.h28
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/port.c107
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/profile.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c278
-rw-r--r--drivers/net/ethernet/myricom/myri10ge/myri10ge.c2
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-main.c3
-rw-r--r--drivers/net/ethernet/qlogic/qlge/qlge_main.c2
-rw-r--r--drivers/net/ethernet/rdc/r6040.c1
-rw-r--r--drivers/net/usb/asix.c28
-rw-r--r--drivers/oprofile/oprofile_perf.c2
-rw-r--r--drivers/s390/net/qeth_l3_main.c3
-rw-r--r--drivers/scsi/cxgbi/cxgb3i/cxgb3i.c3
-rw-r--r--drivers/scsi/cxgbi/cxgb4i/cxgb4i.c5
-rw-r--r--drivers/scsi/cxgbi/libcxgbi.c12
-rw-r--r--fs/splice.c35
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--include/linux/irq.h2
-rw-r--r--include/linux/mlx4/cmd.h4
-rw-r--r--include/linux/mlx4/device.h135
-rw-r--r--include/linux/netdevice.h3
-rw-r--r--include/linux/netfilter/nfnetlink_conntrack.h38
-rw-r--r--include/linux/netfilter/nfnetlink_queue.h1
-rw-r--r--include/linux/splice.h8
-rw-r--r--include/net/arp.h28
-rw-r--r--include/net/dn_route.h2
-rw-r--r--include/net/dst.h54
-rw-r--r--include/net/dst_ops.h4
-rw-r--r--include/net/ip6_fib.h2
-rw-r--r--include/net/ip_fib.h36
-rw-r--r--include/net/neighbour.h26
-rw-r--r--include/net/netevent.h4
-rw-r--r--include/net/netfilter/nf_conntrack_l4proto.h13
-rw-r--r--include/net/netns/ipv4.h8
-rw-r--r--include/net/sctp/structs.h4
-rw-r--r--include/net/sctp/tsnmap.h3
-rw-r--r--kernel/printk.c299
-rw-r--r--kernel/rcutree.c14
-rw-r--r--kernel/relay.c5
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--mm/shmem.c3
-rw-r--r--net/bridge/br_netfilter.c40
-rw-r--r--net/core/dev.c11
-rw-r--r--net/core/dst.c21
-rw-r--r--net/core/neighbour.c31
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/dccp/ipv6.c21
-rw-r--r--net/decnet/dn_neigh.c2
-rw-r--r--net/decnet/dn_route.c44
-rw-r--r--net/ieee802154/6lowpan.c16
-rw-r--r--net/ipv4/fib_frontend.c27
-rw-r--r--net/ipv4/fib_rules.c12
-rw-r--r--net/ipv4/fib_semantics.c6
-rw-r--r--net/ipv4/ip_options.c20
-rw-r--r--net/ipv4/ip_output.c14
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c47
-rw-r--r--net/ipv4/route.c92
-rw-r--r--net/ipv4/tcp_input.c19
-rw-r--r--net/ipv6/ip6_output.c10
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c23
-rw-r--r--net/ipv6/route.c63
-rw-r--r--net/ipv6/syncookies.c3
-rw-r--r--net/ipv6/tcp_ipv6.c16
-rw-r--r--net/ipv6/xfrm6_policy.c1
-rw-r--r--net/netfilter/ipset/ip_set_core.c12
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c32
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c14
-rw-r--r--net/netfilter/nf_conntrack_netlink.c227
-rw-r--r--net/netfilter/nf_conntrack_proto.c139
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c56
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c45
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c65
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c74
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c66
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c43
-rw-r--r--net/netfilter/nfnetlink.c8
-rw-r--r--net/netfilter/nfnetlink_queue_core.c5
-rw-r--r--net/sched/sch_teql.c47
-rw-r--r--net/sctp/associola.c1
-rw-r--r--net/sctp/output.c5
-rw-r--r--net/sctp/sm_make_chunk.c16
-rw-r--r--net/sctp/sm_sideeffect.c2
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/sctp/tsnmap.c6
-rw-r--r--net/sctp/ulpevent.c3
-rw-r--r--net/sctp/ulpqueue.c2
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/xfrm/xfrm_policy.c9
-rw-r--r--security/security.c1
-rw-r--r--sound/pci/hda/patch_realtek.c28
-rw-r--r--sound/soc/codecs/tlv320aic3x.c4
-rw-r--r--sound/soc/codecs/tlv320aic3x.h1
-rw-r--r--sound/soc/codecs/wm2200.c1
198 files changed, 4241 insertions, 1579 deletions
diff --git a/Documentation/ABI/testing/sysfs-block-rssd b/Documentation/ABI/testing/sysfs-block-rssd
index 679ce35..beef30c 100644
--- a/Documentation/ABI/testing/sysfs-block-rssd
+++ b/Documentation/ABI/testing/sysfs-block-rssd
@@ -1,26 +1,5 @@
-What: /sys/block/rssd*/registers
-Date: March 2012
-KernelVersion: 3.3
-Contact: Asai Thambi S P <asamymuthupa@micron.com>
-Description: This is a read-only file. Dumps below driver information and
- hardware registers.
- - S ACTive
- - Command Issue
- - Completed
- - PORT IRQ STAT
- - HOST IRQ STAT
- - Allocated
- - Commands in Q
-
What: /sys/block/rssd*/status
Date: April 2012
KernelVersion: 3.4
Contact: Asai Thambi S P <asamymuthupa@micron.com>
Description: This is a read-only file. Indicates the status of the device.
-
-What: /sys/block/rssd*/flags
-Date: May 2012
-KernelVersion: 3.5
-Contact: Asai Thambi S P <asamymuthupa@micron.com>
-Description: This is a read-only file. Dumps the flags in port and driver
- data structure
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
index 32e4879..9884681 100644
--- a/Documentation/device-mapper/verity.txt
+++ b/Documentation/device-mapper/verity.txt
@@ -7,39 +7,39 @@ This target is read-only.
Construction Parameters
=======================
- <version> <dev> <hash_dev> <hash_start>
+ <version> <dev> <hash_dev>
<data_block_size> <hash_block_size>
<num_data_blocks> <hash_start_block>
<algorithm> <digest> <salt>
<version>
- This is the version number of the on-disk format.
+ This is the type of the on-disk hash format.
0 is the original format used in the Chromium OS.
- The salt is appended when hashing, digests are stored continuously and
- the rest of the block is padded with zeros.
+ The salt is appended when hashing, digests are stored continuously and
+ the rest of the block is padded with zeros.
1 is the current format that should be used for new devices.
- The salt is prepended when hashing and each digest is
- padded with zeros to the power of two.
+ The salt is prepended when hashing and each digest is
+ padded with zeros to the power of two.
<dev>
- This is the device containing the data the integrity of which needs to be
+ This is the device containing data, the integrity of which needs to be
checked. It may be specified as a path, like /dev/sdaX, or a device number,
<major>:<minor>.
<hash_dev>
- This is the device that that supplies the hash tree data. It may be
+ This is the device that supplies the hash tree data. It may be
specified similarly to the device path and may be the same device. If the
- same device is used, the hash_start should be outside of the dm-verity
- configured device size.
+ same device is used, the hash_start should be outside the configured
+ dm-verity device.
<data_block_size>
- The block size on a data device. Each block corresponds to one digest on
- the hash device.
+ The block size on a data device in bytes.
+ Each block corresponds to one digest on the hash device.
<hash_block_size>
- The size of a hash block.
+ The size of a hash block in bytes.
<num_data_blocks>
The number of data blocks on the data device. Additional blocks are
@@ -65,7 +65,7 @@ Construction Parameters
Theory of operation
===================
-dm-verity is meant to be setup as part of a verified boot path. This
+dm-verity is meant to be set up as part of a verified boot path. This
may be anything ranging from a boot using tboot or trustedgrub to just
booting from a known-good device (like a USB drive or CD).
@@ -73,20 +73,20 @@ When a dm-verity device is configured, it is expected that the caller
has been authenticated in some way (cryptographic signatures, etc).
After instantiation, all hashes will be verified on-demand during
disk access. If they cannot be verified up to the root node of the
-tree, the root hash, then the I/O will fail. This should identify
+tree, the root hash, then the I/O will fail. This should detect
tampering with any data on the device and the hash data.
Cryptographic hashes are used to assert the integrity of the device on a
-per-block basis. This allows for a lightweight hash computation on first read
-into the page cache. Block hashes are stored linearly-aligned to the nearest
-block the size of a page.
+per-block basis. This allows for a lightweight hash computation on first read
+into the page cache. Block hashes are stored linearly, aligned to the nearest
+block size.
Hash Tree
---------
Each node in the tree is a cryptographic hash. If it is a leaf node, the hash
-is of some block data on disk. If it is an intermediary node, then the hash is
-of a number of child nodes.
+of some data block on disk is calculated. If it is an intermediary node,
+the hash of a number of child nodes is calculated.
Each entry in the tree is a collection of neighboring nodes that fit in one
block. The number is determined based on block_size and the size of the
@@ -110,63 +110,23 @@ alg = sha256, num_blocks = 32768, block_size = 4096
On-disk format
==============
-Below is the recommended on-disk format. The verity kernel code does not
-read the on-disk header. It only reads the hash blocks which directly
-follow the header. It is expected that a user-space tool will verify the
-integrity of the verity_header and then call dmsetup with the correct
-parameters. Alternatively, the header can be omitted and the dmsetup
-parameters can be passed via the kernel command-line in a rooted chain
-of trust where the command-line is verified.
+The verity kernel code does not read the verity metadata on-disk header.
+It only reads the hash blocks which directly follow the header.
+It is expected that a user-space tool will verify the integrity of the
+verity header.
-The on-disk format is especially useful in cases where the hash blocks
-are on a separate partition. The magic number allows easy identification
-of the partition contents. Alternatively, the hash blocks can be stored
-in the same partition as the data to be verified. In such a configuration
-the filesystem on the partition would be sized a little smaller than
-the full-partition, leaving room for the hash blocks.
-
-struct superblock {
- uint8_t signature[8]
- "verity\0\0";
-
- uint8_t version;
- 1 - current format
-
- uint8_t data_block_bits;
- log2(data block size)
-
- uint8_t hash_block_bits;
- log2(hash block size)
-
- uint8_t pad1[1];
- zero padding
-
- uint16_t salt_size;
- big-endian salt size
-
- uint8_t pad2[2];
- zero padding
-
- uint32_t data_blocks_hi;
- big-endian high 32 bits of the 64-bit number of data blocks
-
- uint32_t data_blocks_lo;
- big-endian low 32 bits of the 64-bit number of data blocks
-
- uint8_t algorithm[16];
- cryptographic algorithm
-
- uint8_t salt[384];
- salt (the salt size is specified above)
-
- uint8_t pad3[88];
- zero padding to 512-byte boundary
-}
+Alternatively, the header can be omitted and the dmsetup parameters can
+be passed via the kernel command-line in a rooted chain of trust where
+the command-line is verified.
Directly following the header (and with sector number padded to the next hash
block boundary) are the hash blocks which are stored a depth at a time
(starting from the root), sorted in order of increasing index.
+The full specification of kernel parameters and on-disk metadata format
+is available at the cryptsetup project's wiki page
+ http://code.google.com/p/cryptsetup/wiki/DMVerity
+
Status
======
V (for Valid) is returned if every check performed so far was valid.
@@ -174,21 +134,22 @@ If any check failed, C (for Corruption) is returned.
Example
=======
-
-Setup a device:
- dmsetup create vroot --table \
- "0 2097152 "\
- "verity 1 /dev/sda1 /dev/sda2 4096 4096 2097152 1 "\
+Set up a device:
+ # dmsetup create vroot --readonly --table \
+ "0 2097152 verity 1 /dev/sda1 /dev/sda2 4096 4096 262144 1 sha256 "\
"4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
"1234000000000000000000000000000000000000000000000000000000000000"
A command line tool veritysetup is available to compute or verify
-the hash tree or activate the kernel driver. This is available from
-the LVM2 upstream repository and may be supplied as a package called
-device-mapper-verity-tools:
- git://sources.redhat.com/git/lvm2
- http://sourceware.org/git/?p=lvm2.git
- http://sourceware.org/cgi-bin/cvsweb.cgi/LVM2/verity?cvsroot=lvm2
-
-veritysetup -a vroot /dev/sda1 /dev/sda2 \
- 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
+the hash tree or activate the kernel device. This is available from
+the cryptsetup upstream repository http://code.google.com/p/cryptsetup/
+(as a libcryptsetup extension).
+
+Create hash on the device:
+ # veritysetup format /dev/sda1 /dev/sda2
+ ...
+ Root hash: 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
+
+Activate the device:
+ # veritysetup create vroot /dev/sda1 /dev/sda2 \
+ 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
diff --git a/Documentation/prctl/no_new_privs.txt b/Documentation/prctl/no_new_privs.txt
new file mode 100644
index 0000000..cb705ec
--- /dev/null
+++ b/Documentation/prctl/no_new_privs.txt
@@ -0,0 +1,50 @@
+The execve system call can grant a newly-started program privileges that
+its parent did not have. The most obvious examples are setuid/setgid
+programs and file capabilities. To prevent the parent program from
+gaining these privileges as well, the kernel and user code must be
+careful to prevent the parent from doing anything that could subvert the
+child. For example:
+
+ - The dynamic loader handles LD_* environment variables differently if
+ a program is setuid.
+
+ - chroot is disallowed to unprivileged processes, since it would allow
+ /etc/passwd to be replaced from the point of view of a process that
+ inherited chroot.
+
+ - The exec code has special handling for ptrace.
+
+These are all ad-hoc fixes. The no_new_privs bit (since Linux 3.5) is a
+new, generic mechanism to make it safe for a process to modify its
+execution environment in a manner that persists across execve. Any task
+can set no_new_privs. Once the bit is set, it is inherited across fork,
+clone, and execve and cannot be unset. With no_new_privs set, execve
+promises not to grant the privilege to do anything that could not have
+been done without the execve call. For example, the setuid and setgid
+bits will no longer change the uid or gid; file capabilities will not
+add to the permitted set, and LSMs will not relax constraints after
+execve.
+
+Note that no_new_privs does not prevent privilege changes that do not
+involve execve. An appropriately privileged task can still call
+setuid(2) and receive SCM_RIGHTS datagrams.
+
+There are two main use cases for no_new_privs so far:
+
+ - Filters installed for the seccomp mode 2 sandbox persist across
+ execve and can change the behavior of newly-executed programs.
+ Unprivileged users are therefore only allowed to install such filters
+ if no_new_privs is set.
+
+ - By itself, no_new_privs can be used to reduce the attack surface
+ available to an unprivileged user. If everything running with a
+ given uid has no_new_privs set, then that uid will be unable to
+ escalate its privileges by directly attacking setuid, setgid, and
+ fcap-using binaries; it will need to compromise something without the
+ no_new_privs bit set first.
+
+In the future, other potentially dangerous kernel features could become
+available to unprivileged tasks if no_new_privs is set. In principle,
+several options to unshare(2) and clone(2) would be safe when
+no_new_privs is set, and no_new_privs + chroot is considerable less
+dangerous than chroot by itself.
diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt
index f0ab5cf..4a7b54b 100644
--- a/Documentation/stable_kernel_rules.txt
+++ b/Documentation/stable_kernel_rules.txt
@@ -12,6 +12,12 @@ Rules on what kind of patches are accepted, and which ones are not, into the
marked CONFIG_BROKEN), an oops, a hang, data corruption, a real
security issue, or some "oh, that's not good" issue. In short, something
critical.
+ - Serious issues as reported by a user of a distribution kernel may also
+ be considered if they fix a notable performance or interactivity issue.
+ As these fixes are not as obvious and have a higher risk of a subtle
+ regression they should only be submitted by a distribution kernel
+ maintainer and include an addendum linking to a bugzilla entry if it
+ exists and additional information on the user-visible impact.
- New device IDs and quirks are also accepted.
- No "theoretical race condition" issues, unless an explanation of how the
race can be exploited is also provided.
diff --git a/MAINTAINERS b/MAINTAINERS
index d350dae..8da1373 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4655,8 +4655,8 @@ L: netfilter@vger.kernel.org
L: coreteam@netfilter.org
W: http://www.netfilter.org/
W: http://www.iptables.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-2.6.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next-2.6.git
+T: git git://1984.lsi.us.es/nf
+T: git git://1984.lsi.us.es/nf-next
S: Supported
F: include/linux/netfilter*
F: include/linux/netfilter/
diff --git a/Makefile b/Makefile
index 3fdfde2..81ea154 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 3
PATCHLEVEL = 5
SUBLEVEL = 0
-EXTRAVERSION = -rc4
+EXTRAVERSION = -rc5
NAME = Saber-toothed Squirrel
# *DOCUMENTATION*
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 43a31fb..36ff15b 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -183,7 +183,9 @@ SECTIONS
}
#endif
+#ifdef CONFIG_SMP
PERCPU_SECTION(L1_CACHE_BYTES)
+#endif
#ifdef CONFIG_XIP_KERNEL
__data_loc = ALIGN(4); /* location in binary */
diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index 573be57..6f6d13f 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig
@@ -212,7 +212,7 @@ config MACH_SMDKV310
select EXYNOS_DEV_SYSMMU
select EXYNOS4_DEV_AHCI
select SAMSUNG_DEV_KEYPAD
- select EXYNOS4_DEV_DMA
+ select EXYNOS_DEV_DMA
select SAMSUNG_DEV_PWM
select EXYNOS4_DEV_USB_OHCI
select EXYNOS4_SETUP_FIMD0
@@ -264,7 +264,7 @@ config MACH_UNIVERSAL_C210
select S5P_DEV_ONENAND
select S5P_DEV_TV
select EXYNOS_DEV_SYSMMU
- select EXYNOS4_DEV_DMA
+ select EXYNOS_DEV_DMA
select EXYNOS_DEV_DRM
select EXYNOS4_SETUP_FIMD0
select EXYNOS4_SETUP_I2C1
@@ -303,7 +303,7 @@ config MACH_NURI
select S5P_DEV_MFC
select S5P_DEV_USB_EHCI
select S5P_SETUP_MIPIPHY
- select EXYNOS4_DEV_DMA
+ select EXYNOS_DEV_DMA
select EXYNOS_DEV_DRM
select EXYNOS4_SETUP_FIMC
select EXYNOS4_SETUP_FIMD0
@@ -341,7 +341,7 @@ config MACH_ORIGEN
select SAMSUNG_DEV_PWM
select EXYNOS_DEV_DRM
select EXYNOS_DEV_SYSMMU
- select EXYNOS4_DEV_DMA
+ select EXYNOS_DEV_DMA
select EXYNOS4_DEV_USB_OHCI
select EXYNOS4_SETUP_FIMD0
select EXYNOS4_SETUP_SDHCI
diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c
index 17dc66a..e1a17ac 100644
--- a/arch/arm/mach-imx/clk-imx6q.c
+++ b/arch/arm/mach-imx/clk-imx6q.c
@@ -152,13 +152,14 @@ enum mx6q_clks {
ssi2, ssi3, uart_ipg, uart_serial, usboh3, usdhc1, usdhc2, usdhc3,
usdhc4, vdo_axi, vpu_axi, cko1, pll1_sys, pll2_bus, pll3_usb_otg,
pll4_audio, pll5_video, pll6_mlb, pll7_usb_host, pll8_enet, ssi1_ipg,
- ssi2_ipg, ssi3_ipg, clk_max
+ ssi2_ipg, ssi3_ipg, rom,
+ clk_max
};
static struct clk *clk[clk_max];
static enum mx6q_clks const clks_init_on[] __initconst = {
- mmdc_ch0_axi, mmdc_ch1_axi,
+ mmdc_ch0_axi, rom,
};
int __init mx6q_clocks_init(void)
@@ -364,6 +365,7 @@ int __init mx6q_clocks_init(void)
clk[gpmi_bch] = imx_clk_gate2("gpmi_bch", "usdhc4", base + 0x78, 26);
clk[gpmi_io] = imx_clk_gate2("gpmi_io", "enfc", base + 0x78, 28);
clk[gpmi_apb] = imx_clk_gate2("gpmi_apb", "usdhc3", base + 0x78, 30);
+ clk[rom] = imx_clk_gate2("rom", "ahb", base + 0x7c, 0);
clk[sata] = imx_clk_gate2("sata", "ipg", base + 0x7c, 4);
clk[sdma] = imx_clk_gate2("sdma", "ahb", base + 0x7c, 6);
clk[spba] = imx_clk_gate2("spba", "ipg", base + 0x7c, 12);
diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c
index 70a81f9..53c39d2 100644
--- a/arch/arm/mach-omap2/board-flash.c
+++ b/arch/arm/mach-omap2/board-flash.c
@@ -97,11 +97,6 @@ __init board_onenand_init(struct mtd_partition *onenand_parts,
gpmc_onenand_init(&board_onenand_data);
}
-#else
-void
-__init board_onenand_init(struct mtd_partition *nor_parts, u8 nr_parts, u8 cs)
-{
-}
#endif /* CONFIG_MTD_ONENAND_OMAP2 || CONFIG_MTD_ONENAND_OMAP2_MODULE */
#if defined(CONFIG_MTD_NAND_OMAP2) || \
diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c
index e2b701e..ba6f9a0 100644
--- a/arch/arm/mach-omap2/clock44xx_data.c
+++ b/arch/arm/mach-omap2/clock44xx_data.c
@@ -3417,9 +3417,12 @@ int __init omap4xxx_clk_init(void)
if (cpu_is_omap443x()) {
cpu_mask = RATE_IN_4430;
cpu_clkflg = CK_443X;
- } else if (cpu_is_omap446x()) {
+ } else if (cpu_is_omap446x() || cpu_is_omap447x()) {
cpu_mask = RATE_IN_4460 | RATE_IN_4430;
cpu_clkflg = CK_446X | CK_443X;
+
+ if (cpu_is_omap447x())
+ pr_warn("WARNING: OMAP4470 clock data incomplete!\n");
} else {
return 0;
}
diff --git a/arch/arm/mach-shmobile/board-armadillo800eva.c b/arch/arm/mach-shmobile/board-armadillo800eva.c
index 9e37026..9bd1355 100644
--- a/arch/arm/mach-shmobile/board-armadillo800eva.c
+++ b/arch/arm/mach-shmobile/board-armadillo800eva.c
@@ -779,6 +779,7 @@ DT_MACHINE_START(ARMADILLO800EVA_DT, "armadillo800eva")
.init_irq = r8a7740_init_irq,
.handle_irq = shmobile_handle_irq_intc,
.init_machine = eva_init,
+ .init_late = shmobile_init_late,
.timer = &shmobile_timer,
.dt_compat = eva_boards_compat_dt,
MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-kzm9d.c b/arch/arm/mach-shmobile/board-kzm9d.c
index 7bc5e7d..6a33cf3 100644
--- a/arch/arm/mach-shmobile/board-kzm9d.c
+++ b/arch/arm/mach-shmobile/board-kzm9d.c
@@ -80,6 +80,7 @@ DT_MACHINE_START(KZM9D_DT, "kzm9d")
.init_irq = emev2_init_irq,
.handle_irq = gic_handle_irq,
.init_machine = kzm9d_add_standard_devices,
+ .init_late = shmobile_init_late,
.timer = &shmobile_timer,
.dt_compat = kzm9d_boards_compat_dt,
MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-kzm9g.c b/arch/arm/mach-shmobile/board-kzm9g.c
index d8e33b6..c0ae815 100644
--- a/arch/arm/mach-shmobile/board-kzm9g.c
+++ b/arch/arm/mach-shmobile/board-kzm9g.c
@@ -455,6 +455,7 @@ DT_MACHINE_START(KZM9G_DT, "kzm9g")
.init_irq = sh73a0_init_irq,
.handle_irq = gic_handle_irq,
.init_machine = kzm_init,
+ .init_late = shmobile_init_late,
.timer = &shmobile_timer,
.dt_compat = kzm9g_boards_compat_dt,
MACHINE_END
diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c
index b577f7c..150122a 100644
--- a/arch/arm/mach-shmobile/board-mackerel.c
+++ b/arch/arm/mach-shmobile/board-mackerel.c
@@ -1512,6 +1512,9 @@ static void __init mackerel_init(void)
gpio_request(GPIO_FN_SDHID0_1, NULL);
gpio_request(GPIO_FN_SDHID0_0, NULL);
+ /* SDHI0 PORT172 card-detect IRQ26 */
+ gpio_request(GPIO_FN_IRQ26_172, NULL);
+
#if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE)
/* enable SDHI1 */
gpio_request(GPIO_FN_SDHICMD1, NULL);
diff --git a/arch/arm/mach-shmobile/clock-sh73a0.c b/arch/arm/mach-shmobile/clock-sh73a0.c
index 472d1f5..3946c4b 100644
--- a/arch/arm/mach-shmobile/clock-sh73a0.c
+++ b/arch/arm/mach-shmobile/clock-sh73a0.c
@@ -475,9 +475,9 @@ static struct clk *late_main_clks[] = {
enum { MSTP001,
MSTP129, MSTP128, MSTP127, MSTP126, MSTP125, MSTP118, MSTP116, MSTP100,
- MSTP219,
+ MSTP219, MSTP218,
MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200,
- MSTP331, MSTP329, MSTP325, MSTP323, MSTP318,
+ MSTP331, MSTP329, MSTP325, MSTP323,
MSTP314, MSTP313, MSTP312, MSTP311,
MSTP303, MSTP302, MSTP301, MSTP300,
MSTP411, MSTP410, MSTP403,
@@ -497,6 +497,7 @@ static struct clk mstp_clks[MSTP_NR] = {
[MSTP116] = MSTP(&div4_clks[DIV4_HP], SMSTPCR1, 16, 0), /* IIC0 */
[MSTP100] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 0, 0), /* LCDC0 */
[MSTP219] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 19, 0), /* SCIFA7 */
+ [MSTP218] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* SY-DMAC */
[MSTP207] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 7, 0), /* SCIFA5 */
[MSTP206] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 6, 0), /* SCIFB */
[MSTP204] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 4, 0), /* SCIFA0 */
@@ -508,7 +509,6 @@ static struct clk mstp_clks[MSTP_NR] = {
[MSTP329] = MSTP(&r_clk, SMSTPCR3, 29, 0), /* CMT10 */
[MSTP325] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 25, 0), /* IrDA */
[MSTP323] = MSTP(&div4_clks[DIV4_HP], SMSTPCR3, 23, 0), /* IIC1 */
- [MSTP318] = MSTP(&div4_clks[DIV4_HP], SMSTPCR3, 18, 0), /* SY-DMAC */
[MSTP314] = MSTP(&div6_clks[DIV6_SDHI0], SMSTPCR3, 14, 0), /* SDHI0 */
[MSTP313] = MSTP(&div6_clks[DIV6_SDHI1], SMSTPCR3, 13, 0), /* SDHI1 */
[MSTP312] = MSTP(&div4_clks[DIV4_HP], SMSTPCR3, 12, 0), /* MMCIF0 */
@@ -552,6 +552,7 @@ static struct clk_lookup lookups[] = {
CLKDEV_DEV_ID("i2c-sh_mobile.0", &mstp_clks[MSTP116]), /* I2C0 */
CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */
CLKDEV_DEV_ID("sh-sci.7", &mstp_clks[MSTP219]), /* SCIFA7 */
+ CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), /* SY-DMAC */
CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */
CLKDEV_DEV_ID("sh-sci.8", &mstp_clks[MSTP206]), /* SCIFB */
CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), /* SCIFA0 */
@@ -563,7 +564,6 @@ static struct clk_lookup lookups[] = {
CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */
CLKDEV_DEV_ID("sh_irda.0", &mstp_clks[MSTP325]), /* IrDA */
CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* I2C1 */
- CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP318]), /* SY-DMAC */
CLKDEV_DEV_ID("sh_mobile_sdhi.0", &mstp_clks[MSTP314]), /* SDHI0 */
CLKDEV_DEV_ID("sh_mobile_sdhi.1", &mstp_clks[MSTP313]), /* SDHI1 */
CLKDEV_DEV_ID("sh_mmcif.0", &mstp_clks[MSTP312]), /* MMCIF0 */
diff --git a/arch/arm/mach-shmobile/intc-r8a7779.c b/arch/arm/mach-shmobile/intc-r8a7779.c
index 550b23d..f04fad4 100644
--- a/arch/arm/mach-shmobile/intc-r8a7779.c
+++ b/arch/arm/mach-shmobile/intc-r8a7779.c
@@ -35,6 +35,9 @@
#define INT2SMSKCR3 0xfe7822ac
#define INT2SMSKCR4 0xfe7822b0
+#define INT2NTSR0 0xfe700060
+#define INT2NTSR1 0xfe700064
+
static int r8a7779_set_wake(struct irq_data *data, unsigned int on)
{
return 0; /* always allow wakeup */
@@ -49,6 +52,10 @@ void __init r8a7779_init_irq(void)
gic_init(0, 29, gic_dist_base, gic_cpu_base);
gic_arch_extn.irq_set_wake = r8a7779_set_wake;
+ /* route all interrupts to ARM */
+ __raw_writel(0xffffffff, INT2NTSR0);
+ __raw_writel(0x3fffffff, INT2NTSR1);
+
/* unmask all known interrupts in INTCS2 */
__raw_writel(0xfffffff0, INT2SMSKCR0);
__raw_writel(0xfff7ffff, INT2SMSKCR1);
diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c
index bacdd66..e859fcd 100644
--- a/arch/arm/mach-shmobile/platsmp.c
+++ b/arch/arm/mach-shmobile/platsmp.c
@@ -25,7 +25,12 @@
#define is_sh73a0() (machine_is_ag5evm() || machine_is_kota2() || \
of_machine_is_compatible("renesas,sh73a0"))
#define is_r8a7779() machine_is_marzen()
+
+#ifdef CONFIG_ARCH_EMEV2
#define is_emev2() of_machine_is_compatible("renesas,emev2")
+#else
+#define is_emev2() (0)
+#endif
static unsigned int __init shmobile_smp_get_core_count(void)
{
diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c
index 6a4bd58..fafce9c 100644
--- a/arch/arm/mach-shmobile/setup-sh7372.c
+++ b/arch/arm/mach-shmobile/setup-sh7372.c
@@ -484,7 +484,7 @@ static const struct sh_dmae_slave_config sh7372_dmae_slaves[] = {
},
};
-#define SH7372_CHCLR 0x220
+#define SH7372_CHCLR (0x220 - 0x20)
static const struct sh_dmae_channel sh7372_dmae_channels[] = {
{
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index e5dad60..cf4528d 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -791,6 +791,79 @@ void __init iotable_init(struct map_desc *io_desc, int nr)
}
}
+#ifndef CONFIG_ARM_LPAE
+
+/*
+ * The Linux PMD is made of two consecutive section entries covering 2MB
+ * (see definition in include/asm/pgtable-2level.h). However a call to
+ * create_mapping() may optimize static mappings by using individual
+ * 1MB section mappings. This leaves the actual PMD potentially half
+ * initialized if the top or bottom section entry isn't used, leaving it
+ * open to problems if a subsequent ioremap() or vmalloc() tries to use
+ * the virtual space left free by that unused section entry.
+ *
+ * Let's avoid the issue by inserting dummy vm entries covering the unused
+ * PMD halves once the static mappings are in place.
+ */
+
+static void __init pmd_empty_section_gap(unsigned long addr)
+{
+ struct vm_struct *vm;
+
+ vm = early_alloc_aligned(sizeof(*vm), __alignof__(*vm));
+ vm->addr = (void *)addr;
+ vm->size = SECTION_SIZE;
+ vm->flags = VM_IOREMAP | VM_ARM_STATIC_MAPPING;
+ vm->caller = pmd_empty_section_gap;
+ vm_area_add_early(vm);
+}
+
+static void __init fill_pmd_gaps(void)
+{
+ struct vm_struct *vm;
+ unsigned long addr, next = 0;
+ pmd_t *pmd;
+
+ /* we're still single threaded hence no lock needed here */
+ for (vm = vmlist; vm; vm = vm->next) {
+ if (!(vm->flags & VM_ARM_STATIC_MAPPING))
+ continue;
+ addr = (unsigned long)vm->addr;
+ if (addr < next)
+ continue;
+
+ /*
+ * Check if this vm starts on an odd section boundary.
+ * If so and the first section entry for this PMD is free
+ * then we block the corresponding virtual address.
+ */
+ if ((addr & ~PMD_MASK) == SECTION_SIZE) {
+ pmd = pmd_off_k(addr);
+ if (pmd_none(*pmd))
+ pmd_empty_section_gap(addr & PMD_MASK);
+ }
+
+ /*
+ * Then check if this vm ends on an odd section boundary.
+ * If so and the second section entry for this PMD is empty
+ * then we block the corresponding virtual address.
+ */
+ addr += vm->size;
+ if ((addr & ~PMD_MASK) == SECTION_SIZE) {
+ pmd = pmd_off_k(addr) + 1;
+ if (pmd_none(*pmd))
+ pmd_empty_section_gap(addr);
+ }
+
+ /* no need to look at any vm entry until we hit the next PMD */
+ next = (addr + PMD_SIZE - 1) & PMD_MASK;
+ }
+}
+
+#else
+#define fill_pmd_gaps() do { } while (0)
+#endif
+
static void * __initdata vmalloc_min =
(void *)(VMALLOC_END - (240 << 20) - VMALLOC_OFFSET);
@@ -1072,6 +1145,7 @@ static void __init devicemaps_init(struct machine_desc *mdesc)
*/
if (mdesc->map_io)
mdesc->map_io();
+ fill_pmd_gaps();
/*
* Finally flush the caches and tlb to ensure that we're in a
diff --git a/arch/arm/plat-samsung/include/plat/map-s3c.h b/arch/arm/plat-samsung/include/plat/map-s3c.h
index 7d04875..c0c70a8 100644
--- a/arch/arm/plat-samsung/include/plat/map-s3c.h
+++ b/arch/arm/plat-samsung/include/plat/map-s3c.h
@@ -22,7 +22,7 @@
#define S3C24XX_VA_WATCHDOG S3C_VA_WATCHDOG
#define S3C2412_VA_SSMC S3C_ADDR_CPU(0x00000000)
-#define S3C2412_VA_EBI S3C_ADDR_CPU(0x00010000)
+#define S3C2412_VA_EBI S3C_ADDR_CPU(0x00100000)
#define S3C2410_PA_UART (0x50000000)
#define S3C24XX_PA_UART S3C2410_PA_UART
diff --git a/arch/arm/plat-samsung/include/plat/watchdog-reset.h b/arch/arm/plat-samsung/include/plat/watchdog-reset.h
index f19aff1..bc4db9b 100644
--- a/arch/arm/plat-samsung/include/plat/watchdog-reset.h
+++ b/arch/arm/plat-samsung/include/plat/watchdog-reset.h
@@ -25,7 +25,7 @@ static inline void arch_wdt_reset(void)
__raw_writel(0, S3C2410_WTCON); /* disable watchdog, to be safe */
- if (s3c2410_wdtclk)
+ if (!IS_ERR(s3c2410_wdtclk))
clk_enable(s3c2410_wdtclk);
/* put initial values into count and data */
diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h
index 32b394f..6eb75b8 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -103,6 +103,11 @@ static inline void hard_irq_disable(void)
/* include/linux/interrupt.h needs hard_irq_disable to be a macro */
#define hard_irq_disable hard_irq_disable
+static inline bool lazy_irq_pending(void)
+{
+ return !!(get_paca()->irq_happened & ~PACA_IRQ_HARD_DIS);
+}
+
/*
* This is called by asynchronous interrupts to conditionally
* re-enable hard interrupts when soft-disabled after having
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index ed1718f..5971c85 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -558,27 +558,54 @@ _GLOBAL(ret_from_except_lite)
mtmsrd r10,1 /* Update machine state */
#endif /* CONFIG_PPC_BOOK3E */
-#ifdef CONFIG_PREEMPT
clrrdi r9,r1,THREAD_SHIFT /* current_thread_info() */
- li r0,_TIF_NEED_RESCHED /* bits to check */
ld r3,_MSR(r1)
ld r4,TI_FLAGS(r9)
- /* Move MSR_PR bit in r3 to _TIF_SIGPENDING position in r0 */
- rlwimi r0,r3,32+TIF_SIGPENDING-MSR_PR_LG,_TIF_SIGPENDING
- and. r0,r4,r0 /* check NEED_RESCHED and maybe SIGPENDING */
- bne do_work
-
-#else /* !CONFIG_PREEMPT */
- ld r3,_MSR(r1) /* Returning to user mode? */
andi. r3,r3,MSR_PR
- beq restore /* if not, just restore regs and return */
+ beq resume_kernel
/* Check current_thread_info()->flags */
+ andi. r0,r4,_TIF_USER_WORK_MASK
+ beq restore
+
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq 1f
+ bl .restore_interrupts
+ bl .schedule
+ b .ret_from_except_lite
+
+1: bl .save_nvgprs
+ bl .restore_interrupts
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl .do_notify_resume
+ b .ret_from_except
+
+resume_kernel:
+#ifdef CONFIG_PREEMPT
+ /* Check if we need to preempt */
+ andi. r0,r4,_TIF_NEED_RESCHED
+ beq+ restore
+ /* Check that preempt_count() == 0 and interrupts are enabled */
+ lwz r8,TI_PREEMPT(r9)
+ cmpwi cr1,r8,0
+ ld r0,SOFTE(r1)
+ cmpdi r0,0
+ crandc eq,cr1*4+eq,eq
+ bne restore
+
+ /*
+ * Here we are preempting the current task. We want to make
+ * sure we are soft-disabled first
+ */
+ SOFT_DISABLE_INTS(r3,r4)
+1: bl .preempt_schedule_irq
+
+ /* Re-test flags and eventually loop */
clrrdi r9,r1,THREAD_SHIFT
ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_USER_WORK_MASK
- bne do_work
-#endif /* !CONFIG_PREEMPT */
+ andi. r0,r4,_TIF_NEED_RESCHED
+ bne 1b
+#endif /* CONFIG_PREEMPT */
.globl fast_exc_return_irq
fast_exc_return_irq:
@@ -759,50 +786,6 @@ restore_check_irq_replay:
#endif /* CONFIG_PPC_BOOK3E */
1: b .ret_from_except /* What else to do here ? */
-
-
-3:
-do_work:
-#ifdef CONFIG_PREEMPT
- andi. r0,r3,MSR_PR /* Returning to user mode? */
- bne user_work
- /* Check that preempt_count() == 0 and interrupts are enabled */
- lwz r8,TI_PREEMPT(r9)
- cmpwi cr1,r8,0
- ld r0,SOFTE(r1)
- cmpdi r0,0
- crandc eq,cr1*4+eq,eq
- bne restore
-
- /*
- * Here we are preempting the current task. We want to make
- * sure we are soft-disabled first
- */
- SOFT_DISABLE_INTS(r3,r4)
-1: bl .preempt_schedule_irq
-
- /* Re-test flags and eventually loop */
- clrrdi r9,r1,THREAD_SHIFT
- ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_NEED_RESCHED
- bne 1b
- b restore
-
-user_work:
-#endif /* CONFIG_PREEMPT */
-
- andi. r0,r4,_TIF_NEED_RESCHED
- beq 1f
- bl .restore_interrupts
- bl .schedule
- b .ret_from_except_lite
-
-1: bl .save_nvgprs
- bl .restore_interrupts
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl .do_notify_resume
- b .ret_from_except
-
unrecov_restore:
addi r3,r1,STACK_FRAME_OVERHEAD
bl .unrecoverable_exception
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 7835a5e..1b41502 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -277,7 +277,7 @@ EXPORT_SYMBOL(arch_local_irq_restore);
* NOTE: This is called with interrupts hard disabled but not marked
* as such in paca->irq_happened, so we need to resync this.
*/
-void restore_interrupts(void)
+void notrace restore_interrupts(void)
{
if (irqs_disabled()) {
local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 1b488e5..0794a30 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -1312,7 +1312,7 @@ static struct opal_secondary_data {
extern char opal_secondary_entry;
-static void prom_query_opal(void)
+static void __init prom_query_opal(void)
{
long rc;
@@ -1436,7 +1436,7 @@ static void __init prom_opal_hold_cpus(void)
prom_debug("prom_opal_hold_cpus: end...\n");
}
-static void prom_opal_takeover(void)
+static void __init prom_opal_takeover(void)
{
struct opal_secondary_data *data = &RELOC(opal_secondary_data);
struct opal_takeover_args *args = &data->args;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index a84aafc..a1044f4 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -810,7 +810,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
lwz r3,VCORE_NAPPING_THREADS(r5)
lwz r4,VCPU_PTID(r9)
li r0,1
- sldi r0,r0,r4
+ sld r0,r0,r4
andc. r3,r3,r0 /* no sense IPI'ing ourselves */
beq 43f
mulli r4,r4,PACA_SIZE /* get paca for thread 0 */
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b6edbb3..6e8f677 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -635,7 +635,7 @@ static inline int __init read_usm_ranges(const u32 **usm)
*/
static void __init parse_drconf_memory(struct device_node *memory)
{
- const u32 *dm, *usm;
+ const u32 *uninitialized_var(dm), *usm;
unsigned int n, rc, ranges, is_kexec_kdump = 0;
unsigned long lmb_size, base, size, sz;
int nid;
diff --git a/arch/powerpc/net/bpf_jit_64.S b/arch/powerpc/net/bpf_jit_64.S
index 55ba385..7d3a3b5 100644
--- a/arch/powerpc/net/bpf_jit_64.S
+++ b/arch/powerpc/net/bpf_jit_64.S
@@ -105,6 +105,7 @@ sk_load_byte_msh_positive_offset:
mr r4, r_addr; \
li r6, SIZE; \
bl skb_copy_bits; \
+ nop; \
/* R3 = 0 on success */ \
addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
ld r0, 16(r1); \
@@ -156,6 +157,7 @@ bpf_slow_path_byte_msh:
mr r4, r_addr; \
li r5, SIZE; \
bl bpf_internal_load_pointer_neg_helper; \
+ nop; \
/* R3 != 0 on success */ \
addi r1, r1, BPF_PPC_SLOWPATH_FRAME; \
ld r0, 16(r1); \
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 0915b1a..2d311c0 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -106,7 +106,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
tcep++;
}
- if (tbl->it_type == TCE_PCI_SWINV_CREATE)
+ if (tbl->it_type & TCE_PCI_SWINV_CREATE)
tce_invalidate_pSeries_sw(tbl, tces, tcep - 1);
return 0;
}
@@ -121,7 +121,7 @@ static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
while (npages--)
*(tcep++) = 0;
- if (tbl->it_type == TCE_PCI_SWINV_FREE)
+ if (tbl->it_type & TCE_PCI_SWINV_FREE)
tce_invalidate_pSeries_sw(tbl, tces, tcep - 1);
}
diff --git a/arch/powerpc/platforms/pseries/processor_idle.c b/arch/powerpc/platforms/pseries/processor_idle.c
index 41a34bc..e61483e 100644
--- a/arch/powerpc/platforms/pseries/processor_idle.c
+++ b/arch/powerpc/platforms/pseries/processor_idle.c
@@ -106,7 +106,7 @@ static void check_and_cede_processor(void)
* we first hard disable then check.
*/
hard_irq_disable();
- if (get_paca()->irq_happened == 0)
+ if (!lazy_irq_pending())
cede_processor();
}
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 0f3ab06..eab3492 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -971,7 +971,7 @@ static int cpu_cmd(void)
/* print cpus waiting or in xmon */
printf("cpus stopped:");
count = 0;
- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ for_each_possible_cpu(cpu) {
if (cpumask_test_cpu(cpu, &cpus_in_xmon)) {
if (count == 0)
printf(" %x", cpu);
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index daeca56..673ac9b 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -38,7 +38,7 @@
int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
{
int err = 0;
- bool ia32 = is_ia32_task();
+ bool ia32 = test_thread_flag(TIF_IA32);
if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
return -EFAULT;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 340ee49..f91e80f 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -176,7 +176,7 @@
#define X86_FEATURE_XSAVEOPT (7*32+ 4) /* Optimized Xsave */
#define X86_FEATURE_PLN (7*32+ 5) /* Intel Power Limit Notification */
#define X86_FEATURE_PTS (7*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_DTS (7*32+ 7) /* Digital Thermal Sensor */
+#define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */
#define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */
/* Virtualization flags: Linux defined, word 8 */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8afb693..b2297e5 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -422,12 +422,14 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
return 0;
}
- if (intsrc->source_irq == 0 && intsrc->global_irq == 2) {
+ if (intsrc->source_irq == 0) {
if (acpi_skip_timer_override) {
- printk(PREFIX "BIOS IRQ0 pin2 override ignored.\n");
+ printk(PREFIX "BIOS IRQ0 override ignored.\n");
return 0;
}
- if (acpi_fix_pin2_polarity && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
+
+ if ((intsrc->global_irq == 2) && acpi_fix_pin2_polarity
+ && (intsrc->inti_flags & ACPI_MADT_POLARITY_MASK)) {
intsrc->inti_flags &= ~ACPI_MADT_POLARITY_MASK;
printk(PREFIX "BIOS IRQ0 pin2 override: forcing polarity to high active.\n");
}
@@ -1334,17 +1336,12 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d)
}
/*
- * Force ignoring BIOS IRQ0 pin2 override
+ * Force ignoring BIOS IRQ0 override
*/
static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
{
- /*
- * The ati_ixp4x0_rev() early PCI quirk should have set
- * the acpi_skip_timer_override flag already:
- */
if (!acpi_skip_timer_override) {
- WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
- pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
+ pr_notice("%s detected: Ignoring BIOS IRQ0 override\n",
d->ident);
acpi_skip_timer_override = 1;
}
@@ -1438,7 +1435,7 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
* is enabled. This input is incorrectly designated the
* ISA IRQ 0 via an interrupt source override even though
* it is wired to the output of the master 8259A and INTIN0
- * is not connected at all. Force ignoring BIOS IRQ0 pin2
+ * is not connected at all. Force ignoring BIOS IRQ0
* override in that cases.
*/
{
@@ -1473,6 +1470,14 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq 6715b"),
},
},
+ {
+ .callback = dmi_ignore_irq0_timer_override,
+ .ident = "FUJITSU SIEMENS",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "AMILO PRO V2030"),
+ },
+ },
{}
};
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
index dfea390..c7b3fe2 100644
--- a/arch/x86/kernel/cpu/mkcapflags.pl
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/perl -w
#
# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
#
@@ -11,22 +11,35 @@ open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
print OUT "#include <asm/cpufeature.h>\n\n";
print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
+%features = ();
+$err = 0;
+
while (defined($line = <IN>)) {
if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
$macro = $1;
- $feature = $2;
+ $feature = "\L$2";
$tail = $3;
if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
- $feature = $1;
+ $feature = "\L$1";
}
- if ($feature ne '') {
- printf OUT "\t%-32s = \"%s\",\n",
- "[$macro]", "\L$feature";
+ next if ($feature eq '');
+
+ if ($features{$feature}++) {
+ print STDERR "$in: duplicate feature name: $feature\n";
+ $err++;
}
+ printf OUT "\t%-32s = \"%s\",\n", "[$macro]", $feature;
}
}
print OUT "};\n";
close(IN);
close(OUT);
+
+if ($err) {
+ unlink($out);
+ exit(1);
+}
+
+exit(0);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index addf9e8..ee8e9ab 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -31,7 +31,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
const struct cpuid_bit *cb;
static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
- { X86_FEATURE_DTS, CR_EAX, 0, 0x00000006, 0 },
+ { X86_FEATURE_DTHERM, CR_EAX, 0, 0x00000006, 0 },
{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006, 0 },
{ X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006, 0 },
{ X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8bfb614..3f61904 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -444,12 +444,12 @@ void kgdb_roundup_cpus(unsigned long flags)
/**
* kgdb_arch_handle_exception - Handle architecture specific GDB packets.
- * @vector: The error vector of the exception that happened.
+ * @e_vector: The error vector of the exception that happened.
* @signo: The signal number of the exception that happened.
* @err_code: The error code of the exception that happened.
- * @remcom_in_buffer: The buffer of the packet we have read.
- * @remcom_out_buffer: The buffer of %BUFMAX bytes to write a packet into.
- * @regs: The &struct pt_regs of the current process.
+ * @remcomInBuffer: The buffer of the packet we have read.
+ * @remcomOutBuffer: The buffer of %BUFMAX bytes to write a packet into.
+ * @linux_regs: The &struct pt_regs of the current process.
*
* This function MUST handle the 'c' and 's' command packets,
* as well packets to set / remove a hardware breakpoint, if used.
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 25b48ed..5de92f1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -451,6 +451,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 990"),
},
},
+ { /* Handle problems with rebooting on the Precision M6600. */
+ .callback = set_pci_reboot,
+ .ident = "Dell OptiPlex 990",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
+ },
+ },
{ }
};
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 459b58a..25b7ae8 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -115,7 +115,7 @@ EXPORT_SYMBOL(csum_partial_copy_to_user);
* @src: source address
* @dst: destination address
* @len: number of bytes to be copied.
- * @isum: initial sum that is added into the result (32bit unfolded)
+ * @sum: initial sum that is added into the result (32bit unfolded)
*
* Returns an 32bit unfolded checksum of the buffer.
*/
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 02cf633..e7dee61 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -125,12 +125,8 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q)
blkg->pd[i] = pd;
pd->blkg = blkg;
- }
-
- /* invoke per-policy init */
- for (i = 0; i < BLKCG_MAX_POLS; i++) {
- struct blkcg_policy *pol = blkcg_policy[i];
+ /* invoke per-policy init */
if (blkcg_policy_enabled(blkg->q, pol))
pol->pd_init_fn(blkg);
}
@@ -245,10 +241,9 @@ EXPORT_SYMBOL_GPL(blkg_lookup_create);
static void blkg_destroy(struct blkcg_gq *blkg)
{
- struct request_queue *q = blkg->q;
struct blkcg *blkcg = blkg->blkcg;
- lockdep_assert_held(q->queue_lock);
+ lockdep_assert_held(blkg->q->queue_lock);
lockdep_assert_held(&blkcg->lock);
/* Something wrong if we are trying to remove same group twice */
diff --git a/block/blk-core.c b/block/blk-core.c
index 3c923a7..93eb3e4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -361,9 +361,10 @@ EXPORT_SYMBOL(blk_put_queue);
*/
void blk_drain_queue(struct request_queue *q, bool drain_all)
{
+ int i;
+
while (true) {
bool drain = false;
- int i;
spin_lock_irq(q->queue_lock);
@@ -408,6 +409,18 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
break;
msleep(10);
}
+
+ /*
+ * With queue marked dead, any woken up waiter will fail the
+ * allocation path, so the wakeup chaining is lost and we're
+ * left with hung waiters. We need to wake up those waiters.
+ */
+ if (q->request_fn) {
+ spin_lock_irq(q->queue_lock);
+ for (i = 0; i < ARRAY_SIZE(q->rq.wait); i++)
+ wake_up_all(&q->rq.wait[i]);
+ spin_unlock_irq(q->queue_lock);
+ }
}
/**
@@ -467,7 +480,6 @@ void blk_cleanup_queue(struct request_queue *q)
/* mark @q DEAD, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock);
queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
-
spin_lock_irq(lock);
/*
@@ -485,10 +497,6 @@ void blk_cleanup_queue(struct request_queue *q)
queue_flag_set(QUEUE_FLAG_NOMERGES, q);
queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
queue_flag_set(QUEUE_FLAG_DEAD, q);
-
- if (q->queue_lock != &q->__queue_lock)
- q->queue_lock = &q->__queue_lock;
-
spin_unlock_irq(lock);
mutex_unlock(&q->sysfs_lock);
@@ -499,6 +507,11 @@ void blk_cleanup_queue(struct request_queue *q)
del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
blk_sync_queue(q);
+ spin_lock_irq(lock);
+ if (q->queue_lock != &q->__queue_lock)
+ q->queue_lock = &q->__queue_lock;
+ spin_unlock_irq(lock);
+
/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 7803548..6e4744c 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -197,44 +197,3 @@ void blk_add_timer(struct request *req)
mod_timer(&q->timeout, expiry);
}
-/**
- * blk_abort_queue -- Abort all request on given queue
- * @queue: pointer to queue
- *
- */
-void blk_abort_queue(struct request_queue *q)
-{
- unsigned long flags;
- struct request *rq, *tmp;
- LIST_HEAD(list);
-
- /*
- * Not a request based block device, nothing to abort
- */
- if (!q->request_fn)
- return;
-
- spin_lock_irqsave(q->queue_lock, flags);
-
- elv_abort_queue(q);
-
- /*
- * Splice entries to local list, to avoid deadlocking if entries
- * get readded to the timeout list by error handling
- */
- list_splice_init(&q->timeout_list, &list);
-
- list_for_each_entry_safe(rq, tmp, &list, timeout_list)
- blk_abort_request(rq);
-
- /*
- * Occasionally, blk_abort_request() will return without
- * deleting the element from the list. Make sure we add those back
- * instead of leaving them on the local stack list.
- */
- list_splice(&list, &q->timeout_list);
-
- spin_unlock_irqrestore(q->queue_lock, flags);
-
-}
-EXPORT_SYMBOL_GPL(blk_abort_queue);
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 673c977..fb52df97 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -17,8 +17,6 @@
#include "blk.h"
#include "blk-cgroup.h"
-static struct blkcg_policy blkcg_policy_cfq __maybe_unused;
-
/*
* tunables
*/
@@ -418,11 +416,6 @@ static inline struct cfq_group *pd_to_cfqg(struct blkg_policy_data *pd)
return pd ? container_of(pd, struct cfq_group, pd) : NULL;
}
-static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
-{
- return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
-}
-
static inline struct blkcg_gq *cfqg_to_blkg(struct cfq_group *cfqg)
{
return pd_to_blkg(&cfqg->pd);
@@ -572,6 +565,13 @@ static inline void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) { }
#ifdef CONFIG_CFQ_GROUP_IOSCHED
+static struct blkcg_policy blkcg_policy_cfq;
+
+static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg)
+{
+ return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq));
+}
+
static inline void cfqg_get(struct cfq_group *cfqg)
{
return blkg_get(cfqg_to_blkg(cfqg));
@@ -3951,10 +3951,11 @@ static void cfq_exit_queue(struct elevator_queue *e)
cfq_shutdown_timer_wq(cfqd);
-#ifndef CONFIG_CFQ_GROUP_IOSCHED
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+ blkcg_deactivate_policy(q, &blkcg_policy_cfq);
+#else
kfree(cfqd->root_group);
#endif
- blkcg_deactivate_policy(q, &blkcg_policy_cfq);
kfree(cfqd);
}
@@ -4194,14 +4195,15 @@ static int __init cfq_init(void)
#ifdef CONFIG_CFQ_GROUP_IOSCHED
if (!cfq_group_idle)
cfq_group_idle = 1;
-#else
- cfq_group_idle = 0;
-#endif
ret = blkcg_policy_register(&blkcg_policy_cfq);
if (ret)
return ret;
+#else
+ cfq_group_idle = 0;
+#endif
+ ret = -ENOMEM;
cfq_pool = KMEM_CACHE(cfq_queue, 0);
if (!cfq_pool)
goto err_pol_unreg;
@@ -4215,13 +4217,17 @@ static int __init cfq_init(void)
err_free_pool:
kmem_cache_destroy(cfq_pool);
err_pol_unreg:
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
blkcg_policy_unregister(&blkcg_policy_cfq);
+#endif
return ret;
}
static void __exit cfq_exit(void)
{
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
blkcg_policy_unregister(&blkcg_policy_cfq);
+#endif
elv_unregister(&iosched_cfq);
kmem_cache_destroy(cfq_pool);
}
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 260fa80..9a87daa 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -721,11 +721,14 @@ int scsi_verify_blk_ioctl(struct block_device *bd, unsigned int cmd)
break;
}
+ if (capable(CAP_SYS_RAWIO))
+ return 0;
+
/* In particular, rule out all resets and host-specific ioctls. */
printk_ratelimited(KERN_WARNING
"%s: sending ioctl %x to a partition!\n", current->comm, cmd);
- return capable(CAP_SYS_RAWIO) ? 0 : -ENOIOCTLCMD;
+ return -ENOIOCTLCMD;
}
EXPORT_SYMBOL(scsi_verify_blk_ioctl);
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index a43fa1a..1502c502 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -36,6 +36,7 @@
#define ACPI_PROCESSOR_AGGREGATOR_DEVICE_NAME "Processor Aggregator"
#define ACPI_PROCESSOR_AGGREGATOR_NOTIFY 0x80
static DEFINE_MUTEX(isolated_cpus_lock);
+static DEFINE_MUTEX(round_robin_lock);
static unsigned long power_saving_mwait_eax;
@@ -107,7 +108,7 @@ static void round_robin_cpu(unsigned int tsk_index)
if (!alloc_cpumask_var(&tmp, GFP_KERNEL))
return;
- mutex_lock(&isolated_cpus_lock);
+ mutex_lock(&round_robin_lock);
cpumask_clear(tmp);
for_each_cpu(cpu, pad_busy_cpus)
cpumask_or(tmp, tmp, topology_thread_cpumask(cpu));
@@ -116,7 +117,7 @@ static void round_robin_cpu(unsigned int tsk_index)
if (cpumask_empty(tmp))
cpumask_andnot(tmp, cpu_online_mask, pad_busy_cpus);
if (cpumask_empty(tmp)) {
- mutex_unlock(&isolated_cpus_lock);
+ mutex_unlock(&round_robin_lock);
return;
}
for_each_cpu(cpu, tmp) {
@@ -131,7 +132,7 @@ static void round_robin_cpu(unsigned int tsk_index)
tsk_in_cpu[tsk_index] = preferred_cpu;
cpumask_set_cpu(preferred_cpu, pad_busy_cpus);
cpu_weight[preferred_cpu]++;
- mutex_unlock(&isolated_cpus_lock);
+ mutex_unlock(&round_robin_lock);
set_cpus_allowed_ptr(current, cpumask_of(preferred_cpu));
}
diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c
index 5577762..6686b1e 100644
--- a/drivers/acpi/apei/apei-base.c
+++ b/drivers/acpi/apei/apei-base.c
@@ -243,7 +243,7 @@ static int pre_map_gar_callback(struct apei_exec_context *ctx,
u8 ins = entry->instruction;
if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
- return acpi_os_map_generic_address(&entry->register_region);
+ return apei_map_generic_address(&entry->register_region);
return 0;
}
@@ -276,7 +276,7 @@ static int post_unmap_gar_callback(struct apei_exec_context *ctx,
u8 ins = entry->instruction;
if (ctx->ins_table[ins].flags & APEI_EXEC_INS_ACCESS_REGISTER)
- acpi_os_unmap_generic_address(&entry->register_region);
+ apei_unmap_generic_address(&entry->register_region);
return 0;
}
@@ -606,6 +606,19 @@ static int apei_check_gar(struct acpi_generic_address *reg, u64 *paddr,
return 0;
}
+int apei_map_generic_address(struct acpi_generic_address *reg)
+{
+ int rc;
+ u32 access_bit_width;
+ u64 address;
+
+ rc = apei_check_gar(reg, &address, &access_bit_width);
+ if (rc)
+ return rc;
+ return acpi_os_map_generic_address(reg);
+}
+EXPORT_SYMBOL_GPL(apei_map_generic_address);
+
/* read GAR in interrupt (including NMI) or process context */
int apei_read(u64 *val, struct acpi_generic_address *reg)
{
diff --git a/drivers/acpi/apei/apei-internal.h b/drivers/acpi/apei/apei-internal.h
index cca240a..f220d64 100644
--- a/drivers/acpi/apei/apei-internal.h
+++ b/drivers/acpi/apei/apei-internal.h
@@ -7,6 +7,8 @@
#define APEI_INTERNAL_H
#include <linux/cper.h>
+#include <linux/acpi.h>
+#include <linux/acpi_io.h>
struct apei_exec_context;
@@ -68,6 +70,13 @@ static inline int apei_exec_run_optional(struct apei_exec_context *ctx, u8 actio
/* IP has been set in instruction function */
#define APEI_EXEC_SET_IP 1
+int apei_map_generic_address(struct acpi_generic_address *reg);
+
+static inline void apei_unmap_generic_address(struct acpi_generic_address *reg)
+{
+ acpi_os_unmap_generic_address(reg);
+}
+
int apei_read(u64 *val, struct acpi_generic_address *reg);
int apei_write(u64 val, struct acpi_generic_address *reg);
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 9b3cac0..1599566 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -301,7 +301,7 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
if (!ghes)
return ERR_PTR(-ENOMEM);
ghes->generic = generic;
- rc = acpi_os_map_generic_address(&generic->error_status_address);
+ rc = apei_map_generic_address(&generic->error_status_address);
if (rc)
goto err_free;
error_block_length = generic->error_block_length;
@@ -321,7 +321,7 @@ static struct ghes *ghes_new(struct acpi_hest_generic *generic)
return ghes;
err_unmap:
- acpi_os_unmap_generic_address(&generic->error_status_address);
+ apei_unmap_generic_address(&generic->error_status_address);
err_free:
kfree(ghes);
return ERR_PTR(rc);
@@ -330,7 +330,7 @@ err_free:
static void ghes_fini(struct ghes *ghes)
{
kfree(ghes->estatus);
- acpi_os_unmap_generic_address(&ghes->generic->error_status_address);
+ apei_unmap_generic_address(&ghes->generic->error_status_address);
}
enum {
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index f3decb3..47a8caa 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -224,6 +224,7 @@ static void lapic_timer_state_broadcast(struct acpi_processor *pr,
/*
* Suspend / resume control
*/
+static int acpi_idle_suspend;
static u32 saved_bm_rld;
static void acpi_idle_bm_rld_save(void)
@@ -242,13 +243,21 @@ static void acpi_idle_bm_rld_restore(void)
int acpi_processor_suspend(struct acpi_device * device, pm_message_t state)
{
+ if (acpi_idle_suspend == 1)
+ return 0;
+
acpi_idle_bm_rld_save();
+ acpi_idle_suspend = 1;
return 0;
}
int acpi_processor_resume(struct acpi_device * device)
{
+ if (acpi_idle_suspend == 0)
+ return 0;
+
acpi_idle_bm_rld_restore();
+ acpi_idle_suspend = 0;
return 0;
}
@@ -754,6 +763,12 @@ static int acpi_idle_enter_c1(struct cpuidle_device *dev,
local_irq_disable();
+ if (acpi_idle_suspend) {
+ local_irq_enable();
+ cpu_relax();
+ return -EBUSY;
+ }
+
lapic_timer_state_broadcast(pr, cx, 1);
kt1 = ktime_get_real();
acpi_idle_do_entry(cx);
@@ -823,6 +838,12 @@ static int acpi_idle_enter_simple(struct cpuidle_device *dev,
local_irq_disable();
+ if (acpi_idle_suspend) {
+ local_irq_enable();
+ cpu_relax();
+ return -EBUSY;
+ }
+
if (cx->entry_method != ACPI_CSTATE_FFH) {
current_thread_info()->status &= ~TS_POLLING;
/*
@@ -907,14 +928,21 @@ static int acpi_idle_enter_bm(struct cpuidle_device *dev,
drv, drv->safe_state_index);
} else {
local_irq_disable();
- acpi_safe_halt();
+ if (!acpi_idle_suspend)
+ acpi_safe_halt();
local_irq_enable();
- return -EINVAL;
+ return -EBUSY;
}
}
local_irq_disable();
+ if (acpi_idle_suspend) {
+ local_irq_enable();
+ cpu_relax();
+ return -EBUSY;
+ }
+
if (cx->entry_method != ACPI_CSTATE_FFH) {
current_thread_info()->status &= ~TS_POLLING;
/*
diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c
index 9f66181..240a244 100644
--- a/drivers/acpi/sysfs.c
+++ b/drivers/acpi/sysfs.c
@@ -173,7 +173,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp)
{
int result = 0;
- if (!strncmp(val, "enable", strlen("enable") - 1)) {
+ if (!strncmp(val, "enable", strlen("enable"))) {
result = acpi_debug_trace(trace_method_name, trace_debug_level,
trace_debug_layer, 0);
if (result)
@@ -181,7 +181,7 @@ static int param_set_trace_state(const char *val, struct kernel_param *kp)
goto exit;
}
- if (!strncmp(val, "disable", strlen("disable") - 1)) {
+ if (!strncmp(val, "disable", strlen("disable"))) {
int name = 0;
result = acpi_debug_trace((char *)&name, trace_debug_level,
trace_debug_layer, 0);
diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index a576575..1e0a9e1 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c
@@ -558,6 +558,8 @@ acpi_video_bus_DOS(struct acpi_video_bus *video, int bios_flag, int lcd_flag)
union acpi_object arg0 = { ACPI_TYPE_INTEGER };
struct acpi_object_list args = { 1, &arg0 };
+ if (!video->cap._DOS)
+ return 0;
if (bios_flag < 0 || bios_flag > 3 || lcd_flag < 0 || lcd_flag > 1)
return -EINVAL;
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index e0fb5b0..9cb845e 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1031,7 +1031,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
dpm_wait_for_children(dev, async);
if (async_error)
- return 0;
+ goto Complete;
pm_runtime_get_noresume(dev);
if (pm_runtime_barrier(dev) && device_may_wakeup(dev))
@@ -1040,7 +1040,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
if (pm_wakeup_pending()) {
pm_runtime_put_sync(dev);
async_error = -EBUSY;
- return 0;
+ goto Complete;
}
device_lock(dev);
@@ -1097,6 +1097,8 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
}
device_unlock(dev);
+
+ Complete:
complete_all(&dev->power.completion);
if (error) {
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index b5c5ff5..fcb956b 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -1475,10 +1475,17 @@ void _drbd_bm_set_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
first_word = 0;
spin_lock_irq(&b->bm_lock);
}
-
/* last page (respectively only page, for first page == last page) */
last_word = MLPP(el >> LN2_BPL);
- bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
+
+ /* consider bitmap->bm_bits = 32768, bitmap->bm_number_of_pages = 1. (or multiples).
+ * ==> e = 32767, el = 32768, last_page = 2,
+ * and now last_word = 0.
+ * We do not want to touch last_page in this case,
+ * as we did not allocate it, it is not present in bitmap->bm_pages.
+ */
+ if (last_word)
+ bm_set_full_words_within_one_page(mdev->bitmap, last_page, first_word, last_word);
/* possibly trailing bits.
* example: (e & 63) == 63, el will be e+1.
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 9c5c849..8e93a6a 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -472,12 +472,17 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;
- D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+ if (req->rq_state & RQ_LOCAL_ABORTED) {
+ _req_may_be_done(req, m);
+ break;
+ }
__drbd_chk_io_error(mdev, false);
goto_queue_for_net_read:
+ D_ASSERT(!(req->rq_state & RQ_NET_MASK));
+
/* no point in retrying if there is no good remote data,
* or we have no connection. */
if (mdev->state.pdsk != D_UP_TO_DATE) {
@@ -765,6 +770,40 @@ static int drbd_may_do_local_read(struct drbd_conf *mdev, sector_t sector, int s
return 0 == drbd_bm_count_bits(mdev, sbnr, ebnr);
}
+static void maybe_pull_ahead(struct drbd_conf *mdev)
+{
+ int congested = 0;
+
+ /* If I don't even have good local storage, we can not reasonably try
+ * to pull ahead of the peer. We also need the local reference to make
+ * sure mdev->act_log is there.
+ * Note: caller has to make sure that net_conf is there.
+ */
+ if (!get_ldev_if_state(mdev, D_UP_TO_DATE))
+ return;
+
+ if (mdev->net_conf->cong_fill &&
+ atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
+ dev_info(DEV, "Congestion-fill threshold reached\n");
+ congested = 1;
+ }
+
+ if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
+ dev_info(DEV, "Congestion-extents threshold reached\n");
+ congested = 1;
+ }
+
+ if (congested) {
+ queue_barrier(mdev); /* last barrier, after mirrored writes */
+
+ if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
+ _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
+ else /*mdev->net_conf->on_congestion == OC_DISCONNECT */
+ _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
+ }
+ put_ldev(mdev);
+}
+
static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio, unsigned long start_time)
{
const int rw = bio_rw(bio);
@@ -972,29 +1011,8 @@ allocate_barrier:
_req_mod(req, queue_for_send_oos);
if (remote &&
- mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96) {
- int congested = 0;
-
- if (mdev->net_conf->cong_fill &&
- atomic_read(&mdev->ap_in_flight) >= mdev->net_conf->cong_fill) {
- dev_info(DEV, "Congestion-fill threshold reached\n");
- congested = 1;
- }
-
- if (mdev->act_log->used >= mdev->net_conf->cong_extents) {
- dev_info(DEV, "Congestion-extents threshold reached\n");
- congested = 1;
- }
-
- if (congested) {
- queue_barrier(mdev); /* last barrier, after mirrored writes */
-
- if (mdev->net_conf->on_congestion == OC_PULL_AHEAD)
- _drbd_set_state(_NS(mdev, conn, C_AHEAD), 0, NULL);
- else /*mdev->net_conf->on_congestion == OC_DISCONNECT */
- _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), 0, NULL);
- }
- }
+ mdev->net_conf->on_congestion != OC_BLOCK && mdev->agreed_pro_version >= 96)
+ maybe_pull_ahead(mdev);
spin_unlock_irq(&mdev->req_lock);
kfree(b); /* if someone else has beaten us to it... */
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index cce7df3..553f43a 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -671,6 +671,7 @@ static void __reschedule_timeout(int drive, const char *message)
if (drive == current_reqD)
drive = current_drive;
+ __cancel_delayed_work(&fd_timeout);
if (drive < 0 || drive >= N_DRIVE) {
delay = 20UL * HZ;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 264bc77..a8fddeb 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -37,6 +37,7 @@
#include <linux/kthread.h>
#include <../drivers/ata/ahci.h>
#include <linux/export.h>
+#include <linux/debugfs.h>
#include "mtip32xx.h"
#define HW_CMD_SLOT_SZ (MTIP_MAX_COMMAND_SLOTS * 32)
@@ -85,6 +86,7 @@ static int instance;
* allocated in mtip_init().
*/
static int mtip_major;
+static struct dentry *dfs_parent;
static DEFINE_SPINLOCK(rssd_index_lock);
static DEFINE_IDA(rssd_index_ida);
@@ -2546,7 +2548,7 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
}
/*
- * Sysfs register/status dump.
+ * Sysfs status dump.
*
* @dev Pointer to the device structure, passed by the kernrel.
* @attr Pointer to the device_attribute structure passed by the kernel.
@@ -2555,45 +2557,68 @@ static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
* return value
* The size, in bytes, of the data copied into buf.
*/
-static ssize_t mtip_hw_show_registers(struct device *dev,
+static ssize_t mtip_hw_show_status(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- u32 group_allocated;
struct driver_data *dd = dev_to_disk(dev)->private_data;
int size = 0;
+
+ if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
+ size += sprintf(buf, "%s", "thermal_shutdown\n");
+ else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag))
+ size += sprintf(buf, "%s", "write_protect\n");
+ else
+ size += sprintf(buf, "%s", "online\n");
+
+ return size;
+}
+
+static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
+
+static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
+ size_t len, loff_t *offset)
+{
+ struct driver_data *dd = (struct driver_data *)f->private_data;
+ char buf[MTIP_DFS_MAX_BUF_SIZE];
+ u32 group_allocated;
+ int size = *offset;
int n;
- size += sprintf(&buf[size], "Hardware\n--------\n");
- size += sprintf(&buf[size], "S ACTive : [ 0x");
+ if (!len || size)
+ return 0;
+
+ if (size < 0)
+ return -EINVAL;
+
+ size += sprintf(&buf[size], "H/ S ACTive : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--)
size += sprintf(&buf[size], "%08X ",
readl(dd->port->s_active[n]));
size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "Command Issue : [ 0x");
+ size += sprintf(&buf[size], "H/ Command Issue : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--)
size += sprintf(&buf[size], "%08X ",
readl(dd->port->cmd_issue[n]));
size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "Completed : [ 0x");
+ size += sprintf(&buf[size], "H/ Completed : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--)
size += sprintf(&buf[size], "%08X ",
readl(dd->port->completed[n]));
size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "PORT IRQ STAT : [ 0x%08X ]\n",
+ size += sprintf(&buf[size], "H/ PORT IRQ STAT : [ 0x%08X ]\n",
readl(dd->port->mmio + PORT_IRQ_STAT));
- size += sprintf(&buf[size], "HOST IRQ STAT : [ 0x%08X ]\n",
+ size += sprintf(&buf[size], "H/ HOST IRQ STAT : [ 0x%08X ]\n",
readl(dd->mmio + HOST_IRQ_STAT));
size += sprintf(&buf[size], "\n");
- size += sprintf(&buf[size], "Local\n-----\n");
- size += sprintf(&buf[size], "Allocated : [ 0x");
+ size += sprintf(&buf[size], "L/ Allocated : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--) {
if (sizeof(long) > sizeof(u32))
@@ -2605,7 +2630,7 @@ static ssize_t mtip_hw_show_registers(struct device *dev,
}
size += sprintf(&buf[size], "]\n");
- size += sprintf(&buf[size], "Commands in Q: [ 0x");
+ size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
for (n = dd->slot_groups-1; n >= 0; n--) {
if (sizeof(long) > sizeof(u32))
@@ -2617,44 +2642,53 @@ static ssize_t mtip_hw_show_registers(struct device *dev,
}
size += sprintf(&buf[size], "]\n");
- return size;
+ *offset = size <= len ? size : len;
+ size = copy_to_user(ubuf, buf, *offset);
+ if (size)
+ return -EFAULT;
+
+ return *offset;
}
-static ssize_t mtip_hw_show_status(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
+ size_t len, loff_t *offset)
{
- struct driver_data *dd = dev_to_disk(dev)->private_data;
- int size = 0;
+ struct driver_data *dd = (struct driver_data *)f->private_data;
+ char buf[MTIP_DFS_MAX_BUF_SIZE];
+ int size = *offset;
- if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
- size += sprintf(buf, "%s", "thermal_shutdown\n");
- else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag))
- size += sprintf(buf, "%s", "write_protect\n");
- else
- size += sprintf(buf, "%s", "online\n");
-
- return size;
-}
+ if (!len || size)
+ return 0;
-static ssize_t mtip_hw_show_flags(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct driver_data *dd = dev_to_disk(dev)->private_data;
- int size = 0;
+ if (size < 0)
+ return -EINVAL;
- size += sprintf(&buf[size], "Flag in port struct : [ %08lX ]\n",
+ size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
dd->port->flags);
- size += sprintf(&buf[size], "Flag in dd struct : [ %08lX ]\n",
+ size += sprintf(&buf[size], "Flag-dd : [ %08lX ]\n",
dd->dd_flag);
- return size;
+ *offset = size <= len ? size : len;
+ size = copy_to_user(ubuf, buf, *offset);
+ if (size)
+ return -EFAULT;
+
+ return *offset;
}
-static DEVICE_ATTR(registers, S_IRUGO, mtip_hw_show_registers, NULL);
-static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
-static DEVICE_ATTR(flags, S_IRUGO, mtip_hw_show_flags, NULL);
+static const struct file_operations mtip_regs_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = mtip_hw_read_registers,
+ .llseek = no_llseek,
+};
+
+static const struct file_operations mtip_flags_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = mtip_hw_read_flags,
+ .llseek = no_llseek,
+};
/*
* Create the sysfs related attributes.
@@ -2671,15 +2705,9 @@ static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
if (!kobj || !dd)
return -EINVAL;
- if (sysfs_create_file(kobj, &dev_attr_registers.attr))
- dev_warn(&dd->pdev->dev,
- "Error creating 'registers' sysfs entry\n");
if (sysfs_create_file(kobj, &dev_attr_status.attr))
dev_warn(&dd->pdev->dev,
"Error creating 'status' sysfs entry\n");
- if (sysfs_create_file(kobj, &dev_attr_flags.attr))
- dev_warn(&dd->pdev->dev,
- "Error creating 'flags' sysfs entry\n");
return 0;
}
@@ -2698,13 +2726,39 @@ static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj)
if (!kobj || !dd)
return -EINVAL;
- sysfs_remove_file(kobj, &dev_attr_registers.attr);
sysfs_remove_file(kobj, &dev_attr_status.attr);
- sysfs_remove_file(kobj, &dev_attr_flags.attr);
return 0;
}
+static int mtip_hw_debugfs_init(struct driver_data *dd)
+{
+ if (!dfs_parent)
+ return -1;
+
+ dd->dfs_node = debugfs_create_dir(dd->disk->disk_name, dfs_parent);
+ if (IS_ERR_OR_NULL(dd->dfs_node)) {
+ dev_warn(&dd->pdev->dev,
+ "Error creating node %s under debugfs\n",
+ dd->disk->disk_name);
+ dd->dfs_node = NULL;
+ return -1;
+ }
+
+ debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
+ &mtip_flags_fops);
+ debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
+ &mtip_regs_fops);
+
+ return 0;
+}
+
+static void mtip_hw_debugfs_exit(struct driver_data *dd)
+{
+ debugfs_remove_recursive(dd->dfs_node);
+}
+
+
/*
* Perform any init/resume time hardware setup
*
@@ -3730,6 +3784,7 @@ skip_create_disk:
mtip_hw_sysfs_init(dd, kobj);
kobject_put(kobj);
}
+ mtip_hw_debugfs_init(dd);
if (dd->mtip_svc_handler) {
set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
@@ -3755,6 +3810,8 @@ start_service_thread:
return rv;
kthread_run_error:
+ mtip_hw_debugfs_exit(dd);
+
/* Delete our gendisk. This also removes the device from /dev */
del_gendisk(dd->disk);
@@ -3805,6 +3862,7 @@ static int mtip_block_remove(struct driver_data *dd)
kobject_put(kobj);
}
}
+ mtip_hw_debugfs_exit(dd);
/*
* Delete our gendisk structure. This also removes the device
@@ -4152,10 +4210,20 @@ static int __init mtip_init(void)
}
mtip_major = error;
+ if (!dfs_parent) {
+ dfs_parent = debugfs_create_dir("rssd", NULL);
+ if (IS_ERR_OR_NULL(dfs_parent)) {
+ printk(KERN_WARNING "Error creating debugfs parent\n");
+ dfs_parent = NULL;
+ }
+ }
+
/* Register our PCI operations. */
error = pci_register_driver(&mtip_pci_driver);
- if (error)
+ if (error) {
+ debugfs_remove(dfs_parent);
unregister_blkdev(mtip_major, MTIP_DRV_NAME);
+ }
return error;
}
@@ -4172,6 +4240,8 @@ static int __init mtip_init(void)
*/
static void __exit mtip_exit(void)
{
+ debugfs_remove_recursive(dfs_parent);
+
/* Release the allocated major block device number. */
unregister_blkdev(mtip_major, MTIP_DRV_NAME);
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index b2c88da..f51fc23 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -26,7 +26,6 @@
#include <linux/ata.h>
#include <linux/interrupt.h>
#include <linux/genhd.h>
-#include <linux/version.h>
/* Offset of Subsystem Device ID in pci confoguration space */
#define PCI_SUBSYSTEM_DEVICEID 0x2E
@@ -111,6 +110,8 @@
#define dbg_printk(format, arg...)
#endif
+#define MTIP_DFS_MAX_BUF_SIZE 1024
+
#define __force_bit2int (unsigned int __force)
enum {
@@ -447,6 +448,8 @@ struct driver_data {
unsigned long dd_flag; /* NOTE: use atomic bit operations on this */
struct task_struct *mtip_svc_handler; /* task_struct of svc thd */
+
+ struct dentry *dfs_node;
};
#endif
diff --git a/drivers/block/umem.c b/drivers/block/umem.c
index aa27120..9a72277 100644
--- a/drivers/block/umem.c
+++ b/drivers/block/umem.c
@@ -513,6 +513,44 @@ static void process_page(unsigned long data)
}
}
+struct mm_plug_cb {
+ struct blk_plug_cb cb;
+ struct cardinfo *card;
+};
+
+static void mm_unplug(struct blk_plug_cb *cb)
+{
+ struct mm_plug_cb *mmcb = container_of(cb, struct mm_plug_cb, cb);
+
+ spin_lock_irq(&mmcb->card->lock);
+ activate(mmcb->card);
+ spin_unlock_irq(&mmcb->card->lock);
+ kfree(mmcb);
+}
+
+static int mm_check_plugged(struct cardinfo *card)
+{
+ struct blk_plug *plug = current->plug;
+ struct mm_plug_cb *mmcb;
+
+ if (!plug)
+ return 0;
+
+ list_for_each_entry(mmcb, &plug->cb_list, cb.list) {
+ if (mmcb->cb.callback == mm_unplug && mmcb->card == card)
+ return 1;
+ }
+ /* Not currently on the callback list */
+ mmcb = kmalloc(sizeof(*mmcb), GFP_ATOMIC);
+ if (!mmcb)
+ return 0;
+
+ mmcb->card = card;
+ mmcb->cb.callback = mm_unplug;
+ list_add(&mmcb->cb.list, &plug->cb_list);
+ return 1;
+}
+
static void mm_make_request(struct request_queue *q, struct bio *bio)
{
struct cardinfo *card = q->queuedata;
@@ -523,6 +561,8 @@ static void mm_make_request(struct request_queue *q, struct bio *bio)
*card->biotail = bio;
bio->bi_next = NULL;
card->biotail = &bio->bi_next;
+ if (bio->bi_rw & REQ_SYNC || !mm_check_plugged(card))
+ activate(card);
spin_unlock_irq(&card->lock);
return;
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 773cf27..9ad3b5e 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -257,6 +257,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
@@ -287,6 +288,7 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 60eed4b..e4fb337 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -141,14 +141,36 @@ static int get_id_from_freelist(struct blkfront_info *info)
return free;
}
-static void add_id_to_freelist(struct blkfront_info *info,
+static int add_id_to_freelist(struct blkfront_info *info,
unsigned long id)
{
+ if (info->shadow[id].req.u.rw.id != id)
+ return -EINVAL;
+ if (info->shadow[id].request == NULL)
+ return -EINVAL;
info->shadow[id].req.u.rw.id = info->shadow_free;
info->shadow[id].request = NULL;
info->shadow_free = id;
+ return 0;
}
+static const char *op_name(int op)
+{
+ static const char *const names[] = {
+ [BLKIF_OP_READ] = "read",
+ [BLKIF_OP_WRITE] = "write",
+ [BLKIF_OP_WRITE_BARRIER] = "barrier",
+ [BLKIF_OP_FLUSH_DISKCACHE] = "flush",
+ [BLKIF_OP_DISCARD] = "discard" };
+
+ if (op < 0 || op >= ARRAY_SIZE(names))
+ return "unknown";
+
+ if (!names[op])
+ return "reserved";
+
+ return names[op];
+}
static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
{
unsigned int end = minor + nr;
@@ -746,20 +768,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
bret = RING_GET_RESPONSE(&info->ring, i);
id = bret->id;
+ /*
+ * The backend has messed up and given us an id that we would
+ * never have given to it (we stamp it up to BLK_RING_SIZE -
+ * look in get_id_from_freelist.
+ */
+ if (id >= BLK_RING_SIZE) {
+ WARN(1, "%s: response to %s has incorrect id (%ld)\n",
+ info->gd->disk_name, op_name(bret->operation), id);
+ /* We can't safely get the 'struct request' as
+ * the id is busted. */
+ continue;
+ }
req = info->shadow[id].request;
if (bret->operation != BLKIF_OP_DISCARD)
blkif_completion(&info->shadow[id]);
- add_id_to_freelist(info, id);
+ if (add_id_to_freelist(info, id)) {
+ WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
+ info->gd->disk_name, op_name(bret->operation), id);
+ continue;
+ }
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
switch (bret->operation) {
case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
- printk(KERN_WARNING "blkfront: %s: discard op failed\n",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
@@ -771,18 +809,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
- printk(KERN_WARNING "blkfront: %s: write %s op failed\n",
- info->flush_op == BLKIF_OP_WRITE_BARRIER ?
- "barrier" : "flush disk cache",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
}
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
info->shadow[id].req.u.rw.nr_segments == 0)) {
- printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
- info->flush_op == BLKIF_OP_WRITE_BARRIER ?
- "barrier" : "flush disk cache",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
}
if (unlikely(error)) {
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index dcbe056..9a1eb0c 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1067,26 +1067,24 @@ static int __clk_set_parent(struct clk *clk, struct clk *parent)
old_parent = clk->parent;
- /* find index of new parent clock using cached parent ptrs */
- if (clk->parents)
- for (i = 0; i < clk->num_parents; i++)
- if (clk->parents[i] == parent)
- break;
- else
+ if (!clk->parents)
clk->parents = kzalloc((sizeof(struct clk*) * clk->num_parents),
GFP_KERNEL);
/*
- * find index of new parent clock using string name comparison
- * also try to cache the parent to avoid future calls to __clk_lookup
+ * find index of new parent clock using cached parent ptrs,
+ * or if not yet cached, use string name comparison and cache
+ * them now to avoid future calls to __clk_lookup.
*/
- if (i == clk->num_parents)
- for (i = 0; i < clk->num_parents; i++)
- if (!strcmp(clk->parent_names[i], parent->name)) {
- if (clk->parents)
- clk->parents[i] = __clk_lookup(parent->name);
- break;
- }
+ for (i = 0; i < clk->num_parents; i++) {
+ if (clk->parents && clk->parents[i] == parent)
+ break;
+ else if (!strcmp(clk->parent_names[i], parent->name)) {
+ if (clk->parents)
+ clk->parents[i] = __clk_lookup(parent->name);
+ break;
+ }
+ }
if (i == clk->num_parents) {
pr_debug("%s: clock %s is not a possible parent of clock %s\n",
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 5873e48..a8743c3 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1039,6 +1039,24 @@ mode_in_range(const struct drm_display_mode *mode, struct edid *edid,
return true;
}
+static bool valid_inferred_mode(const struct drm_connector *connector,
+ const struct drm_display_mode *mode)
+{
+ struct drm_display_mode *m;
+ bool ok = false;
+
+ list_for_each_entry(m, &connector->probed_modes, head) {
+ if (mode->hdisplay == m->hdisplay &&
+ mode->vdisplay == m->vdisplay &&
+ drm_mode_vrefresh(mode) == drm_mode_vrefresh(m))
+ return false; /* duplicated */
+ if (mode->hdisplay <= m->hdisplay &&
+ mode->vdisplay <= m->vdisplay)
+ ok = true;
+ }
+ return ok;
+}
+
static int
drm_dmt_modes_for_range(struct drm_connector *connector, struct edid *edid,
struct detailed_timing *timing)
@@ -1048,7 +1066,8 @@ drm_dmt_modes_for_range(struct drm_connector *connector, struct edid *edid,
struct drm_device *dev = connector->dev;
for (i = 0; i < drm_num_dmt_modes; i++) {
- if (mode_in_range(drm_dmt_modes + i, edid, timing)) {
+ if (mode_in_range(drm_dmt_modes + i, edid, timing) &&
+ valid_inferred_mode(connector, drm_dmt_modes + i)) {
newmode = drm_mode_duplicate(dev, &drm_dmt_modes[i]);
if (newmode) {
drm_mode_probed_add(connector, newmode);
@@ -1088,7 +1107,8 @@ drm_gtf_modes_for_range(struct drm_connector *connector, struct edid *edid,
return modes;
fixup_mode_1366x768(newmode);
- if (!mode_in_range(newmode, edid, timing)) {
+ if (!mode_in_range(newmode, edid, timing) ||
+ !valid_inferred_mode(connector, newmode)) {
drm_mode_destroy(dev, newmode);
continue;
}
@@ -1116,7 +1136,8 @@ drm_cvt_modes_for_range(struct drm_connector *connector, struct edid *edid,
return modes;
fixup_mode_1366x768(newmode);
- if (!mode_in_range(newmode, edid, timing)) {
+ if (!mode_in_range(newmode, edid, timing) ||
+ !valid_inferred_mode(connector, newmode)) {
drm_mode_destroy(dev, newmode);
continue;
}
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index f947926..36822b9 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1401,6 +1401,27 @@ i915_mtrr_setup(struct drm_i915_private *dev_priv, unsigned long base,
}
}
+static void i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv)
+{
+ struct apertures_struct *ap;
+ struct pci_dev *pdev = dev_priv->dev->pdev;
+ bool primary;
+
+ ap = alloc_apertures(1);
+ if (!ap)
+ return;
+
+ ap->ranges[0].base = dev_priv->dev->agp->base;
+ ap->ranges[0].size =
+ dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
+ primary =
+ pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
+
+ remove_conflicting_framebuffers(ap, "inteldrmfb", primary);
+
+ kfree(ap);
+}
+
/**
* i915_driver_load - setup chip and create an initial config
* @dev: DRM device
@@ -1446,6 +1467,15 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
goto free_priv;
}
+ dev_priv->mm.gtt = intel_gtt_get();
+ if (!dev_priv->mm.gtt) {
+ DRM_ERROR("Failed to initialize GTT\n");
+ ret = -ENODEV;
+ goto put_bridge;
+ }
+
+ i915_kick_out_firmware_fb(dev_priv);
+
pci_set_master(dev->pdev);
/* overlay on gen2 is broken and can't address above 1G */
@@ -1471,13 +1501,6 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
goto put_bridge;
}
- dev_priv->mm.gtt = intel_gtt_get();
- if (!dev_priv->mm.gtt) {
- DRM_ERROR("Failed to initialize GTT\n");
- ret = -ENODEV;
- goto out_rmmap;
- }
-
aperture_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
dev_priv->mm.gtt_mapping =
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 59d4493..84b648a 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -289,8 +289,9 @@ int radeon_vm_manager_init(struct radeon_device *rdev)
rdev->vm_manager.enabled = false;
/* mark first vm as always in use, it's the system one */
+ /* allocate enough for 2 full VM pts */
r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
- rdev->vm_manager.max_pfn * 8,
+ rdev->vm_manager.max_pfn * 8 * 2,
RADEON_GEM_DOMAIN_VRAM);
if (r) {
dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
@@ -633,7 +634,15 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
mutex_init(&vm->mutex);
INIT_LIST_HEAD(&vm->list);
INIT_LIST_HEAD(&vm->va);
- vm->last_pfn = 0;
+ /* SI requires equal sized PTs for all VMs, so always set
+ * last_pfn to max_pfn. cayman allows variable sized
+ * pts so we can grow then as needed. Once we switch
+ * to two level pts we can unify this again.
+ */
+ if (rdev->family >= CHIP_TAHITI)
+ vm->last_pfn = rdev->vm_manager.max_pfn;
+ else
+ vm->last_pfn = 0;
/* map the ib pool buffer at 0 in virtual address space, set
* read only
*/
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index f28bd4b..21ec9f5 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -292,6 +292,7 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
+ struct radeon_device *rdev = dev->dev_private;
struct drm_radeon_gem_busy *args = data;
struct drm_gem_object *gobj;
struct radeon_bo *robj;
@@ -317,13 +318,14 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
break;
}
drm_gem_object_unreference_unlocked(gobj);
- r = radeon_gem_handle_lockup(robj->rdev, r);
+ r = radeon_gem_handle_lockup(rdev, r);
return r;
}
int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
+ struct radeon_device *rdev = dev->dev_private;
struct drm_radeon_gem_wait_idle *args = data;
struct drm_gem_object *gobj;
struct radeon_bo *robj;
@@ -336,10 +338,10 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
robj = gem_to_radeon_bo(gobj);
r = radeon_bo_wait(robj, NULL, false);
/* callback hw specific functions if any */
- if (robj->rdev->asic->ioctl_wait_idle)
- robj->rdev->asic->ioctl_wait_idle(robj->rdev, robj);
+ if (rdev->asic->ioctl_wait_idle)
+ robj->rdev->asic->ioctl_wait_idle(rdev, robj);
drm_gem_object_unreference_unlocked(gobj);
- r = radeon_gem_handle_lockup(robj->rdev, r);
+ r = radeon_gem_handle_lockup(rdev, r);
return r;
}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index c7b61f1..0b02792 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2365,12 +2365,12 @@ int si_pcie_gart_enable(struct radeon_device *rdev)
WREG32(0x15DC, 0);
/* empty context1-15 */
- /* FIXME start with 1G, once using 2 level pt switch to full
+ /* FIXME start with 4G, once using 2 level pt switch to full
* vm size space
*/
/* set vm size, must be a multiple of 4 */
WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
- WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE);
+ WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
for (i = 1; i < 16; i++) {
if (i < 8)
WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 7f1feb2..637c51c 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -693,7 +693,7 @@ static void __cpuinit get_core_online(unsigned int cpu)
* sensors. We check this bit only, all the early CPUs
* without thermal sensors will be filtered out.
*/
- if (!cpu_has(c, X86_FEATURE_DTS))
+ if (!cpu_has(c, X86_FEATURE_DTHERM))
return;
if (!pdev) {
@@ -794,7 +794,7 @@ static struct notifier_block coretemp_cpu_notifier __refdata = {
};
static const struct x86_cpu_id coretemp_ids[] = {
- { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTS },
+ { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM },
{}
};
MODULE_DEVICE_TABLE(x86cpu, coretemp_ids);
diff --git a/drivers/ieee802154/at86rf230.c b/drivers/ieee802154/at86rf230.c
index 4d033d4..902e38b 100644
--- a/drivers/ieee802154/at86rf230.c
+++ b/drivers/ieee802154/at86rf230.c
@@ -585,7 +585,6 @@ err:
static int at86rf230_rx(struct at86rf230_local *lp)
{
u8 len = 128, lqi = 0;
- int rc;
struct sk_buff *skb;
skb = alloc_skb(len, GFP_KERNEL);
@@ -607,7 +606,7 @@ static int at86rf230_rx(struct at86rf230_local *lp)
ieee802154_rx_irqsafe(lp->dev, skb, lqi);
- dev_dbg(&lp->spi->dev, "READ_FBUF: %d %d %x\n", rc, len, lqi);
+ dev_dbg(&lp->spi->dev, "READ_FBUF: %d %x\n", len, lqi);
return 0;
err:
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 740dcc0..77b6b18 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -1374,7 +1374,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
goto reject;
}
dst = &rt->dst;
- l2t = t3_l2t_get(tdev, dst, NULL);
+ l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip);
if (!l2t) {
printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
__func__);
@@ -1942,7 +1942,8 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
goto fail3;
}
ep->dst = &rt->dst;
- ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL);
+ ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL,
+ &cm_id->remote_addr.sin_addr.s_addr);
if (!ep->l2t) {
printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
err = -ENOMEM;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3530c41..8a3a203 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -718,26 +718,53 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
return ret;
}
+struct mlx4_ib_steering {
+ struct list_head list;
+ u64 reg_id;
+ union ib_gid gid;
+};
+
static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
{
int err;
struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
struct mlx4_ib_qp *mqp = to_mqp(ibqp);
+ u64 reg_id;
+ struct mlx4_ib_steering *ib_steering = NULL;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
+ if (!ib_steering)
+ return -ENOMEM;
+ }
- err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
- !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
- MLX4_PROT_IB_IPV6);
+ err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
+ !!(mqp->flags &
+ MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
+ MLX4_PROT_IB_IPV6, &reg_id);
if (err)
- return err;
+ goto err_malloc;
err = add_gid_entry(ibqp, gid);
if (err)
goto err_add;
+ if (ib_steering) {
+ memcpy(ib_steering->gid.raw, gid->raw, 16);
+ ib_steering->reg_id = reg_id;
+ mutex_lock(&mqp->mutex);
+ list_add(&ib_steering->list, &mqp->steering_rules);
+ mutex_unlock(&mqp->mutex);
+ }
return 0;
err_add:
- mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6);
+ mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ MLX4_PROT_IB_IPV6, reg_id);
+err_malloc:
+ kfree(ib_steering);
+
return err;
}
@@ -765,9 +792,30 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
u8 mac[6];
struct net_device *ndev;
struct mlx4_ib_gid_entry *ge;
+ u64 reg_id = 0;
+
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ struct mlx4_ib_steering *ib_steering;
+
+ mutex_lock(&mqp->mutex);
+ list_for_each_entry(ib_steering, &mqp->steering_rules, list) {
+ if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
+ list_del(&ib_steering->list);
+ break;
+ }
+ }
+ mutex_unlock(&mqp->mutex);
+ if (&ib_steering->list == &mqp->steering_rules) {
+ pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
+ return -EINVAL;
+ }
+ reg_id = ib_steering->reg_id;
+ kfree(ib_steering);
+ }
- err = mlx4_multicast_detach(mdev->dev,
- &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6);
+ err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
+ MLX4_PROT_IB_IPV6, reg_id);
if (err)
return err;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index ff36655..42df4f7 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -163,6 +163,7 @@ struct mlx4_ib_qp {
u8 state;
int mlx_type;
struct list_head gid_list;
+ struct list_head steering_rules;
};
struct mlx4_ib_srq {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 8d4ed24..6af19f6 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -495,6 +495,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
spin_lock_init(&qp->sq.lock);
spin_lock_init(&qp->rq.lock);
INIT_LIST_HEAD(&qp->gid_list);
+ INIT_LIST_HEAD(&qp->steering_rules);
qp->state = IB_QPS_RESET;
if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3974c29..bbee4b2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -715,7 +715,7 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
rcu_read_lock();
if (likely(skb_dst(skb))) {
- n = dst_get_neighbour_noref(skb_dst(skb));
+ n = dst_neigh_lookup_skb(skb_dst(skb), skb);
if (!n) {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
@@ -797,6 +797,8 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
}
unlock:
+ if (n)
+ neigh_release(n);
rcu_read_unlock();
return NETDEV_TX_OK;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 20ebc6f..7cecb16 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -658,9 +658,15 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
+ struct dst_entry *dst = skb_dst(skb);
struct ipoib_mcast *mcast;
+ struct neighbour *n;
unsigned long flags;
+ n = NULL;
+ if (dst)
+ n = dst_neigh_lookup_skb(dst, skb);
+
spin_lock_irqsave(&priv->lock, flags);
if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags) ||
@@ -715,29 +721,28 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
out:
if (mcast && mcast->ah) {
- struct dst_entry *dst = skb_dst(skb);
- struct neighbour *n = NULL;
-
- rcu_read_lock();
- if (dst)
- n = dst_get_neighbour_noref(dst);
- if (n && !*to_ipoib_neigh(n)) {
- struct ipoib_neigh *neigh = ipoib_neigh_alloc(n,
- skb->dev);
-
- if (neigh) {
- kref_get(&mcast->ah->ref);
- neigh->ah = mcast->ah;
- list_add_tail(&neigh->list, &mcast->neigh_list);
+ if (n) {
+ if (!*to_ipoib_neigh(n)) {
+ struct ipoib_neigh *neigh;
+
+ neigh = ipoib_neigh_alloc(n, skb->dev);
+ if (neigh) {
+ kref_get(&mcast->ah->ref);
+ neigh->ah = mcast->ah;
+ list_add_tail(&neigh->list,
+ &mcast->neigh_list);
+ }
}
+ neigh_release(n);
}
- rcu_read_unlock();
spin_unlock_irqrestore(&priv->lock, flags);
ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
return;
}
unlock:
+ if (n)
+ neigh_release(n);
spin_unlock_irqrestore(&priv->lock, flags);
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 37fdaf8..ce59824 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2292,6 +2292,13 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct
if (r)
return r;
+ r = dm_pool_commit_metadata(pool->pmd);
+ if (r) {
+ DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
+ __func__, r);
+ return r;
+ }
+
r = dm_pool_reserve_metadata_snap(pool->pmd);
if (r)
DMWARN("reserve_metadata_snap message failed.");
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1c2f904..a4c219e 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5784,8 +5784,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
super_types[mddev->major_version].
validate_super(mddev, rdev);
if ((info->state & (1<<MD_DISK_SYNC)) &&
- (!test_bit(In_sync, &rdev->flags) ||
- rdev->raid_disk != info->raid_disk)) {
+ rdev->raid_disk != info->raid_disk) {
/* This was a hot-add request, but events doesn't
* match, so reject it.
*/
@@ -6751,7 +6750,7 @@ struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev
thread->tsk = kthread_run(md_thread, thread,
"%s_%s",
mdname(thread->mddev),
- name ?: mddev->pers->name);
+ name);
if (IS_ERR(thread->tsk)) {
kfree(thread);
return NULL;
@@ -7298,6 +7297,7 @@ void md_do_sync(struct mddev *mddev)
int skipped = 0;
struct md_rdev *rdev;
char *desc;
+ struct blk_plug plug;
/* just incase thread restarts... */
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -7447,6 +7447,7 @@ void md_do_sync(struct mddev *mddev)
}
mddev->curr_resync_completed = j;
+ blk_start_plug(&plug);
while (j < max_sectors) {
sector_t sectors;
@@ -7552,6 +7553,7 @@ void md_do_sync(struct mddev *mddev)
* this also signals 'finished resyncing' to md_stop
*/
out:
+ blk_finish_plug(&plug);
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
/* tell personality that we are finished */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 9339e67..61a1833 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -474,7 +474,8 @@ static int multipath_run (struct mddev *mddev)
}
{
- mddev->thread = md_register_thread(multipathd, mddev, NULL);
+ mddev->thread = md_register_thread(multipathd, mddev,
+ "multipath");
if (!mddev->thread) {
printk(KERN_ERR "multipath: couldn't allocate thread"
" for %s\n", mdname(mddev));
diff --git a/drivers/md/persistent-data/dm-space-map-checker.c b/drivers/md/persistent-data/dm-space-map-checker.c
index 50ed53b..fc90c11 100644
--- a/drivers/md/persistent-data/dm-space-map-checker.c
+++ b/drivers/md/persistent-data/dm-space-map-checker.c
@@ -8,6 +8,7 @@
#include <linux/device-mapper.h>
#include <linux/export.h>
+#include <linux/vmalloc.h>
#ifdef CONFIG_DM_DEBUG_SPACE_MAPS
@@ -89,13 +90,23 @@ static int ca_create(struct count_array *ca, struct dm_space_map *sm)
ca->nr = nr_blocks;
ca->nr_free = nr_blocks;
- ca->counts = kzalloc(sizeof(*ca->counts) * nr_blocks, GFP_KERNEL);
- if (!ca->counts)
- return -ENOMEM;
+
+ if (!nr_blocks)
+ ca->counts = NULL;
+ else {
+ ca->counts = vzalloc(sizeof(*ca->counts) * nr_blocks);
+ if (!ca->counts)
+ return -ENOMEM;
+ }
return 0;
}
+static void ca_destroy(struct count_array *ca)
+{
+ vfree(ca->counts);
+}
+
static int ca_load(struct count_array *ca, struct dm_space_map *sm)
{
int r;
@@ -126,12 +137,14 @@ static int ca_load(struct count_array *ca, struct dm_space_map *sm)
static int ca_extend(struct count_array *ca, dm_block_t extra_blocks)
{
dm_block_t nr_blocks = ca->nr + extra_blocks;
- uint32_t *counts = kzalloc(sizeof(*counts) * nr_blocks, GFP_KERNEL);
+ uint32_t *counts = vzalloc(sizeof(*counts) * nr_blocks);
if (!counts)
return -ENOMEM;
- memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
- kfree(ca->counts);
+ if (ca->counts) {
+ memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
+ ca_destroy(ca);
+ }
ca->nr = nr_blocks;
ca->nr_free += extra_blocks;
ca->counts = counts;
@@ -151,11 +164,6 @@ static int ca_commit(struct count_array *old, struct count_array *new)
return 0;
}
-static void ca_destroy(struct count_array *ca)
-{
- kfree(ca->counts);
-}
-
/*----------------------------------------------------------------*/
struct sm_checker {
@@ -343,25 +351,25 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
int r;
struct sm_checker *smc;
- if (!sm)
- return NULL;
+ if (IS_ERR_OR_NULL(sm))
+ return ERR_PTR(-EINVAL);
smc = kmalloc(sizeof(*smc), GFP_KERNEL);
if (!smc)
- return NULL;
+ return ERR_PTR(-ENOMEM);
memcpy(&smc->sm, &ops_, sizeof(smc->sm));
r = ca_create(&smc->old_counts, sm);
if (r) {
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
r = ca_create(&smc->counts, sm);
if (r) {
ca_destroy(&smc->old_counts);
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
smc->real_sm = sm;
@@ -371,7 +379,7 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
ca_destroy(&smc->counts);
ca_destroy(&smc->old_counts);
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
r = ca_commit(&smc->old_counts, &smc->counts);
@@ -379,7 +387,7 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
ca_destroy(&smc->counts);
ca_destroy(&smc->old_counts);
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
return &smc->sm;
@@ -391,25 +399,25 @@ struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
int r;
struct sm_checker *smc;
- if (!sm)
- return NULL;
+ if (IS_ERR_OR_NULL(sm))
+ return ERR_PTR(-EINVAL);
smc = kmalloc(sizeof(*smc), GFP_KERNEL);
if (!smc)
- return NULL;
+ return ERR_PTR(-ENOMEM);
memcpy(&smc->sm, &ops_, sizeof(smc->sm));
r = ca_create(&smc->old_counts, sm);
if (r) {
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
r = ca_create(&smc->counts, sm);
if (r) {
ca_destroy(&smc->old_counts);
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
smc->real_sm = sm;
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index fc469ba..3d0ed53 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -290,7 +290,16 @@ struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
dm_block_t nr_blocks)
{
struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks);
- return dm_sm_checker_create_fresh(sm);
+ struct dm_space_map *smc;
+
+ if (IS_ERR_OR_NULL(sm))
+ return sm;
+
+ smc = dm_sm_checker_create_fresh(sm);
+ if (IS_ERR(smc))
+ dm_sm_destroy(sm);
+
+ return smc;
}
EXPORT_SYMBOL_GPL(dm_sm_disk_create);
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 400fe14..e5604b3 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -138,6 +138,9 @@ EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone);
void dm_tm_destroy(struct dm_transaction_manager *tm)
{
+ if (!tm->is_clone)
+ wipe_shadow_table(tm);
+
kfree(tm);
}
EXPORT_SYMBOL_GPL(dm_tm_destroy);
@@ -344,8 +347,10 @@ static int dm_tm_create_internal(struct dm_block_manager *bm,
}
*sm = dm_sm_checker_create(inner);
- if (!*sm)
+ if (IS_ERR(*sm)) {
+ r = PTR_ERR(*sm);
goto bad2;
+ }
} else {
r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location,
@@ -364,8 +369,10 @@ static int dm_tm_create_internal(struct dm_block_manager *bm,
}
*sm = dm_sm_checker_create(inner);
- if (!*sm)
+ if (IS_ERR(*sm)) {
+ r = PTR_ERR(*sm);
goto bad2;
+ }
}
return 0;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a9c7981..8c2754f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -517,8 +517,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
int bad_sectors;
int disk = start_disk + i;
- if (disk >= conf->raid_disks)
- disk -= conf->raid_disks;
+ if (disk >= conf->raid_disks * 2)
+ disk -= conf->raid_disks * 2;
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED
@@ -883,7 +883,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
struct md_rdev *blocked_rdev;
- int plugged;
int first_clone;
int sectors_handled;
int max_sectors;
@@ -1034,7 +1033,6 @@ read_again:
* the bad blocks. Each set of writes gets it's own r1bio
* with a set of bios attached.
*/
- plugged = mddev_check_plugged(mddev);
disks = conf->raid_disks * 2;
retry_write:
@@ -1191,6 +1189,8 @@ read_again:
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (!mddev_check_plugged(mddev))
+ md_wakeup_thread(mddev->thread);
}
/* Mustn't call r1_bio_write_done before this next test,
* as it could result in the bio being freed.
@@ -1213,9 +1213,6 @@ read_again:
/* In case raid1d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
-
- if (do_sync || !bitmap || !plugged)
- md_wakeup_thread(mddev->thread);
}
static void status(struct seq_file *seq, struct mddev *mddev)
@@ -2621,7 +2618,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
goto abort;
}
err = -ENOMEM;
- conf->thread = md_register_thread(raid1d, mddev, NULL);
+ conf->thread = md_register_thread(raid1d, mddev, "raid1");
if (!conf->thread) {
printk(KERN_ERR
"md/raid1:%s: couldn't allocate thread\n",
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 99ae606..8da6282 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1039,7 +1039,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
unsigned long flags;
struct md_rdev *blocked_rdev;
- int plugged;
int sectors_handled;
int max_sectors;
int sectors;
@@ -1239,7 +1238,6 @@ read_again:
* of r10_bios is recored in bio->bi_phys_segments just as with
* the read case.
*/
- plugged = mddev_check_plugged(mddev);
r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
raid10_find_phys(conf, r10_bio);
@@ -1396,6 +1394,8 @@ retry_write:
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (!mddev_check_plugged(mddev))
+ md_wakeup_thread(mddev->thread);
if (!r10_bio->devs[i].repl_bio)
continue;
@@ -1423,6 +1423,8 @@ retry_write:
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (!mddev_check_plugged(mddev))
+ md_wakeup_thread(mddev->thread);
}
/* Don't remove the bias on 'remaining' (one_write_done) until
@@ -1448,9 +1450,6 @@ retry_write:
/* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
-
- if (do_sync || !mddev->bitmap || !plugged)
- md_wakeup_thread(mddev->thread);
}
static void status(struct seq_file *seq, struct mddev *mddev)
@@ -2310,7 +2309,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
if (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
- s<<9, conf->tmppage, WRITE)
+ s, conf->tmppage, WRITE)
== 0) {
/* Well, this device is dead */
printk(KERN_NOTICE
@@ -2349,7 +2348,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
switch (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
- s<<9, conf->tmppage,
+ s, conf->tmppage,
READ)) {
case 0:
/* Well, this device is dead */
@@ -2512,7 +2511,7 @@ read_more:
slot = r10_bio->read_slot;
printk_ratelimited(
KERN_ERR
- "md/raid10:%s: %s: redirecting"
+ "md/raid10:%s: %s: redirecting "
"sector %llu to another mirror\n",
mdname(mddev),
bdevname(rdev->bdev, b),
@@ -2661,7 +2660,8 @@ static void raid10d(struct mddev *mddev)
blk_start_plug(&plug);
for (;;) {
- flush_pending_writes(conf);
+ if (atomic_read(&mddev->plug_cnt) == 0)
+ flush_pending_writes(conf);
spin_lock_irqsave(&conf->device_lock, flags);
if (list_empty(head)) {
@@ -2890,6 +2890,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
/* want to reconstruct this device */
rb2 = r10_bio;
sect = raid10_find_virt(conf, sector_nr, i);
+ if (sect >= mddev->resync_max_sectors) {
+ /* last stripe is not complete - don't
+ * try to recover this sector.
+ */
+ continue;
+ }
/* Unless we are doing a full sync, or a replacement
* we only need to recover the block if it is set in
* the bitmap
@@ -3421,7 +3427,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
spin_lock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
- conf->thread = md_register_thread(raid10d, mddev, NULL);
+ conf->thread = md_register_thread(raid10d, mddev, "raid10");
if (!conf->thread)
goto out;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d267672..04348d7 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
BUG_ON(!list_empty(&sh->lru));
BUG_ON(atomic_read(&conf->active_stripes)==0);
if (test_bit(STRIPE_HANDLE, &sh->state)) {
- if (test_bit(STRIPE_DELAYED, &sh->state))
+ if (test_bit(STRIPE_DELAYED, &sh->state) &&
+ !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
list_add_tail(&sh->lru, &conf->delayed_list);
else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0)
list_add_tail(&sh->lru, &conf->bitmap_list);
else {
+ clear_bit(STRIPE_DELAYED, &sh->state);
clear_bit(STRIPE_BIT_DELAY, &sh->state);
list_add_tail(&sh->lru, &conf->handle_list);
}
@@ -606,6 +608,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
* a chance*/
md_check_recovery(conf->mddev);
}
+ /*
+ * Because md_wait_for_blocked_rdev
+ * will dec nr_pending, we must
+ * increment it first.
+ */
+ atomic_inc(&rdev->nr_pending);
md_wait_for_blocked_rdev(rdev, conf->mddev);
} else {
/* Acknowledged bad block - skip the write */
@@ -1737,6 +1745,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
} else {
const char *bdn = bdevname(rdev->bdev, b);
int retry = 0;
+ int set_bad = 0;
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
atomic_inc(&rdev->read_errors);
@@ -1748,7 +1757,8 @@ static void raid5_end_read_request(struct bio * bi, int error)
mdname(conf->mddev),
(unsigned long long)s,
bdn);
- else if (conf->mddev->degraded >= conf->max_degraded)
+ else if (conf->mddev->degraded >= conf->max_degraded) {
+ set_bad = 1;
printk_ratelimited(
KERN_WARNING
"md/raid:%s: read error not correctable "
@@ -1756,8 +1766,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
mdname(conf->mddev),
(unsigned long long)s,
bdn);
- else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
+ } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
/* Oh, no!!! */
+ set_bad = 1;
printk_ratelimited(
KERN_WARNING
"md/raid:%s: read error NOT corrected!! "
@@ -1765,7 +1776,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
mdname(conf->mddev),
(unsigned long long)s,
bdn);
- else if (atomic_read(&rdev->read_errors)
+ } else if (atomic_read(&rdev->read_errors)
> conf->max_nr_stripes)
printk(KERN_WARNING
"md/raid:%s: Too many read errors, failing device %s.\n",
@@ -1777,7 +1788,11 @@ static void raid5_end_read_request(struct bio * bi, int error)
else {
clear_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReWrite, &sh->dev[i].flags);
- md_error(conf->mddev, rdev);
+ if (!(set_bad
+ && test_bit(In_sync, &rdev->flags)
+ && rdev_set_badblocks(
+ rdev, sh->sector, STRIPE_SECTORS, 0)))
+ md_error(conf->mddev, rdev);
}
}
rdev_dec_pending(rdev, conf->mddev);
@@ -3582,8 +3597,18 @@ static void handle_stripe(struct stripe_head *sh)
finish:
/* wait for this device to become unblocked */
- if (conf->mddev->external && unlikely(s.blocked_rdev))
- md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
+ if (unlikely(s.blocked_rdev)) {
+ if (conf->mddev->external)
+ md_wait_for_blocked_rdev(s.blocked_rdev,
+ conf->mddev);
+ else
+ /* Internal metadata will immediately
+ * be written by raid5d, so we don't
+ * need to wait here.
+ */
+ rdev_dec_pending(s.blocked_rdev,
+ conf->mddev);
+ }
if (s.handle_bad_blocks)
for (i = disks; i--; ) {
@@ -3881,8 +3906,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
raid_bio->bi_next = (void*)rdev;
align_bi->bi_bdev = rdev->bdev;
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
- /* No reshape active, so we can trust rdev->data_offset */
- align_bi->bi_sector += rdev->data_offset;
if (!bio_fits_rdev(align_bi) ||
is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
@@ -3893,6 +3916,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
return 0;
}
+ /* No reshape active, so we can trust rdev->data_offset */
+ align_bi->bi_sector += rdev->data_offset;
+
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_stripe,
conf->quiesce == 0,
@@ -3971,7 +3997,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
struct stripe_head *sh;
const int rw = bio_data_dir(bi);
int remaining;
- int plugged;
if (unlikely(bi->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bi);
@@ -3990,7 +4015,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
- plugged = mddev_check_plugged(mddev);
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
int previous;
@@ -4092,6 +4116,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
if ((bi->bi_rw & REQ_SYNC) &&
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
+ mddev_check_plugged(mddev);
release_stripe(sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
@@ -4099,10 +4124,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
finish_wait(&conf->wait_for_overlap, &w);
break;
}
-
}
- if (!plugged)
- md_wakeup_thread(mddev->thread);
spin_lock_irq(&conf->device_lock);
remaining = raid5_dec_bi_phys_segments(bi);
@@ -4823,6 +4845,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
int raid_disk, memory, max_disks;
struct md_rdev *rdev;
struct disk_info *disk;
+ char pers_name[6];
if (mddev->new_level != 5
&& mddev->new_level != 4
@@ -4946,7 +4969,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
mdname(mddev), memory);
- conf->thread = md_register_thread(raid5d, mddev, NULL);
+ sprintf(pers_name, "raid%d", mddev->new_level);
+ conf->thread = md_register_thread(raid5d, mddev, pers_name);
if (!conf->thread) {
printk(KERN_ERR
"md/raid:%s: couldn't allocate thread.\n",
@@ -5465,10 +5489,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->saved_raid_disk >= 0 &&
rdev->saved_raid_disk >= first &&
conf->disks[rdev->saved_raid_disk].rdev == NULL)
- disk = rdev->saved_raid_disk;
- else
- disk = first;
- for ( ; disk <= last ; disk++) {
+ first = rdev->saved_raid_disk;
+
+ for (disk = first; disk <= last; disk++) {
p = conf->disks + disk;
if (p->rdev == NULL) {
clear_bit(In_sync, &rdev->flags);
@@ -5477,8 +5500,11 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->saved_raid_disk != disk)
conf->fullsync = 1;
rcu_assign_pointer(p->rdev, rdev);
- break;
+ goto out;
}
+ }
+ for (disk = first; disk <= last; disk++) {
+ p = conf->disks + disk;
if (test_bit(WantReplacement, &p->rdev->flags) &&
p->replacement == NULL) {
clear_bit(In_sync, &rdev->flags);
@@ -5490,6 +5516,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break;
}
}
+out:
print_raid5_conf(conf);
return err;
}
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 9eb7624..1901da1 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -6250,7 +6250,7 @@ bnx2_enable_msix(struct bnx2 *bp, int msix_vecs)
static int
bnx2_setup_int_mode(struct bnx2 *bp, int dis_msi)
{
- int cpus = num_online_cpus();
+ int cpus = netif_get_num_default_rss_queues();
int msix_vecs;
if (!bp->num_req_rx_rings)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index daa894b..53659f3 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -822,7 +822,8 @@ static inline int bnx2x_calc_num_queues(struct bnx2x *bp)
{
return num_queues ?
min_t(int, num_queues, BNX2X_MAX_QUEUES(bp)) :
- min_t(int, num_online_cpus(), BNX2X_MAX_QUEUES(bp));
+ min_t(int, netif_get_num_default_rss_queues(),
+ BNX2X_MAX_QUEUES(bp));
}
static inline void bnx2x_clear_sge_mask_next_elems(struct bnx2x_fastpath *fp)
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index e47ff8b..6cbab03 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -9908,7 +9908,7 @@ static bool tg3_enable_msix(struct tg3 *tp)
int i, rc;
struct msix_entry msix_ent[tp->irq_max];
- tp->irq_cnt = num_online_cpus();
+ tp->irq_cnt = netif_get_num_default_rss_queues();
if (tp->irq_cnt > 1) {
/* We want as many rx rings enabled as there are cpus.
* In multiqueue MSI-X mode, the first MSI-X vector
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index abb6ce7..9b08749 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -3050,7 +3050,7 @@ static struct pci_error_handlers t3_err_handler = {
static void set_nqsets(struct adapter *adap)
{
int i, j = 0;
- int num_cpus = num_online_cpus();
+ int num_cpus = netif_get_num_default_rss_queues();
int hwports = adap->params.nports;
int nqsets = adap->msix_nvectors - 1;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
index 55cf72a..2dbbcbb 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_offload.c
@@ -62,7 +62,9 @@ static const unsigned int MAX_ATIDS = 64 * 1024;
static const unsigned int ATID_BASE = 0x10000;
static void cxgb_neigh_update(struct neighbour *neigh);
-static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new);
+static void cxgb_redirect(struct dst_entry *old, struct neighbour *old_neigh,
+ struct dst_entry *new, struct neighbour *new_neigh,
+ const void *daddr);
static inline int offload_activated(struct t3cdev *tdev)
{
@@ -968,8 +970,10 @@ static int nb_callback(struct notifier_block *self, unsigned long event,
}
case (NETEVENT_REDIRECT):{
struct netevent_redirect *nr = ctx;
- cxgb_redirect(nr->old, nr->new);
- cxgb_neigh_update(dst_get_neighbour_noref(nr->new));
+ cxgb_redirect(nr->old, nr->old_neigh,
+ nr->new, nr->new_neigh,
+ nr->daddr);
+ cxgb_neigh_update(nr->new_neigh);
break;
}
default:
@@ -1107,10 +1111,11 @@ static void set_l2t_ix(struct t3cdev *tdev, u32 tid, struct l2t_entry *e)
tdev->send(tdev, skb);
}
-static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new)
+static void cxgb_redirect(struct dst_entry *old, struct neighbour *old_neigh,
+ struct dst_entry *new, struct neighbour *new_neigh,
+ const void *daddr)
{
struct net_device *olddev, *newdev;
- struct neighbour *n;
struct tid_info *ti;
struct t3cdev *tdev;
u32 tid;
@@ -1118,15 +1123,8 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new)
struct l2t_entry *e;
struct t3c_tid_entry *te;
- n = dst_get_neighbour_noref(old);
- if (!n)
- return;
- olddev = n->dev;
-
- n = dst_get_neighbour_noref(new);
- if (!n)
- return;
- newdev = n->dev;
+ olddev = old_neigh->dev;
+ newdev = new_neigh->dev;
if (!is_offloading(olddev))
return;
@@ -1144,7 +1142,7 @@ static void cxgb_redirect(struct dst_entry *old, struct dst_entry *new)
}
/* Add new L2T entry */
- e = t3_l2t_get(tdev, new, newdev);
+ e = t3_l2t_get(tdev, new, newdev, daddr);
if (!e) {
printk(KERN_ERR "%s: couldn't allocate new l2t entry!\n",
__func__);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.c b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
index 3fa3c88..8d53438 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/l2t.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.c
@@ -299,7 +299,7 @@ static inline void reuse_entry(struct l2t_entry *e, struct neighbour *neigh)
}
struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst,
- struct net_device *dev)
+ struct net_device *dev, const void *daddr)
{
struct l2t_entry *e = NULL;
struct neighbour *neigh;
@@ -311,7 +311,7 @@ struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst,
int smt_idx;
rcu_read_lock();
- neigh = dst_get_neighbour_noref(dst);
+ neigh = dst_neigh_lookup(dst, daddr);
if (!neigh)
goto done_rcu;
@@ -360,6 +360,8 @@ struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst,
done_unlock:
write_unlock_bh(&d->lock);
done_rcu:
+ if (neigh)
+ neigh_release(neigh);
rcu_read_unlock();
return e;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb3/l2t.h b/drivers/net/ethernet/chelsio/cxgb3/l2t.h
index c4e8643..8cffcdf 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/l2t.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/l2t.h
@@ -110,7 +110,7 @@ static inline void set_arp_failure_handler(struct sk_buff *skb,
void t3_l2e_free(struct l2t_data *d, struct l2t_entry *e);
void t3_l2t_update(struct t3cdev *dev, struct neighbour *neigh);
struct l2t_entry *t3_l2t_get(struct t3cdev *cdev, struct dst_entry *dst,
- struct net_device *dev);
+ struct net_device *dev, const void *daddr);
int t3_l2t_send_slow(struct t3cdev *dev, struct sk_buff *skb,
struct l2t_entry *e);
void t3_l2t_send_event(struct t3cdev *dev, struct l2t_entry *e);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index e1f96fb..5ed49af 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3493,8 +3493,8 @@ static void __devinit cfg_queues(struct adapter *adap)
*/
if (n10g)
q10g = (MAX_ETH_QSETS - (adap->params.nports - n10g)) / n10g;
- if (q10g > num_online_cpus())
- q10g = num_online_cpus();
+ if (q10g > netif_get_num_default_rss_queues())
+ q10g = netif_get_num_default_rss_queues();
for_each_port(adap, i) {
struct port_info *pi = adap2pinfo(adap, i);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index edce7af..2141bd7 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -2172,12 +2172,14 @@ static void be_msix_disable(struct be_adapter *adapter)
static uint be_num_rss_want(struct be_adapter *adapter)
{
+ u32 num = 0;
if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
!sriov_want(adapter) && be_physfn(adapter) &&
- !be_is_mc(adapter))
- return (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
- else
- return 0;
+ !be_is_mc(adapter)) {
+ num = (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
+ num = min_t(u32, num, (u32)netif_get_num_default_rss_queues());
+ }
+ return num;
}
static void be_msix_enable(struct be_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index 351a409..76edbc1 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -103,6 +103,7 @@
#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */
#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */
#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */
+#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */
#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */
#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index ba86b3f..a166efc 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -496,7 +496,7 @@ static void e1000_receive_skb(struct e1000_adapter *adapter,
* @sk_buff: socket buffer with received data
**/
static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
- __le16 csum, struct sk_buff *skb)
+ struct sk_buff *skb)
{
u16 status = (u16)status_err;
u8 errors = (u8)(status_err >> 24);
@@ -511,8 +511,8 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
if (status & E1000_RXD_STAT_IXSM)
return;
- /* TCP/UDP checksum error bit is set */
- if (errors & E1000_RXD_ERR_TCPE) {
+ /* TCP/UDP checksum error bit or IP checksum error bit is set */
+ if (errors & (E1000_RXD_ERR_TCPE | E1000_RXD_ERR_IPE)) {
/* let the stack verify checksum errors */
adapter->hw_csum_err++;
return;
@@ -523,19 +523,7 @@ static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
return;
/* It must be a TCP or UDP packet with a valid checksum */
- if (status & E1000_RXD_STAT_TCPCS) {
- /* TCP checksum is good */
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- } else {
- /*
- * IP fragment with UDP payload
- * Hardware complements the payload checksum, so we undo it
- * and then put the value in host order for further stack use.
- */
- __sum16 sum = (__force __sum16)swab16((__force u16)csum);
- skb->csum = csum_unfold(~sum);
- skb->ip_summed = CHECKSUM_COMPLETE;
- }
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
adapter->hw_csum_good++;
}
@@ -954,8 +942,7 @@ static bool e1000_clean_rx_irq(struct e1000_ring *rx_ring, int *work_done,
skb_put(skb, length);
/* Receive Checksum Offload */
- e1000_rx_checksum(adapter, staterr,
- rx_desc->wb.lower.hi_dword.csum_ip.csum, skb);
+ e1000_rx_checksum(adapter, staterr, skb);
e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb);
@@ -1341,8 +1328,7 @@ copydone:
total_rx_bytes += skb->len;
total_rx_packets++;
- e1000_rx_checksum(adapter, staterr,
- rx_desc->wb.lower.hi_dword.csum_ip.csum, skb);
+ e1000_rx_checksum(adapter, staterr, skb);
e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb);
@@ -1512,9 +1498,8 @@ static bool e1000_clean_jumbo_rx_irq(struct e1000_ring *rx_ring, int *work_done,
}
}
- /* Receive Checksum Offload XXX recompute due to CRC strip? */
- e1000_rx_checksum(adapter, staterr,
- rx_desc->wb.lower.hi_dword.csum_ip.csum, skb);
+ /* Receive Checksum Offload */
+ e1000_rx_checksum(adapter, staterr, skb);
e1000_rx_hash(netdev, rx_desc->wb.lower.hi_dword.rss, skb);
@@ -3098,19 +3083,10 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
/* Enable Receive Checksum Offload for TCP and UDP */
rxcsum = er32(RXCSUM);
- if (adapter->netdev->features & NETIF_F_RXCSUM) {
+ if (adapter->netdev->features & NETIF_F_RXCSUM)
rxcsum |= E1000_RXCSUM_TUOFL;
-
- /*
- * IPv4 payload checksum for UDP fragments must be
- * used in conjunction with packet-split.
- */
- if (adapter->rx_ps_pages)
- rxcsum |= E1000_RXCSUM_IPPCSE;
- } else {
+ else
rxcsum &= ~E1000_RXCSUM_TUOFL;
- /* no need to clear IPPCSE as it defaults to 0 */
- }
ew32(RXCSUM, rxcsum);
if (adapter->hw.mac.type == e1000_pch2lan) {
@@ -5241,22 +5217,10 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
/* Jumbo frame support */
- if (max_frame > ETH_FRAME_LEN + ETH_FCS_LEN) {
- if (!(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) {
- e_err("Jumbo Frames not supported.\n");
- return -EINVAL;
- }
-
- /*
- * IP payload checksum (enabled with jumbos/packet-split when
- * Rx checksum is enabled) and generation of RSS hash is
- * mutually exclusive in the hardware.
- */
- if ((netdev->features & NETIF_F_RXCSUM) &&
- (netdev->features & NETIF_F_RXHASH)) {
- e_err("Jumbo frames cannot be enabled when both receive checksum offload and receive hashing are enabled. Disable one of the receive offload features before enabling jumbos.\n");
- return -EINVAL;
- }
+ if ((max_frame > ETH_FRAME_LEN + ETH_FCS_LEN) &&
+ !(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) {
+ e_err("Jumbo Frames not supported.\n");
+ return -EINVAL;
}
/* Supported frame sizes */
@@ -6030,17 +5994,6 @@ static int e1000_set_features(struct net_device *netdev,
NETIF_F_RXALL)))
return 0;
- /*
- * IP payload checksum (enabled with jumbos/packet-split when Rx
- * checksum is enabled) and generation of RSS hash is mutually
- * exclusive in the hardware.
- */
- if (adapter->rx_ps_pages &&
- (features & NETIF_F_RXCSUM) && (features & NETIF_F_RXHASH)) {
- e_err("Enabling both receive checksum offload and receive hashing is not possible with jumbo frames. Disable jumbos or enable only one of the receive offload features.\n");
- return -EINVAL;
- }
-
if (changed & NETIF_F_RXFCS) {
if (features & NETIF_F_RXFCS) {
adapter->flags2 &= ~FLAG2_CRC_STRIPPING;
diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c
index 8ce6706..90eef07 100644
--- a/drivers/net/ethernet/intel/igbvf/ethtool.c
+++ b/drivers/net/ethernet/intel/igbvf/ethtool.c
@@ -357,21 +357,28 @@ static int igbvf_set_coalesce(struct net_device *netdev,
struct igbvf_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
- if ((ec->rx_coalesce_usecs > IGBVF_MAX_ITR_USECS) ||
- ((ec->rx_coalesce_usecs > 3) &&
- (ec->rx_coalesce_usecs < IGBVF_MIN_ITR_USECS)) ||
- (ec->rx_coalesce_usecs == 2))
- return -EINVAL;
-
- /* convert to rate of irq's per second */
- if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3) {
+ if ((ec->rx_coalesce_usecs >= IGBVF_MIN_ITR_USECS) &&
+ (ec->rx_coalesce_usecs <= IGBVF_MAX_ITR_USECS)) {
+ adapter->current_itr = ec->rx_coalesce_usecs << 2;
+ adapter->requested_itr = 1000000000 /
+ (adapter->current_itr * 256);
+ } else if ((ec->rx_coalesce_usecs == 3) ||
+ (ec->rx_coalesce_usecs == 2)) {
adapter->current_itr = IGBVF_START_ITR;
adapter->requested_itr = ec->rx_coalesce_usecs;
- } else {
- adapter->current_itr = ec->rx_coalesce_usecs << 2;
+ } else if (ec->rx_coalesce_usecs == 0) {
+ /*
+ * The user's desire is to turn off interrupt throttling
+ * altogether, but due to HW limitations, we can't do that.
+ * Instead we set a very small value in EITR, which would
+ * allow ~967k interrupts per second, but allow the adapter's
+ * internal clocking to still function properly.
+ */
+ adapter->current_itr = 4;
adapter->requested_itr = 1000000000 /
(adapter->current_itr * 256);
- }
+ } else
+ return -EINVAL;
writel(adapter->current_itr,
hw->hw_addr + adapter->rx_ring->itr_register);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 842c8ce..7e94987d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1080,6 +1080,25 @@ static struct mlx4_cmd_info cmd_info[] = {
.verify = NULL,
.wrapper = NULL
},
+ /* flow steering commands */
+ {
+ .opcode = MLX4_QP_FLOW_STEERING_ATTACH,
+ .has_inbox = true,
+ .has_outbox = false,
+ .out_is_imm = true,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_FLOW_STEERING_ATTACH_wrapper
+ },
+ {
+ .opcode = MLX4_QP_FLOW_STEERING_DETACH,
+ .has_inbox = false,
+ .has_outbox = false,
+ .out_is_imm = false,
+ .encode_slave_id = false,
+ .verify = NULL,
+ .wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper
+ },
};
static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 72901ce..dd6a77b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -38,6 +38,10 @@
#include "mlx4_en.h"
#include "en_port.h"
+#define EN_ETHTOOL_QP_ATTACH (1ull << 63)
+#define EN_ETHTOOL_MAC_MASK 0xffffffffffffULL
+#define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff)
+#define EN_ETHTOOL_WORD_MASK cpu_to_be32(0xffffffff)
static void
mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo)
@@ -599,16 +603,369 @@ static int mlx4_en_set_rxfh_indir(struct net_device *dev,
return err;
}
+#define all_zeros_or_all_ones(field) \
+ ((field) == 0 || (field) == (__force typeof(field))-1)
+
+static int mlx4_en_validate_flow(struct net_device *dev,
+ struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_usrip4_spec *l3_mask;
+ struct ethtool_tcpip4_spec *l4_mask;
+ struct ethhdr *eth_mask;
+ u64 full_mac = ~0ull;
+ u64 zero_mac = 0;
+
+ if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
+ return -EINVAL;
+
+ switch (cmd->fs.flow_type & ~FLOW_EXT) {
+ case TCP_V4_FLOW:
+ case UDP_V4_FLOW:
+ if (cmd->fs.m_u.tcp_ip4_spec.tos)
+ return -EINVAL;
+ l4_mask = &cmd->fs.m_u.tcp_ip4_spec;
+ /* don't allow mask which isn't all 0 or 1 */
+ if (!all_zeros_or_all_ones(l4_mask->ip4src) ||
+ !all_zeros_or_all_ones(l4_mask->ip4dst) ||
+ !all_zeros_or_all_ones(l4_mask->psrc) ||
+ !all_zeros_or_all_ones(l4_mask->pdst))
+ return -EINVAL;
+ break;
+ case IP_USER_FLOW:
+ l3_mask = &cmd->fs.m_u.usr_ip4_spec;
+ if (l3_mask->l4_4_bytes || l3_mask->tos || l3_mask->proto ||
+ cmd->fs.h_u.usr_ip4_spec.ip_ver != ETH_RX_NFC_IP4 ||
+ (!l3_mask->ip4src && !l3_mask->ip4dst) ||
+ !all_zeros_or_all_ones(l3_mask->ip4src) ||
+ !all_zeros_or_all_ones(l3_mask->ip4dst))
+ return -EINVAL;
+ break;
+ case ETHER_FLOW:
+ eth_mask = &cmd->fs.m_u.ether_spec;
+ /* source mac mask must not be set */
+ if (memcmp(eth_mask->h_source, &zero_mac, ETH_ALEN))
+ return -EINVAL;
+
+ /* dest mac mask must be ff:ff:ff:ff:ff:ff */
+ if (memcmp(eth_mask->h_dest, &full_mac, ETH_ALEN))
+ return -EINVAL;
+
+ if (!all_zeros_or_all_ones(eth_mask->h_proto))
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if ((cmd->fs.flow_type & FLOW_EXT)) {
+ if (cmd->fs.m_ext.vlan_etype ||
+ !(cmd->fs.m_ext.vlan_tci == 0 ||
+ cmd->fs.m_ext.vlan_tci == cpu_to_be16(0xfff)))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int add_ip_rule(struct mlx4_en_priv *priv,
+ struct ethtool_rxnfc *cmd,
+ struct list_head *list_h)
+{
+ struct mlx4_spec_list *spec_l3;
+ struct ethtool_usrip4_spec *l3_mask = &cmd->fs.m_u.usr_ip4_spec;
+
+ spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
+ if (!spec_l3) {
+ en_err(priv, "Fail to alloc ethtool rule.\n");
+ return -ENOMEM;
+ }
+
+ spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
+ spec_l3->ipv4.src_ip = cmd->fs.h_u.usr_ip4_spec.ip4src;
+ if (l3_mask->ip4src)
+ spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
+ spec_l3->ipv4.dst_ip = cmd->fs.h_u.usr_ip4_spec.ip4dst;
+ if (l3_mask->ip4dst)
+ spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
+ list_add_tail(&spec_l3->list, list_h);
+
+ return 0;
+}
+
+static int add_tcp_udp_rule(struct mlx4_en_priv *priv,
+ struct ethtool_rxnfc *cmd,
+ struct list_head *list_h, int proto)
+{
+ struct mlx4_spec_list *spec_l3;
+ struct mlx4_spec_list *spec_l4;
+ struct ethtool_tcpip4_spec *l4_mask = &cmd->fs.m_u.tcp_ip4_spec;
+
+ spec_l3 = kzalloc(sizeof *spec_l3, GFP_KERNEL);
+ spec_l4 = kzalloc(sizeof *spec_l4, GFP_KERNEL);
+ if (!spec_l4 || !spec_l3) {
+ en_err(priv, "Fail to alloc ethtool rule.\n");
+ kfree(spec_l3);
+ kfree(spec_l4);
+ return -ENOMEM;
+ }
+
+ spec_l3->id = MLX4_NET_TRANS_RULE_ID_IPV4;
+
+ if (proto == TCP_V4_FLOW) {
+ spec_l4->id = MLX4_NET_TRANS_RULE_ID_TCP;
+ spec_l3->ipv4.src_ip = cmd->fs.h_u.tcp_ip4_spec.ip4src;
+ spec_l3->ipv4.dst_ip = cmd->fs.h_u.tcp_ip4_spec.ip4dst;
+ spec_l4->tcp_udp.src_port = cmd->fs.h_u.tcp_ip4_spec.psrc;
+ spec_l4->tcp_udp.dst_port = cmd->fs.h_u.tcp_ip4_spec.pdst;
+ } else {
+ spec_l4->id = MLX4_NET_TRANS_RULE_ID_UDP;
+ spec_l3->ipv4.src_ip = cmd->fs.h_u.udp_ip4_spec.ip4src;
+ spec_l3->ipv4.dst_ip = cmd->fs.h_u.udp_ip4_spec.ip4dst;
+ spec_l4->tcp_udp.src_port = cmd->fs.h_u.udp_ip4_spec.psrc;
+ spec_l4->tcp_udp.dst_port = cmd->fs.h_u.udp_ip4_spec.pdst;
+ }
+
+ if (l4_mask->ip4src)
+ spec_l3->ipv4.src_ip_msk = EN_ETHTOOL_WORD_MASK;
+ if (l4_mask->ip4dst)
+ spec_l3->ipv4.dst_ip_msk = EN_ETHTOOL_WORD_MASK;
+
+ if (l4_mask->psrc)
+ spec_l4->tcp_udp.src_port_msk = EN_ETHTOOL_SHORT_MASK;
+ if (l4_mask->pdst)
+ spec_l4->tcp_udp.dst_port_msk = EN_ETHTOOL_SHORT_MASK;
+
+ list_add_tail(&spec_l3->list, list_h);
+ list_add_tail(&spec_l4->list, list_h);
+
+ return 0;
+}
+
+static int mlx4_en_ethtool_to_net_trans_rule(struct net_device *dev,
+ struct ethtool_rxnfc *cmd,
+ struct list_head *rule_list_h)
+{
+ int err;
+ u64 mac;
+ __be64 be_mac;
+ struct ethhdr *eth_spec;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_spec_list *spec_l2;
+ __be64 mac_msk = cpu_to_be64(EN_ETHTOOL_MAC_MASK << 16);
+
+ err = mlx4_en_validate_flow(dev, cmd);
+ if (err)
+ return err;
+
+ spec_l2 = kzalloc(sizeof *spec_l2, GFP_KERNEL);
+ if (!spec_l2)
+ return -ENOMEM;
+
+ mac = priv->mac & EN_ETHTOOL_MAC_MASK;
+ be_mac = cpu_to_be64(mac << 16);
+
+ spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec_l2->eth.dst_mac_msk, &mac_msk, ETH_ALEN);
+ if ((cmd->fs.flow_type & ~FLOW_EXT) != ETHER_FLOW)
+ memcpy(spec_l2->eth.dst_mac, &be_mac, ETH_ALEN);
+
+ if ((cmd->fs.flow_type & FLOW_EXT) && cmd->fs.m_ext.vlan_tci) {
+ spec_l2->eth.vlan_id = cmd->fs.h_ext.vlan_tci;
+ spec_l2->eth.vlan_id_msk = cpu_to_be16(0xfff);
+ }
+
+ list_add_tail(&spec_l2->list, rule_list_h);
+
+ switch (cmd->fs.flow_type & ~FLOW_EXT) {
+ case ETHER_FLOW:
+ eth_spec = &cmd->fs.h_u.ether_spec;
+ memcpy(&spec_l2->eth.dst_mac, eth_spec->h_dest, ETH_ALEN);
+ spec_l2->eth.ether_type = eth_spec->h_proto;
+ if (eth_spec->h_proto)
+ spec_l2->eth.ether_type_enable = 1;
+ break;
+ case IP_USER_FLOW:
+ err = add_ip_rule(priv, cmd, rule_list_h);
+ break;
+ case TCP_V4_FLOW:
+ err = add_tcp_udp_rule(priv, cmd, rule_list_h, TCP_V4_FLOW);
+ break;
+ case UDP_V4_FLOW:
+ err = add_tcp_udp_rule(priv, cmd, rule_list_h, UDP_V4_FLOW);
+ break;
+ }
+
+ return err;
+}
+
+static int mlx4_en_flow_replace(struct net_device *dev,
+ struct ethtool_rxnfc *cmd)
+{
+ int err;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct ethtool_flow_id *loc_rule;
+ struct mlx4_spec_list *spec, *tmp_spec;
+ u32 qpn;
+ u64 reg_id;
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .allow_loopback = 1,
+ .promisc_mode = MLX4_FS_PROMISC_NONE,
+ };
+
+ rule.port = priv->port;
+ rule.priority = MLX4_DOMAIN_ETHTOOL | cmd->fs.location;
+ INIT_LIST_HEAD(&rule.list);
+
+ /* Allow direct QP attaches if the EN_ETHTOOL_QP_ATTACH flag is set */
+ if (cmd->fs.ring_cookie == RX_CLS_FLOW_DISC)
+ qpn = priv->drop_qp.qpn;
+ else if (cmd->fs.ring_cookie & EN_ETHTOOL_QP_ATTACH) {
+ qpn = cmd->fs.ring_cookie & (EN_ETHTOOL_QP_ATTACH - 1);
+ } else {
+ if (cmd->fs.ring_cookie >= priv->rx_ring_num) {
+ en_warn(priv, "rxnfc: RX ring (%llu) doesn't exist.\n",
+ cmd->fs.ring_cookie);
+ return -EINVAL;
+ }
+ qpn = priv->rss_map.qps[cmd->fs.ring_cookie].qpn;
+ if (!qpn) {
+ en_warn(priv, "rxnfc: RX ring (%llu) is inactive.\n",
+ cmd->fs.ring_cookie);
+ return -EINVAL;
+ }
+ }
+ rule.qpn = qpn;
+ err = mlx4_en_ethtool_to_net_trans_rule(dev, cmd, &rule.list);
+ if (err)
+ goto out_free_list;
+
+ loc_rule = &priv->ethtool_rules[cmd->fs.location];
+ if (loc_rule->id) {
+ err = mlx4_flow_detach(priv->mdev->dev, loc_rule->id);
+ if (err) {
+ en_err(priv, "Fail to detach network rule at location %d. registration id = %llx\n",
+ cmd->fs.location, loc_rule->id);
+ goto out_free_list;
+ }
+ loc_rule->id = 0;
+ memset(&loc_rule->flow_spec, 0,
+ sizeof(struct ethtool_rx_flow_spec));
+ }
+ err = mlx4_flow_attach(priv->mdev->dev, &rule, &reg_id);
+ if (err) {
+ en_err(priv, "Fail to attach network rule at location %d.\n",
+ cmd->fs.location);
+ goto out_free_list;
+ }
+ loc_rule->id = reg_id;
+ memcpy(&loc_rule->flow_spec, &cmd->fs,
+ sizeof(struct ethtool_rx_flow_spec));
+
+out_free_list:
+ list_for_each_entry_safe(spec, tmp_spec, &rule.list, list) {
+ list_del(&spec->list);
+ kfree(spec);
+ }
+ return err;
+}
+
+static int mlx4_en_flow_detach(struct net_device *dev,
+ struct ethtool_rxnfc *cmd)
+{
+ int err = 0;
+ struct ethtool_flow_id *rule;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
+ return -EINVAL;
+
+ rule = &priv->ethtool_rules[cmd->fs.location];
+ if (!rule->id) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ err = mlx4_flow_detach(priv->mdev->dev, rule->id);
+ if (err) {
+ en_err(priv, "Fail to detach network rule at location %d. registration id = 0x%llx\n",
+ cmd->fs.location, rule->id);
+ goto out;
+ }
+ rule->id = 0;
+ memset(&rule->flow_spec, 0, sizeof(struct ethtool_rx_flow_spec));
+out:
+ return err;
+
+}
+
+static int mlx4_en_get_flow(struct net_device *dev, struct ethtool_rxnfc *cmd,
+ int loc)
+{
+ int err = 0;
+ struct ethtool_flow_id *rule;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+
+ if (loc < 0 || loc >= MAX_NUM_OF_FS_RULES)
+ return -EINVAL;
+
+ rule = &priv->ethtool_rules[loc];
+ if (rule->id)
+ memcpy(&cmd->fs, &rule->flow_spec,
+ sizeof(struct ethtool_rx_flow_spec));
+ else
+ err = -ENOENT;
+
+ return err;
+}
+
+static int mlx4_en_get_num_flows(struct mlx4_en_priv *priv)
+{
+
+ int i, res = 0;
+ for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) {
+ if (priv->ethtool_rules[i].id)
+ res++;
+ }
+ return res;
+
+}
+
static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
u32 *rule_locs)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
int err = 0;
+ int i = 0, priority = 0;
+
+ if ((cmd->cmd == ETHTOOL_GRXCLSRLCNT ||
+ cmd->cmd == ETHTOOL_GRXCLSRULE ||
+ cmd->cmd == ETHTOOL_GRXCLSRLALL) &&
+ mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return -EINVAL;
switch (cmd->cmd) {
case ETHTOOL_GRXRINGS:
cmd->data = priv->rx_ring_num;
break;
+ case ETHTOOL_GRXCLSRLCNT:
+ cmd->rule_cnt = mlx4_en_get_num_flows(priv);
+ break;
+ case ETHTOOL_GRXCLSRULE:
+ err = mlx4_en_get_flow(dev, cmd, cmd->fs.location);
+ break;
+ case ETHTOOL_GRXCLSRLALL:
+ while ((!err || err == -ENOENT) && priority < cmd->rule_cnt) {
+ err = mlx4_en_get_flow(dev, cmd, i);
+ if (!err)
+ rule_locs[priority++] = i;
+ i++;
+ }
+ err = 0;
+ break;
default:
err = -EOPNOTSUPP;
break;
@@ -617,6 +974,30 @@ static int mlx4_en_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
return err;
}
+static int mlx4_en_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+ int err = 0;
+ struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_dev *mdev = priv->mdev;
+
+ if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return -EINVAL;
+
+ switch (cmd->cmd) {
+ case ETHTOOL_SRXCLSRLINS:
+ err = mlx4_en_flow_replace(dev, cmd);
+ break;
+ case ETHTOOL_SRXCLSRLDEL:
+ err = mlx4_en_flow_detach(dev, cmd);
+ break;
+ default:
+ en_warn(priv, "Unsupported ethtool command. (%d)\n", cmd->cmd);
+ return -EINVAL;
+ }
+
+ return err;
+}
+
const struct ethtool_ops mlx4_en_ethtool_ops = {
.get_drvinfo = mlx4_en_get_drvinfo,
.get_settings = mlx4_en_get_settings,
@@ -637,6 +1018,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = {
.get_ringparam = mlx4_en_get_ringparam,
.set_ringparam = mlx4_en_set_ringparam,
.get_rxnfc = mlx4_en_get_rxnfc,
+ .set_rxnfc = mlx4_en_set_rxnfc,
.get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size,
.get_rxfh_indir = mlx4_en_get_rxfh_indir,
.set_rxfh_indir = mlx4_en_set_rxfh_indir,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 073b85b..94375a8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -170,33 +170,81 @@ static void mlx4_en_do_set_mac(struct work_struct *work)
static void mlx4_en_clear_list(struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
+ struct mlx4_en_mc_list *tmp, *mc_to_del;
- kfree(priv->mc_addrs);
- priv->mc_addrs = NULL;
- priv->mc_addrs_cnt = 0;
+ list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) {
+ list_del(&mc_to_del->list);
+ kfree(mc_to_del);
+ }
}
static void mlx4_en_cache_mclist(struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct netdev_hw_addr *ha;
- char *mc_addrs;
- int mc_addrs_cnt = netdev_mc_count(dev);
- int i;
+ struct mlx4_en_mc_list *tmp;
- mc_addrs = kmalloc(mc_addrs_cnt * ETH_ALEN, GFP_ATOMIC);
- if (!mc_addrs) {
- en_err(priv, "failed to allocate multicast list\n");
- return;
- }
- i = 0;
- netdev_for_each_mc_addr(ha, dev)
- memcpy(mc_addrs + i++ * ETH_ALEN, ha->addr, ETH_ALEN);
mlx4_en_clear_list(dev);
- priv->mc_addrs = mc_addrs;
- priv->mc_addrs_cnt = mc_addrs_cnt;
+ netdev_for_each_mc_addr(ha, dev) {
+ tmp = kzalloc(sizeof(struct mlx4_en_mc_list), GFP_ATOMIC);
+ if (!tmp) {
+ en_err(priv, "failed to allocate multicast list\n");
+ mlx4_en_clear_list(dev);
+ return;
+ }
+ memcpy(tmp->addr, ha->addr, ETH_ALEN);
+ list_add_tail(&tmp->list, &priv->mc_list);
+ }
}
+static void update_mclist_flags(struct mlx4_en_priv *priv,
+ struct list_head *dst,
+ struct list_head *src)
+{
+ struct mlx4_en_mc_list *dst_tmp, *src_tmp, *new_mc;
+ bool found;
+
+ /* Find all the entries that should be removed from dst,
+ * These are the entries that are not found in src
+ */
+ list_for_each_entry(dst_tmp, dst, list) {
+ found = false;
+ list_for_each_entry(src_tmp, src, list) {
+ if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ dst_tmp->action = MCLIST_REM;
+ }
+
+ /* Add entries that exist in src but not in dst
+ * mark them as need to add
+ */
+ list_for_each_entry(src_tmp, src, list) {
+ found = false;
+ list_for_each_entry(dst_tmp, dst, list) {
+ if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) {
+ dst_tmp->action = MCLIST_NONE;
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ new_mc = kmalloc(sizeof(struct mlx4_en_mc_list),
+ GFP_KERNEL);
+ if (!new_mc) {
+ en_err(priv, "Failed to allocate current multicast list\n");
+ return;
+ }
+ memcpy(new_mc, src_tmp,
+ sizeof(struct mlx4_en_mc_list));
+ new_mc->action = MCLIST_ADD;
+ list_add_tail(&new_mc->list, dst);
+ }
+ }
+}
static void mlx4_en_set_multicast(struct net_device *dev)
{
@@ -214,9 +262,10 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
mcast_task);
struct mlx4_en_dev *mdev = priv->mdev;
struct net_device *dev = priv->dev;
+ struct mlx4_en_mc_list *mclist, *tmp;
u64 mcast_addr = 0;
u8 mc_list[16] = {0};
- int err;
+ int err = 0;
mutex_lock(&mdev->state_lock);
if (!mdev->device_up) {
@@ -251,16 +300,46 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
priv->flags |= MLX4_EN_FLAG_PROMISC;
/* Enable promiscouos mode */
- if (!(mdev->dev->caps.flags &
- MLX4_DEV_CAP_FLAG_VEP_UC_STEER))
- err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
- priv->base_qpn, 1);
- else
- err = mlx4_unicast_promisc_add(mdev->dev, priv->base_qpn,
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_add(mdev->dev,
+ priv->port,
+ priv->base_qpn,
+ MLX4_FS_PROMISC_UPLINK);
+ if (err)
+ en_err(priv, "Failed enabling promiscuous mode\n");
+ priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
+ break;
+
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_unicast_promisc_add(mdev->dev,
+ priv->base_qpn,
priv->port);
- if (err)
- en_err(priv, "Failed enabling "
- "promiscuous mode\n");
+ if (err)
+ en_err(priv, "Failed enabling unicast promiscuous mode\n");
+
+ /* Add the default qp number as multicast
+ * promisc
+ */
+ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) {
+ err = mlx4_multicast_promisc_add(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ if (err)
+ en_err(priv, "Failed enabling multicast promiscuous mode\n");
+ priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
+ }
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ err = mlx4_SET_PORT_qpn_calc(mdev->dev,
+ priv->port,
+ priv->base_qpn,
+ 1);
+ if (err)
+ en_err(priv, "Failed enabling promiscuous mode\n");
+ break;
+ }
/* Disable port multicast filter (unconditionally) */
err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
@@ -269,15 +348,6 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
en_err(priv, "Failed disabling "
"multicast filter\n");
- /* Add the default qp number as multicast promisc */
- if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) {
- err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn,
- priv->port);
- if (err)
- en_err(priv, "Failed entering multicast promisc mode\n");
- priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
- }
-
/* Disable port VLAN filter */
err = mlx4_SET_VLAN_FLTR(mdev->dev, priv);
if (err)
@@ -296,22 +366,40 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
priv->flags &= ~MLX4_EN_FLAG_PROMISC;
/* Disable promiscouos mode */
- if (!(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER))
- err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port,
- priv->base_qpn, 0);
- else
- err = mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn,
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_PROMISC_UPLINK);
+ if (err)
+ en_err(priv, "Failed disabling promiscuous mode\n");
+ priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ break;
+
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_unicast_promisc_remove(mdev->dev,
+ priv->base_qpn,
priv->port);
- if (err)
- en_err(priv, "Failed disabling promiscuous mode\n");
+ if (err)
+ en_err(priv, "Failed disabling unicast promiscuous mode\n");
+ /* Disable Multicast promisc */
+ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
+ err = mlx4_multicast_promisc_remove(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ if (err)
+ en_err(priv, "Failed disabling multicast promiscuous mode\n");
+ priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ }
+ break;
- /* Disable Multicast promisc */
- if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
- err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn,
- priv->port);
+ case MLX4_STEERING_MODE_A0:
+ err = mlx4_SET_PORT_qpn_calc(mdev->dev,
+ priv->port,
+ priv->base_qpn, 0);
if (err)
- en_err(priv, "Failed disabling multicast promiscuous mode\n");
- priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
+ en_err(priv, "Failed disabling promiscuous mode\n");
+ break;
}
/* Enable port VLAN filter */
@@ -329,18 +417,46 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
/* Add the default qp number as multicast promisc */
if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) {
- err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn,
- priv->port);
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_add(mdev->dev,
+ priv->port,
+ priv->base_qpn,
+ MLX4_FS_PROMISC_ALL_MULTI);
+ break;
+
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_multicast_promisc_add(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ break;
+ }
if (err)
en_err(priv, "Failed entering multicast promisc mode\n");
priv->flags |= MLX4_EN_FLAG_MC_PROMISC;
}
} else {
- int i;
/* Disable Multicast promisc */
if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) {
- err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn,
- priv->port);
+ switch (mdev->dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ err = mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_PROMISC_ALL_MULTI);
+ break;
+
+ case MLX4_STEERING_MODE_B0:
+ err = mlx4_multicast_promisc_remove(mdev->dev,
+ priv->base_qpn,
+ priv->port);
+ break;
+
+ case MLX4_STEERING_MODE_A0:
+ break;
+ }
if (err)
en_err(priv, "Failed disabling multicast promiscuous mode\n");
priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC;
@@ -351,13 +467,6 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
if (err)
en_err(priv, "Failed disabling multicast filter\n");
- /* Detach our qp from all the multicast addresses */
- for (i = 0; i < priv->mc_addrs_cnt; i++) {
- memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN);
- mc_list[5] = priv->port;
- mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp,
- mc_list, MLX4_PROT_ETH);
- }
/* Flush mcast filter and init it with broadcast address */
mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST,
1, MLX4_MCAST_CONFIG);
@@ -367,13 +476,8 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
netif_tx_lock_bh(dev);
mlx4_en_cache_mclist(dev);
netif_tx_unlock_bh(dev);
- for (i = 0; i < priv->mc_addrs_cnt; i++) {
- mcast_addr =
- mlx4_en_mac_to_u64(priv->mc_addrs + i * ETH_ALEN);
- memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN);
- mc_list[5] = priv->port;
- mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp,
- mc_list, 0, MLX4_PROT_ETH);
+ list_for_each_entry(mclist, &priv->mc_list, list) {
+ mcast_addr = mlx4_en_mac_to_u64(mclist->addr);
mlx4_SET_MCAST_FLTR(mdev->dev, priv->port,
mcast_addr, 0, MLX4_MCAST_CONFIG);
}
@@ -381,6 +485,42 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
0, MLX4_MCAST_ENABLE);
if (err)
en_err(priv, "Failed enabling multicast filter\n");
+
+ update_mclist_flags(priv, &priv->curr_list, &priv->mc_list);
+ list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) {
+ if (mclist->action == MCLIST_REM) {
+ /* detach this address and delete from list */
+ memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
+ mc_list[5] = priv->port;
+ err = mlx4_multicast_detach(mdev->dev,
+ &priv->rss_map.indir_qp,
+ mc_list,
+ MLX4_PROT_ETH,
+ mclist->reg_id);
+ if (err)
+ en_err(priv, "Fail to detach multicast address\n");
+
+ /* remove from list */
+ list_del(&mclist->list);
+ kfree(mclist);
+ }
+
+ if (mclist->action == MCLIST_ADD) {
+ /* attach the address */
+ memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
+ /* needed for B0 steering support */
+ mc_list[5] = priv->port;
+ err = mlx4_multicast_attach(mdev->dev,
+ &priv->rss_map.indir_qp,
+ mc_list,
+ priv->port, 0,
+ MLX4_PROT_ETH,
+ &mclist->reg_id);
+ if (err)
+ en_err(priv, "Fail to attach multicast address\n");
+
+ }
+ }
}
out:
mutex_unlock(&mdev->state_lock);
@@ -605,6 +745,9 @@ int mlx4_en_start_port(struct net_device *dev)
return 0;
}
+ INIT_LIST_HEAD(&priv->mc_list);
+ INIT_LIST_HEAD(&priv->curr_list);
+
/* Calculate Rx buf size */
dev->mtu = min(dev->mtu, priv->max_mtu);
mlx4_en_calc_rx_buf(dev);
@@ -653,6 +796,10 @@ int mlx4_en_start_port(struct net_device *dev)
goto mac_err;
}
+ err = mlx4_en_create_drop_qp(priv);
+ if (err)
+ goto rss_err;
+
/* Configure tx cq's and rings */
for (i = 0; i < priv->tx_ring_num; i++) {
/* Configure cq */
@@ -720,13 +867,23 @@ int mlx4_en_start_port(struct net_device *dev)
/* Attach rx QP to bradcast address */
memset(&mc_list[10], 0xff, ETH_ALEN);
- mc_list[5] = priv->port;
+ mc_list[5] = priv->port; /* needed for B0 steering support */
if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list,
- 0, MLX4_PROT_ETH))
+ priv->port, 0, MLX4_PROT_ETH,
+ &priv->broadcast_id))
mlx4_warn(mdev, "Failed Attaching Broadcast\n");
/* Must redo promiscuous mode setup. */
priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC);
+ if (mdev->dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_PROMISC_UPLINK);
+ mlx4_flow_steer_promisc_remove(mdev->dev,
+ priv->port,
+ MLX4_FS_PROMISC_ALL_MULTI);
+ }
/* Schedule multicast task to populate multicast list */
queue_work(mdev->workqueue, &priv->mcast_task);
@@ -742,7 +899,8 @@ tx_err:
mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[tx_index]);
mlx4_en_deactivate_cq(priv, &priv->tx_cq[tx_index]);
}
-
+ mlx4_en_destroy_drop_qp(priv);
+rss_err:
mlx4_en_release_rss_steer(priv);
mac_err:
mlx4_put_eth_qp(mdev->dev, priv->port, priv->mac, priv->base_qpn);
@@ -760,6 +918,7 @@ void mlx4_en_stop_port(struct net_device *dev)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_dev *mdev = priv->mdev;
+ struct mlx4_en_mc_list *mclist, *tmp;
int i;
u8 mc_list[16] = {0};
@@ -778,19 +937,26 @@ void mlx4_en_stop_port(struct net_device *dev)
/* Detach All multicasts */
memset(&mc_list[10], 0xff, ETH_ALEN);
- mc_list[5] = priv->port;
+ mc_list[5] = priv->port; /* needed for B0 steering support */
mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list,
- MLX4_PROT_ETH);
- for (i = 0; i < priv->mc_addrs_cnt; i++) {
- memcpy(&mc_list[10], priv->mc_addrs + i * ETH_ALEN, ETH_ALEN);
+ MLX4_PROT_ETH, priv->broadcast_id);
+ list_for_each_entry(mclist, &priv->curr_list, list) {
+ memcpy(&mc_list[10], mclist->addr, ETH_ALEN);
mc_list[5] = priv->port;
mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp,
- mc_list, MLX4_PROT_ETH);
+ mc_list, MLX4_PROT_ETH, mclist->reg_id);
}
mlx4_en_clear_list(dev);
+ list_for_each_entry_safe(mclist, tmp, &priv->curr_list, list) {
+ list_del(&mclist->list);
+ kfree(mclist);
+ }
+
/* Flush multicast filter */
mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG);
+ mlx4_en_destroy_drop_qp(priv);
+
/* Free TX Rings */
for (i = 0; i < priv->tx_ring_num; i++) {
mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index d49a7ac..a04cbf7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -844,6 +844,36 @@ out:
return err;
}
+int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
+{
+ int err;
+ u32 qpn;
+
+ err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn);
+ if (err) {
+ en_err(priv, "Failed reserving drop qpn\n");
+ return err;
+ }
+ err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp);
+ if (err) {
+ en_err(priv, "Failed allocating drop qp\n");
+ mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
+ return err;
+ }
+
+ return 0;
+}
+
+void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv)
+{
+ u32 qpn;
+
+ qpn = priv->drop_qp.qpn;
+ mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp);
+ mlx4_qp_free(priv->mdev->dev, &priv->drop_qp);
+ mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
+}
+
/* Allocate rx qp's and configure them according to rss map */
int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
{
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 9c83bb8..1d70657 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -123,7 +123,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
static const char * const fname[] = {
[0] = "RSS support",
[1] = "RSS Toeplitz Hash Function support",
- [2] = "RSS XOR Hash Function support"
+ [2] = "RSS XOR Hash Function support",
+ [3] = "Device manage flow steering support"
};
int i;
@@ -391,6 +392,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
#define QUERY_DEV_CAP_RSVD_XRC_OFFSET 0x66
#define QUERY_DEV_CAP_MAX_XRC_OFFSET 0x67
#define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET 0x68
+#define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET 0x76
+#define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET 0x77
#define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET 0x80
#define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET 0x82
#define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET 0x84
@@ -474,6 +477,12 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev_cap->num_ports = field & 0xf;
MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET);
dev_cap->max_msg_sz = 1 << (field & 0x1f);
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
+ if (field & 0x80)
+ dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
+ dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
+ MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET);
+ dev_cap->fs_max_num_qp_per_entry = field;
MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
dev_cap->stat_rate_support = stat_rate;
MLX4_GET(ext_flags, outbox, QUERY_DEV_CAP_EXT_FLAGS_OFFSET);
@@ -1061,6 +1070,15 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
#define INIT_HCA_LOG_MC_HASH_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x16)
#define INIT_HCA_UC_STEERING_OFFSET (INIT_HCA_MCAST_OFFSET + 0x18)
#define INIT_HCA_LOG_MC_TABLE_SZ_OFFSET (INIT_HCA_MCAST_OFFSET + 0x1b)
+#define INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN 0x6
+#define INIT_HCA_FS_PARAM_OFFSET 0x1d0
+#define INIT_HCA_FS_BASE_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x00)
+#define INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x12)
+#define INIT_HCA_FS_LOG_TABLE_SZ_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x1b)
+#define INIT_HCA_FS_ETH_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x21)
+#define INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22)
+#define INIT_HCA_FS_IB_BITS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x25)
+#define INIT_HCA_FS_IB_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x26)
#define INIT_HCA_TPT_OFFSET 0x0f0
#define INIT_HCA_DMPT_BASE_OFFSET (INIT_HCA_TPT_OFFSET + 0x00)
#define INIT_HCA_LOG_MPT_SZ_OFFSET (INIT_HCA_TPT_OFFSET + 0x0b)
@@ -1119,14 +1137,44 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
MLX4_PUT(inbox, param->rdmarc_base, INIT_HCA_RDMARC_BASE_OFFSET);
MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET);
- /* multicast attributes */
-
- MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
- MLX4_PUT(inbox, param->log_mc_entry_sz, INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
- MLX4_PUT(inbox, param->log_mc_hash_sz, INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
- MLX4_PUT(inbox, (u8) (1 << 3), INIT_HCA_UC_STEERING_OFFSET);
- MLX4_PUT(inbox, param->log_mc_table_sz, INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ /* steering attributes */
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ *(inbox + INIT_HCA_FLAGS_OFFSET / 4) |=
+ cpu_to_be32(1 <<
+ INIT_HCA_DEVICE_MANAGED_FLOW_STEERING_EN);
+
+ MLX4_PUT(inbox, param->mc_base, INIT_HCA_FS_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_entry_sz,
+ INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_table_sz,
+ INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+ /* Enable Ethernet flow steering
+ * with udp unicast and tcp unicast
+ */
+ MLX4_PUT(inbox, param->fs_hash_enable_bits,
+ INIT_HCA_FS_ETH_BITS_OFFSET);
+ MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR,
+ INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET);
+ /* Enable IPoIB flow steering
+ * with udp unicast and tcp unicast
+ */
+ MLX4_PUT(inbox, param->fs_hash_enable_bits,
+ INIT_HCA_FS_IB_BITS_OFFSET);
+ MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR,
+ INIT_HCA_FS_IB_NUM_ADDRS_OFFSET);
+ } else {
+ MLX4_PUT(inbox, param->mc_base, INIT_HCA_MC_BASE_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_entry_sz,
+ INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_hash_sz,
+ INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+ MLX4_PUT(inbox, param->log_mc_table_sz,
+ INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_B0)
+ MLX4_PUT(inbox, (u8) (1 << 3),
+ INIT_HCA_UC_STEERING_OFFSET);
+ }
/* TPT attributes */
@@ -1188,15 +1236,24 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
MLX4_GET(param->rdmarc_base, outbox, INIT_HCA_RDMARC_BASE_OFFSET);
MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
- /* multicast attributes */
+ /* steering attributes */
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
- MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET);
- MLX4_GET(param->log_mc_entry_sz, outbox,
- INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
- MLX4_GET(param->log_mc_hash_sz, outbox,
- INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
- MLX4_GET(param->log_mc_table_sz, outbox,
- INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ MLX4_GET(param->mc_base, outbox, INIT_HCA_FS_BASE_OFFSET);
+ MLX4_GET(param->log_mc_entry_sz, outbox,
+ INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
+ MLX4_GET(param->log_mc_table_sz, outbox,
+ INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+ } else {
+ MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET);
+ MLX4_GET(param->log_mc_entry_sz, outbox,
+ INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+ MLX4_GET(param->log_mc_hash_sz, outbox,
+ INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+ MLX4_GET(param->log_mc_table_sz, outbox,
+ INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+ }
/* TPT attributes */
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 64c0399..83fcbbf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -78,6 +78,8 @@ struct mlx4_dev_cap {
u16 wavelength[MLX4_MAX_PORTS + 1];
u64 trans_code[MLX4_MAX_PORTS + 1];
u16 stat_rate_support;
+ int fs_log_max_ucast_qp_range_size;
+ int fs_max_num_qp_per_entry;
u64 flags;
u64 flags2;
int reserved_uars;
@@ -165,6 +167,7 @@ struct mlx4_init_hca_param {
u8 log_mpt_sz;
u8 log_uar_sz;
u8 uar_page_sz; /* log pg sz in 4k chunks */
+ u8 fs_hash_enable_bits;
};
struct mlx4_init_ib_param {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index a0313de..4264516 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -41,6 +41,7 @@
#include <linux/slab.h>
#include <linux/io-mapping.h>
#include <linux/delay.h>
+#include <linux/netdevice.h>
#include <linux/mlx4/device.h>
#include <linux/mlx4/doorbell.h>
@@ -90,7 +91,9 @@ module_param_named(log_num_mgm_entry_size,
MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
" of qp per mcg, for example:"
" 10 gives 248.range: 9<="
- " log_num_mgm_entry_size <= 12");
+ " log_num_mgm_entry_size <= 12."
+ " Not in use with device managed"
+ " flow steering");
#define MLX4_VF (1 << 0)
@@ -243,7 +246,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.reserved_srqs = dev_cap->reserved_srqs;
dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz;
dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz;
- dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
/*
* Subtract 1 from the limit because we need to allocate a
* spare CQE so the HCA HW can tell the difference between an
@@ -274,6 +276,28 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
dev->caps.max_gso_sz = dev_cap->max_gso_sz;
dev->caps.max_rss_tbl_sz = dev_cap->max_rss_tbl_sz;
+ if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
+ dev->caps.steering_mode = MLX4_STEERING_MODE_DEVICE_MANAGED;
+ dev->caps.num_qp_per_mgm = dev_cap->fs_max_num_qp_per_entry;
+ dev->caps.fs_log_max_ucast_qp_range_size =
+ dev_cap->fs_log_max_ucast_qp_range_size;
+ } else {
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) {
+ dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
+ } else {
+ dev->caps.steering_mode = MLX4_STEERING_MODE_A0;
+
+ if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER ||
+ dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
+ mlx4_warn(dev, "Must have UC_STEER and MC_STEER flags "
+ "set to use B0 steering. Falling back to A0 steering mode.\n");
+ }
+ dev->caps.num_qp_per_mgm = mlx4_get_qp_per_mgm(dev);
+ }
+ mlx4_dbg(dev, "Steering mode is: %s\n",
+ mlx4_steering_mode_str(dev->caps.steering_mode));
+
/* Sense port always allowed on supported devices for ConnectX1 and 2 */
if (dev->pdev->device != 0x1003)
dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
@@ -967,9 +991,11 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
}
/*
- * It's not strictly required, but for simplicity just map the
- * whole multicast group table now. The table isn't very big
- * and it's a lot easier than trying to track ref counts.
+ * For flow steering device managed mode it is required to use
+ * mlx4_init_icm_table. For B0 steering mode it's not strictly
+ * required, but for simplicity just map the whole multicast
+ * group table now. The table isn't very big and it's a lot
+ * easier than trying to track ref counts.
*/
err = mlx4_init_icm_table(dev, &priv->mcg_table.table,
init_hca->mc_base,
@@ -1205,7 +1231,26 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
goto err_stop_fw;
}
+ priv->fs_hash_mode = MLX4_FS_L2_HASH;
+
+ switch (priv->fs_hash_mode) {
+ case MLX4_FS_L2_HASH:
+ init_hca.fs_hash_enable_bits = 0;
+ break;
+
+ case MLX4_FS_L2_L3_L4_HASH:
+ /* Enable flow steering with
+ * udp unicast and tcp unicast
+ */
+ init_hca.fs_hash_enable_bits =
+ MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN;
+ break;
+ }
+
profile = default_profile;
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ profile.num_mcg = MLX4_FS_NUM_MCG;
icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
&init_hca);
@@ -1539,8 +1584,8 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
struct msix_entry *entries;
int nreq = min_t(int, dev->caps.num_ports *
- min_t(int, num_online_cpus() + 1, MAX_MSIX_P_PORT)
- + MSIX_LEGACY_SZ, MAX_MSIX);
+ min_t(int, netif_get_num_default_rss_queues() + 1,
+ MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX);
int err;
int i;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index f4a8f98..bc62f53 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -41,6 +41,7 @@
#define MGM_QPN_MASK 0x00FFFFFF
#define MGM_BLCK_LB_BIT 30
+#define MLX4_MAC_MASK 0xffffffffffffULL
static const u8 zero_gid[16]; /* automatically initialized to 0 */
@@ -54,7 +55,12 @@ struct mlx4_mgm {
int mlx4_get_mgm_entry_size(struct mlx4_dev *dev)
{
- return min((1 << mlx4_log_num_mgm_entry_size), MLX4_MAX_MGM_ENTRY_SIZE);
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return 1 << MLX4_FS_MGM_LOG_ENTRY_SIZE;
+ else
+ return min((1 << mlx4_log_num_mgm_entry_size),
+ MLX4_MAX_MGM_ENTRY_SIZE);
}
int mlx4_get_qp_per_mgm(struct mlx4_dev *dev)
@@ -62,6 +68,35 @@ int mlx4_get_qp_per_mgm(struct mlx4_dev *dev)
return 4 * (mlx4_get_mgm_entry_size(dev) / 16 - 2);
}
+static int mlx4_QP_FLOW_STEERING_ATTACH(struct mlx4_dev *dev,
+ struct mlx4_cmd_mailbox *mailbox,
+ u32 size,
+ u64 *reg_id)
+{
+ u64 imm;
+ int err = 0;
+
+ err = mlx4_cmd_imm(dev, mailbox->dma, &imm, size, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+ *reg_id = imm;
+
+ return err;
+}
+
+static int mlx4_QP_FLOW_STEERING_DETACH(struct mlx4_dev *dev, u64 regid)
+{
+ int err = 0;
+
+ err = mlx4_cmd(dev, regid, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ return err;
+}
+
static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index,
struct mlx4_cmd_mailbox *mailbox)
{
@@ -614,6 +649,311 @@ static int find_entry(struct mlx4_dev *dev, u8 port,
return err;
}
+struct mlx4_net_trans_rule_hw_ctrl {
+ __be32 ctrl;
+ __be32 vf_vep_port;
+ __be32 qpn;
+ __be32 reserved;
+};
+
+static void trans_rule_ctrl_to_hw(struct mlx4_net_trans_rule *ctrl,
+ struct mlx4_net_trans_rule_hw_ctrl *hw)
+{
+ static const u8 __promisc_mode[] = {
+ [MLX4_FS_PROMISC_NONE] = 0x0,
+ [MLX4_FS_PROMISC_UPLINK] = 0x1,
+ [MLX4_FS_PROMISC_FUNCTION_PORT] = 0x2,
+ [MLX4_FS_PROMISC_ALL_MULTI] = 0x3,
+ };
+
+ u32 dw = 0;
+
+ dw = ctrl->queue_mode == MLX4_NET_TRANS_Q_LIFO ? 1 : 0;
+ dw |= ctrl->exclusive ? (1 << 2) : 0;
+ dw |= ctrl->allow_loopback ? (1 << 3) : 0;
+ dw |= __promisc_mode[ctrl->promisc_mode] << 8;
+ dw |= ctrl->priority << 16;
+
+ hw->ctrl = cpu_to_be32(dw);
+ hw->vf_vep_port = cpu_to_be32(ctrl->port);
+ hw->qpn = cpu_to_be32(ctrl->qpn);
+}
+
+struct mlx4_net_trans_rule_hw_ib {
+ u8 size;
+ u8 rsvd1;
+ __be16 id;
+ u32 rsvd2;
+ __be32 qpn;
+ __be32 qpn_mask;
+ u8 dst_gid[16];
+ u8 dst_gid_msk[16];
+} __packed;
+
+struct mlx4_net_trans_rule_hw_eth {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ u8 rsvd1[6];
+ u8 dst_mac[6];
+ u16 rsvd2;
+ u8 dst_mac_msk[6];
+ u16 rsvd3;
+ u8 src_mac[6];
+ u16 rsvd4;
+ u8 src_mac_msk[6];
+ u8 rsvd5;
+ u8 ether_type_enable;
+ __be16 ether_type;
+ __be16 vlan_id_msk;
+ __be16 vlan_id;
+} __packed;
+
+struct mlx4_net_trans_rule_hw_tcp_udp {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ __be16 rsvd1[3];
+ __be16 dst_port;
+ __be16 rsvd2;
+ __be16 dst_port_msk;
+ __be16 rsvd3;
+ __be16 src_port;
+ __be16 rsvd4;
+ __be16 src_port_msk;
+} __packed;
+
+struct mlx4_net_trans_rule_hw_ipv4 {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ __be32 rsvd1;
+ __be32 dst_ip;
+ __be32 dst_ip_msk;
+ __be32 src_ip;
+ __be32 src_ip_msk;
+} __packed;
+
+struct _rule_hw {
+ union {
+ struct {
+ u8 size;
+ u8 rsvd;
+ __be16 id;
+ };
+ struct mlx4_net_trans_rule_hw_eth eth;
+ struct mlx4_net_trans_rule_hw_ib ib;
+ struct mlx4_net_trans_rule_hw_ipv4 ipv4;
+ struct mlx4_net_trans_rule_hw_tcp_udp tcp_udp;
+ };
+};
+
+static int parse_trans_rule(struct mlx4_dev *dev, struct mlx4_spec_list *spec,
+ struct _rule_hw *rule_hw)
+{
+ static const u16 __sw_id_hw[] = {
+ [MLX4_NET_TRANS_RULE_ID_ETH] = 0xE001,
+ [MLX4_NET_TRANS_RULE_ID_IB] = 0xE005,
+ [MLX4_NET_TRANS_RULE_ID_IPV6] = 0xE003,
+ [MLX4_NET_TRANS_RULE_ID_IPV4] = 0xE002,
+ [MLX4_NET_TRANS_RULE_ID_TCP] = 0xE004,
+ [MLX4_NET_TRANS_RULE_ID_UDP] = 0xE006
+ };
+
+ static const size_t __rule_hw_sz[] = {
+ [MLX4_NET_TRANS_RULE_ID_ETH] =
+ sizeof(struct mlx4_net_trans_rule_hw_eth),
+ [MLX4_NET_TRANS_RULE_ID_IB] =
+ sizeof(struct mlx4_net_trans_rule_hw_ib),
+ [MLX4_NET_TRANS_RULE_ID_IPV6] = 0,
+ [MLX4_NET_TRANS_RULE_ID_IPV4] =
+ sizeof(struct mlx4_net_trans_rule_hw_ipv4),
+ [MLX4_NET_TRANS_RULE_ID_TCP] =
+ sizeof(struct mlx4_net_trans_rule_hw_tcp_udp),
+ [MLX4_NET_TRANS_RULE_ID_UDP] =
+ sizeof(struct mlx4_net_trans_rule_hw_tcp_udp)
+ };
+ if (spec->id > MLX4_NET_TRANS_RULE_NUM) {
+ mlx4_err(dev, "Invalid network rule id. id = %d\n", spec->id);
+ return -EINVAL;
+ }
+ memset(rule_hw, 0, __rule_hw_sz[spec->id]);
+ rule_hw->id = cpu_to_be16(__sw_id_hw[spec->id]);
+ rule_hw->size = __rule_hw_sz[spec->id] >> 2;
+
+ switch (spec->id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ memcpy(rule_hw->eth.dst_mac, spec->eth.dst_mac, ETH_ALEN);
+ memcpy(rule_hw->eth.dst_mac_msk, spec->eth.dst_mac_msk,
+ ETH_ALEN);
+ memcpy(rule_hw->eth.src_mac, spec->eth.src_mac, ETH_ALEN);
+ memcpy(rule_hw->eth.src_mac_msk, spec->eth.src_mac_msk,
+ ETH_ALEN);
+ if (spec->eth.ether_type_enable) {
+ rule_hw->eth.ether_type_enable = 1;
+ rule_hw->eth.ether_type = spec->eth.ether_type;
+ }
+ rule_hw->eth.vlan_id = spec->eth.vlan_id;
+ rule_hw->eth.vlan_id_msk = spec->eth.vlan_id_msk;
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ rule_hw->ib.qpn = spec->ib.r_qpn;
+ rule_hw->ib.qpn_mask = spec->ib.qpn_msk;
+ memcpy(&rule_hw->ib.dst_gid, &spec->ib.dst_gid, 16);
+ memcpy(&rule_hw->ib.dst_gid_msk, &spec->ib.dst_gid_msk, 16);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV6:
+ return -EOPNOTSUPP;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ rule_hw->ipv4.src_ip = spec->ipv4.src_ip;
+ rule_hw->ipv4.src_ip_msk = spec->ipv4.src_ip_msk;
+ rule_hw->ipv4.dst_ip = spec->ipv4.dst_ip;
+ rule_hw->ipv4.dst_ip_msk = spec->ipv4.dst_ip_msk;
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ rule_hw->tcp_udp.dst_port = spec->tcp_udp.dst_port;
+ rule_hw->tcp_udp.dst_port_msk = spec->tcp_udp.dst_port_msk;
+ rule_hw->tcp_udp.src_port = spec->tcp_udp.src_port;
+ rule_hw->tcp_udp.src_port_msk = spec->tcp_udp.src_port_msk;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return __rule_hw_sz[spec->id];
+}
+
+static void mlx4_err_rule(struct mlx4_dev *dev, char *str,
+ struct mlx4_net_trans_rule *rule)
+{
+#define BUF_SIZE 256
+ struct mlx4_spec_list *cur;
+ char buf[BUF_SIZE];
+ int len = 0;
+
+ mlx4_err(dev, "%s", str);
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "port = %d prio = 0x%x qp = 0x%x ",
+ rule->port, rule->priority, rule->qpn);
+
+ list_for_each_entry(cur, &rule->list, list) {
+ switch (cur->id) {
+ case MLX4_NET_TRANS_RULE_ID_ETH:
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dmac = %pM ", &cur->eth.dst_mac);
+ if (cur->eth.ether_type)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "ethertype = 0x%x ",
+ be16_to_cpu(cur->eth.ether_type));
+ if (cur->eth.vlan_id)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "vlan-id = %d ",
+ be16_to_cpu(cur->eth.vlan_id));
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV4:
+ if (cur->ipv4.src_ip)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "src-ip = %pI4 ",
+ &cur->ipv4.src_ip);
+ if (cur->ipv4.dst_ip)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-ip = %pI4 ",
+ &cur->ipv4.dst_ip);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_TCP:
+ case MLX4_NET_TRANS_RULE_ID_UDP:
+ if (cur->tcp_udp.src_port)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "src-port = %d ",
+ be16_to_cpu(cur->tcp_udp.src_port));
+ if (cur->tcp_udp.dst_port)
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-port = %d ",
+ be16_to_cpu(cur->tcp_udp.dst_port));
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IB:
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-gid = %pI6\n", cur->ib.dst_gid);
+ len += snprintf(buf + len, BUF_SIZE - len,
+ "dst-gid-mask = %pI6\n",
+ cur->ib.dst_gid_msk);
+ break;
+
+ case MLX4_NET_TRANS_RULE_ID_IPV6:
+ break;
+
+ default:
+ break;
+ }
+ }
+ len += snprintf(buf + len, BUF_SIZE - len, "\n");
+ mlx4_err(dev, "%s", buf);
+
+ if (len >= BUF_SIZE)
+ mlx4_err(dev, "Network rule error message was truncated, print buffer is too small.\n");
+}
+
+int mlx4_flow_attach(struct mlx4_dev *dev,
+ struct mlx4_net_trans_rule *rule, u64 *reg_id)
+{
+ struct mlx4_cmd_mailbox *mailbox;
+ struct mlx4_spec_list *cur;
+ u32 size = 0;
+ int ret;
+
+ mailbox = mlx4_alloc_cmd_mailbox(dev);
+ if (IS_ERR(mailbox))
+ return PTR_ERR(mailbox);
+
+ memset(mailbox->buf, 0, sizeof(struct mlx4_net_trans_rule_hw_ctrl));
+ trans_rule_ctrl_to_hw(rule, mailbox->buf);
+
+ size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+
+ list_for_each_entry(cur, &rule->list, list) {
+ ret = parse_trans_rule(dev, cur, mailbox->buf + size);
+ if (ret < 0) {
+ mlx4_free_cmd_mailbox(dev, mailbox);
+ return -EINVAL;
+ }
+ size += ret;
+ }
+
+ ret = mlx4_QP_FLOW_STEERING_ATTACH(dev, mailbox, size >> 2, reg_id);
+ if (ret == -ENOMEM)
+ mlx4_err_rule(dev,
+ "mcg table is full. Fail to register network rule.\n",
+ rule);
+ else if (ret)
+ mlx4_err_rule(dev, "Fail to register network rule.\n", rule);
+
+ mlx4_free_cmd_mailbox(dev, mailbox);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_attach);
+
+int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id)
+{
+ int err;
+
+ err = mlx4_QP_FLOW_STEERING_DETACH(dev, reg_id);
+ if (err)
+ mlx4_err(dev, "Fail to detach network rule. registration id = 0x%llx\n",
+ reg_id);
+ return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_detach);
+
int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
int block_mcast_loopback, enum mlx4_protocol prot,
enum mlx4_steer_type steer)
@@ -866,49 +1206,159 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp,
}
int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- int block_mcast_loopback, enum mlx4_protocol prot)
+ u8 port, int block_mcast_loopback,
+ enum mlx4_protocol prot, u64 *reg_id)
{
- if (prot == MLX4_PROT_ETH &&
- !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER))
- return 0;
- if (prot == MLX4_PROT_ETH)
- gid[7] |= (MLX4_MC_STEER << 1);
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ if (prot == MLX4_PROT_ETH)
+ return 0;
+
+ case MLX4_STEERING_MODE_B0:
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_MC_STEER << 1);
+
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 1,
+ block_mcast_loopback, prot);
+ return mlx4_qp_attach_common(dev, qp, gid,
+ block_mcast_loopback, prot,
+ MLX4_MC_STEER);
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+ struct mlx4_spec_list spec = { {NULL} };
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .promisc_mode = MLX4_FS_PROMISC_NONE,
+ .priority = MLX4_DOMAIN_NIC,
+ };
+
+ rule.allow_loopback = ~block_mcast_loopback;
+ rule.port = port;
+ rule.qpn = qp->qpn;
+ INIT_LIST_HEAD(&rule.list);
+
+ switch (prot) {
+ case MLX4_PROT_ETH:
+ spec.id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec.eth.dst_mac, &gid[10], ETH_ALEN);
+ memcpy(spec.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+ break;
- if (mlx4_is_mfunc(dev))
- return mlx4_QP_ATTACH(dev, qp, gid, 1,
- block_mcast_loopback, prot);
+ case MLX4_PROT_IB_IPV6:
+ spec.id = MLX4_NET_TRANS_RULE_ID_IB;
+ memcpy(spec.ib.dst_gid, gid, 16);
+ memset(&spec.ib.dst_gid_msk, 0xff, 16);
+ break;
+ default:
+ return -EINVAL;
+ }
+ list_add_tail(&spec.list, &rule.list);
- return mlx4_qp_attach_common(dev, qp, gid, block_mcast_loopback,
- prot, MLX4_MC_STEER);
+ return mlx4_flow_attach(dev, &rule, reg_id);
+ }
+
+ default:
+ return -EINVAL;
+ }
}
EXPORT_SYMBOL_GPL(mlx4_multicast_attach);
int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- enum mlx4_protocol prot)
+ enum mlx4_protocol prot, u64 reg_id)
{
- if (prot == MLX4_PROT_ETH &&
- !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER))
- return 0;
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ if (prot == MLX4_PROT_ETH)
+ return 0;
- if (prot == MLX4_PROT_ETH)
- gid[7] |= (MLX4_MC_STEER << 1);
+ case MLX4_STEERING_MODE_B0:
+ if (prot == MLX4_PROT_ETH)
+ gid[7] |= (MLX4_MC_STEER << 1);
- if (mlx4_is_mfunc(dev))
- return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot);
+ if (mlx4_is_mfunc(dev))
+ return mlx4_QP_ATTACH(dev, qp, gid, 0, 0, prot);
+
+ return mlx4_qp_detach_common(dev, qp, gid, prot,
+ MLX4_MC_STEER);
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return mlx4_flow_detach(dev, reg_id);
- return mlx4_qp_detach_common(dev, qp, gid, prot, MLX4_MC_STEER);
+ default:
+ return -EINVAL;
+ }
}
EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
+int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port,
+ u32 qpn, enum mlx4_net_trans_promisc_mode mode)
+{
+ struct mlx4_net_trans_rule rule;
+ u64 *regid_p;
+
+ switch (mode) {
+ case MLX4_FS_PROMISC_UPLINK:
+ case MLX4_FS_PROMISC_FUNCTION_PORT:
+ regid_p = &dev->regid_promisc_array[port];
+ break;
+ case MLX4_FS_PROMISC_ALL_MULTI:
+ regid_p = &dev->regid_allmulti_array[port];
+ break;
+ default:
+ return -1;
+ }
+
+ if (*regid_p != 0)
+ return -1;
+
+ rule.promisc_mode = mode;
+ rule.port = port;
+ rule.qpn = qpn;
+ INIT_LIST_HEAD(&rule.list);
+ mlx4_err(dev, "going promisc on %x\n", port);
+
+ return mlx4_flow_attach(dev, &rule, regid_p);
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_add);
+
+int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
+ enum mlx4_net_trans_promisc_mode mode)
+{
+ int ret;
+ u64 *regid_p;
+
+ switch (mode) {
+ case MLX4_FS_PROMISC_UPLINK:
+ case MLX4_FS_PROMISC_FUNCTION_PORT:
+ regid_p = &dev->regid_promisc_array[port];
+ break;
+ case MLX4_FS_PROMISC_ALL_MULTI:
+ regid_p = &dev->regid_allmulti_array[port];
+ break;
+ default:
+ return -1;
+ }
+
+ if (*regid_p == 0)
+ return -1;
+
+ ret = mlx4_flow_detach(dev, *regid_p);
+ if (ret == 0)
+ *regid_p = 0;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_flow_steer_promisc_remove);
+
int mlx4_unicast_attach(struct mlx4_dev *dev,
struct mlx4_qp *qp, u8 gid[16],
int block_mcast_loopback, enum mlx4_protocol prot)
{
- if (prot == MLX4_PROT_ETH &&
- !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER))
- return 0;
-
if (prot == MLX4_PROT_ETH)
gid[7] |= (MLX4_UC_STEER << 1);
@@ -924,10 +1374,6 @@ EXPORT_SYMBOL_GPL(mlx4_unicast_attach);
int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp,
u8 gid[16], enum mlx4_protocol prot)
{
- if (prot == MLX4_PROT_ETH &&
- !(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER))
- return 0;
-
if (prot == MLX4_PROT_ETH)
gid[7] |= (MLX4_UC_STEER << 1);
@@ -968,9 +1414,6 @@ static int mlx4_PROMISC(struct mlx4_dev *dev, u32 qpn,
int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port)
{
- if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER))
- return 0;
-
if (mlx4_is_mfunc(dev))
return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 1, port);
@@ -980,9 +1423,6 @@ EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_add);
int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port)
{
- if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER))
- return 0;
-
if (mlx4_is_mfunc(dev))
return mlx4_PROMISC(dev, qpn, MLX4_MC_STEER, 0, port);
@@ -992,9 +1432,6 @@ EXPORT_SYMBOL_GPL(mlx4_multicast_promisc_remove);
int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port)
{
- if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER))
- return 0;
-
if (mlx4_is_mfunc(dev))
return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 1, port);
@@ -1004,9 +1441,6 @@ EXPORT_SYMBOL_GPL(mlx4_unicast_promisc_add);
int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port)
{
- if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER))
- return 0;
-
if (mlx4_is_mfunc(dev))
return mlx4_PROMISC(dev, qpn, MLX4_UC_STEER, 0, port);
@@ -1019,6 +1453,10 @@ int mlx4_init_mcg_table(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
int err;
+ /* No need for mcg_table when fw managed the mcg table*/
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return 0;
err = mlx4_bitmap_init(&priv->mcg_table.bitmap, dev->caps.num_amgms,
dev->caps.num_amgms - 1, 0, 0);
if (err)
@@ -1031,5 +1469,7 @@ int mlx4_init_mcg_table(struct mlx4_dev *dev)
void mlx4_cleanup_mcg_table(struct mlx4_dev *dev)
{
- mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap);
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ mlx4_bitmap_cleanup(&mlx4_priv(dev)->mcg_table.bitmap);
}
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index e5d2022..d2c436b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -39,6 +39,7 @@
#include <linux/mutex.h>
#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
#include <linux/timer.h>
#include <linux/semaphore.h>
#include <linux/workqueue.h>
@@ -53,6 +54,17 @@
#define DRV_VERSION "1.1"
#define DRV_RELDATE "Dec, 2011"
+#define MLX4_FS_UDP_UC_EN (1 << 1)
+#define MLX4_FS_TCP_UC_EN (1 << 2)
+#define MLX4_FS_NUM_OF_L2_ADDR 8
+#define MLX4_FS_MGM_LOG_ENTRY_SIZE 7
+#define MLX4_FS_NUM_MCG (1 << 17)
+
+enum {
+ MLX4_FS_L2_HASH = 0,
+ MLX4_FS_L2_L3_L4_HASH,
+};
+
#define MLX4_NUM_UP 8
#define MLX4_NUM_TC 8
#define MLX4_RATELIMIT_UNITS 3 /* 100 Mbps */
@@ -137,6 +149,7 @@ enum mlx4_resource {
RES_VLAN,
RES_EQ,
RES_COUNTER,
+ RES_FS_RULE,
MLX4_NUM_OF_RESOURCE_TYPE
};
@@ -509,7 +522,7 @@ struct slave_list {
struct mlx4_resource_tracker {
spinlock_t lock;
/* tree for each resources */
- struct radix_tree_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE];
+ struct rb_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE];
/* num_of_slave's lists, one per slave */
struct slave_list *slave_list;
};
@@ -703,6 +716,7 @@ struct mlx4_set_port_rqp_calc_context {
struct mlx4_mac_entry {
u64 mac;
+ u64 reg_id;
};
struct mlx4_port_info {
@@ -776,6 +790,7 @@ struct mlx4_priv {
struct mutex bf_mutex;
struct io_mapping *bf_mapping;
int reserved_mtts;
+ int fs_hash_mode;
};
static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev)
@@ -1032,7 +1047,7 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port);
/* resource tracker functions*/
int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
enum mlx4_resource resource_type,
- int resource_id, int *slave);
+ u64 resource_id, int *slave);
void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id);
int mlx4_init_resource_tracker(struct mlx4_dev *dev);
@@ -1117,6 +1132,16 @@ int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
struct mlx4_cmd_mailbox *inbox,
struct mlx4_cmd_mailbox *outbox,
struct mlx4_cmd_info *cmd);
+int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
+int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd);
int mlx4_get_mgm_entry_size(struct mlx4_dev *dev);
int mlx4_get_qp_per_mgm(struct mlx4_dev *dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 225c20d..a126321 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -75,6 +75,7 @@
#define STAMP_SHIFT 31
#define STAMP_VAL 0x7fffffff
#define STATS_DELAY (HZ / 4)
+#define MAX_NUM_OF_FS_RULES 256
/* Typical TSO descriptor with 16 gather entries is 352 bytes... */
#define MAX_DESC_SIZE 512
@@ -404,6 +405,19 @@ struct mlx4_en_perf_stats {
#define NUM_PERF_COUNTERS 6
};
+enum mlx4_en_mclist_act {
+ MCLIST_NONE,
+ MCLIST_REM,
+ MCLIST_ADD,
+};
+
+struct mlx4_en_mc_list {
+ struct list_head list;
+ enum mlx4_en_mclist_act action;
+ u8 addr[ETH_ALEN];
+ u64 reg_id;
+};
+
struct mlx4_en_frag_info {
u16 frag_size;
u16 frag_prefix_size;
@@ -422,6 +436,11 @@ struct mlx4_en_frag_info {
#endif
+struct ethtool_flow_id {
+ struct ethtool_rx_flow_spec flow_spec;
+ u64 id;
+};
+
struct mlx4_en_priv {
struct mlx4_en_dev *mdev;
struct mlx4_en_port_profile *prof;
@@ -431,6 +450,7 @@ struct mlx4_en_priv {
struct net_device_stats ret_stats;
struct mlx4_en_port_state port_state;
spinlock_t stats_lock;
+ struct ethtool_flow_id ethtool_rules[MAX_NUM_OF_FS_RULES];
unsigned long last_moder_packets[MAX_RX_RINGS];
unsigned long last_moder_tx_packets;
@@ -480,6 +500,7 @@ struct mlx4_en_priv {
struct mlx4_en_rx_ring rx_ring[MAX_RX_RINGS];
struct mlx4_en_cq *tx_cq;
struct mlx4_en_cq rx_cq[MAX_RX_RINGS];
+ struct mlx4_qp drop_qp;
struct work_struct mcast_task;
struct work_struct mac_task;
struct work_struct watchdog_task;
@@ -489,8 +510,9 @@ struct mlx4_en_priv {
struct mlx4_en_pkt_stats pkstats;
struct mlx4_en_port_stats port_stats;
u64 stats_bitmap;
- char *mc_addrs;
- int mc_addrs_cnt;
+ struct list_head mc_list;
+ struct list_head curr_list;
+ u64 broadcast_id;
struct mlx4_en_stat_out_mbox hw_stats;
int vids[128];
bool wol;
@@ -565,6 +587,8 @@ void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
void mlx4_en_calc_rx_buf(struct net_device *dev);
int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv);
void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
+int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv);
+void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv);
int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring);
void mlx4_en_rx_irq(struct mlx4_cq *mcq);
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index a8fb529..a51d1b9b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -75,21 +75,54 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table)
table->total = 0;
}
-static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
+static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port,
+ u64 mac, int *qpn, u64 *reg_id)
{
- struct mlx4_qp qp;
- u8 gid[16] = {0};
__be64 be_mac;
int err;
- qp.qpn = *qpn;
-
- mac &= 0xffffffffffffULL;
+ mac &= MLX4_MAC_MASK;
be_mac = cpu_to_be64(mac << 16);
- memcpy(&gid[10], &be_mac, ETH_ALEN);
- gid[5] = port;
- err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH);
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_B0: {
+ struct mlx4_qp qp;
+ u8 gid[16] = {0};
+
+ qp.qpn = *qpn;
+ memcpy(&gid[10], &be_mac, ETH_ALEN);
+ gid[5] = port;
+
+ err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH);
+ break;
+ }
+ case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+ struct mlx4_spec_list spec_eth = { {NULL} };
+ __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16);
+
+ struct mlx4_net_trans_rule rule = {
+ .queue_mode = MLX4_NET_TRANS_Q_FIFO,
+ .exclusive = 0,
+ .allow_loopback = 1,
+ .promisc_mode = MLX4_FS_PROMISC_NONE,
+ .priority = MLX4_DOMAIN_NIC,
+ };
+
+ rule.port = port;
+ rule.qpn = *qpn;
+ INIT_LIST_HEAD(&rule.list);
+
+ spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH;
+ memcpy(spec_eth.eth.dst_mac, &be_mac, ETH_ALEN);
+ memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN);
+ list_add_tail(&spec_eth.list, &rule.list);
+
+ err = mlx4_flow_attach(dev, &rule, reg_id);
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
if (err)
mlx4_warn(dev, "Failed Attaching Unicast\n");
@@ -97,19 +130,30 @@ static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
}
static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port,
- u64 mac, int qpn)
+ u64 mac, int qpn, u64 reg_id)
{
- struct mlx4_qp qp;
- u8 gid[16] = {0};
- __be64 be_mac;
+ switch (dev->caps.steering_mode) {
+ case MLX4_STEERING_MODE_B0: {
+ struct mlx4_qp qp;
+ u8 gid[16] = {0};
+ __be64 be_mac;
- qp.qpn = qpn;
- mac &= 0xffffffffffffULL;
- be_mac = cpu_to_be64(mac << 16);
- memcpy(&gid[10], &be_mac, ETH_ALEN);
- gid[5] = port;
+ qp.qpn = qpn;
+ mac &= MLX4_MAC_MASK;
+ be_mac = cpu_to_be64(mac << 16);
+ memcpy(&gid[10], &be_mac, ETH_ALEN);
+ gid[5] = port;
- mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH);
+ mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH);
+ break;
+ }
+ case MLX4_STEERING_MODE_DEVICE_MANAGED: {
+ mlx4_flow_detach(dev, reg_id);
+ break;
+ }
+ default:
+ mlx4_err(dev, "Invalid steering mode.\n");
+ }
}
static int validate_index(struct mlx4_dev *dev,
@@ -144,6 +188,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
struct mlx4_mac_entry *entry;
int index = 0;
int err = 0;
+ u64 reg_id;
mlx4_dbg(dev, "Registering MAC: 0x%llx for adding\n",
(unsigned long long) mac);
@@ -155,7 +200,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
return err;
}
- if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) {
+ if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) {
*qpn = info->base_qpn + index;
return 0;
}
@@ -167,7 +212,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
goto qp_err;
}
- err = mlx4_uc_steer_add(dev, port, mac, qpn);
+ err = mlx4_uc_steer_add(dev, port, mac, qpn, &reg_id);
if (err)
goto steer_err;
@@ -177,6 +222,7 @@ int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
goto alloc_err;
}
entry->mac = mac;
+ entry->reg_id = reg_id;
err = radix_tree_insert(&info->mac_tree, *qpn, entry);
if (err)
goto insert_err;
@@ -186,7 +232,7 @@ insert_err:
kfree(entry);
alloc_err:
- mlx4_uc_steer_release(dev, port, mac, *qpn);
+ mlx4_uc_steer_release(dev, port, mac, *qpn, reg_id);
steer_err:
mlx4_qp_release_range(dev, *qpn, 1);
@@ -206,13 +252,14 @@ void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn)
(unsigned long long) mac);
mlx4_unregister_mac(dev, port, mac);
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
entry = radix_tree_lookup(&info->mac_tree, qpn);
if (entry) {
mlx4_dbg(dev, "Releasing qp: port %d, mac 0x%llx,"
" qpn %d\n", port,
(unsigned long long) mac, qpn);
- mlx4_uc_steer_release(dev, port, entry->mac, qpn);
+ mlx4_uc_steer_release(dev, port, entry->mac,
+ qpn, entry->reg_id);
mlx4_qp_release_range(dev, qpn, 1);
radix_tree_delete(&info->mac_tree, qpn);
kfree(entry);
@@ -359,15 +406,18 @@ int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
int index = qpn - info->base_qpn;
int err = 0;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0) {
entry = radix_tree_lookup(&info->mac_tree, qpn);
if (!entry)
return -EINVAL;
- mlx4_uc_steer_release(dev, port, entry->mac, qpn);
+ mlx4_uc_steer_release(dev, port, entry->mac,
+ qpn, entry->reg_id);
mlx4_unregister_mac(dev, port, entry->mac);
entry->mac = new_mac;
+ entry->reg_id = 0;
mlx4_register_mac(dev, port, new_mac);
- err = mlx4_uc_steer_add(dev, port, entry->mac, &qpn);
+ err = mlx4_uc_steer_add(dev, port, entry->mac,
+ &qpn, &entry->reg_id);
return err;
}
@@ -803,8 +853,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ?
MCAST_DIRECT : MCAST_DEFAULT;
- if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER &&
- dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)
+ if (dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
return 0;
mailbox = mlx4_alloc_cmd_mailbox(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c
index b83bc92..9ee4725 100644
--- a/drivers/net/ethernet/mellanox/mlx4/profile.c
+++ b/drivers/net/ethernet/mellanox/mlx4/profile.c
@@ -237,13 +237,19 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
init_hca->mtt_base = profile[i].start;
break;
case MLX4_RES_MCG:
- dev->caps.num_mgms = profile[i].num >> 1;
- dev->caps.num_amgms = profile[i].num >> 1;
init_hca->mc_base = profile[i].start;
init_hca->log_mc_entry_sz =
ilog2(mlx4_get_mgm_entry_size(dev));
init_hca->log_mc_table_sz = profile[i].log_num;
- init_hca->log_mc_hash_sz = profile[i].log_num - 1;
+ if (dev->caps.steering_mode ==
+ MLX4_STEERING_MODE_DEVICE_MANAGED) {
+ dev->caps.num_mgms = profile[i].num;
+ } else {
+ init_hca->log_mc_hash_sz =
+ profile[i].log_num - 1;
+ dev->caps.num_mgms = profile[i].num >> 1;
+ dev->caps.num_amgms = profile[i].num >> 1;
+ }
break;
default:
break;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 766b8c5..c3fa919 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -57,7 +57,8 @@ struct mac_res {
struct res_common {
struct list_head list;
- u32 res_id;
+ struct rb_node node;
+ u64 res_id;
int owner;
int state;
int from_state;
@@ -189,6 +190,58 @@ struct res_xrcdn {
int port;
};
+enum res_fs_rule_states {
+ RES_FS_RULE_BUSY = RES_ANY_BUSY,
+ RES_FS_RULE_ALLOCATED,
+};
+
+struct res_fs_rule {
+ struct res_common com;
+};
+
+static void *res_tracker_lookup(struct rb_root *root, u64 res_id)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct res_common *res = container_of(node, struct res_common,
+ node);
+
+ if (res_id < res->res_id)
+ node = node->rb_left;
+ else if (res_id > res->res_id)
+ node = node->rb_right;
+ else
+ return res;
+ }
+ return NULL;
+}
+
+static int res_tracker_insert(struct rb_root *root, struct res_common *res)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct res_common *this = container_of(*new, struct res_common,
+ node);
+
+ parent = *new;
+ if (res->res_id < this->res_id)
+ new = &((*new)->rb_left);
+ else if (res->res_id > this->res_id)
+ new = &((*new)->rb_right);
+ else
+ return -EEXIST;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&res->node, parent, new);
+ rb_insert_color(&res->node, root);
+
+ return 0;
+}
+
/* For Debug uses */
static const char *ResourceType(enum mlx4_resource rt)
{
@@ -201,6 +254,7 @@ static const char *ResourceType(enum mlx4_resource rt)
case RES_MAC: return "RES_MAC";
case RES_EQ: return "RES_EQ";
case RES_COUNTER: return "RES_COUNTER";
+ case RES_FS_RULE: return "RES_FS_RULE";
case RES_XRCD: return "RES_XRCD";
default: return "Unknown resource type !!!";
};
@@ -228,8 +282,7 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
mlx4_dbg(dev, "Started init_resource_tracker: %ld slaves\n",
dev->num_slaves);
for (i = 0 ; i < MLX4_NUM_OF_RESOURCE_TYPE; i++)
- INIT_RADIX_TREE(&priv->mfunc.master.res_tracker.res_tree[i],
- GFP_ATOMIC|__GFP_NOWARN);
+ priv->mfunc.master.res_tracker.res_tree[i] = RB_ROOT;
spin_lock_init(&priv->mfunc.master.res_tracker.lock);
return 0 ;
@@ -277,11 +330,11 @@ static void *find_res(struct mlx4_dev *dev, int res_id,
{
struct mlx4_priv *priv = mlx4_priv(dev);
- return radix_tree_lookup(&priv->mfunc.master.res_tracker.res_tree[type],
- res_id);
+ return res_tracker_lookup(&priv->mfunc.master.res_tracker.res_tree[type],
+ res_id);
}
-static int get_res(struct mlx4_dev *dev, int slave, int res_id,
+static int get_res(struct mlx4_dev *dev, int slave, u64 res_id,
enum mlx4_resource type,
void *res)
{
@@ -307,7 +360,7 @@ static int get_res(struct mlx4_dev *dev, int slave, int res_id,
r->from_state = r->state;
r->state = RES_ANY_BUSY;
- mlx4_dbg(dev, "res %s id 0x%x to busy\n",
+ mlx4_dbg(dev, "res %s id 0x%llx to busy\n",
ResourceType(type), r->res_id);
if (res)
@@ -320,7 +373,7 @@ exit:
int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
enum mlx4_resource type,
- int res_id, int *slave)
+ u64 res_id, int *slave)
{
struct res_common *r;
@@ -341,7 +394,7 @@ int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
return err;
}
-static void put_res(struct mlx4_dev *dev, int slave, int res_id,
+static void put_res(struct mlx4_dev *dev, int slave, u64 res_id,
enum mlx4_resource type)
{
struct res_common *r;
@@ -473,7 +526,21 @@ static struct res_common *alloc_xrcdn_tr(int id)
return &ret->com;
}
-static struct res_common *alloc_tr(int id, enum mlx4_resource type, int slave,
+static struct res_common *alloc_fs_rule_tr(u64 id)
+{
+ struct res_fs_rule *ret;
+
+ ret = kzalloc(sizeof *ret, GFP_KERNEL);
+ if (!ret)
+ return NULL;
+
+ ret->com.res_id = id;
+ ret->com.state = RES_FS_RULE_ALLOCATED;
+
+ return &ret->com;
+}
+
+static struct res_common *alloc_tr(u64 id, enum mlx4_resource type, int slave,
int extra)
{
struct res_common *ret;
@@ -506,6 +573,9 @@ static struct res_common *alloc_tr(int id, enum mlx4_resource type, int slave,
case RES_XRCD:
ret = alloc_xrcdn_tr(id);
break;
+ case RES_FS_RULE:
+ ret = alloc_fs_rule_tr(id);
+ break;
default:
return NULL;
}
@@ -515,7 +585,7 @@ static struct res_common *alloc_tr(int id, enum mlx4_resource type, int slave,
return ret;
}
-static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count,
+static int add_res_range(struct mlx4_dev *dev, int slave, u64 base, int count,
enum mlx4_resource type, int extra)
{
int i;
@@ -523,7 +593,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count,
struct mlx4_priv *priv = mlx4_priv(dev);
struct res_common **res_arr;
struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
- struct radix_tree_root *root = &tracker->res_tree[type];
+ struct rb_root *root = &tracker->res_tree[type];
res_arr = kzalloc(count * sizeof *res_arr, GFP_KERNEL);
if (!res_arr)
@@ -546,7 +616,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count,
err = -EEXIST;
goto undo;
}
- err = radix_tree_insert(root, base + i, res_arr[i]);
+ err = res_tracker_insert(root, res_arr[i]);
if (err)
goto undo;
list_add_tail(&res_arr[i]->list,
@@ -559,7 +629,7 @@ static int add_res_range(struct mlx4_dev *dev, int slave, int base, int count,
undo:
for (--i; i >= base; --i)
- radix_tree_delete(&tracker->res_tree[type], i);
+ rb_erase(&res_arr[i]->node, root);
spin_unlock_irq(mlx4_tlock(dev));
@@ -638,6 +708,16 @@ static int remove_xrcdn_ok(struct res_xrcdn *res)
return 0;
}
+static int remove_fs_rule_ok(struct res_fs_rule *res)
+{
+ if (res->com.state == RES_FS_RULE_BUSY)
+ return -EBUSY;
+ else if (res->com.state != RES_FS_RULE_ALLOCATED)
+ return -EPERM;
+
+ return 0;
+}
+
static int remove_cq_ok(struct res_cq *res)
{
if (res->com.state == RES_CQ_BUSY)
@@ -679,15 +759,17 @@ static int remove_ok(struct res_common *res, enum mlx4_resource type, int extra)
return remove_counter_ok((struct res_counter *)res);
case RES_XRCD:
return remove_xrcdn_ok((struct res_xrcdn *)res);
+ case RES_FS_RULE:
+ return remove_fs_rule_ok((struct res_fs_rule *)res);
default:
return -EINVAL;
}
}
-static int rem_res_range(struct mlx4_dev *dev, int slave, int base, int count,
+static int rem_res_range(struct mlx4_dev *dev, int slave, u64 base, int count,
enum mlx4_resource type, int extra)
{
- int i;
+ u64 i;
int err;
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_resource_tracker *tracker = &priv->mfunc.master.res_tracker;
@@ -695,7 +777,7 @@ static int rem_res_range(struct mlx4_dev *dev, int slave, int base, int count,
spin_lock_irq(mlx4_tlock(dev));
for (i = base; i < base + count; ++i) {
- r = radix_tree_lookup(&tracker->res_tree[type], i);
+ r = res_tracker_lookup(&tracker->res_tree[type], i);
if (!r) {
err = -ENOENT;
goto out;
@@ -710,8 +792,8 @@ static int rem_res_range(struct mlx4_dev *dev, int slave, int base, int count,
}
for (i = base; i < base + count; ++i) {
- r = radix_tree_lookup(&tracker->res_tree[type], i);
- radix_tree_delete(&tracker->res_tree[type], i);
+ r = res_tracker_lookup(&tracker->res_tree[type], i);
+ rb_erase(&r->node, &tracker->res_tree[type]);
list_del(&r->list);
kfree(r);
}
@@ -733,7 +815,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn,
int err = 0;
spin_lock_irq(mlx4_tlock(dev));
- r = radix_tree_lookup(&tracker->res_tree[RES_QP], qpn);
+ r = res_tracker_lookup(&tracker->res_tree[RES_QP], qpn);
if (!r)
err = -ENOENT;
else if (r->com.owner != slave)
@@ -741,7 +823,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn,
else {
switch (state) {
case RES_QP_BUSY:
- mlx4_dbg(dev, "%s: failed RES_QP, 0x%x\n",
+ mlx4_dbg(dev, "%s: failed RES_QP, 0x%llx\n",
__func__, r->com.res_id);
err = -EBUSY;
break;
@@ -750,7 +832,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn,
if (r->com.state == RES_QP_MAPPED && !alloc)
break;
- mlx4_dbg(dev, "failed RES_QP, 0x%x\n", r->com.res_id);
+ mlx4_dbg(dev, "failed RES_QP, 0x%llx\n", r->com.res_id);
err = -EINVAL;
break;
@@ -759,7 +841,7 @@ static int qp_res_start_move_to(struct mlx4_dev *dev, int slave, int qpn,
r->com.state == RES_QP_HW)
break;
else {
- mlx4_dbg(dev, "failed RES_QP, 0x%x\n",
+ mlx4_dbg(dev, "failed RES_QP, 0x%llx\n",
r->com.res_id);
err = -EINVAL;
}
@@ -797,7 +879,7 @@ static int mr_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
int err = 0;
spin_lock_irq(mlx4_tlock(dev));
- r = radix_tree_lookup(&tracker->res_tree[RES_MPT], index);
+ r = res_tracker_lookup(&tracker->res_tree[RES_MPT], index);
if (!r)
err = -ENOENT;
else if (r->com.owner != slave)
@@ -850,7 +932,7 @@ static int eq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
int err = 0;
spin_lock_irq(mlx4_tlock(dev));
- r = radix_tree_lookup(&tracker->res_tree[RES_EQ], index);
+ r = res_tracker_lookup(&tracker->res_tree[RES_EQ], index);
if (!r)
err = -ENOENT;
else if (r->com.owner != slave)
@@ -898,7 +980,7 @@ static int cq_res_start_move_to(struct mlx4_dev *dev, int slave, int cqn,
int err;
spin_lock_irq(mlx4_tlock(dev));
- r = radix_tree_lookup(&tracker->res_tree[RES_CQ], cqn);
+ r = res_tracker_lookup(&tracker->res_tree[RES_CQ], cqn);
if (!r)
err = -ENOENT;
else if (r->com.owner != slave)
@@ -952,7 +1034,7 @@ static int srq_res_start_move_to(struct mlx4_dev *dev, int slave, int index,
int err = 0;
spin_lock_irq(mlx4_tlock(dev));
- r = radix_tree_lookup(&tracker->res_tree[RES_SRQ], index);
+ r = res_tracker_lookup(&tracker->res_tree[RES_SRQ], index);
if (!r)
err = -ENOENT;
else if (r->com.owner != slave)
@@ -1001,7 +1083,7 @@ static void res_abort_move(struct mlx4_dev *dev, int slave,
struct res_common *r;
spin_lock_irq(mlx4_tlock(dev));
- r = radix_tree_lookup(&tracker->res_tree[type], id);
+ r = res_tracker_lookup(&tracker->res_tree[type], id);
if (r && (r->owner == slave))
r->state = r->from_state;
spin_unlock_irq(mlx4_tlock(dev));
@@ -1015,7 +1097,7 @@ static void res_end_move(struct mlx4_dev *dev, int slave,
struct res_common *r;
spin_lock_irq(mlx4_tlock(dev));
- r = radix_tree_lookup(&tracker->res_tree[type], id);
+ r = res_tracker_lookup(&tracker->res_tree[type], id);
if (r && (r->owner == slave))
r->state = r->to_state;
spin_unlock_irq(mlx4_tlock(dev));
@@ -2695,6 +2777,60 @@ ex_put:
return err;
}
+int mlx4_QP_FLOW_STEERING_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return -EOPNOTSUPP;
+
+ err = mlx4_cmd_imm(dev, inbox->dma, &vhcr->out_param,
+ vhcr->in_modifier, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ if (err)
+ return err;
+
+ err = add_res_range(dev, slave, vhcr->out_param, 1, RES_FS_RULE, 0);
+ if (err) {
+ mlx4_err(dev, "Fail to add flow steering resources.\n ");
+ /* detach rule*/
+ mlx4_cmd(dev, vhcr->out_param, 0, 0,
+ MLX4_QP_FLOW_STEERING_ATTACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ }
+ return err;
+}
+
+int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
+ struct mlx4_vhcr *vhcr,
+ struct mlx4_cmd_mailbox *inbox,
+ struct mlx4_cmd_mailbox *outbox,
+ struct mlx4_cmd_info *cmd)
+{
+ int err;
+
+ if (dev->caps.steering_mode !=
+ MLX4_STEERING_MODE_DEVICE_MANAGED)
+ return -EOPNOTSUPP;
+
+ err = rem_res_range(dev, slave, vhcr->in_param, 1, RES_FS_RULE, 0);
+ if (err) {
+ mlx4_err(dev, "Fail to remove flow steering resources.\n ");
+ return err;
+ }
+
+ err = mlx4_cmd(dev, vhcr->in_param, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH, MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+ return err;
+}
+
enum {
BUSY_MAX_RETRIES = 10
};
@@ -2751,7 +2887,7 @@ static int _move_all_busy(struct mlx4_dev *dev, int slave,
if (r->state == RES_ANY_BUSY) {
if (print)
mlx4_dbg(dev,
- "%s id 0x%x is busy\n",
+ "%s id 0x%llx is busy\n",
ResourceType(type),
r->res_id);
++busy;
@@ -2817,8 +2953,8 @@ static void rem_slave_qps(struct mlx4_dev *dev, int slave)
switch (state) {
case RES_QP_RESERVED:
spin_lock_irq(mlx4_tlock(dev));
- radix_tree_delete(&tracker->res_tree[RES_QP],
- qp->com.res_id);
+ rb_erase(&qp->com.node,
+ &tracker->res_tree[RES_QP]);
list_del(&qp->com.list);
spin_unlock_irq(mlx4_tlock(dev));
kfree(qp);
@@ -2888,8 +3024,8 @@ static void rem_slave_srqs(struct mlx4_dev *dev, int slave)
case RES_SRQ_ALLOCATED:
__mlx4_srq_free_icm(dev, srqn);
spin_lock_irq(mlx4_tlock(dev));
- radix_tree_delete(&tracker->res_tree[RES_SRQ],
- srqn);
+ rb_erase(&srq->com.node,
+ &tracker->res_tree[RES_SRQ]);
list_del(&srq->com.list);
spin_unlock_irq(mlx4_tlock(dev));
kfree(srq);
@@ -2954,8 +3090,8 @@ static void rem_slave_cqs(struct mlx4_dev *dev, int slave)
case RES_CQ_ALLOCATED:
__mlx4_cq_free_icm(dev, cqn);
spin_lock_irq(mlx4_tlock(dev));
- radix_tree_delete(&tracker->res_tree[RES_CQ],
- cqn);
+ rb_erase(&cq->com.node,
+ &tracker->res_tree[RES_CQ]);
list_del(&cq->com.list);
spin_unlock_irq(mlx4_tlock(dev));
kfree(cq);
@@ -3017,8 +3153,8 @@ static void rem_slave_mrs(struct mlx4_dev *dev, int slave)
case RES_MPT_RESERVED:
__mlx4_mr_release(dev, mpt->key);
spin_lock_irq(mlx4_tlock(dev));
- radix_tree_delete(&tracker->res_tree[RES_MPT],
- mptn);
+ rb_erase(&mpt->com.node,
+ &tracker->res_tree[RES_MPT]);
list_del(&mpt->com.list);
spin_unlock_irq(mlx4_tlock(dev));
kfree(mpt);
@@ -3086,8 +3222,8 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave)
__mlx4_free_mtt_range(dev, base,
mtt->order);
spin_lock_irq(mlx4_tlock(dev));
- radix_tree_delete(&tracker->res_tree[RES_MTT],
- base);
+ rb_erase(&mtt->com.node,
+ &tracker->res_tree[RES_MTT]);
list_del(&mtt->com.list);
spin_unlock_irq(mlx4_tlock(dev));
kfree(mtt);
@@ -3104,6 +3240,58 @@ static void rem_slave_mtts(struct mlx4_dev *dev, int slave)
spin_unlock_irq(mlx4_tlock(dev));
}
+static void rem_slave_fs_rule(struct mlx4_dev *dev, int slave)
+{
+ struct mlx4_priv *priv = mlx4_priv(dev);
+ struct mlx4_resource_tracker *tracker =
+ &priv->mfunc.master.res_tracker;
+ struct list_head *fs_rule_list =
+ &tracker->slave_list[slave].res_list[RES_FS_RULE];
+ struct res_fs_rule *fs_rule;
+ struct res_fs_rule *tmp;
+ int state;
+ u64 base;
+ int err;
+
+ err = move_all_busy(dev, slave, RES_FS_RULE);
+ if (err)
+ mlx4_warn(dev, "rem_slave_fs_rule: Could not move all mtts to busy for slave %d\n",
+ slave);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ list_for_each_entry_safe(fs_rule, tmp, fs_rule_list, com.list) {
+ spin_unlock_irq(mlx4_tlock(dev));
+ if (fs_rule->com.owner == slave) {
+ base = fs_rule->com.res_id;
+ state = fs_rule->com.from_state;
+ while (state != 0) {
+ switch (state) {
+ case RES_FS_RULE_ALLOCATED:
+ /* detach rule */
+ err = mlx4_cmd(dev, base, 0, 0,
+ MLX4_QP_FLOW_STEERING_DETACH,
+ MLX4_CMD_TIME_CLASS_A,
+ MLX4_CMD_NATIVE);
+
+ spin_lock_irq(mlx4_tlock(dev));
+ rb_erase(&fs_rule->com.node,
+ &tracker->res_tree[RES_FS_RULE]);
+ list_del(&fs_rule->com.list);
+ spin_unlock_irq(mlx4_tlock(dev));
+ kfree(fs_rule);
+ state = 0;
+ break;
+
+ default:
+ state = 0;
+ }
+ }
+ }
+ spin_lock_irq(mlx4_tlock(dev));
+ }
+ spin_unlock_irq(mlx4_tlock(dev));
+}
+
static void rem_slave_eqs(struct mlx4_dev *dev, int slave)
{
struct mlx4_priv *priv = mlx4_priv(dev);
@@ -3133,8 +3321,8 @@ static void rem_slave_eqs(struct mlx4_dev *dev, int slave)
switch (state) {
case RES_EQ_RESERVED:
spin_lock_irq(mlx4_tlock(dev));
- radix_tree_delete(&tracker->res_tree[RES_EQ],
- eqn);
+ rb_erase(&eq->com.node,
+ &tracker->res_tree[RES_EQ]);
list_del(&eq->com.list);
spin_unlock_irq(mlx4_tlock(dev));
kfree(eq);
@@ -3191,7 +3379,8 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave)
list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
if (counter->com.owner == slave) {
index = counter->com.res_id;
- radix_tree_delete(&tracker->res_tree[RES_COUNTER], index);
+ rb_erase(&counter->com.node,
+ &tracker->res_tree[RES_COUNTER]);
list_del(&counter->com.list);
kfree(counter);
__mlx4_counter_free(dev, index);
@@ -3220,7 +3409,7 @@ static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave)
list_for_each_entry_safe(xrcd, tmp, xrcdn_list, com.list) {
if (xrcd->com.owner == slave) {
xrcdn = xrcd->com.res_id;
- radix_tree_delete(&tracker->res_tree[RES_XRCD], xrcdn);
+ rb_erase(&xrcd->com.node, &tracker->res_tree[RES_XRCD]);
list_del(&xrcd->com.list);
kfree(xrcd);
__mlx4_xrcd_free(dev, xrcdn);
@@ -3244,5 +3433,6 @@ void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave)
rem_slave_mtts(dev, slave);
rem_slave_counters(dev, slave);
rem_slave_xrcdns(dev, slave);
+ rem_slave_fs_rule(dev, slave);
mutex_unlock(&priv->mfunc.master.res_tracker.slave_list[slave].mutex);
}
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 90153fc..fa85cf1 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -3775,7 +3775,7 @@ static void myri10ge_probe_slices(struct myri10ge_priv *mgp)
mgp->num_slices = 1;
msix_cap = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
- ncpus = num_online_cpus();
+ ncpus = netif_get_num_default_rss_queues();
if (myri10ge_max_slices == 1 || msix_cap == 0 ||
(myri10ge_max_slices == -1 && ncpus < 2))
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 2578eb1..2fd1edb 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3687,7 +3687,8 @@ static int __devinit vxge_config_vpaths(
return 0;
if (!driver_config->g_no_cpus)
- driver_config->g_no_cpus = num_online_cpus();
+ driver_config->g_no_cpus =
+ netif_get_num_default_rss_queues();
driver_config->vpath_per_dev = driver_config->g_no_cpus >> 1;
if (!driver_config->vpath_per_dev)
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 09d8d33..3c3499d 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -4649,7 +4649,7 @@ static int __devinit qlge_probe(struct pci_dev *pdev,
int err = 0;
ndev = alloc_etherdev_mq(sizeof(struct ql_adapter),
- min(MAX_CPUS, (int)num_online_cpus()));
+ min(MAX_CPUS, netif_get_num_default_rss_queues()));
if (!ndev)
return -ENOMEM;
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index d1827e8..9acc026 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -1256,7 +1256,6 @@ static void __devexit r6040_remove_one(struct pci_dev *pdev)
kfree(lp->mii_bus->irq);
mdiobus_free(lp->mii_bus);
netif_napi_del(&lp->napi);
- pci_set_drvdata(pdev, NULL);
pci_iounmap(pdev, lp->base);
pci_release_regions(pdev);
free_netdev(dev);
diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index 3ae80ec..6564c32 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -358,14 +358,30 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
padlen = ((skb->len + 4) & (dev->maxpacket - 1)) ? 0 : 4;
- if ((!skb_cloned(skb)) &&
- ((headroom + tailroom) >= (4 + padlen))) {
- if ((headroom < 4) || (tailroom < padlen)) {
+ /* We need to push 4 bytes in front of frame (packet_len)
+ * and maybe add 4 bytes after the end (if padlen is 4)
+ *
+ * Avoid skb_copy_expand() expensive call, using following rules :
+ * - We are allowed to push 4 bytes in headroom if skb_header_cloned()
+ * is false (and if we have 4 bytes of headroom)
+ * - We are allowed to put 4 bytes at tail if skb_cloned()
+ * is false (and if we have 4 bytes of tailroom)
+ *
+ * TCP packets for example are cloned, but skb_header_release()
+ * was called in tcp stack, allowing us to use headroom for our needs.
+ */
+ if (!skb_header_cloned(skb) &&
+ !(padlen && skb_cloned(skb)) &&
+ headroom + tailroom >= 4 + padlen) {
+ /* following should not happen, but better be safe */
+ if (headroom < 4 ||
+ tailroom < padlen) {
skb->data = memmove(skb->head + 4, skb->data, skb->len);
skb_set_tail_pointer(skb, skb->len);
}
} else {
struct sk_buff *skb2;
+
skb2 = skb_copy_expand(skb, 4, padlen, flags);
dev_kfree_skb_any(skb);
skb = skb2;
@@ -373,8 +389,8 @@ static struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
return NULL;
}
+ packet_len = ((skb->len ^ 0x0000ffff) << 16) + skb->len;
skb_push(skb, 4);
- packet_len = (((skb->len - 4) ^ 0x0000ffff) << 16) + (skb->len - 4);
cpu_to_le32s(&packet_len);
skb_copy_to_linear_data(skb, &packet_len, sizeof(packet_len));
@@ -880,6 +896,8 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf)
dev->net->netdev_ops = &ax88172_netdev_ops;
dev->net->ethtool_ops = &ax88172_ethtool_ops;
+ dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */
+ dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */
asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET);
asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE,
@@ -1075,6 +1093,8 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
dev->net->netdev_ops = &ax88772_netdev_ops;
dev->net->ethtool_ops = &ax88772_ethtool_ops;
+ dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */
+ dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */
embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0);
diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c
index da14432..efc4b7f 100644
--- a/drivers/oprofile/oprofile_perf.c
+++ b/drivers/oprofile/oprofile_perf.c
@@ -25,7 +25,7 @@ static int oprofile_perf_enabled;
static DEFINE_MUTEX(oprofile_perf_mutex);
static struct op_counter_config *counter_config;
-static struct perf_event **perf_events[nr_cpumask_bits];
+static struct perf_event **perf_events[NR_CPUS];
static int num_counters;
/*
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 7be5e97..73ac63d 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2700,10 +2700,11 @@ int inline qeth_l3_get_cast_type(struct qeth_card *card, struct sk_buff *skb)
rcu_read_lock();
dst = skb_dst(skb);
if (dst)
- n = dst_get_neighbour_noref(dst);
+ n = dst_neigh_lookup_skb(dst, skb);
if (n) {
cast_type = n->type;
rcu_read_unlock();
+ neigh_release(n);
if ((cast_type == RTN_BROADCAST) ||
(cast_type == RTN_MULTICAST) ||
(cast_type == RTN_ANYCAST))
diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
index 36739da..49692a1 100644
--- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
+++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
@@ -966,7 +966,8 @@ static int init_act_open(struct cxgbi_sock *csk)
csk->saddr.sin_addr.s_addr = chba->ipv4addr;
csk->rss_qid = 0;
- csk->l2t = t3_l2t_get(t3dev, dst, ndev);
+ csk->l2t = t3_l2t_get(t3dev, dst, ndev,
+ &csk->daddr.sin_addr.s_addr);
if (!csk->l2t) {
pr_err("NO l2t available.\n");
return -EINVAL;
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index 5a4a3bf..cc9a068 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -1142,7 +1142,7 @@ static int init_act_open(struct cxgbi_sock *csk)
cxgbi_sock_set_flag(csk, CTPF_HAS_ATID);
cxgbi_sock_get(csk);
- n = dst_get_neighbour_noref(csk->dst);
+ n = dst_neigh_lookup(csk->dst, &csk->daddr.sin_addr.s_addr);
if (!n) {
pr_err("%s, can't get neighbour of csk->dst.\n", ndev->name);
goto rel_resource;
@@ -1182,9 +1182,12 @@ static int init_act_open(struct cxgbi_sock *csk)
cxgbi_sock_set_state(csk, CTP_ACTIVE_OPEN);
send_act_open_req(csk, skb, csk->l2t);
+ neigh_release(n);
return 0;
rel_resource:
+ if (n)
+ neigh_release(n);
if (skb)
__kfree_skb(skb);
return -EINVAL;
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index d9253db..b44c1cf 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -494,7 +494,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr)
goto err_out;
}
dst = &rt->dst;
- n = dst_get_neighbour_noref(dst);
+ n = dst_neigh_lookup(dst, &daddr->sin_addr.s_addr);
if (!n) {
err = -ENODEV;
goto rel_rt;
@@ -506,7 +506,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr)
&daddr->sin_addr.s_addr, ntohs(daddr->sin_port),
ndev->name);
err = -ENETUNREACH;
- goto rel_rt;
+ goto rel_neigh;
}
if (ndev->flags & IFF_LOOPBACK) {
@@ -521,7 +521,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr)
pr_info("dst %pI4, %s, NOT cxgbi device.\n",
&daddr->sin_addr.s_addr, ndev->name);
err = -ENETUNREACH;
- goto rel_rt;
+ goto rel_neigh;
}
log_debug(1 << CXGBI_DBG_SOCK,
"route to %pI4 :%u, ndev p#%d,%s, cdev 0x%p.\n",
@@ -531,7 +531,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr)
csk = cxgbi_sock_create(cdev);
if (!csk) {
err = -ENOMEM;
- goto rel_rt;
+ goto rel_neigh;
}
csk->cdev = cdev;
csk->port_id = port;
@@ -541,9 +541,13 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr)
csk->daddr.sin_port = daddr->sin_port;
csk->daddr.sin_family = daddr->sin_family;
csk->saddr.sin_addr.s_addr = fl4.saddr;
+ neigh_release(n);
return csk;
+rel_neigh:
+ neigh_release(n);
+
rel_rt:
ip_rt_put(rt);
if (csk)
diff --git a/fs/splice.c b/fs/splice.c
index c9f1318..7bf08fa 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -273,13 +273,16 @@ void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
* Check if we need to grow the arrays holding pages and partial page
* descriptions.
*/
-int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
+int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
{
- if (pipe->buffers <= PIPE_DEF_BUFFERS)
+ unsigned int buffers = ACCESS_ONCE(pipe->buffers);
+
+ spd->nr_pages_max = buffers;
+ if (buffers <= PIPE_DEF_BUFFERS)
return 0;
- spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL);
- spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL);
+ spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL);
+ spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL);
if (spd->pages && spd->partial)
return 0;
@@ -289,10 +292,9 @@ int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
return -ENOMEM;
}
-void splice_shrink_spd(struct pipe_inode_info *pipe,
- struct splice_pipe_desc *spd)
+void splice_shrink_spd(struct splice_pipe_desc *spd)
{
- if (pipe->buffers <= PIPE_DEF_BUFFERS)
+ if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
return;
kfree(spd->pages);
@@ -315,6 +317,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &page_cache_pipe_buf_ops,
.spd_release = spd_release_page,
@@ -326,7 +329,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
index = *ppos >> PAGE_CACHE_SHIFT;
loff = *ppos & ~PAGE_CACHE_MASK;
req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- nr_pages = min(req_pages, pipe->buffers);
+ nr_pages = min(req_pages, spd.nr_pages_max);
/*
* Lookup the (hopefully) full range of pages we need.
@@ -497,7 +500,7 @@ fill_it:
if (spd.nr_pages)
error = splice_to_pipe(pipe, &spd);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
return error;
}
@@ -598,6 +601,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &default_pipe_buf_ops,
.spd_release = spd_release_page,
@@ -608,8 +612,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
res = -ENOMEM;
vec = __vec;
- if (pipe->buffers > PIPE_DEF_BUFFERS) {
- vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL);
+ if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
+ vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
if (!vec)
goto shrink_ret;
}
@@ -617,7 +621,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
offset = *ppos & ~PAGE_CACHE_MASK;
nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
- for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) {
+ for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
struct page *page;
page = alloc_page(GFP_USER);
@@ -665,7 +669,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
shrink_ret:
if (vec != __vec)
kfree(vec);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
return res;
err:
@@ -1614,6 +1618,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &user_page_pipe_buf_ops,
.spd_release = spd_release_page,
@@ -1629,13 +1634,13 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
spd.partial, false,
- pipe->buffers);
+ spd.nr_pages_max);
if (spd.nr_pages <= 0)
ret = spd.nr_pages;
else
ret = splice_to_pipe(pipe, &spd);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
return ret;
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ba43f40..07954b0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -827,7 +827,6 @@ extern bool __blk_end_request_err(struct request *rq, int error);
extern void blk_complete_request(struct request *);
extern void __blk_complete_request(struct request *);
extern void blk_abort_request(struct request *);
-extern void blk_abort_queue(struct request_queue *);
extern void blk_unprep_request(struct request *);
/*
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 61f5cec..a5261e3 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -301,8 +301,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
* @irq_pm_shutdown: function called from core code on shutdown once per chip
* @irq_print_chip: optional to print special chip info in show_interrupts
* @flags: chip specific flags
- *
- * @release: release function solely used by UML
*/
struct irq_chip {
const char *name;
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 1f3860a..2606951 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -154,6 +154,10 @@ enum {
/* set port opcode modifiers */
MLX4_SET_PORT_PRIO2TC = 0x8,
MLX4_SET_PORT_SCHEDULER = 0x9,
+
+ /* register/delete flow steering network rules */
+ MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
+ MLX4_QP_FLOW_STEERING_DETACH = 0x66,
};
enum {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6a8f002..6f0d133 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -70,6 +70,36 @@ enum {
MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1)
};
+/* Driver supports 3 diffrent device methods to manage traffic steering:
+ * -device managed - High level API for ib and eth flow steering. FW is
+ * managing flow steering tables.
+ * - B0 steering mode - Common low level API for ib and (if supported) eth.
+ * - A0 steering mode - Limited low level API for eth. In case of IB,
+ * B0 mode is in use.
+ */
+enum {
+ MLX4_STEERING_MODE_A0,
+ MLX4_STEERING_MODE_B0,
+ MLX4_STEERING_MODE_DEVICE_MANAGED
+};
+
+static inline const char *mlx4_steering_mode_str(int steering_mode)
+{
+ switch (steering_mode) {
+ case MLX4_STEERING_MODE_A0:
+ return "A0 steering";
+
+ case MLX4_STEERING_MODE_B0:
+ return "B0 steering";
+
+ case MLX4_STEERING_MODE_DEVICE_MANAGED:
+ return "Device managed flow steering";
+
+ default:
+ return "Unrecognize steering mode";
+ }
+}
+
enum {
MLX4_DEV_CAP_FLAG_RC = 1LL << 0,
MLX4_DEV_CAP_FLAG_UC = 1LL << 1,
@@ -102,7 +132,8 @@ enum {
enum {
MLX4_DEV_CAP_FLAG2_RSS = 1LL << 0,
MLX4_DEV_CAP_FLAG2_RSS_TOP = 1LL << 1,
- MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2
+ MLX4_DEV_CAP_FLAG2_RSS_XOR = 1LL << 2,
+ MLX4_DEV_CAP_FLAG2_FS_EN = 1LL << 3
};
#define MLX4_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90)
@@ -295,6 +326,8 @@ struct mlx4_caps {
int num_amgms;
int reserved_mcgs;
int num_qp_per_mgm;
+ int steering_mode;
+ int fs_log_max_ucast_qp_range_size;
int num_pds;
int reserved_pds;
int max_xrcds;
@@ -509,6 +542,8 @@ struct mlx4_dev {
u8 rev_id;
char board_id[MLX4_BOARD_ID_LEN];
int num_vfs;
+ u64 regid_promisc_array[MLX4_MAX_PORTS + 1];
+ u64 regid_allmulti_array[MLX4_MAX_PORTS + 1];
};
struct mlx4_init_port_param {
@@ -623,9 +658,99 @@ int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
enum mlx4_protocol prot);
int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- int block_mcast_loopback, enum mlx4_protocol protocol);
+ u8 port, int block_mcast_loopback,
+ enum mlx4_protocol protocol, u64 *reg_id);
int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
- enum mlx4_protocol protocol);
+ enum mlx4_protocol protocol, u64 reg_id);
+
+enum {
+ MLX4_DOMAIN_UVERBS = 0x1000,
+ MLX4_DOMAIN_ETHTOOL = 0x2000,
+ MLX4_DOMAIN_RFS = 0x3000,
+ MLX4_DOMAIN_NIC = 0x5000,
+};
+
+enum mlx4_net_trans_rule_id {
+ MLX4_NET_TRANS_RULE_ID_ETH = 0,
+ MLX4_NET_TRANS_RULE_ID_IB,
+ MLX4_NET_TRANS_RULE_ID_IPV6,
+ MLX4_NET_TRANS_RULE_ID_IPV4,
+ MLX4_NET_TRANS_RULE_ID_TCP,
+ MLX4_NET_TRANS_RULE_ID_UDP,
+ MLX4_NET_TRANS_RULE_NUM, /* should be last */
+};
+
+enum mlx4_net_trans_promisc_mode {
+ MLX4_FS_PROMISC_NONE = 0,
+ MLX4_FS_PROMISC_UPLINK,
+ /* For future use. Not implemented yet */
+ MLX4_FS_PROMISC_FUNCTION_PORT,
+ MLX4_FS_PROMISC_ALL_MULTI,
+};
+
+struct mlx4_spec_eth {
+ u8 dst_mac[6];
+ u8 dst_mac_msk[6];
+ u8 src_mac[6];
+ u8 src_mac_msk[6];
+ u8 ether_type_enable;
+ __be16 ether_type;
+ __be16 vlan_id_msk;
+ __be16 vlan_id;
+};
+
+struct mlx4_spec_tcp_udp {
+ __be16 dst_port;
+ __be16 dst_port_msk;
+ __be16 src_port;
+ __be16 src_port_msk;
+};
+
+struct mlx4_spec_ipv4 {
+ __be32 dst_ip;
+ __be32 dst_ip_msk;
+ __be32 src_ip;
+ __be32 src_ip_msk;
+};
+
+struct mlx4_spec_ib {
+ __be32 r_qpn;
+ __be32 qpn_msk;
+ u8 dst_gid[16];
+ u8 dst_gid_msk[16];
+};
+
+struct mlx4_spec_list {
+ struct list_head list;
+ enum mlx4_net_trans_rule_id id;
+ union {
+ struct mlx4_spec_eth eth;
+ struct mlx4_spec_ib ib;
+ struct mlx4_spec_ipv4 ipv4;
+ struct mlx4_spec_tcp_udp tcp_udp;
+ };
+};
+
+enum mlx4_net_trans_hw_rule_queue {
+ MLX4_NET_TRANS_Q_FIFO,
+ MLX4_NET_TRANS_Q_LIFO,
+};
+
+struct mlx4_net_trans_rule {
+ struct list_head list;
+ enum mlx4_net_trans_hw_rule_queue queue_mode;
+ bool exclusive;
+ bool allow_loopback;
+ enum mlx4_net_trans_promisc_mode promisc_mode;
+ u8 port;
+ u16 priority;
+ u32 qpn;
+};
+
+int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port, u32 qpn,
+ enum mlx4_net_trans_promisc_mode mode);
+int mlx4_flow_steer_promisc_remove(struct mlx4_dev *dev, u8 port,
+ enum mlx4_net_trans_promisc_mode mode);
int mlx4_multicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
int mlx4_multicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
@@ -668,4 +793,8 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
+int mlx4_flow_attach(struct mlx4_dev *dev,
+ struct mlx4_net_trans_rule *rule, u64 *reg_id);
+int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
+
#endif /* MLX4_DEVICE_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2c2ecea..ab0251d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2119,6 +2119,9 @@ static inline int netif_copy_real_num_queues(struct net_device *to_dev,
#endif
}
+#define DEFAULT_MAX_NUM_RSS_QUEUES (8)
+extern int netif_get_num_default_rss_queues(void);
+
/* Use this variant when it is known for sure that it
* is executing from hardware interrupt context or with hardware interrupts
* disabled.
diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 7688833..f649f74 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -7,6 +7,8 @@ enum cntl_msg_types {
IPCTNL_MSG_CT_GET,
IPCTNL_MSG_CT_DELETE,
IPCTNL_MSG_CT_GET_CTRZERO,
+ IPCTNL_MSG_CT_GET_STATS_CPU,
+ IPCTNL_MSG_CT_GET_STATS,
IPCTNL_MSG_MAX
};
@@ -15,6 +17,7 @@ enum ctnl_exp_msg_types {
IPCTNL_MSG_EXP_NEW,
IPCTNL_MSG_EXP_GET,
IPCTNL_MSG_EXP_DELETE,
+ IPCTNL_MSG_EXP_GET_STATS_CPU,
IPCTNL_MSG_EXP_MAX
};
@@ -203,4 +206,39 @@ enum ctattr_secctx {
};
#define CTA_SECCTX_MAX (__CTA_SECCTX_MAX - 1)
+enum ctattr_stats_cpu {
+ CTA_STATS_UNSPEC,
+ CTA_STATS_SEARCHED,
+ CTA_STATS_FOUND,
+ CTA_STATS_NEW,
+ CTA_STATS_INVALID,
+ CTA_STATS_IGNORE,
+ CTA_STATS_DELETE,
+ CTA_STATS_DELETE_LIST,
+ CTA_STATS_INSERT,
+ CTA_STATS_INSERT_FAILED,
+ CTA_STATS_DROP,
+ CTA_STATS_EARLY_DROP,
+ CTA_STATS_ERROR,
+ CTA_STATS_SEARCH_RESTART,
+ __CTA_STATS_MAX,
+};
+#define CTA_STATS_MAX (__CTA_STATS_MAX - 1)
+
+enum ctattr_stats_global {
+ CTA_STATS_GLOBAL_UNSPEC,
+ CTA_STATS_GLOBAL_ENTRIES,
+ __CTA_STATS_GLOBAL_MAX,
+};
+#define CTA_STATS_GLOBAL_MAX (__CTA_STATS_GLOBAL_MAX - 1)
+
+enum ctattr_expect_stats {
+ CTA_STATS_EXP_UNSPEC,
+ CTA_STATS_EXP_NEW,
+ CTA_STATS_EXP_CREATE,
+ CTA_STATS_EXP_DELETE,
+ __CTA_STATS_EXP_MAX,
+};
+#define CTA_STATS_EXP_MAX (__CTA_STATS_EXP_MAX - 1)
+
#endif /* _IPCONNTRACK_NETLINK_H */
diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h
index e0d8fd8..3b1c136 100644
--- a/include/linux/netfilter/nfnetlink_queue.h
+++ b/include/linux/netfilter/nfnetlink_queue.h
@@ -95,5 +95,6 @@ enum nfqnl_attr_config {
/* Flags for NFQA_CFG_FLAGS */
#define NFQA_CFG_F_FAIL_OPEN (1 << 0)
#define NFQA_CFG_F_CONNTRACK (1 << 1)
+#define NFQA_CFG_F_MAX (1 << 2)
#endif /* _NFNETLINK_QUEUE_H */
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 26e5b61..09a545a 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -51,7 +51,8 @@ struct partial_page {
struct splice_pipe_desc {
struct page **pages; /* page map */
struct partial_page *partial; /* pages[] may not be contig */
- int nr_pages; /* number of pages in map */
+ int nr_pages; /* number of populated pages in map */
+ unsigned int nr_pages_max; /* pages[] & partial[] arrays size */
unsigned int flags; /* splice flags */
const struct pipe_buf_operations *ops;/* ops associated with output pipe */
void (*spd_release)(struct splice_pipe_desc *, unsigned int);
@@ -85,9 +86,8 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
/*
* for dynamic pipe sizing
*/
-extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *);
-extern void splice_shrink_spd(struct pipe_inode_info *,
- struct splice_pipe_desc *);
+extern int splice_grow_spd(const struct pipe_inode_info *, struct splice_pipe_desc *);
+extern void splice_shrink_spd(struct splice_pipe_desc *);
extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
diff --git a/include/net/arp.h b/include/net/arp.h
index 4a1f3fb..4617d98 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -15,24 +15,34 @@ static inline u32 arp_hashfn(u32 key, const struct net_device *dev, u32 hash_rnd
return val * hash_rnd;
}
-static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key)
+static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key)
{
- struct neigh_hash_table *nht;
+ struct neigh_hash_table *nht = rcu_dereference_bh(arp_tbl.nht);
struct neighbour *n;
u32 hash_val;
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(arp_tbl.nht);
+ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
+ key = 0;
+
hash_val = arp_hashfn(key, dev, nht->hash_rnd[0]) >> (32 - nht->hash_shift);
for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
n != NULL;
n = rcu_dereference_bh(n->next)) {
- if (n->dev == dev && *(u32 *)n->primary_key == key) {
- if (!atomic_inc_not_zero(&n->refcnt))
- n = NULL;
- break;
- }
+ if (n->dev == dev && *(u32 *)n->primary_key == key)
+ return n;
}
+
+ return NULL;
+}
+
+static inline struct neighbour *__ipv4_neigh_lookup(struct net_device *dev, u32 key)
+{
+ struct neighbour *n;
+
+ rcu_read_lock_bh();
+ n = __ipv4_neigh_lookup_noref(dev, key);
+ if (n && !atomic_inc_not_zero(&n->refcnt))
+ n = NULL;
rcu_read_unlock_bh();
return n;
diff --git a/include/net/dn_route.h b/include/net/dn_route.h
index c507e05..4f7d6a1 100644
--- a/include/net/dn_route.h
+++ b/include/net/dn_route.h
@@ -67,6 +67,8 @@ extern void dn_rt_cache_flush(int delay);
struct dn_route {
struct dst_entry dst;
+ struct neighbour *n;
+
struct flowidn fld;
__le16 rt_saddr;
diff --git a/include/net/dst.h b/include/net/dst.h
index f0bf3b8..b2634e4 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -42,7 +42,7 @@ struct dst_entry {
struct dst_entry *from;
};
struct dst_entry *path;
- struct neighbour __rcu *_neighbour;
+ void *__pad0;
#ifdef CONFIG_XFRM
struct xfrm_state *xfrm;
#else
@@ -51,7 +51,7 @@ struct dst_entry {
int (*input)(struct sk_buff *);
int (*output)(struct sk_buff *);
- int flags;
+ unsigned short flags;
#define DST_HOST 0x0001
#define DST_NOXFRM 0x0002
#define DST_NOPOLICY 0x0004
@@ -62,6 +62,8 @@ struct dst_entry {
#define DST_FAKE_RTABLE 0x0080
#define DST_XFRM_TUNNEL 0x0100
+ unsigned short pending_confirm;
+
short error;
short obsolete;
unsigned short header_len; /* more space at head required */
@@ -94,21 +96,6 @@ struct dst_entry {
};
};
-static inline struct neighbour *dst_get_neighbour_noref(struct dst_entry *dst)
-{
- return rcu_dereference(dst->_neighbour);
-}
-
-static inline struct neighbour *dst_get_neighbour_noref_raw(struct dst_entry *dst)
-{
- return rcu_dereference_raw(dst->_neighbour);
-}
-
-static inline void dst_set_neighbour(struct dst_entry *dst, struct neighbour *neigh)
-{
- rcu_assign_pointer(dst->_neighbour, neigh);
-}
-
extern u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
extern const u32 dst_default_metrics[RTAX_MAX];
@@ -371,7 +358,8 @@ static inline struct dst_entry *skb_dst_pop(struct sk_buff *skb)
extern int dst_discard(struct sk_buff *skb);
extern void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
- int initial_ref, int initial_obsolete, int flags);
+ int initial_ref, int initial_obsolete,
+ unsigned short flags);
extern void __dst_free(struct dst_entry *dst);
extern struct dst_entry *dst_destroy(struct dst_entry *dst);
@@ -395,19 +383,35 @@ static inline void dst_rcu_free(struct rcu_head *head)
static inline void dst_confirm(struct dst_entry *dst)
{
- if (dst) {
- struct neighbour *n;
+ dst->pending_confirm = 1;
+}
+
+static inline int dst_neigh_output(struct dst_entry *dst, struct neighbour *n,
+ struct sk_buff *skb)
+{
+ struct hh_cache *hh;
- rcu_read_lock();
- n = dst_get_neighbour_noref(dst);
- neigh_confirm(n);
- rcu_read_unlock();
+ if (unlikely(dst->pending_confirm)) {
+ n->confirmed = jiffies;
+ dst->pending_confirm = 0;
}
+
+ hh = &n->hh;
+ if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
+ return neigh_hh_output(hh, skb);
+ else
+ return n->output(n, skb);
}
static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
{
- return dst->ops->neigh_lookup(dst, daddr);
+ return dst->ops->neigh_lookup(dst, NULL, daddr);
+}
+
+static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst,
+ struct sk_buff *skb)
+{
+ return dst->ops->neigh_lookup(dst, skb, NULL);
}
static inline void dst_link_failure(struct sk_buff *skb)
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 3682a0a..4badc86 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -26,7 +26,9 @@ struct dst_ops {
void (*link_failure)(struct sk_buff *);
void (*update_pmtu)(struct dst_entry *dst, u32 mtu);
int (*local_out)(struct sk_buff *skb);
- struct neighbour * (*neigh_lookup)(const struct dst_entry *dst, const void *daddr);
+ struct neighbour * (*neigh_lookup)(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr);
struct kmem_cache *kmem_cachep;
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index a192f78..0fedbd8 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -86,6 +86,8 @@ struct fib6_table;
struct rt6_info {
struct dst_entry dst;
+ struct neighbour *n;
+
/*
* Tail elements of dst_entry (__refcnt etc.)
* and these elements (rarely used in hot path) are in
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 3dc7c96..539c672 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -220,11 +220,33 @@ extern void __net_exit fib4_rules_exit(struct net *net);
extern u32 fib_rules_tclass(const struct fib_result *res);
#endif
-extern int fib_lookup(struct net *n, struct flowi4 *flp, struct fib_result *res);
-
extern struct fib_table *fib_new_table(struct net *net, u32 id);
extern struct fib_table *fib_get_table(struct net *net, u32 id);
+extern int __fib_lookup(struct net *net, struct flowi4 *flp,
+ struct fib_result *res);
+
+static inline int fib_lookup(struct net *net, struct flowi4 *flp,
+ struct fib_result *res)
+{
+ if (!net->ipv4.fib_has_custom_rules) {
+ if (net->ipv4.fib_local &&
+ !fib_table_lookup(net->ipv4.fib_local, flp, res,
+ FIB_LOOKUP_NOREF))
+ return 0;
+ if (net->ipv4.fib_main &&
+ !fib_table_lookup(net->ipv4.fib_main, flp, res,
+ FIB_LOOKUP_NOREF))
+ return 0;
+ if (net->ipv4.fib_default &&
+ !fib_table_lookup(net->ipv4.fib_default, flp, res,
+ FIB_LOOKUP_NOREF))
+ return 0;
+ return -ENETUNREACH;
+ }
+ return __fib_lookup(net, flp, res);
+}
+
#endif /* CONFIG_IP_MULTIPLE_TABLES */
/* Exported by fib_frontend.c */
@@ -236,9 +258,15 @@ extern int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
struct in_device *idev, u32 *itag);
extern void fib_select_default(struct fib_result *res);
#ifdef CONFIG_IP_ROUTE_CLASSID
-extern int fib_num_tclassid_users;
+static inline int fib_num_tclassid_users(struct net *net)
+{
+ return net->ipv4.fib_num_tclassid_users;
+}
#else
-#define fib_num_tclassid_users 0
+static inline int fib_num_tclassid_users(struct net *net)
+{
+ return 0;
+}
#endif
/* Exported by fib_semantics.c */
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 6cdfeed..344d898 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -202,9 +202,16 @@ extern struct neighbour * neigh_lookup(struct neigh_table *tbl,
extern struct neighbour * neigh_lookup_nodev(struct neigh_table *tbl,
struct net *net,
const void *pkey);
-extern struct neighbour * neigh_create(struct neigh_table *tbl,
+extern struct neighbour * __neigh_create(struct neigh_table *tbl,
+ const void *pkey,
+ struct net_device *dev,
+ bool want_ref);
+static inline struct neighbour *neigh_create(struct neigh_table *tbl,
const void *pkey,
- struct net_device *dev);
+ struct net_device *dev)
+{
+ return __neigh_create(tbl, pkey, dev, true);
+}
extern void neigh_destroy(struct neighbour *neigh);
extern int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb);
extern int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
@@ -302,12 +309,6 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh)
#define neigh_hold(n) atomic_inc(&(n)->refcnt)
-static inline void neigh_confirm(struct neighbour *neigh)
-{
- if (neigh)
- neigh->confirmed = jiffies;
-}
-
static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
unsigned long now = jiffies;
@@ -351,15 +352,6 @@ static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
return dev_queue_xmit(skb);
}
-static inline int neigh_output(struct neighbour *n, struct sk_buff *skb)
-{
- struct hh_cache *hh = &n->hh;
- if ((n->nud_state & NUD_CONNECTED) && hh->hh_len)
- return neigh_hh_output(hh, skb);
- else
- return n->output(n, skb);
-}
-
static inline struct neighbour *
__neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev, int creat)
{
diff --git a/include/net/netevent.h b/include/net/netevent.h
index 086f8a5..3ce4988 100644
--- a/include/net/netevent.h
+++ b/include/net/netevent.h
@@ -12,10 +12,14 @@
*/
struct dst_entry;
+struct neighbour;
struct netevent_redirect {
struct dst_entry *old;
+ struct neighbour *old_neigh;
struct dst_entry *new;
+ struct neighbour *new_neigh;
+ const void *daddr;
};
enum netevent_notif_type {
diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 81c52b5..c3be4ae 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -97,7 +97,10 @@ struct nf_conntrack_l4proto {
#endif
int *net_id;
/* Init l4proto pernet data */
- int (*init_net)(struct net *net);
+ int (*init_net)(struct net *net, u_int16_t proto);
+
+ /* Return the per-net protocol part. */
+ struct nf_proto_net *(*get_net_proto)(struct net *net);
/* Protocol name */
const char *name;
@@ -124,6 +127,14 @@ extern int nf_conntrack_l4proto_register(struct net *net,
extern void nf_conntrack_l4proto_unregister(struct net *net,
struct nf_conntrack_l4proto *proto);
+static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn)
+{
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ kfree(pn->ctl_compat_table);
+ pn->ctl_compat_table = NULL;
+#endif
+}
+
/* Generic netlink helpers */
extern int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 227f0cd..599e48f 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -11,6 +11,7 @@ struct ctl_table_header;
struct ipv4_devconf;
struct fib_rules_ops;
struct hlist_head;
+struct fib_table;
struct sock;
struct netns_ipv4 {
@@ -24,6 +25,13 @@ struct netns_ipv4 {
struct ipv4_devconf *devconf_dflt;
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_rules_ops *rules_ops;
+ bool fib_has_custom_rules;
+ struct fib_table *fib_local;
+ struct fib_table *fib_main;
+ struct fib_table *fib_default;
+#endif
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ int fib_num_tclassid_users;
#endif
struct hlist_head *fib_table_hash;
struct sock *fibnl;
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index e4652fe..fecdf31 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -912,6 +912,9 @@ struct sctp_transport {
/* Is this structure kfree()able? */
malloced:1;
+ /* Has this transport moved the ctsn since we last sacked */
+ __u32 sack_generation;
+
struct flowi fl;
/* This is the peer's IP address and port. */
@@ -1584,6 +1587,7 @@ struct sctp_association {
*/
__u8 sack_needed; /* Do we need to sack the peer? */
__u32 sack_cnt;
+ __u32 sack_generation;
/* These are capabilities which our peer advertised. */
__u8 ecn_capable:1, /* Can peer do ECN? */
diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h
index e7728bc..2c5d2b4 100644
--- a/include/net/sctp/tsnmap.h
+++ b/include/net/sctp/tsnmap.h
@@ -117,7 +117,8 @@ void sctp_tsnmap_free(struct sctp_tsnmap *map);
int sctp_tsnmap_check(const struct sctp_tsnmap *, __u32 tsn);
/* Mark this TSN as seen. */
-int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn);
+int sctp_tsnmap_mark(struct sctp_tsnmap *, __u32 tsn,
+ struct sctp_transport *trans);
/* Mark this TSN and all lower as seen. */
void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn);
diff --git a/kernel/printk.c b/kernel/printk.c
index a2276b9..dba1821 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -193,12 +193,19 @@ static int console_may_schedule;
* separated by ',', and find the message after the ';' character.
*/
+enum log_flags {
+ LOG_DEFAULT = 0,
+ LOG_NOCONS = 1, /* already flushed, do not print to console */
+};
+
struct log {
u64 ts_nsec; /* timestamp in nanoseconds */
u16 len; /* length of entire record */
u16 text_len; /* length of text buffer */
u16 dict_len; /* length of dictionary buffer */
- u16 level; /* syslog level + facility */
+ u8 facility; /* syslog facility */
+ u8 flags:5; /* internal record flags */
+ u8 level:3; /* syslog level */
};
/*
@@ -286,6 +293,7 @@ static u32 log_next(u32 idx)
/* insert record into the buffer, discard old ones, update heads */
static void log_store(int facility, int level,
+ enum log_flags flags, u64 ts_nsec,
const char *dict, u16 dict_len,
const char *text, u16 text_len)
{
@@ -329,8 +337,13 @@ static void log_store(int facility, int level,
msg->text_len = text_len;
memcpy(log_dict(msg), dict, dict_len);
msg->dict_len = dict_len;
- msg->level = (facility << 3) | (level & 7);
- msg->ts_nsec = local_clock();
+ msg->facility = facility;
+ msg->level = level & 7;
+ msg->flags = flags & 0x1f;
+ if (ts_nsec > 0)
+ msg->ts_nsec = ts_nsec;
+ else
+ msg->ts_nsec = local_clock();
memset(log_dict(msg) + dict_len, 0, pad_len);
msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
@@ -446,7 +459,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
ts_usec = msg->ts_nsec;
do_div(ts_usec, 1000);
len = sprintf(user->buf, "%u,%llu,%llu;",
- msg->level, user->seq, ts_usec);
+ (msg->facility << 3) | msg->level, user->seq, ts_usec);
/* escape non-printable characters */
for (i = 0; i < msg->text_len; i++) {
@@ -787,6 +800,21 @@ static bool printk_time;
#endif
module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
+static size_t print_time(u64 ts, char *buf)
+{
+ unsigned long rem_nsec;
+
+ if (!printk_time)
+ return 0;
+
+ if (!buf)
+ return 15;
+
+ rem_nsec = do_div(ts, 1000000000);
+ return sprintf(buf, "[%5lu.%06lu] ",
+ (unsigned long)ts, rem_nsec / 1000);
+}
+
static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
{
size_t len = 0;
@@ -803,18 +831,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
}
}
- if (printk_time) {
- if (buf) {
- unsigned long long ts = msg->ts_nsec;
- unsigned long rem_nsec = do_div(ts, 1000000000);
-
- len += sprintf(buf + len, "[%5lu.%06lu] ",
- (unsigned long) ts, rem_nsec / 1000);
- } else {
- len += 15;
- }
- }
-
+ len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
return len;
}
@@ -862,28 +879,49 @@ static int syslog_print(char __user *buf, int size)
{
char *text;
struct log *msg;
- int len;
+ int len = 0;
text = kmalloc(LOG_LINE_MAX, GFP_KERNEL);
if (!text)
return -ENOMEM;
- raw_spin_lock_irq(&logbuf_lock);
- if (syslog_seq < log_first_seq) {
- /* messages are gone, move to first one */
- syslog_seq = log_first_seq;
- syslog_idx = log_first_idx;
- }
- msg = log_from_idx(syslog_idx);
- len = msg_print_text(msg, true, text, LOG_LINE_MAX);
- syslog_idx = log_next(syslog_idx);
- syslog_seq++;
- raw_spin_unlock_irq(&logbuf_lock);
+ while (size > 0) {
+ size_t n;
- if (len > size)
- len = -EINVAL;
- else if (len > 0 && copy_to_user(buf, text, len))
- len = -EFAULT;
+ raw_spin_lock_irq(&logbuf_lock);
+ if (syslog_seq < log_first_seq) {
+ /* messages are gone, move to first one */
+ syslog_seq = log_first_seq;
+ syslog_idx = log_first_idx;
+ }
+ if (syslog_seq == log_next_seq) {
+ raw_spin_unlock_irq(&logbuf_lock);
+ break;
+ }
+ msg = log_from_idx(syslog_idx);
+ n = msg_print_text(msg, true, text, LOG_LINE_MAX);
+ if (n <= size) {
+ syslog_idx = log_next(syslog_idx);
+ syslog_seq++;
+ } else
+ n = 0;
+ raw_spin_unlock_irq(&logbuf_lock);
+
+ if (!n)
+ break;
+
+ len += n;
+ size -= n;
+ buf += n;
+ n = copy_to_user(buf - n, text, n);
+
+ if (n) {
+ len -= n;
+ if (!len)
+ len = -EFAULT;
+ break;
+ }
+ }
kfree(text);
return len;
@@ -1040,6 +1078,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
/* Clear ring buffer */
case SYSLOG_ACTION_CLEAR:
syslog_print_all(NULL, 0, true);
+ break;
/* Disable logging to console */
case SYSLOG_ACTION_CONSOLE_OFF:
if (saved_console_loglevel == -1)
@@ -1272,15 +1311,92 @@ static inline void printk_delay(void)
}
}
+/*
+ * Continuation lines are buffered, and not committed to the record buffer
+ * until the line is complete, or a race forces it. The line fragments
+ * though, are printed immediately to the consoles to ensure everything has
+ * reached the console in case of a kernel crash.
+ */
+static struct cont {
+ char buf[LOG_LINE_MAX];
+ size_t len; /* length == 0 means unused buffer */
+ size_t cons; /* bytes written to console */
+ struct task_struct *owner; /* task of first print*/
+ u64 ts_nsec; /* time of first print */
+ u8 level; /* log level of first message */
+ u8 facility; /* log level of first message */
+ bool flushed:1; /* buffer sealed and committed */
+} cont;
+
+static void cont_flush(void)
+{
+ if (cont.flushed)
+ return;
+ if (cont.len == 0)
+ return;
+
+ log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec,
+ NULL, 0, cont.buf, cont.len);
+
+ cont.flushed = true;
+}
+
+static bool cont_add(int facility, int level, const char *text, size_t len)
+{
+ if (cont.len && cont.flushed)
+ return false;
+
+ if (cont.len + len > sizeof(cont.buf)) {
+ cont_flush();
+ return false;
+ }
+
+ if (!cont.len) {
+ cont.facility = facility;
+ cont.level = level;
+ cont.owner = current;
+ cont.ts_nsec = local_clock();
+ cont.cons = 0;
+ cont.flushed = false;
+ }
+
+ memcpy(cont.buf + cont.len, text, len);
+ cont.len += len;
+ return true;
+}
+
+static size_t cont_print_text(char *text, size_t size)
+{
+ size_t textlen = 0;
+ size_t len;
+
+ if (cont.cons == 0) {
+ textlen += print_time(cont.ts_nsec, text);
+ size -= textlen;
+ }
+
+ len = cont.len - cont.cons;
+ if (len > 0) {
+ if (len+1 > size)
+ len = size-1;
+ memcpy(text + textlen, cont.buf + cont.cons, len);
+ textlen += len;
+ cont.cons = cont.len;
+ }
+
+ if (cont.flushed) {
+ text[textlen++] = '\n';
+ /* got everything, release buffer */
+ cont.len = 0;
+ }
+ return textlen;
+}
+
asmlinkage int vprintk_emit(int facility, int level,
const char *dict, size_t dictlen,
const char *fmt, va_list args)
{
static int recursion_bug;
- static char cont_buf[LOG_LINE_MAX];
- static size_t cont_len;
- static int cont_level;
- static struct task_struct *cont_task;
static char textbuf[LOG_LINE_MAX];
char *text = textbuf;
size_t text_len;
@@ -1326,7 +1442,8 @@ asmlinkage int vprintk_emit(int facility, int level,
recursion_bug = 0;
printed_len += strlen(recursion_msg);
/* emit KERN_CRIT message */
- log_store(0, 2, NULL, 0, recursion_msg, printed_len);
+ log_store(0, 2, LOG_DEFAULT, 0,
+ NULL, 0, recursion_msg, printed_len);
}
/*
@@ -1364,55 +1481,37 @@ asmlinkage int vprintk_emit(int facility, int level,
}
if (!newline) {
- if (cont_len && (prefix || cont_task != current)) {
- /*
- * Flush earlier buffer, which is either from a
- * different thread, or when we got a new prefix.
- */
- log_store(facility, cont_level, NULL, 0, cont_buf, cont_len);
- cont_len = 0;
- }
-
- if (!cont_len) {
- cont_level = level;
- cont_task = current;
- }
+ /*
+ * Flush the conflicting buffer. An earlier newline was missing,
+ * or another task also prints continuation lines.
+ */
+ if (cont.len && (prefix || cont.owner != current))
+ cont_flush();
- /* buffer or append to earlier buffer from the same thread */
- if (cont_len + text_len > sizeof(cont_buf))
- text_len = sizeof(cont_buf) - cont_len;
- memcpy(cont_buf + cont_len, text, text_len);
- cont_len += text_len;
+ /* buffer line if possible, otherwise store it right away */
+ if (!cont_add(facility, level, text, text_len))
+ log_store(facility, level, LOG_DEFAULT, 0,
+ dict, dictlen, text, text_len);
} else {
- if (cont_len && cont_task == current) {
- if (prefix) {
- /*
- * New prefix from the same thread; flush. We
- * either got no earlier newline, or we race
- * with an interrupt.
- */
- log_store(facility, cont_level,
- NULL, 0, cont_buf, cont_len);
- cont_len = 0;
- }
+ bool stored = false;
- /* append to the earlier buffer and flush */
- if (cont_len + text_len > sizeof(cont_buf))
- text_len = sizeof(cont_buf) - cont_len;
- memcpy(cont_buf + cont_len, text, text_len);
- cont_len += text_len;
- log_store(facility, cont_level,
- NULL, 0, cont_buf, cont_len);
- cont_len = 0;
- cont_task = NULL;
- printed_len = cont_len;
- } else {
- /* ordinary single and terminated line */
- log_store(facility, level,
- dict, dictlen, text, text_len);
- printed_len = text_len;
+ /*
+ * If an earlier newline was missing and it was the same task,
+ * either merge it with the current buffer and flush, or if
+ * there was a race with interrupts (prefix == true) then just
+ * flush it out and store this line separately.
+ */
+ if (cont.len && cont.owner == current) {
+ if (!prefix)
+ stored = cont_add(facility, level, text, text_len);
+ cont_flush();
}
+
+ if (!stored)
+ log_store(facility, level, LOG_DEFAULT, 0,
+ dict, dictlen, text, text_len);
}
+ printed_len += text_len;
/*
* Try to acquire and then immediately release the console semaphore.
@@ -1499,11 +1598,18 @@ EXPORT_SYMBOL(printk);
#else
#define LOG_LINE_MAX 0
+static struct cont {
+ size_t len;
+ size_t cons;
+ u8 level;
+ bool flushed:1;
+} cont;
static struct log *log_from_idx(u32 idx) { return NULL; }
static u32 log_next(u32 idx) { return 0; }
static void call_console_drivers(int level, const char *text, size_t len) {}
static size_t msg_print_text(const struct log *msg, bool syslog,
char *buf, size_t size) { return 0; }
+static size_t cont_print_text(char *text, size_t size) { return 0; }
#endif /* CONFIG_PRINTK */
@@ -1795,6 +1901,7 @@ static u32 console_idx;
*/
void console_unlock(void)
{
+ static char text[LOG_LINE_MAX];
static u64 seen_seq;
unsigned long flags;
bool wake_klogd = false;
@@ -1807,10 +1914,23 @@ void console_unlock(void)
console_may_schedule = 0;
+ /* flush buffered message fragment immediately to console */
+ raw_spin_lock_irqsave(&logbuf_lock, flags);
+ if (cont.len && (cont.cons < cont.len || cont.flushed)) {
+ size_t len;
+
+ len = cont_print_text(text, sizeof(text));
+ raw_spin_unlock(&logbuf_lock);
+ stop_critical_timings();
+ call_console_drivers(cont.level, text, len);
+ start_critical_timings();
+ local_irq_restore(flags);
+ } else
+ raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+
again:
for (;;) {
struct log *msg;
- static char text[LOG_LINE_MAX];
size_t len;
int level;
@@ -1825,13 +1945,22 @@ again:
console_seq = log_first_seq;
console_idx = log_first_idx;
}
-
+skip:
if (console_seq == log_next_seq)
break;
msg = log_from_idx(console_idx);
- level = msg->level & 7;
+ if (msg->flags & LOG_NOCONS) {
+ /*
+ * Skip record we have buffered and already printed
+ * directly to the console when we received it.
+ */
+ console_idx = log_next(console_idx);
+ console_seq++;
+ goto skip;
+ }
+ level = msg->level;
len = msg_print_text(msg, false, text, sizeof(text));
console_idx = log_next(console_idx);
@@ -2409,7 +2538,7 @@ EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
* kmsg_dump_get_buffer - copy kmsg log lines
* @dumper: registered kmsg dumper
* @syslog: include the "<4>" prefixes
- * @line: buffer to copy the line to
+ * @buf: buffer to copy the line to
* @size: maximum size of the buffer
* @len: length of line placed into buffer
*
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 3b0f133..38ecdda 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1530,7 +1530,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
{
unsigned long flags;
struct rcu_head *next, *list, **tail;
- int bl, count, count_lazy;
+ int bl, count, count_lazy, i;
/* If no callbacks are ready, just return.*/
if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
@@ -1553,9 +1553,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
*rdp->nxttail[RCU_DONE_TAIL] = NULL;
tail = rdp->nxttail[RCU_DONE_TAIL];
- for (count = RCU_NEXT_SIZE - 1; count >= 0; count--)
- if (rdp->nxttail[count] == rdp->nxttail[RCU_DONE_TAIL])
- rdp->nxttail[count] = &rdp->nxtlist;
+ for (i = RCU_NEXT_SIZE - 1; i >= 0; i--)
+ if (rdp->nxttail[i] == rdp->nxttail[RCU_DONE_TAIL])
+ rdp->nxttail[i] = &rdp->nxtlist;
local_irq_restore(flags);
/* Invoke callbacks. */
@@ -1583,9 +1583,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
if (list != NULL) {
*tail = rdp->nxtlist;
rdp->nxtlist = list;
- for (count = 0; count < RCU_NEXT_SIZE; count++)
- if (&rdp->nxtlist == rdp->nxttail[count])
- rdp->nxttail[count] = tail;
+ for (i = 0; i < RCU_NEXT_SIZE; i++)
+ if (&rdp->nxtlist == rdp->nxttail[i])
+ rdp->nxttail[i] = tail;
else
break;
}
diff --git a/kernel/relay.c b/kernel/relay.c
index ab56a17..e8cd202 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1235,6 +1235,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
struct splice_pipe_desc spd = {
.pages = pages,
.nr_pages = 0,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.partial = partial,
.flags = flags,
.ops = &relay_pipe_buf_ops,
@@ -1302,8 +1303,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
ret += padding;
out:
- splice_shrink_spd(pipe, &spd);
- return ret;
+ splice_shrink_spd(&spd);
+ return ret;
}
static ssize_t relay_file_splice_read(struct file *in,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 49249c2..a7fa070 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3609,6 +3609,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
.pages = pages_def,
.partial = partial_def,
.nr_pages = 0, /* This gets updated below. */
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &tracing_pipe_buf_ops,
.spd_release = tracing_spd_release_pipe,
@@ -3680,7 +3681,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
ret = splice_to_pipe(pipe, &spd);
out:
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
return ret;
out_err:
@@ -4231,6 +4232,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
struct splice_pipe_desc spd = {
.pages = pages_def,
.partial = partial_def,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &buffer_pipe_buf_ops,
.spd_release = buffer_spd_release,
@@ -4318,7 +4320,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
}
ret = splice_to_pipe(pipe, &spd);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
out:
return ret;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index a15a466..4ce02e0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1594,6 +1594,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = PIPE_DEF_BUFFERS,
.flags = flags,
.ops = &page_cache_pipe_buf_ops,
.spd_release = spd_release_page,
@@ -1682,7 +1683,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
if (spd.nr_pages)
error = splice_to_pipe(pipe, &spd);
- splice_shrink_spd(pipe, &spd);
+ splice_shrink_spd(&spd);
if (error > 0) {
*ppos += error;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 20fa719..b98d3d7 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -120,7 +120,9 @@ static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
return NULL;
}
-static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
{
return NULL;
}
@@ -373,19 +375,29 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
if (!skb->dev)
goto free_skb;
dst = skb_dst(skb);
- neigh = dst_get_neighbour_noref(dst);
- if (neigh->hh.hh_len) {
- neigh_hh_bridge(&neigh->hh, skb);
- skb->dev = nf_bridge->physindev;
- return br_handle_frame_finish(skb);
- } else {
- /* the neighbour function below overwrites the complete
- * MAC header, so we save the Ethernet source address and
- * protocol number. */
- skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
- /* tell br_dev_xmit to continue with forwarding */
- nf_bridge->mask |= BRNF_BRIDGED_DNAT;
- return neigh->output(neigh, skb);
+ neigh = dst_neigh_lookup_skb(dst, skb);
+ if (neigh) {
+ int ret;
+
+ if (neigh->hh.hh_len) {
+ neigh_hh_bridge(&neigh->hh, skb);
+ skb->dev = nf_bridge->physindev;
+ ret = br_handle_frame_finish(skb);
+ } else {
+ /* the neighbour function below overwrites the complete
+ * MAC header, so we save the Ethernet source address and
+ * protocol number.
+ */
+ skb_copy_from_linear_data_offset(skb,
+ -(ETH_HLEN-ETH_ALEN),
+ skb->nf_bridge->data,
+ ETH_HLEN-ETH_ALEN);
+ /* tell br_dev_xmit to continue with forwarding */
+ nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+ ret = neigh->output(neigh, skb);
+ }
+ neigh_release(neigh);
+ return ret;
}
free_skb:
kfree_skb(skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index ed674e2..69f7a1a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1793,6 +1793,17 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
EXPORT_SYMBOL(netif_set_real_num_rx_queues);
#endif
+/* netif_get_num_default_rss_queues - default number of RSS queues
+ *
+ * This routine should set an upper limit on the number of RSS queues
+ * used by default by multiqueue devices.
+ */
+int netif_get_num_default_rss_queues()
+{
+ return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
+}
+EXPORT_SYMBOL(netif_get_num_default_rss_queues);
+
static inline void __netif_reschedule(struct Qdisc *q)
{
struct softnet_data *sd;
diff --git a/net/core/dst.c b/net/core/dst.c
index 43d94ce..07bacff 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -152,7 +152,7 @@ EXPORT_SYMBOL(dst_discard);
const u32 dst_default_metrics[RTAX_MAX];
void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
- int initial_ref, int initial_obsolete, int flags)
+ int initial_ref, int initial_obsolete, unsigned short flags)
{
struct dst_entry *dst;
@@ -171,7 +171,6 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst_init_metrics(dst, dst_default_metrics, true);
dst->expires = 0UL;
dst->path = dst;
- RCU_INIT_POINTER(dst->_neighbour, NULL);
#ifdef CONFIG_XFRM
dst->xfrm = NULL;
#endif
@@ -188,6 +187,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst->__use = 0;
dst->lastuse = jiffies;
dst->flags = flags;
+ dst->pending_confirm = 0;
dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
@@ -224,19 +224,12 @@ EXPORT_SYMBOL(__dst_free);
struct dst_entry *dst_destroy(struct dst_entry * dst)
{
struct dst_entry *child;
- struct neighbour *neigh;
smp_rmb();
again:
- neigh = rcu_dereference_protected(dst->_neighbour, 1);
child = dst->child;
- if (neigh) {
- RCU_INIT_POINTER(dst->_neighbour, NULL);
- neigh_release(neigh);
- }
-
if (!(dst->flags & DST_NOCOUNT))
dst_entries_add(dst->ops, -1);
@@ -360,19 +353,9 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (!unregister) {
dst->input = dst->output = dst_discard;
} else {
- struct neighbour *neigh;
-
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
dev_put(dev);
- rcu_read_lock();
- neigh = dst_get_neighbour_noref(dst);
- if (neigh && neigh->dev == dev) {
- neigh->dev = dst->dev;
- dev_hold(dst->dev);
- dev_put(dev);
- }
- rcu_read_unlock();
}
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d81d026..117afaf 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -474,8 +474,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
}
EXPORT_SYMBOL(neigh_lookup_nodev);
-struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
- struct net_device *dev)
+struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
+ struct net_device *dev, bool want_ref)
{
u32 hash_val;
int key_len = tbl->key_len;
@@ -535,14 +535,16 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
n1 = rcu_dereference_protected(n1->next,
lockdep_is_held(&tbl->lock))) {
if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
- neigh_hold(n1);
+ if (want_ref)
+ neigh_hold(n1);
rc = n1;
goto out_tbl_unlock;
}
}
n->dead = 0;
- neigh_hold(n);
+ if (want_ref)
+ neigh_hold(n);
rcu_assign_pointer(n->next,
rcu_dereference_protected(nht->hash_buckets[hash_val],
lockdep_is_held(&tbl->lock)));
@@ -558,7 +560,7 @@ out_neigh_release:
neigh_release(n);
goto out;
}
-EXPORT_SYMBOL(neigh_create);
+EXPORT_SYMBOL(__neigh_create);
static u32 pneigh_hash(const void *pkey, int key_len)
{
@@ -1199,10 +1201,23 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
write_unlock_bh(&neigh->lock);
rcu_read_lock();
- /* On shaper/eql skb->dst->neighbour != neigh :( */
- if (dst && (n2 = dst_get_neighbour_noref(dst)) != NULL)
- n1 = n2;
+
+ /* Why not just use 'neigh' as-is? The problem is that
+ * things such as shaper, eql, and sch_teql can end up
+ * using alternative, different, neigh objects to output
+ * the packet in the output path. So what we need to do
+ * here is re-lookup the top-level neigh in the path so
+ * we can reinject the packet there.
+ */
+ n2 = NULL;
+ if (dst) {
+ n2 = dst_neigh_lookup_skb(dst, skb);
+ if (n2)
+ n1 = n2;
+ }
n1->output(n1, skb);
+ if (n2)
+ neigh_release(n2);
rcu_read_unlock();
write_lock_bh(&neigh->lock);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5b21522..5a789a8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1755,6 +1755,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
struct splice_pipe_desc spd = {
.pages = pages,
.partial = partial,
+ .nr_pages_max = MAX_SKB_FRAGS,
.flags = flags,
.ops = &sock_pipe_buf_ops,
.spd_release = sock_spd_release,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9991be0..02162cf 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -239,7 +239,6 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
struct inet6_request_sock *ireq6 = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff *skb;
- struct ipv6_txoptions *opt = NULL;
struct in6_addr *final_p, final;
struct flowi6 fl6;
int err = -1;
@@ -255,9 +254,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
fl6.fl6_sport = inet_rsk(req)->loc_port;
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
- opt = np->opt;
- final_p = fl6_update_dst(&fl6, opt, &final);
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
if (IS_ERR(dst)) {
@@ -274,13 +272,11 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
&ireq6->loc_addr,
&ireq6->rmt_addr);
fl6.daddr = ireq6->rmt_addr;
- err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
}
done:
- if (opt != NULL && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
return err;
}
@@ -475,7 +471,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
struct inet_sock *newinet;
struct dccp6_sock *newdp6;
struct sock *newsk;
- struct ipv6_txoptions *opt;
if (skb->protocol == htons(ETH_P_IP)) {
/*
@@ -520,7 +515,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
return newsk;
}
- opt = np->opt;
if (sk_acceptq_is_full(sk))
goto out_overflow;
@@ -532,7 +526,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_DCCP;
fl6.daddr = ireq6->rmt_addr;
- final_p = fl6_update_dst(&fl6, opt, &final);
+ final_p = fl6_update_dst(&fl6, np->opt, &final);
fl6.saddr = ireq6->loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.fl6_dport = inet_rsk(req)->rmt_port;
@@ -597,11 +591,8 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
* Yes, keeping reference count would be much more clever, but we make
* one more one thing there: reattach optmem to newsk.
*/
- if (opt != NULL) {
- newnp->opt = ipv6_dup_options(newsk, opt);
- if (opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- }
+ if (np->opt != NULL)
+ newnp->opt = ipv6_dup_options(newsk, np->opt);
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newnp->opt != NULL)
@@ -627,8 +618,6 @@ out_nonewsk:
dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
- if (opt != NULL && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
return NULL;
}
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 8e9a35b..3aede1b 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -202,7 +202,7 @@ static int dn_neigh_output_packet(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
- struct neighbour *neigh = dst_get_neighbour_noref(dst);
+ struct neighbour *neigh = rt->n;
struct net_device *dev = neigh->dev;
char mac_addr[ETH_ALEN];
unsigned int seq;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 2493ed5..6e74b3f 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -114,10 +114,13 @@ static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
static unsigned int dn_dst_mtu(const struct dst_entry *dst);
static void dn_dst_destroy(struct dst_entry *);
+static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
static void dn_dst_link_failure(struct sk_buff *);
static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu);
-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr);
+static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr);
static int dn_route_input(struct sk_buff *);
static void dn_run_flush(unsigned long dummy);
@@ -138,6 +141,7 @@ static struct dst_ops dn_dst_ops = {
.mtu = dn_dst_mtu,
.cow_metrics = dst_cow_metrics_generic,
.destroy = dn_dst_destroy,
+ .ifdown = dn_dst_ifdown,
.negative_advice = dn_dst_negative_advice,
.link_failure = dn_dst_link_failure,
.update_pmtu = dn_dst_update_pmtu,
@@ -146,9 +150,27 @@ static struct dst_ops dn_dst_ops = {
static void dn_dst_destroy(struct dst_entry *dst)
{
+ struct dn_route *rt = (struct dn_route *) dst;
+
+ if (rt->n)
+ neigh_release(rt->n);
dst_destroy_metrics_generic(dst);
}
+static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how)
+{
+ if (how) {
+ struct dn_route *rt = (struct dn_route *) dst;
+ struct neighbour *n = rt->n;
+
+ if (n && n->dev == dev) {
+ n->dev = dev_net(dev)->loopback_dev;
+ dev_hold(n->dev);
+ dev_put(dev);
+ }
+ }
+}
+
static __inline__ unsigned int dn_hash(__le16 src, __le16 dst)
{
__u16 tmp = (__u16 __force)(src ^ dst);
@@ -244,7 +266,8 @@ static int dn_dst_gc(struct dst_ops *ops)
*/
static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
{
- struct neighbour *n = dst_get_neighbour_noref(dst);
+ struct dn_route *rt = (struct dn_route *) dst;
+ struct neighbour *n = rt->n;
u32 min_mtu = 230;
struct dn_dev *dn;
@@ -713,7 +736,8 @@ out:
static int dn_to_neigh_output(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- struct neighbour *n = dst_get_neighbour_noref(dst);
+ struct dn_route *rt = (struct dn_route *) dst;
+ struct neighbour *n = rt->n;
return n->output(n, skb);
}
@@ -727,7 +751,7 @@ static int dn_output(struct sk_buff *skb)
int err = -EINVAL;
- if (dst_get_neighbour_noref(dst) == NULL)
+ if (rt->n == NULL)
goto error;
skb->dev = dev;
@@ -828,7 +852,9 @@ static unsigned int dn_dst_mtu(const struct dst_entry *dst)
return mtu ? : dst->dev->mtu;
}
-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
{
return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev);
}
@@ -848,11 +874,11 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
}
rt->rt_type = res->type;
- if (dev != NULL && dst_get_neighbour_noref(&rt->dst) == NULL) {
+ if (dev != NULL && rt->n == NULL) {
n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
if (IS_ERR(n))
return PTR_ERR(n);
- dst_set_neighbour(&rt->dst, n);
+ rt->n = n;
}
if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
@@ -1159,7 +1185,7 @@ make_route:
rt->rt_dst_map = fld.daddr;
rt->rt_src_map = fld.saddr;
- dst_set_neighbour(&rt->dst, neigh);
+ rt->n = neigh;
neigh = NULL;
rt->dst.lastuse = jiffies;
@@ -1429,7 +1455,7 @@ make_route:
rt->fld.flowidn_iif = in_dev->ifindex;
rt->fld.flowidn_mark = fld.flowidn_mark;
- dst_set_neighbour(&rt->dst, neigh);
+ rt->n = neigh;
rt->dst.lastuse = jiffies;
rt->dst.output = dn_rt_bug;
switch (res.type) {
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index cd5007f..f4070e5 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -55,7 +55,6 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
#include <net/af_ieee802154.h>
#include <net/ieee802154.h>
#include <net/ieee802154_netdev.h>
@@ -936,6 +935,19 @@ drop:
return -EINVAL;
}
+static int lowpan_set_address(struct net_device *dev, void *p)
+{
+ struct sockaddr *sa = p;
+
+ if (netif_running(dev))
+ return -EBUSY;
+
+ /* TODO: validate addr */
+ memcpy(dev->dev_addr, sa->sa_data, dev->addr_len);
+
+ return 0;
+}
+
static int lowpan_get_mac_header_length(struct sk_buff *skb)
{
/*
@@ -1078,7 +1090,7 @@ static struct header_ops lowpan_header_ops = {
static const struct net_device_ops lowpan_netdev_ops = {
.ndo_start_xmit = lowpan_xmit,
- .ndo_set_mac_address = eth_mac_addr,
+ .ndo_set_mac_address = lowpan_set_address,
};
static struct ieee802154_mlme_ops lowpan_mlme = {
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3e11ea2..81f8571 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -86,6 +86,24 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
tb = fib_trie_table(id);
if (!tb)
return NULL;
+
+ switch (id) {
+ case RT_TABLE_LOCAL:
+ net->ipv4.fib_local = tb;
+ break;
+
+ case RT_TABLE_MAIN:
+ net->ipv4.fib_main = tb;
+ break;
+
+ case RT_TABLE_DEFAULT:
+ net->ipv4.fib_default = tb;
+ break;
+
+ default:
+ break;
+ }
+
h = id & (FIB_TABLE_HASHSZ - 1);
hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]);
return tb;
@@ -218,10 +236,6 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
return inet_select_addr(dev, ip_hdr(skb)->saddr, scope);
}
-#ifdef CONFIG_IP_ROUTE_CLASSID
-int fib_num_tclassid_users __read_mostly;
-#endif
-
/* Given (packet source, input interface) and optional (dst, oif, tos):
* - (main) check, that source is valid i.e. not broadcast or our local
* address.
@@ -312,7 +326,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
{
int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
- if (!r && !fib_num_tclassid_users) {
+ if (!r && !fib_num_tclassid_users(dev_net(dev))) {
*itag = 0;
return 0;
}
@@ -1134,6 +1148,9 @@ static int __net_init fib_net_init(struct net *net)
{
int error;
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ net->ipv4.fib_num_tclassid_users = 0;
+#endif
error = ip_fib_net_init(net);
if (error < 0)
goto out;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index b23fd95..c06da93 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -54,7 +54,7 @@ u32 fib_rules_tclass(const struct fib_result *res)
}
#endif
-int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
+int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
{
struct fib_lookup_arg arg = {
.result = res,
@@ -67,7 +67,7 @@ int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
return err;
}
-EXPORT_SYMBOL_GPL(fib_lookup);
+EXPORT_SYMBOL_GPL(__fib_lookup);
static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
@@ -172,7 +172,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
if (tb[FRA_FLOW]) {
rule4->tclassid = nla_get_u32(tb[FRA_FLOW]);
if (rule4->tclassid)
- fib_num_tclassid_users++;
+ net->ipv4.fib_num_tclassid_users++;
}
#endif
@@ -182,6 +182,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule4->dstmask = inet_make_mask(rule4->dst_len);
rule4->tos = frh->tos;
+ net->ipv4.fib_has_custom_rules = true;
err = 0;
errout:
return err;
@@ -189,12 +190,14 @@ errout:
static void fib4_rule_delete(struct fib_rule *rule)
{
+ struct net *net = rule->fr_net;
#ifdef CONFIG_IP_ROUTE_CLASSID
struct fib4_rule *rule4 = (struct fib4_rule *) rule;
if (rule4->tclassid)
- fib_num_tclassid_users--;
+ net->ipv4.fib_num_tclassid_users--;
#endif
+ net->ipv4.fib_has_custom_rules = true;
}
static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
@@ -309,6 +312,7 @@ int __net_init fib4_rules_init(struct net *net)
if (err < 0)
goto fail;
net->ipv4.rules_ops = ops;
+ net->ipv4.fib_has_custom_rules = false;
return 0;
fail:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c46c20b..ae301c8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -166,7 +166,7 @@ void free_fib_info(struct fib_info *fi)
#ifdef CONFIG_IP_ROUTE_CLASSID
change_nexthops(fi) {
if (nexthop_nh->nh_tclassid)
- fib_num_tclassid_users--;
+ fi->fib_net->ipv4.fib_num_tclassid_users--;
} endfor_nexthops(fi);
#endif
call_rcu(&fi->rcu, free_fib_info_rcu);
@@ -428,7 +428,7 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
nla = nla_find(attrs, attrlen, RTA_FLOW);
nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
if (nexthop_nh->nh_tclassid)
- fib_num_tclassid_users++;
+ fi->fib_net->ipv4.fib_num_tclassid_users++;
#endif
}
@@ -824,7 +824,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
#ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid = cfg->fc_flow;
if (nh->nh_tclassid)
- fib_num_tclassid_users++;
+ fi->fib_net->ipv4.fib_num_tclassid_users++;
#endif
#ifdef CONFIG_IP_ROUTE_MULTIPATH
nh->nh_weight = 1;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 766dfe56..a19d647 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -242,6 +242,15 @@ void ip_options_fragment(struct sk_buff *skb)
opt->ts_needtime = 0;
}
+/* helper used by ip_options_compile() to call fib_compute_spec_dst()
+ * at most one time.
+ */
+static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb)
+{
+ if (*spec_dst == htonl(INADDR_ANY))
+ *spec_dst = fib_compute_spec_dst(skb);
+}
+
/*
* Verify options and fill pointers in struct options.
* Caller should clear *opt, and set opt->data.
@@ -251,14 +260,15 @@ void ip_options_fragment(struct sk_buff *skb)
int ip_options_compile(struct net *net,
struct ip_options *opt, struct sk_buff *skb)
{
- __be32 spec_dst = (__force __be32) 0;
+ __be32 spec_dst = htonl(INADDR_ANY);
unsigned char *pp_ptr = NULL;
+ struct rtable *rt = NULL;
unsigned char *optptr;
unsigned char *iph;
int optlen, l;
if (skb != NULL) {
- spec_dst = fib_compute_spec_dst(skb);
+ rt = skb_rtable(skb);
optptr = (unsigned char *)&(ip_hdr(skb)[1]);
} else
optptr = opt->__data;
@@ -330,7 +340,8 @@ int ip_options_compile(struct net *net,
pp_ptr = optptr + 2;
goto error;
}
- if (skb) {
+ if (rt) {
+ spec_dst_fill(&spec_dst, skb);
memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
opt->is_changed = 1;
}
@@ -372,7 +383,8 @@ int ip_options_compile(struct net *net,
goto error;
}
opt->ts = optptr - iph;
- if (skb) {
+ if (rt) {
+ spec_dst_fill(&spec_dst, skb);
memcpy(&optptr[optptr[2]-1], &spec_dst, 4);
timeptr = &optptr[optptr[2]+3];
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 2630900..cc52679 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -170,6 +170,7 @@ static inline int ip_finish_output2(struct sk_buff *skb)
struct net_device *dev = dst->dev;
unsigned int hh_len = LL_RESERVED_SPACE(dev);
struct neighbour *neigh;
+ u32 nexthop;
if (rt->rt_type == RTN_MULTICAST) {
IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
@@ -191,15 +192,18 @@ static inline int ip_finish_output2(struct sk_buff *skb)
skb = skb2;
}
- rcu_read_lock();
- neigh = dst_get_neighbour_noref(dst);
+ rcu_read_lock_bh();
+ nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr;
+ neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
+ if (unlikely(!neigh))
+ neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
if (neigh) {
- int res = neigh_output(neigh, skb);
+ int res = dst_neigh_output(dst, neigh, skb);
- rcu_read_unlock();
+ rcu_read_unlock_bh();
return res;
}
- rcu_read_unlock();
+ rcu_read_unlock_bh();
net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
__func__);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 041923c..5241d99 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -337,34 +337,62 @@ static struct ctl_table icmp_compat_sysctl_table[] = {
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
-static int icmp_init_net(struct net *net)
+static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
{
- struct nf_icmp_net *in = icmp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)in;
- in->timeout = nf_ct_icmp_timeout;
-
#ifdef CONFIG_SYSCTL
pn->ctl_table = kmemdup(icmp_sysctl_table,
sizeof(icmp_sysctl_table),
GFP_KERNEL);
if (!pn->ctl_table)
return -ENOMEM;
+
pn->ctl_table[0].data = &in->timeout;
+#endif
+ return 0;
+}
+
+static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table,
sizeof(icmp_compat_sysctl_table),
GFP_KERNEL);
- if (!pn->ctl_compat_table) {
- kfree(pn->ctl_table);
- pn->ctl_table = NULL;
+ if (!pn->ctl_compat_table)
return -ENOMEM;
- }
+
pn->ctl_compat_table[0].data = &in->timeout;
#endif
#endif
return 0;
}
+static int icmp_init_net(struct net *net, u_int16_t proto)
+{
+ int ret;
+ struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmp_timeout;
+
+ ret = icmp_kmemdup_compat_sysctl_table(pn, in);
+ if (ret < 0)
+ return ret;
+
+ ret = icmp_kmemdup_sysctl_table(pn, in);
+ if (ret < 0)
+ nf_ct_kfree_compat_sysctl_table(pn);
+
+ return ret;
+}
+
+static struct nf_proto_net *icmp_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmp.pn;
+}
+
struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
{
.l3proto = PF_INET,
@@ -395,4 +423,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
.init_net = icmp_init_net,
+ .get_net_proto = icmp_get_net_proto,
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6a5afc7..72e88c2 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -188,7 +188,9 @@ static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
return p;
}
-static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr);
+static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr);
static struct dst_ops ipv4_dst_ops = {
.family = AF_INET,
@@ -418,13 +420,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
"HHUptod\tSpecDst");
else {
struct rtable *r = v;
- struct neighbour *n;
- int len, HHUptod;
-
- rcu_read_lock();
- n = dst_get_neighbour_noref(&r->dst);
- HHUptod = (n && (n->nud_state & NUD_CONNECTED)) ? 1 : 0;
- rcu_read_unlock();
+ int len;
seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
"%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
@@ -438,9 +434,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
(int)((dst_metric(&r->dst, RTAX_RTT) >> 3) +
dst_metric(&r->dst, RTAX_RTTVAR)),
r->rt_key_tos,
- -1,
- HHUptod,
- 0, &len);
+ -1, 0, 0, &len);
seq_printf(seq, "%*s\n", 127 - len, "");
}
@@ -1096,20 +1090,20 @@ static int slow_chain_length(const struct rtable *head)
return length >> FRACT_BITS;
}
-static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
{
- static const __be32 inaddr_any = 0;
struct net_device *dev = dst->dev;
const __be32 *pkey = daddr;
const struct rtable *rt;
struct neighbour *n;
rt = (const struct rtable *) dst;
-
- if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
- pkey = &inaddr_any;
- else if (rt->rt_gateway)
+ if (rt->rt_gateway)
pkey = (const __be32 *) &rt->rt_gateway;
+ else if (skb)
+ pkey = &ip_hdr(skb)->daddr;
n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
if (n)
@@ -1117,16 +1111,6 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, const vo
return neigh_create(&arp_tbl, pkey, dev);
}
-static int rt_bind_neighbour(struct rtable *rt)
-{
- struct neighbour *n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
- if (IS_ERR(n))
- return PTR_ERR(n);
- dst_set_neighbour(&rt->dst, n);
-
- return 0;
-}
-
static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt,
struct sk_buff *skb, int ifindex)
{
@@ -1135,7 +1119,6 @@ static struct rtable *rt_intern_hash(unsigned int hash, struct rtable *rt,
unsigned long now;
u32 min_score;
int chain_length;
- int attempts = !in_softirq();
restart:
chain_length = 0;
@@ -1162,15 +1145,6 @@ restart:
*/
rt->dst.flags |= DST_NOCACHE;
- if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
- int err = rt_bind_neighbour(rt);
- if (err) {
- net_warn_ratelimited("Neighbour table failure & not caching routes\n");
- ip_rt_put(rt);
- return ERR_PTR(err);
- }
- }
-
goto skip_hashing;
}
@@ -1253,40 +1227,6 @@ restart:
}
}
- /* Try to bind route to arp only if it is output
- route or unicast forwarding path.
- */
- if (rt->rt_type == RTN_UNICAST || rt_is_output_route(rt)) {
- int err = rt_bind_neighbour(rt);
- if (err) {
- spin_unlock_bh(rt_hash_lock_addr(hash));
-
- if (err != -ENOBUFS) {
- rt_drop(rt);
- return ERR_PTR(err);
- }
-
- /* Neighbour tables are full and nothing
- can be released. Try to shrink route cache,
- it is most likely it holds some neighbour records.
- */
- if (attempts-- > 0) {
- int saved_elasticity = ip_rt_gc_elasticity;
- int saved_int = ip_rt_gc_min_interval;
- ip_rt_gc_elasticity = 1;
- ip_rt_gc_min_interval = 0;
- rt_garbage_collect(&ipv4_dst_ops);
- ip_rt_gc_min_interval = saved_int;
- ip_rt_gc_elasticity = saved_elasticity;
- goto restart;
- }
-
- net_warn_ratelimited("Neighbour table overflow\n");
- rt_drop(rt);
- return ERR_PTR(-ENOBUFS);
- }
- }
-
rt->dst.rt_next = rt_hash_table[hash].chain;
/*
@@ -1394,26 +1334,24 @@ static void check_peer_redir(struct dst_entry *dst, struct inet_peer *peer)
{
struct rtable *rt = (struct rtable *) dst;
__be32 orig_gw = rt->rt_gateway;
- struct neighbour *n, *old_n;
+ struct neighbour *n;
dst_confirm(&rt->dst);
rt->rt_gateway = peer->redirect_learned.a4;
- n = ipv4_neigh_lookup(&rt->dst, &rt->rt_gateway);
- if (IS_ERR(n)) {
+ n = ipv4_neigh_lookup(&rt->dst, NULL, &rt->rt_gateway);
+ if (!n) {
rt->rt_gateway = orig_gw;
return;
}
- old_n = xchg(&rt->dst._neighbour, n);
- if (old_n)
- neigh_release(old_n);
if (!(n->nud_state & NUD_VALID)) {
neigh_event_send(n, NULL);
} else {
rt->rt_flags |= RTCF_REDIRECTED;
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
}
+ neigh_release(n);
}
/* called in rcu_read_lock() section */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8416f8a..ca0d0e7 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -740,13 +740,13 @@ void tcp_update_metrics(struct sock *sk)
if (sysctl_tcp_nometrics_save)
return;
- dst_confirm(dst);
-
if (dst && (dst->flags & DST_HOST)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
int m;
unsigned long rtt;
+ dst_confirm(dst);
+
if (icsk->icsk_backoff || !tp->srtt) {
/* This session failed to estimate rtt. Why?
* Probably, no packets returned in time.
@@ -3869,9 +3869,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
tcp_cong_avoid(sk, ack, prior_in_flight);
}
- if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
- dst_confirm(__sk_dst_get(sk));
-
+ if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
+ struct dst_entry *dst = __sk_dst_get(sk);
+ if (dst)
+ dst_confirm(dst);
+ }
return 1;
no_queue:
@@ -6140,9 +6142,14 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
case TCP_FIN_WAIT1:
if (tp->snd_una == tp->write_seq) {
+ struct dst_entry *dst;
+
tcp_set_state(sk, TCP_FIN_WAIT2);
sk->sk_shutdown |= SEND_SHUTDOWN;
- dst_confirm(__sk_dst_get(sk));
+
+ dst = __sk_dst_get(sk);
+ if (dst)
+ dst_confirm(dst);
if (!sock_flag(sk, SOCK_DEAD))
/* Wake up lingering close() */
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a233a7c..c6af596 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -88,6 +88,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
struct neighbour *neigh;
+ struct rt6_info *rt;
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
@@ -123,9 +124,10 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_lock();
- neigh = dst_get_neighbour_noref(dst);
+ rt = (struct rt6_info *) dst;
+ neigh = rt->n;
if (neigh) {
- int res = neigh_output(neigh, skb);
+ int res = dst_neigh_output(dst, neigh, skb);
rcu_read_unlock();
return res;
@@ -944,6 +946,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
struct net *net = sock_net(sk);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
struct neighbour *n;
+ struct rt6_info *rt;
#endif
int err;
@@ -972,7 +975,8 @@ static int ip6_dst_lookup_tail(struct sock *sk,
* dst entry of the nexthop router
*/
rcu_read_lock();
- n = dst_get_neighbour_noref(*dst);
+ rt = (struct rt6_info *) *dst;
+ n = rt->n;
if (n && !(n->nud_state & NUD_VALID)) {
struct inet6_ifaddr *ifp;
struct flowi6 fl_gw6;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 63ed012..2d54b20 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -333,22 +333,36 @@ static struct ctl_table icmpv6_sysctl_table[] = {
};
#endif /* CONFIG_SYSCTL */
-static int icmpv6_init_net(struct net *net)
+static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
{
- struct nf_icmp_net *in = icmpv6_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)in;
- in->timeout = nf_ct_icmpv6_timeout;
#ifdef CONFIG_SYSCTL
pn->ctl_table = kmemdup(icmpv6_sysctl_table,
sizeof(icmpv6_sysctl_table),
GFP_KERNEL);
if (!pn->ctl_table)
return -ENOMEM;
+
pn->ctl_table[0].data = &in->timeout;
#endif
return 0;
}
+static int icmpv6_init_net(struct net *net, u_int16_t proto)
+{
+ struct nf_icmp_net *in = icmpv6_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmpv6_timeout;
+
+ return icmpv6_kmemdup_sysctl_table(pn, in);
+}
+
+static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmpv6.pn;
+}
+
struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
{
.l3proto = PF_INET6,
@@ -377,4 +391,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
.init_net = icmpv6_init_net,
+ .get_net_proto = icmpv6_get_net_proto,
};
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c518e4e..6cc6c88 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -120,21 +120,27 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
return p;
}
-static inline const void *choose_neigh_daddr(struct rt6_info *rt, const void *daddr)
+static inline const void *choose_neigh_daddr(struct rt6_info *rt,
+ struct sk_buff *skb,
+ const void *daddr)
{
struct in6_addr *p = &rt->rt6i_gateway;
if (!ipv6_addr_any(p))
return (const void *) p;
+ else if (skb)
+ return &ipv6_hdr(skb)->daddr;
return daddr;
}
-static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
{
struct rt6_info *rt = (struct rt6_info *) dst;
struct neighbour *n;
- daddr = choose_neigh_daddr(rt, daddr);
+ daddr = choose_neigh_daddr(rt, skb, daddr);
n = __ipv6_neigh_lookup(&nd_tbl, dst->dev, daddr);
if (n)
return n;
@@ -149,7 +155,7 @@ static int rt6_bind_neighbour(struct rt6_info *rt, struct net_device *dev)
if (IS_ERR(n))
return PTR_ERR(n);
}
- dst_set_neighbour(&rt->dst, n);
+ rt->n = n;
return 0;
}
@@ -267,7 +273,7 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
0, 0, flags);
if (rt) {
- memset(&rt->rt6i_table, 0,
+ memset(&rt->n, 0,
sizeof(*rt) - sizeof(struct dst_entry));
rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
}
@@ -279,6 +285,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev;
+ if (rt->n)
+ neigh_release(rt->n);
+
if (!(rt->dst.flags & DST_HOST))
dst_destroy_metrics_generic(dst);
@@ -329,12 +338,19 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
struct net_device *loopback_dev =
dev_net(dev)->loopback_dev;
- if (dev != loopback_dev && idev && idev->dev == dev) {
- struct inet6_dev *loopback_idev =
- in6_dev_get(loopback_dev);
- if (loopback_idev) {
- rt->rt6i_idev = loopback_idev;
- in6_dev_put(idev);
+ if (dev != loopback_dev) {
+ if (idev && idev->dev == dev) {
+ struct inet6_dev *loopback_idev =
+ in6_dev_get(loopback_dev);
+ if (loopback_idev) {
+ rt->rt6i_idev = loopback_idev;
+ in6_dev_put(idev);
+ }
+ }
+ if (rt->n && rt->n->dev == dev) {
+ rt->n->dev = loopback_dev;
+ dev_hold(loopback_dev);
+ dev_put(dev);
}
}
}
@@ -424,7 +440,7 @@ static void rt6_probe(struct rt6_info *rt)
* to no more than one per minute.
*/
rcu_read_lock();
- neigh = rt ? dst_get_neighbour_noref(&rt->dst) : NULL;
+ neigh = rt ? rt->n : NULL;
if (!neigh || (neigh->nud_state & NUD_VALID))
goto out;
read_lock_bh(&neigh->lock);
@@ -471,7 +487,7 @@ static inline int rt6_check_neigh(struct rt6_info *rt)
int m;
rcu_read_lock();
- neigh = dst_get_neighbour_noref(&rt->dst);
+ neigh = rt->n;
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
m = 1;
@@ -818,7 +834,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort,
if (rt) {
rt->rt6i_flags |= RTF_CACHE;
- dst_set_neighbour(&rt->dst, neigh_clone(dst_get_neighbour_noref_raw(&ort->dst)));
+ rt->n = neigh_clone(ort->n);
}
return rt;
}
@@ -852,7 +868,7 @@ restart:
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
- if (!dst_get_neighbour_noref_raw(&rt->dst) && !(rt->rt6i_flags & RTF_NONEXTHOP))
+ if (!rt->n && !(rt->rt6i_flags & RTF_NONEXTHOP))
nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
else if (!(rt->dst.flags & DST_HOST))
nrt = rt6_alloc_clone(rt, &fl6->daddr);
@@ -1162,7 +1178,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (neigh)
neigh_hold(neigh);
else {
- neigh = ip6_neigh_lookup(&rt->dst, &fl6->daddr);
+ neigh = ip6_neigh_lookup(&rt->dst, NULL, &fl6->daddr);
if (IS_ERR(neigh)) {
in6_dev_put(idev);
dst_free(&rt->dst);
@@ -1172,7 +1188,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
- dst_set_neighbour(&rt->dst, neigh);
+ rt->n = neigh;
atomic_set(&rt->dst.__refcnt, 1);
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
@@ -1681,6 +1697,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
struct rt6_info *rt, *nrt = NULL;
struct netevent_redirect netevent;
struct net *net = dev_net(neigh->dev);
+ struct neighbour *old_neigh;
rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
@@ -1708,7 +1725,8 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
dst_confirm(&rt->dst);
/* Duplicate redirect: silently ignore. */
- if (neigh == dst_get_neighbour_noref_raw(&rt->dst))
+ old_neigh = rt->n;
+ if (neigh == old_neigh)
goto out;
nrt = ip6_rt_copy(rt, dest);
@@ -1720,13 +1738,16 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
nrt->rt6i_flags &= ~RTF_GATEWAY;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
+ nrt->n = neigh_clone(neigh);
if (ip6_ins_rt(nrt))
goto out;
netevent.old = &rt->dst;
+ netevent.old_neigh = old_neigh;
netevent.new = &nrt->dst;
+ netevent.new_neigh = neigh;
+ netevent.daddr = dest;
call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
if (rt->rt6i_flags & RTF_CACHE) {
@@ -2431,7 +2452,7 @@ static int rt6_fill_node(struct net *net,
goto nla_put_failure;
rcu_read_lock();
- n = dst_get_neighbour_noref(&rt->dst);
+ n = rt->n;
if (n) {
if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) {
rcu_read_unlock();
@@ -2655,7 +2676,7 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg)
seq_puts(m, "00000000000000000000000000000000 00 ");
#endif
rcu_read_lock();
- n = dst_get_neighbour_noref(&rt->dst);
+ n = rt->n;
if (n) {
seq_printf(m, "%pi6", n->primary_key);
} else {
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 8e951d8..7bf3cc4 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -21,9 +21,6 @@
#include <net/ipv6.h>
#include <net/tcp.h>
-extern int sysctl_tcp_syncookies;
-extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
-
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9c06eaf..6cc67ed 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -486,7 +486,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff * skb;
- struct ipv6_txoptions *opt = np->opt;
int err = -ENOMEM;
/* First, grab a route. */
@@ -500,13 +499,11 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
fl6->daddr = treq->rmt_addr;
skb_set_queue_mapping(skb, queue_mapping);
- err = ip6_xmit(sk, skb, fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
}
done:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
return err;
}
@@ -1229,7 +1226,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
- struct ipv6_txoptions *opt;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
#endif
@@ -1290,7 +1286,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
}
treq = inet6_rsk(req);
- opt = np->opt;
if (sk_acceptq_is_full(sk))
goto out_overflow;
@@ -1359,11 +1354,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (opt) {
- newnp->opt = ipv6_dup_options(newsk, opt);
- if (opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
- }
+ if (np->opt)
+ newnp->opt = ipv6_dup_options(newsk, np->opt);
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (newnp->opt)
@@ -1410,8 +1402,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
out_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d749484..bb02038 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -103,6 +103,7 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
/* Sheit... I remember I did this right. Apparently,
* it was magically lost, so this code needs audit */
+ xdst->u.rt6.n = neigh_clone(rt->n);
xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST |
RTF_LOCAL);
xdst->u.rt6.rt6i_metric = rt->rt6i_metric;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 819c342..9730882 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -640,6 +640,14 @@ find_free_id(const char *name, ip_set_id_t *index, struct ip_set **set)
}
static int
+ip_set_none(struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[])
+{
+ return -EOPNOTSUPP;
+}
+
+static int
ip_set_create(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
@@ -1539,6 +1547,10 @@ nlmsg_failure:
}
static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
+ [IPSET_CMD_NONE] = {
+ .call = ip_set_none,
+ .attr_count = IPSET_ATTR_CMD_MAX,
+ },
[IPSET_CMD_CREATE] = {
.call = ip_set_create,
.attr_count = IPSET_ATTR_CMD_MAX,
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index ee86394..d5d3607 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -38,30 +38,6 @@ struct iface_node {
#define iface_data(n) (rb_entry(n, struct iface_node, node)->iface)
-static inline long
-ifname_compare(const char *_a, const char *_b)
-{
- const long *a = (const long *)_a;
- const long *b = (const long *)_b;
-
- BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long));
- if (a[0] != b[0])
- return a[0] - b[0];
- if (IFNAMSIZ > sizeof(long)) {
- if (a[1] != b[1])
- return a[1] - b[1];
- }
- if (IFNAMSIZ > 2 * sizeof(long)) {
- if (a[2] != b[2])
- return a[2] - b[2];
- }
- if (IFNAMSIZ > 3 * sizeof(long)) {
- if (a[3] != b[3])
- return a[3] - b[3];
- }
- return 0;
-}
-
static void
rbtree_destroy(struct rb_root *root)
{
@@ -99,7 +75,7 @@ iface_test(struct rb_root *root, const char **iface)
while (n) {
const char *d = iface_data(n);
- long res = ifname_compare(*iface, d);
+ int res = strcmp(*iface, d);
if (res < 0)
n = n->rb_left;
@@ -121,7 +97,7 @@ iface_add(struct rb_root *root, const char **iface)
while (*n) {
char *ifname = iface_data(*n);
- long res = ifname_compare(*iface, ifname);
+ int res = strcmp(*iface, ifname);
p = *n;
if (res < 0)
@@ -366,7 +342,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
struct hash_netiface4_elem data = { .cidr = HOST_MASK };
u32 ip = 0, ip_to, last;
u32 timeout = h->timeout;
- char iface[IFNAMSIZ] = {};
+ char iface[IFNAMSIZ];
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
@@ -663,7 +639,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface6_elem data = { .cidr = HOST_MASK };
u32 timeout = h->timeout;
- char iface[IFNAMSIZ] = {};
+ char iface[IFNAMSIZ];
int ret;
if (unlikely(!tb[IPSET_ATTR_IP] ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index dd811b8..d43e3c1 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -76,19 +76,19 @@ static void __ip_vs_del_service(struct ip_vs_service *svc);
#ifdef CONFIG_IP_VS_IPV6
/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(struct net *net,
- const struct in6_addr *addr)
+static bool __ip_vs_addr_is_local_v6(struct net *net,
+ const struct in6_addr *addr)
{
- struct rt6_info *rt;
struct flowi6 fl6 = {
.daddr = *addr,
};
+ struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
+ bool is_local;
- rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
- if (rt && rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))
- return 1;
+ is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
- return 0;
+ dst_release(dst);
+ return is_local;
}
#endif
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b9b8f4a..14f67a2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -4,7 +4,7 @@
* (C) 2001 by Jay Schulist <jschlst@samba.org>
* (C) 2002-2006 by Harald Welte <laforge@gnumonks.org>
* (C) 2003 by Patrick Mchardy <kaber@trash.net>
- * (C) 2005-2011 by Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2005-2012 by Pablo Neira Ayuso <pablo@netfilter.org>
*
* Initial connection tracking via netlink development funded and
* generally made possible by Network Robots, Inc. (www.networkrobots.com)
@@ -1627,6 +1627,155 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
return err;
}
+static int
+ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+ __u16 cpu, const struct ip_conntrack_stat *st)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+
+ event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = AF_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(cpu);
+
+ if (nla_put_be32(skb, CTA_STATS_SEARCHED, htonl(st->searched)) ||
+ nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
+ nla_put_be32(skb, CTA_STATS_NEW, htonl(st->new)) ||
+ nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
+ nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) ||
+ nla_put_be32(skb, CTA_STATS_DELETE, htonl(st->delete)) ||
+ nla_put_be32(skb, CTA_STATS_DELETE_LIST, htonl(st->delete_list)) ||
+ nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) ||
+ nla_put_be32(skb, CTA_STATS_INSERT_FAILED,
+ htonl(st->insert_failed)) ||
+ nla_put_be32(skb, CTA_STATS_DROP, htonl(st->drop)) ||
+ nla_put_be32(skb, CTA_STATS_EARLY_DROP, htonl(st->early_drop)) ||
+ nla_put_be32(skb, CTA_STATS_ERROR, htonl(st->error)) ||
+ nla_put_be32(skb, CTA_STATS_SEARCH_RESTART,
+ htonl(st->search_restart)))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return skb->len;
+
+nla_put_failure:
+nlmsg_failure:
+ nlmsg_cancel(skb, nlh);
+ return -1;
+}
+
+static int
+ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int cpu;
+ struct net *net = sock_net(skb->sk);
+
+ if (cb->args[0] == nr_cpu_ids)
+ return 0;
+
+ for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
+ const struct ip_conntrack_stat *st;
+
+ if (!cpu_possible(cpu))
+ continue;
+
+ st = per_cpu_ptr(net->ct.stat, cpu);
+ if (ctnetlink_ct_stat_cpu_fill_info(skb,
+ NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ cpu, st) < 0)
+ break;
+ }
+ cb->args[0] = cpu;
+
+ return skb->len;
+}
+
+static int
+ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
+{
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = ctnetlink_ct_stat_cpu_dump,
+ };
+ return netlink_dump_start(ctnl, skb, nlh, &c);
+ }
+
+ return 0;
+}
+
+static int
+ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+ struct net *net)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
+
+ event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = AF_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return skb->len;
+
+nla_put_failure:
+nlmsg_failure:
+ nlmsg_cancel(skb, nlh);
+ return -1;
+}
+
+static int
+ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
+{
+ struct sk_buff *skb2;
+ int err;
+
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL)
+ return -ENOMEM;
+
+ err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).pid,
+ nlh->nlmsg_seq,
+ NFNL_MSG_TYPE(nlh->nlmsg_type),
+ sock_net(skb->sk));
+ if (err <= 0)
+ goto free;
+
+ err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+ if (err < 0)
+ goto out;
+
+ return 0;
+
+free:
+ kfree_skb(skb2);
+out:
+ /* this avoids a loop in nfnetlink. */
+ return err == -EAGAIN ? -ENOBUFS : err;
+}
+
#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
static size_t
ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
@@ -2440,6 +2589,79 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
return err;
}
+static int
+ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int cpu,
+ const struct ip_conntrack_stat *st)
+{
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfmsg;
+ unsigned int flags = pid ? NLM_F_MULTI : 0, event;
+
+ event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = AF_UNSPEC;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = htons(cpu);
+
+ if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) ||
+ nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) ||
+ nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete)))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return skb->len;
+
+nla_put_failure:
+nlmsg_failure:
+ nlmsg_cancel(skb, nlh);
+ return -1;
+}
+
+static int
+ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ int cpu;
+ struct net *net = sock_net(skb->sk);
+
+ if (cb->args[0] == nr_cpu_ids)
+ return 0;
+
+ for (cpu = cb->args[0]; cpu < nr_cpu_ids; cpu++) {
+ const struct ip_conntrack_stat *st;
+
+ if (!cpu_possible(cpu))
+ continue;
+
+ st = per_cpu_ptr(net->ct.stat, cpu);
+ if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ cb->nlh->nlmsg_seq,
+ cpu, st) < 0)
+ break;
+ }
+ cb->args[0] = cpu;
+
+ return skb->len;
+}
+
+static int
+ctnetlink_stat_exp_cpu(struct sock *ctnl, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const cda[])
+{
+ if (nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = ctnetlink_exp_stat_cpu_dump,
+ };
+ return netlink_dump_start(ctnl, skb, nlh, &c);
+ }
+
+ return 0;
+}
+
#ifdef CONFIG_NF_CONNTRACK_EVENTS
static struct nf_ct_event_notifier ctnl_notifier = {
.fcn = ctnetlink_conntrack_event,
@@ -2463,6 +2685,8 @@ static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
[IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack,
.attr_count = CTA_MAX,
.policy = ct_nla_policy },
+ [IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu },
+ [IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct },
};
static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
@@ -2475,6 +2699,7 @@ static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
[IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect,
.attr_count = CTA_EXPECT_MAX,
.policy = exp_nla_policy },
+ [IPCTNL_MSG_EXP_GET_STATS_CPU] = { .call = ctnetlink_stat_exp_cpu },
};
static const struct nfnetlink_subsystem ctnl_subsys = {
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1ea9194..0dc6385 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -39,16 +39,13 @@ static int
nf_ct_register_sysctl(struct net *net,
struct ctl_table_header **header,
const char *path,
- struct ctl_table *table,
- unsigned int *users)
+ struct ctl_table *table)
{
if (*header == NULL) {
*header = register_net_sysctl(net, path, table);
if (*header == NULL)
return -ENOMEM;
}
- if (users != NULL)
- (*users)++;
return 0;
}
@@ -56,9 +53,9 @@ nf_ct_register_sysctl(struct net *net,
static void
nf_ct_unregister_sysctl(struct ctl_table_header **header,
struct ctl_table **table,
- unsigned int *users)
+ unsigned int users)
{
- if (users != NULL && --*users > 0)
+ if (users > 0)
return;
unregister_net_sysctl_table(*header);
@@ -191,8 +188,7 @@ static int nf_ct_l3proto_register_sysctl(struct net *net,
err = nf_ct_register_sysctl(net,
&in->ctl_table_header,
l3proto->ctl_table_path,
- in->ctl_table,
- NULL);
+ in->ctl_table);
if (err < 0) {
kfree(in->ctl_table);
in->ctl_table = NULL;
@@ -213,7 +209,7 @@ static void nf_ct_l3proto_unregister_sysctl(struct net *net,
if (in->ctl_table_header != NULL)
nf_ct_unregister_sysctl(&in->ctl_table_header,
&in->ctl_table,
- NULL);
+ 0);
#endif
}
@@ -253,18 +249,23 @@ int nf_conntrack_l3proto_register(struct net *net,
{
int ret = 0;
- if (net == &init_net)
- ret = nf_conntrack_l3proto_register_net(proto);
+ if (proto->init_net) {
+ ret = proto->init_net(net);
+ if (ret < 0)
+ return ret;
+ }
+ ret = nf_ct_l3proto_register_sysctl(net, proto);
if (ret < 0)
return ret;
- if (proto->init_net) {
- ret = proto->init_net(net);
+ if (net == &init_net) {
+ ret = nf_conntrack_l3proto_register_net(proto);
if (ret < 0)
- return ret;
+ nf_ct_l3proto_unregister_sysctl(net, proto);
}
- return nf_ct_l3proto_register_sysctl(net, proto);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register);
@@ -302,93 +303,77 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister);
static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
struct nf_conntrack_l4proto *l4proto)
{
- switch (l4proto->l4proto) {
- case IPPROTO_TCP:
- return (struct nf_proto_net *)&net->ct.nf_ct_proto.tcp;
- case IPPROTO_UDP:
- return (struct nf_proto_net *)&net->ct.nf_ct_proto.udp;
- case IPPROTO_ICMP:
- return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmp;
- case IPPROTO_ICMPV6:
- return (struct nf_proto_net *)&net->ct.nf_ct_proto.icmpv6;
- case 255: /* l4proto_generic */
- return (struct nf_proto_net *)&net->ct.nf_ct_proto.generic;
- default:
- if (l4proto->net_id)
- return net_generic(net, *l4proto->net_id);
- else
- return NULL;
+ if (l4proto->get_net_proto) {
+ /* statically built-in protocols use static per-net */
+ return l4proto->get_net_proto(net);
+ } else if (l4proto->net_id) {
+ /* ... and loadable protocols use dynamic per-net */
+ return net_generic(net, *l4proto->net_id);
}
return NULL;
}
static
int nf_ct_l4proto_register_sysctl(struct net *net,
+ struct nf_proto_net *pn,
struct nf_conntrack_l4proto *l4proto)
{
int err = 0;
- struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
- if (pn == NULL)
- return 0;
#ifdef CONFIG_SYSCTL
if (pn->ctl_table != NULL) {
err = nf_ct_register_sysctl(net,
&pn->ctl_table_header,
"net/netfilter",
- pn->ctl_table,
- &pn->users);
+ pn->ctl_table);
if (err < 0) {
if (!pn->users) {
kfree(pn->ctl_table);
pn->ctl_table = NULL;
}
- goto out;
}
}
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) {
+ if (err < 0) {
+ nf_ct_kfree_compat_sysctl_table(pn);
+ goto out;
+ }
err = nf_ct_register_sysctl(net,
&pn->ctl_compat_header,
"net/ipv4/netfilter",
- pn->ctl_compat_table,
- NULL);
+ pn->ctl_compat_table);
if (err == 0)
goto out;
- kfree(pn->ctl_compat_table);
- pn->ctl_compat_table = NULL;
+ nf_ct_kfree_compat_sysctl_table(pn);
nf_ct_unregister_sysctl(&pn->ctl_table_header,
&pn->ctl_table,
- &pn->users);
+ pn->users);
}
-#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
out:
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
return err;
}
static
void nf_ct_l4proto_unregister_sysctl(struct net *net,
+ struct nf_proto_net *pn,
struct nf_conntrack_l4proto *l4proto)
{
- struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
- if (pn == NULL)
- return;
#ifdef CONFIG_SYSCTL
if (pn->ctl_table_header != NULL)
nf_ct_unregister_sysctl(&pn->ctl_table_header,
&pn->ctl_table,
- &pn->users);
+ pn->users);
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL)
nf_ct_unregister_sysctl(&pn->ctl_compat_header,
&pn->ctl_compat_table,
- NULL);
+ 0);
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
-#else
- pn->users--;
#endif /* CONFIG_SYSCTL */
}
@@ -454,19 +439,33 @@ int nf_conntrack_l4proto_register(struct net *net,
struct nf_conntrack_l4proto *l4proto)
{
int ret = 0;
- if (net == &init_net)
- ret = nf_conntrack_l4proto_register_net(l4proto);
+ struct nf_proto_net *pn = NULL;
- if (ret < 0)
- return ret;
+ if (l4proto->init_net) {
+ ret = l4proto->init_net(net, l4proto->l3proto);
+ if (ret < 0)
+ goto out;
+ }
- if (l4proto->init_net)
- ret = l4proto->init_net(net);
+ pn = nf_ct_l4proto_net(net, l4proto);
+ if (pn == NULL)
+ goto out;
+ ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto);
if (ret < 0)
- return ret;
+ goto out;
- return nf_ct_l4proto_register_sysctl(net, l4proto);
+ if (net == &init_net) {
+ ret = nf_conntrack_l4proto_register_net(l4proto);
+ if (ret < 0) {
+ nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
+ goto out;
+ }
+ }
+
+ pn->users++;
+out:
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register);
@@ -490,10 +489,18 @@ nf_conntrack_l4proto_unregister_net(struct nf_conntrack_l4proto *l4proto)
void nf_conntrack_l4proto_unregister(struct net *net,
struct nf_conntrack_l4proto *l4proto)
{
+ struct nf_proto_net *pn = NULL;
+
if (net == &init_net)
nf_conntrack_l4proto_unregister_net(l4proto);
- nf_ct_l4proto_unregister_sysctl(net, l4proto);
+ pn = nf_ct_l4proto_net(net, l4proto);
+ if (pn == NULL)
+ return;
+
+ pn->users--;
+ nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
+
/* Remove all contrack entries for this protocol */
rtnl_lock();
nf_ct_iterate_cleanup(net, kill_l4proto, l4proto);
@@ -505,10 +512,15 @@ int nf_conntrack_proto_init(struct net *net)
{
unsigned int i;
int err;
- err = nf_conntrack_l4proto_generic.init_net(net);
+ struct nf_proto_net *pn = nf_ct_l4proto_net(net,
+ &nf_conntrack_l4proto_generic);
+
+ err = nf_conntrack_l4proto_generic.init_net(net,
+ nf_conntrack_l4proto_generic.l3proto);
if (err < 0)
return err;
err = nf_ct_l4proto_register_sysctl(net,
+ pn,
&nf_conntrack_l4proto_generic);
if (err < 0)
return err;
@@ -518,13 +530,20 @@ int nf_conntrack_proto_init(struct net *net)
rcu_assign_pointer(nf_ct_l3protos[i],
&nf_conntrack_l3proto_generic);
}
+
+ pn->users++;
return 0;
}
void nf_conntrack_proto_fini(struct net *net)
{
unsigned int i;
+ struct nf_proto_net *pn = nf_ct_l4proto_net(net,
+ &nf_conntrack_l4proto_generic);
+
+ pn->users--;
nf_ct_l4proto_unregister_sysctl(net,
+ pn,
&nf_conntrack_l4proto_generic);
if (net == &init_net) {
/* free l3proto protocol tables */
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index c33f76af..6535326 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -387,7 +387,7 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
/* this module per-net specifics */
static int dccp_net_id __read_mostly;
struct dccp_net {
- struct nf_proto_net np;
+ struct nf_proto_net pn;
int dccp_loose;
unsigned int dccp_timeout[CT_DCCP_MAX + 1];
};
@@ -815,16 +815,37 @@ static struct ctl_table dccp_sysctl_table[] = {
};
#endif /* CONFIG_SYSCTL */
-static int dccp_init_net(struct net *net)
+static int dccp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct dccp_net *dn)
{
- struct dccp_net *dn = dccp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)dn;
-
#ifdef CONFIG_SYSCTL
- if (!pn->ctl_table) {
-#else
- if (!pn->users++) {
+ if (pn->ctl_table)
+ return 0;
+
+ pn->ctl_table = kmemdup(dccp_sysctl_table,
+ sizeof(dccp_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST];
+ pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND];
+ pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN];
+ pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN];
+ pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ];
+ pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING];
+ pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT];
+ pn->ctl_table[7].data = &dn->dccp_loose;
#endif
+ return 0;
+}
+
+static int dccp_init_net(struct net *net, u_int16_t proto)
+{
+ struct dccp_net *dn = dccp_pernet(net);
+ struct nf_proto_net *pn = &dn->pn;
+
+ if (!pn->users) {
/* default values */
dn->dccp_loose = 1;
dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL;
@@ -834,24 +855,9 @@ static int dccp_init_net(struct net *net)
dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ;
dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ;
dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL;
-#ifdef CONFIG_SYSCTL
- pn->ctl_table = kmemdup(dccp_sysctl_table,
- sizeof(dccp_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_table)
- return -ENOMEM;
-
- pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST];
- pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND];
- pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN];
- pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN];
- pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ];
- pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING];
- pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT];
- pn->ctl_table[7].data = &dn->dccp_loose;
-#endif
}
- return 0;
+
+ return dccp_kmemdup_sysctl_table(pn, dn);
}
static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index bb0e74f..d25f293 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -135,34 +135,62 @@ static struct ctl_table generic_compat_sysctl_table[] = {
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
-static int generic_init_net(struct net *net)
+static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_generic_net *gn)
{
- struct nf_generic_net *gn = generic_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)gn;
- gn->timeout = nf_ct_generic_timeout;
#ifdef CONFIG_SYSCTL
pn->ctl_table = kmemdup(generic_sysctl_table,
sizeof(generic_sysctl_table),
GFP_KERNEL);
if (!pn->ctl_table)
return -ENOMEM;
+
pn->ctl_table[0].data = &gn->timeout;
+#endif
+ return 0;
+}
+static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+ struct nf_generic_net *gn)
+{
+#ifdef CONFIG_SYSCTL
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table,
sizeof(generic_compat_sysctl_table),
GFP_KERNEL);
- if (!pn->ctl_compat_table) {
- kfree(pn->ctl_table);
- pn->ctl_table = NULL;
+ if (!pn->ctl_compat_table)
return -ENOMEM;
- }
+
pn->ctl_compat_table[0].data = &gn->timeout;
#endif
#endif
return 0;
}
+static int generic_init_net(struct net *net, u_int16_t proto)
+{
+ int ret;
+ struct nf_generic_net *gn = generic_pernet(net);
+ struct nf_proto_net *pn = &gn->pn;
+
+ gn->timeout = nf_ct_generic_timeout;
+
+ ret = generic_kmemdup_compat_sysctl_table(pn, gn);
+ if (ret < 0)
+ return ret;
+
+ ret = generic_kmemdup_sysctl_table(pn, gn);
+ if (ret < 0)
+ nf_ct_kfree_compat_sysctl_table(pn);
+
+ return ret;
+}
+
+static struct nf_proto_net *generic_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.generic.pn;
+}
+
struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
{
.l3proto = PF_UNSPEC,
@@ -184,4 +212,5 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
.init_net = generic_init_net,
+ .get_net_proto = generic_get_net_proto,
};
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 5cac41c..b09b7af 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -348,7 +348,7 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
};
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
-static int gre_init_net(struct net *net)
+static int gre_init_net(struct net *net, u_int16_t proto)
{
struct netns_proto_gre *net_gre = gre_pernet(net);
int i;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 8fb0582..c746d61 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -707,23 +707,10 @@ static struct ctl_table sctp_compat_sysctl_table[] = {
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif
-static void sctp_init_net_data(struct sctp_net *sn)
-{
- int i;
-#ifdef CONFIG_SYSCTL
- if (!sn->pn.ctl_table) {
-#else
- if (!sn->pn.users++) {
-#endif
- for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
- sn->timeouts[i] = sctp_timeouts[i];
- }
-}
-
-static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn)
+static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct sctp_net *sn)
{
#ifdef CONFIG_SYSCTL
- struct sctp_net *sn = (struct sctp_net *)pn;
if (pn->ctl_table)
return 0;
@@ -744,11 +731,11 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn)
return 0;
}
-static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
+static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+ struct sctp_net *sn)
{
#ifdef CONFIG_SYSCTL
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- struct sctp_net *sn = (struct sctp_net *)pn;
pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table,
sizeof(sctp_compat_sysctl_table),
GFP_KERNEL);
@@ -767,41 +754,33 @@ static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
return 0;
}
-static int sctpv4_init_net(struct net *net)
+static int sctp_init_net(struct net *net, u_int16_t proto)
{
int ret;
struct sctp_net *sn = sctp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)sn;
+ struct nf_proto_net *pn = &sn->pn;
- sctp_init_net_data(sn);
+ if (!pn->users) {
+ int i;
- ret = sctp_kmemdup_compat_sysctl_table(pn);
- if (ret < 0)
- return ret;
+ for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
+ sn->timeouts[i] = sctp_timeouts[i];
+ }
- ret = sctp_kmemdup_sysctl_table(pn);
+ if (proto == AF_INET) {
+ ret = sctp_kmemdup_compat_sysctl_table(pn, sn);
+ if (ret < 0)
+ return ret;
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- if (ret < 0) {
+ ret = sctp_kmemdup_sysctl_table(pn, sn);
+ if (ret < 0)
+ nf_ct_kfree_compat_sysctl_table(pn);
+ } else
+ ret = sctp_kmemdup_sysctl_table(pn, sn);
- kfree(pn->ctl_compat_table);
- pn->ctl_compat_table = NULL;
- }
-#endif
-#endif
return ret;
}
-static int sctpv6_init_net(struct net *net)
-{
- struct sctp_net *sn = sctp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)sn;
-
- sctp_init_net_data(sn);
- return sctp_kmemdup_sysctl_table(pn);
-}
-
static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
.l3proto = PF_INET,
.l4proto = IPPROTO_SCTP,
@@ -833,7 +812,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
.net_id = &sctp_net_id,
- .init_net = sctpv4_init_net,
+ .init_net = sctp_init_net,
};
static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
@@ -867,7 +846,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
#endif
.net_id = &sctp_net_id,
- .init_net = sctpv6_init_net,
+ .init_net = sctp_init_net,
};
static int sctp_net_init(struct net *net)
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 99caa13..a5ac11e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -821,7 +821,7 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl,
static unsigned int *tcp_get_timeouts(struct net *net)
{
- return tcp_timeouts;
+ return tcp_pernet(net)->timeouts;
}
/* Returns verdict for packet, or -1 for invalid. */
@@ -1533,11 +1533,10 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
-static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn)
+static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_tcp_net *tn)
{
#ifdef CONFIG_SYSCTL
- struct nf_tcp_net *tn = (struct nf_tcp_net *)pn;
-
if (pn->ctl_table)
return 0;
@@ -1564,11 +1563,11 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn)
return 0;
}
-static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
+static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+ struct nf_tcp_net *tn)
{
#ifdef CONFIG_SYSCTL
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- struct nf_tcp_net *tn = (struct nf_tcp_net *)pn;
pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table,
sizeof(tcp_compat_sysctl_table),
GFP_KERNEL);
@@ -1593,18 +1592,15 @@ static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
return 0;
}
-static int tcpv4_init_net(struct net *net)
+static int tcp_init_net(struct net *net, u_int16_t proto)
{
- int i;
- int ret = 0;
+ int ret;
struct nf_tcp_net *tn = tcp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)tn;
+ struct nf_proto_net *pn = &tn->pn;
+
+ if (!pn->users) {
+ int i;
-#ifdef CONFIG_SYSCTL
- if (!pn->ctl_table) {
-#else
- if (!pn->users++) {
-#endif
for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
tn->timeouts[i] = tcp_timeouts[i];
@@ -1613,43 +1609,23 @@ static int tcpv4_init_net(struct net *net)
tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
}
- ret = tcp_kmemdup_compat_sysctl_table(pn);
-
- if (ret < 0)
- return ret;
+ if (proto == AF_INET) {
+ ret = tcp_kmemdup_compat_sysctl_table(pn, tn);
+ if (ret < 0)
+ return ret;
- ret = tcp_kmemdup_sysctl_table(pn);
+ ret = tcp_kmemdup_sysctl_table(pn, tn);
+ if (ret < 0)
+ nf_ct_kfree_compat_sysctl_table(pn);
+ } else
+ ret = tcp_kmemdup_sysctl_table(pn, tn);
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- if (ret < 0) {
- kfree(pn->ctl_compat_table);
- pn->ctl_compat_table = NULL;
- }
-#endif
-#endif
return ret;
}
-static int tcpv6_init_net(struct net *net)
+static struct nf_proto_net *tcp_get_net_proto(struct net *net)
{
- int i;
- struct nf_tcp_net *tn = tcp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)tn;
-
-#ifdef CONFIG_SYSCTL
- if (!pn->ctl_table) {
-#else
- if (!pn->users++) {
-#endif
- for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
- tn->timeouts[i] = tcp_timeouts[i];
- tn->tcp_loose = nf_ct_tcp_loose;
- tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
- tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
- }
-
- return tcp_kmemdup_sysctl_table(pn);
+ return &net->ct.nf_ct_proto.tcp.pn;
}
struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
@@ -1684,7 +1660,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
.nla_policy = tcp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .init_net = tcpv4_init_net,
+ .init_net = tcp_init_net,
+ .get_net_proto = tcp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
@@ -1720,6 +1697,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
.nla_policy = tcp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .init_net = tcpv6_init_net,
+ .init_net = tcp_init_net,
+ .get_net_proto = tcp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index a83cf93..59623cc 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -235,10 +235,10 @@ static struct ctl_table udp_compat_sysctl_table[] = {
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
-static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn)
+static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_udp_net *un)
{
#ifdef CONFIG_SYSCTL
- struct nf_udp_net *un = (struct nf_udp_net *)pn;
if (pn->ctl_table)
return 0;
pn->ctl_table = kmemdup(udp_sysctl_table,
@@ -252,11 +252,11 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn)
return 0;
}
-static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
+static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+ struct nf_udp_net *un)
{
#ifdef CONFIG_SYSCTL
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- struct nf_udp_net *un = (struct nf_udp_net *)pn;
pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table,
sizeof(udp_compat_sysctl_table),
GFP_KERNEL);
@@ -270,50 +270,36 @@ static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn)
return 0;
}
-static void udp_init_net_data(struct nf_udp_net *un)
+static int udp_init_net(struct net *net, u_int16_t proto)
{
- int i;
-#ifdef CONFIG_SYSCTL
- if (!un->pn.ctl_table) {
-#else
- if (!un->pn.users++) {
-#endif
+ int ret;
+ struct nf_udp_net *un = udp_pernet(net);
+ struct nf_proto_net *pn = &un->pn;
+
+ if (!pn->users) {
+ int i;
+
for (i = 0; i < UDP_CT_MAX; i++)
un->timeouts[i] = udp_timeouts[i];
}
-}
-
-static int udpv4_init_net(struct net *net)
-{
- int ret;
- struct nf_udp_net *un = udp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)un;
- udp_init_net_data(un);
+ if (proto == AF_INET) {
+ ret = udp_kmemdup_compat_sysctl_table(pn, un);
+ if (ret < 0)
+ return ret;
- ret = udp_kmemdup_compat_sysctl_table(pn);
- if (ret < 0)
- return ret;
+ ret = udp_kmemdup_sysctl_table(pn, un);
+ if (ret < 0)
+ nf_ct_kfree_compat_sysctl_table(pn);
+ } else
+ ret = udp_kmemdup_sysctl_table(pn, un);
- ret = udp_kmemdup_sysctl_table(pn);
-#ifdef CONFIG_SYSCTL
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- if (ret < 0) {
- kfree(pn->ctl_compat_table);
- pn->ctl_compat_table = NULL;
- }
-#endif
-#endif
return ret;
}
-static int udpv6_init_net(struct net *net)
+static struct nf_proto_net *udp_get_net_proto(struct net *net)
{
- struct nf_udp_net *un = udp_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)un;
-
- udp_init_net_data(un);
- return udp_kmemdup_sysctl_table(pn);
+ return &net->ct.nf_ct_proto.udp.pn;
}
struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
@@ -343,7 +329,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
.nla_policy = udp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .init_net = udpv4_init_net,
+ .init_net = udp_init_net,
+ .get_net_proto = udp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp4);
@@ -374,6 +361,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
.nla_policy = udp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
- .init_net = udpv6_init_net,
+ .init_net = udp_init_net,
+ .get_net_proto = udp_get_net_proto,
};
EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_udp6);
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index b32e700f..4b66df2 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -234,29 +234,38 @@ static struct ctl_table udplite_sysctl_table[] = {
};
#endif /* CONFIG_SYSCTL */
-static int udplite_init_net(struct net *net)
+static int udplite_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct udplite_net *un)
{
- int i;
- struct udplite_net *un = udplite_pernet(net);
- struct nf_proto_net *pn = (struct nf_proto_net *)un;
#ifdef CONFIG_SYSCTL
- if (!pn->ctl_table) {
-#else
- if (!pn->users++) {
+ if (pn->ctl_table)
+ return 0;
+
+ pn->ctl_table = kmemdup(udplite_sysctl_table,
+ sizeof(udplite_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &un->timeouts[UDPLITE_CT_UNREPLIED];
+ pn->ctl_table[1].data = &un->timeouts[UDPLITE_CT_REPLIED];
#endif
+ return 0;
+}
+
+static int udplite_init_net(struct net *net, u_int16_t proto)
+{
+ struct udplite_net *un = udplite_pernet(net);
+ struct nf_proto_net *pn = &un->pn;
+
+ if (!pn->users) {
+ int i;
+
for (i = 0 ; i < UDPLITE_CT_MAX; i++)
un->timeouts[i] = udplite_timeouts[i];
-#ifdef CONFIG_SYSCTL
- pn->ctl_table = kmemdup(udplite_sysctl_table,
- sizeof(udplite_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_table)
- return -ENOMEM;
- pn->ctl_table[0].data = &un->timeouts[UDPLITE_CT_UNREPLIED];
- pn->ctl_table[1].data = &un->timeouts[UDPLITE_CT_REPLIED];
-#endif
}
- return 0;
+
+ return udplite_kmemdup_sysctl_table(pn, un);
}
static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 5a2132b..a265033 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -178,8 +178,10 @@ replay:
err = nla_parse(cda, ss->cb[cb_id].attr_count,
attr, attrlen, ss->cb[cb_id].policy);
- if (err < 0)
+ if (err < 0) {
+ rcu_read_unlock();
return err;
+ }
if (nc->call_rcu) {
err = nc->call_rcu(net->nfnl, skb, nlh,
@@ -193,9 +195,11 @@ replay:
lockdep_is_held(&nfnl_mutex)) != ss ||
nfnetlink_find_client(type, ss) != nc)
err = -EAGAIN;
- else
+ else if (nc->call)
err = nc->call(net->nfnl, skb, nlh,
(const struct nlattr **)cda);
+ else
+ err = -EINVAL;
nfnl_unlock();
}
if (err == -EAGAIN)
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index a0b6492..c0496a5 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -910,6 +910,11 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS]));
mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK]));
+ if (flags >= NFQA_CFG_F_MAX) {
+ ret = -EOPNOTSUPP;
+ goto err_out_unlock;
+ }
+
spin_lock_bh(&queue->lock);
queue->flags &= ~mask;
queue->flags |= flags & mask;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index ca0c296..4741671 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -67,7 +67,6 @@ struct teql_master {
struct teql_sched_data {
struct Qdisc *next;
struct teql_master *m;
- struct neighbour *ncache;
struct sk_buff_head q;
};
@@ -134,7 +133,6 @@ teql_reset(struct Qdisc *sch)
skb_queue_purge(&dat->q);
sch->q.qlen = 0;
- teql_neigh_release(xchg(&dat->ncache, NULL));
}
static void
@@ -166,7 +164,6 @@ teql_destroy(struct Qdisc *sch)
}
}
skb_queue_purge(&dat->q);
- teql_neigh_release(xchg(&dat->ncache, NULL));
break;
}
@@ -225,21 +222,25 @@ static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
static int
__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
struct net_device *dev, struct netdev_queue *txq,
- struct neighbour *mn)
+ struct dst_entry *dst)
{
- struct teql_sched_data *q = qdisc_priv(txq->qdisc);
- struct neighbour *n = q->ncache;
+ struct neighbour *n;
+ int err = 0;
- if (mn->tbl == NULL)
- return -EINVAL;
- if (n && n->tbl == mn->tbl &&
- memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
- atomic_inc(&n->refcnt);
- } else {
- n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
- if (IS_ERR(n))
- return PTR_ERR(n);
+ n = dst_neigh_lookup_skb(dst, skb);
+ if (!n)
+ return -ENOENT;
+
+ if (dst->dev != dev) {
+ struct neighbour *mn;
+
+ mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
+ neigh_release(n);
+ if (IS_ERR(mn))
+ return PTR_ERR(mn);
+ n = mn;
}
+
if (neigh_event_send(n, skb_res) == 0) {
int err;
char haddr[MAX_ADDR_LEN];
@@ -248,15 +249,13 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
NULL, skb->len);
- if (err < 0) {
- neigh_release(n);
- return -EINVAL;
- }
- teql_neigh_release(xchg(&q->ncache, n));
- return 0;
+ if (err < 0)
+ err = -EINVAL;
+ } else {
+ err = (skb_res == NULL) ? -EAGAIN : 1;
}
neigh_release(n);
- return (skb_res == NULL) ? -EAGAIN : 1;
+ return err;
}
static inline int teql_resolve(struct sk_buff *skb,
@@ -265,7 +264,6 @@ static inline int teql_resolve(struct sk_buff *skb,
struct netdev_queue *txq)
{
struct dst_entry *dst = skb_dst(skb);
- struct neighbour *mn;
int res;
if (txq->qdisc == &noop_qdisc)
@@ -275,8 +273,7 @@ static inline int teql_resolve(struct sk_buff *skb,
return 0;
rcu_read_lock();
- mn = dst_get_neighbour_noref(dst);
- res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
+ res = __teql_resolve(skb, skb_res, dev, txq, dst);
rcu_read_unlock();
return res;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5bc9ab1..b16517e 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -271,6 +271,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
*/
asoc->peer.sack_needed = 1;
asoc->peer.sack_cnt = 0;
+ asoc->peer.sack_generation = 1;
/* Assume that the peer will tell us if he recognizes ASCONF
* as part of INIT exchange.
diff --git a/net/sctp/output.c b/net/sctp/output.c
index f1b7d4b..6ae47ac 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -248,6 +248,11 @@ static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt,
/* If the SACK timer is running, we have a pending SACK */
if (timer_pending(timer)) {
struct sctp_chunk *sack;
+
+ if (pkt->transport->sack_generation !=
+ pkt->transport->asoc->peer.sack_generation)
+ return retval;
+
asoc->a_rwnd = asoc->rwnd;
sack = sctp_make_sack(asoc);
if (sack) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index a85eeeb..b6de71e 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -734,8 +734,10 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
int len;
__u32 ctsn;
__u16 num_gabs, num_dup_tsns;
+ struct sctp_association *aptr = (struct sctp_association *)asoc;
struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
struct sctp_gap_ack_block gabs[SCTP_MAX_GABS];
+ struct sctp_transport *trans;
memset(gabs, 0, sizeof(gabs));
ctsn = sctp_tsnmap_get_ctsn(map);
@@ -805,6 +807,20 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc)
sctp_addto_chunk(retval, sizeof(__u32) * num_dup_tsns,
sctp_tsnmap_get_dups(map));
+ /* Once we have a sack generated, check to see what our sack
+ * generation is, if its 0, reset the transports to 0, and reset
+ * the association generation to 1
+ *
+ * The idea is that zero is never used as a valid generation for the
+ * association so no transport will match after a wrap event like this,
+ * Until the next sack
+ */
+ if (++aptr->peer.sack_generation == 0) {
+ list_for_each_entry(trans, &asoc->peer.transport_addr_list,
+ transports)
+ trans->sack_generation = 0;
+ aptr->peer.sack_generation = 1;
+ }
nodata:
return retval;
}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index c96d1a8..8716da1 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1268,7 +1268,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
case SCTP_CMD_REPORT_TSN:
/* Record the arrival of a TSN. */
error = sctp_tsnmap_mark(&asoc->peer.tsn_map,
- cmd->obj.u32);
+ cmd->obj.u32, NULL);
break;
case SCTP_CMD_REPORT_FWDTSN:
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index b026ba0..1dcceb6 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -68,6 +68,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
memset(&peer->saddr, 0, sizeof(union sctp_addr));
+ peer->sack_generation = 0;
+
/* From 6.3.1 RTO Calculation:
*
* C1) Until an RTT measurement has been made for a packet sent to the
diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c
index f1e40ceb..b5fb7c40 100644
--- a/net/sctp/tsnmap.c
+++ b/net/sctp/tsnmap.c
@@ -114,7 +114,8 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn)
/* Mark this TSN as seen. */
-int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn)
+int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn,
+ struct sctp_transport *trans)
{
u16 gap;
@@ -133,6 +134,9 @@ int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn)
*/
map->max_tsn_seen++;
map->cumulative_tsn_ack_point++;
+ if (trans)
+ trans->sack_generation =
+ trans->asoc->peer.sack_generation;
map->base_tsn++;
} else {
/* Either we already have a gap, or about to record a gap, so
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 8a84017..33d8947 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -715,7 +715,8 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
* can mark it as received so the tsn_map is updated correctly.
*/
if (sctp_tsnmap_mark(&asoc->peer.tsn_map,
- ntohl(chunk->subh.data_hdr->tsn)))
+ ntohl(chunk->subh.data_hdr->tsn),
+ chunk->transport))
goto fail_mark;
/* First calculate the padding, so we don't inadvertently
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index f2d1de7..f5a6a4f 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -1051,7 +1051,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
if (chunk && (freed >= needed)) {
__u32 tsn;
tsn = ntohl(chunk->subh.data_hdr->tsn);
- sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn);
+ sctp_tsnmap_mark(&asoc->peer.tsn_map, tsn, chunk->transport);
sctp_ulpq_tail_data(ulpq, chunk, gfp);
sctp_ulpq_partial_delivery(ulpq, chunk, gfp);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 890b03f..62d0dac 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1014,9 +1014,6 @@ static void xs_udp_data_ready(struct sock *sk, int len)
UDPX_INC_STATS_BH(sk, UDP_MIB_INDATAGRAMS);
- /* Something worked... */
- dst_confirm(skb_dst(skb));
-
xprt_adjust_cwnd(task, copied);
xprt_complete_rqst(task, copied);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ccfbd32..6e97855 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1500,9 +1500,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
if (!dev)
goto free_dst;
- /* Copy neighbour for reachability confirmation */
- dst_set_neighbour(dst0, neigh_clone(dst_get_neighbour_noref(dst)));
-
xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
xfrm_init_pmtu(dst_prev);
@@ -2404,9 +2401,11 @@ static unsigned int xfrm_mtu(const struct dst_entry *dst)
return mtu ? : dst_mtu(dst->path);
}
-static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, const void *daddr)
+static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
+ struct sk_buff *skb,
+ const void *daddr)
{
- return dst_neigh_lookup(dst->path, daddr);
+ return dst->path->ops->neigh_lookup(dst, skb, daddr);
}
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
diff --git a/security/security.c b/security/security.c
index 3efc9b1..860aeb3 100644
--- a/security/security.c
+++ b/security/security.c
@@ -23,6 +23,7 @@
#include <linux/mman.h>
#include <linux/mount.h>
#include <linux/personality.h>
+#include <linux/backing-dev.h>
#include <net/flow.h>
#define MAX_LSM_EVM_XATTR 2
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5ccf10a..aa4c25e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -6688,6 +6688,31 @@ static const struct alc_model_fixup alc662_fixup_models[] = {
{}
};
+static void alc662_fill_coef(struct hda_codec *codec)
+{
+ int val, coef;
+
+ coef = alc_get_coef0(codec);
+
+ switch (codec->vendor_id) {
+ case 0x10ec0662:
+ if ((coef & 0x00f0) == 0x0030) {
+ val = alc_read_coef_idx(codec, 0x4); /* EAPD Ctrl */
+ alc_write_coef_idx(codec, 0x4, val & ~(1<<10));
+ }
+ break;
+ case 0x10ec0272:
+ case 0x10ec0273:
+ case 0x10ec0663:
+ case 0x10ec0665:
+ case 0x10ec0670:
+ case 0x10ec0671:
+ case 0x10ec0672:
+ val = alc_read_coef_idx(codec, 0xd); /* EAPD Ctrl */
+ alc_write_coef_idx(codec, 0xd, val | (1<<14));
+ break;
+ }
+}
/*
*/
@@ -6707,6 +6732,9 @@ static int patch_alc662(struct hda_codec *codec)
alc_fix_pll_init(codec, 0x20, 0x04, 15);
+ spec->init_hook = alc662_fill_coef;
+ alc662_fill_coef(codec);
+
alc_pick_fixup(codec, alc662_fixup_models,
alc662_fixup_tbl, alc662_fixups);
alc_apply_fixup(codec, ALC_FIXUP_ACT_PRE_PROBE);
diff --git a/sound/soc/codecs/tlv320aic3x.c b/sound/soc/codecs/tlv320aic3x.c
index 64d2a4f..e9b62b5 100644
--- a/sound/soc/codecs/tlv320aic3x.c
+++ b/sound/soc/codecs/tlv320aic3x.c
@@ -935,9 +935,7 @@ static int aic3x_hw_params(struct snd_pcm_substream *substream,
}
found:
- data = snd_soc_read(codec, AIC3X_PLL_PROGA_REG);
- snd_soc_write(codec, AIC3X_PLL_PROGA_REG,
- data | (pll_p << PLLP_SHIFT));
+ snd_soc_update_bits(codec, AIC3X_PLL_PROGA_REG, PLLP_MASK, pll_p);
snd_soc_write(codec, AIC3X_OVRF_STATUS_AND_PLLR_REG,
pll_r << PLLR_SHIFT);
snd_soc_write(codec, AIC3X_PLL_PROGB_REG, pll_j << PLLJ_SHIFT);
diff --git a/sound/soc/codecs/tlv320aic3x.h b/sound/soc/codecs/tlv320aic3x.h
index 6f097fb..08c7f66 100644
--- a/sound/soc/codecs/tlv320aic3x.h
+++ b/sound/soc/codecs/tlv320aic3x.h
@@ -166,6 +166,7 @@
/* PLL registers bitfields */
#define PLLP_SHIFT 0
+#define PLLP_MASK 7
#define PLLQ_SHIFT 3
#define PLLR_SHIFT 0
#define PLLJ_SHIFT 2
diff --git a/sound/soc/codecs/wm2200.c b/sound/soc/codecs/wm2200.c
index acbdc5f..32682c1 100644
--- a/sound/soc/codecs/wm2200.c
+++ b/sound/soc/codecs/wm2200.c
@@ -1491,6 +1491,7 @@ static int wm2200_bclk_rates_dat[WM2200_NUM_BCLK_RATES] = {
static int wm2200_bclk_rates_cd[WM2200_NUM_BCLK_RATES] = {
5644800,
+ 3763200,
2882400,
1881600,
1411200,
OpenPOWER on IntegriCloud